[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH 4/5] zfs com.delphix:embedded_data feature support
From: |
Toomas Soome |
Subject: |
Re: [PATCH 4/5] zfs com.delphix:embedded_data feature support |
Date: |
Sun, 19 Apr 2015 20:43:01 +0300 |
> On 19.04.2015, at 18:37, Andrei Borzenkov <address@hidden> wrote:
>
> В Thu, 16 Apr 2015 08:23:22 +0300
> Toomas Soome <address@hidden> пишет:
>
>>
>> ---
>> grub-core/fs/zfs/zfs.c | 84
>> ++++++++++++++++++++++++++++++++++++++++--------
>> include/grub/zfs/spa.h | 27 +++++++++++++---
>> 2 files changed, 93 insertions(+), 18 deletions(-)
>>
>> diff --git a/grub-core/fs/zfs/zfs.c b/grub-core/fs/zfs/zfs.c
>> index a731c3d..da44131 100644
>> --- a/grub-core/fs/zfs/zfs.c
>> +++ b/grub-core/fs/zfs/zfs.c
>> @@ -282,6 +282,7 @@ grub_crypto_cipher_handle_t (*grub_zfs_load_key) (const
>> struct grub_zfs_key *key
>> static const char *spa_feature_names[] = {
>> "org.illumos:lz4_compress",
>> "com.delphix:hole_birth",
>> + "com.delphix:embedded_data",
>> NULL
>> };
>>
>> @@ -1803,6 +1804,39 @@ zio_read_data (blkptr_t * bp, grub_zfs_endian_t
>> endian, void *buf,
>> }
>>
>> /*
>> + * buf must be at least BPE_GET_PSIZE(bp) bytes long (which will never be
>> + * more than BPE_PAYLOAD_SIZE bytes).
>> + */
>> +static grub_err_t
>> +decode_embedded_bp_compressed(const blkptr_t *bp, void *buf)
>> +{
>> + grub_size_t psize, i;
>> + grub_uint8_t *buf8 = buf;
>> + grub_uint64_t w = 0;
>> + const grub_uint64_t *bp64 = (const grub_uint64_t *)bp;
>> +
>> + psize = BPE_GET_PSIZE(bp);
>> +
>
> This needs check that it is not more than BPE_PAYLOAD_SIZE bytes.
in theory yes, in practice the BP is protected by checksum which is verified
when BP was read, the values should be valid there.
>
>> + /*
>> + * Decode the words of the block pointer into the byte array.
>> + * Low bits of first word are the first byte (little endian).
>> + */
>> + for (i = 0; i < psize; i++)
>> + {
>> + if (i % sizeof (w) == 0)
>> + {
>> + /* beginning of a word */
>> + w = *bp64;
>> + bp64++;
>> + if (!BPE_IS_PAYLOADWORD(bp, bp64))
>> + bp64++;
>> + }
>> + buf8[i] = BF64_GET(w, (i % sizeof (w)) * 8, 8);
>> + }
>> + return GRUB_ERR_NONE;
>> +}
>> +
>> +/*
>> * Read in a block of data, verify its checksum, decompress if needed,
>> * and put the uncompressed data in buf.
>> */
>> @@ -1820,12 +1854,26 @@ zio_read (blkptr_t *bp, grub_zfs_endian_t endian,
>> void **buf,
>> *buf = NULL;
>>
>> checksum = (grub_zfs_to_cpu64((bp)->blk_prop, endian) >> 40) & 0xff;
>> - comp = (grub_zfs_to_cpu64((bp)->blk_prop, endian)>>32) & 0xff;
>> + comp = (grub_zfs_to_cpu64((bp)->blk_prop, endian)>>32) & 0x7f;
>> encrypted = ((grub_zfs_to_cpu64((bp)->blk_prop, endian) >> 60) & 3);
>> - lsize = (BP_IS_HOLE(bp) ? 0 :
>> - (((grub_zfs_to_cpu64 ((bp)->blk_prop, endian) & 0xffff) + 1)
>> - << SPA_MINBLOCKSHIFT));
>> - psize = get_psize (bp, endian);
>> + if (BP_IS_EMBEDDED(bp))
>> + {
>> + if (BPE_GET_ETYPE(bp) != BP_EMBEDDED_TYPE_DATA)
>> + return grub_error (GRUB_ERR_NOT_IMPLEMENTED_YET,
>> + "unsupported embedded BP (type=%u)\n",
>> + BPE_GET_ETYPE(bp));
>> + lsize = BPE_GET_LSIZE(bp);
>> + psize = BF64_GET_SB(grub_zfs_to_cpu64 ((bp)->blk_prop, endian), 25,
>> 7, 0, 1);
>> + }
>> + else
>> + {
>> + lsize = (BP_IS_HOLE(bp) ? 0 :
>> + (((grub_zfs_to_cpu64 ((bp)->blk_prop, endian) & 0xffff) + 1)
>> + << SPA_MINBLOCKSHIFT));
>> + psize = get_psize (bp, endian);
>> + }
>> + grub_dprintf("zfs", "zio_read: E %d: size %" PRIdGRUB_SSIZE "/%"
>> + PRIdGRUB_SSIZE "\n", (int)BP_IS_EMBEDDED(bp), lsize, psize);
>>
>> if (size)
>> *size = lsize;
>> @@ -1849,23 +1897,31 @@ zio_read (blkptr_t *bp, grub_zfs_endian_t endian,
>> void **buf,
>> compbuf = *buf = grub_malloc (lsize);
>>
>
> I'll commit NULL check
ok.
>
>> grub_dprintf ("zfs", "endian = %d\n", endian);
>> - err = zio_read_data (bp, endian, compbuf, data);
>> + if (BP_IS_EMBEDDED(bp))
>> + err = decode_embedded_bp_compressed(bp, compbuf);
>> + else
>> + {
>> + err = zio_read_data (bp, endian, compbuf, data);
>> + grub_memset (compbuf, 0, ALIGN_UP (psize, 16) - psize);
>> + }
>
> Something is fishy around this place (it is not about your patch but
> existing code as well). It allocates combuf but never checks for error,
> it allocates lsize but reads psize and never really verifies that
> lsize is < than psize .
>
again, the lsize can not be smaller than psize, as lsize is uncompressed and
psize is compressed and the values came up from disk verified. well, of course
it does not hurt to have such checks in code anyhow.
> What do you say about attached patch? Is there any reason it should be
> complicated by allocating different sizes?
>
> It also sounds like grub_memset should really be
>
> grub_memset (compbuf + psize, 0, ALIGN_UP (psize, 16) - psize);
>
> but I'm not sure here.
tbh, i was thinking, it should use grub_zalloc instead, only concern was that
it may slow things down too much.
well, if the compression is off, then lsize == psize, so in that sense the
overhead is possible alignment. im even not sure where is the ALIGN_UP to 16
coming from; the only alignment what can happen is to the 1<<vdev_ashift in
case of raidz (to split block between children dev on write and to collect the
pieces on read). but again, it shouldn’t harm to have few extra bytes:D even
zeroing out shouldnt matter there, as checksum is located at the end of the
psize size memory area - so you get memory block with data+zc and the block
size is psize.
the extra patches should be tested with different pool types tho.
rgds,
toomas
>
>> if (err)
>> {
>> grub_free (compbuf);
>> *buf = NULL;
>> return err;
>> }
>> - grub_memset (compbuf, 0, ALIGN_UP (psize, 16) - psize);
>>
>> - err = zio_checksum_verify (zc, checksum, endian,
>> - compbuf, psize);
>> - if (err)
>> + if (!BP_IS_EMBEDDED(bp))
>> {
>> - grub_dprintf ("zfs", "incorrect checksum\n");
>> - grub_free (compbuf);
>> - *buf = NULL;
>> - return err;
>> + err = zio_checksum_verify (zc, checksum, endian,
>> + compbuf, psize);
>> + if (err)
>> + {
>> + grub_dprintf ("zfs", "incorrect checksum\n");
>> + grub_free (compbuf);
>> + *buf = NULL;
>> + return err;
>> + }
>> }
>>
>> if (encrypted)
>> diff --git a/include/grub/zfs/spa.h b/include/grub/zfs/spa.h
>> index df43b6b..5d89250 100644
>> --- a/include/grub/zfs/spa.h
>> +++ b/include/grub/zfs/spa.h
>> @@ -126,7 +126,7 @@ typedef struct zio_cksum {
>> * +-------+-------+-------+-------+-------+-------+-------+-------+
>> * 5 |G| offset3 |
>> * +-------+-------+-------+-------+-------+-------+-------+-------+
>> - * 6 |BDX|lvl| type | cksum | comp | PSIZE | LSIZE
>> |
>> + * 6 |BDX|lvl| type | cksum |E| comp| PSIZE | LSIZE
>> |
>> * +-------+-------+-------+-------+-------+-------+-------+-------+
>> * 7 | padding |
>> * +-------+-------+-------+-------+-------+-------+-------+-------+
>> @@ -160,7 +160,8 @@ typedef struct zio_cksum {
>> * G gang block indicator
>> * B byteorder (endianness)
>> * D dedup
>> - * X unused
>> + * X encryption
>> + * E blkptr_t contains embedded data
>> * lvl level of indirection
>> * type DMU object type
>> * phys birth txg of block allocation; zero if same as logical birth
>> txg
>> @@ -203,8 +204,8 @@ typedef struct blkptr {
>> #define BP_SET_LSIZE(bp, x) \
>> BF64_SET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1, x)
>>
>> -#define BP_GET_COMPRESS(bp) BF64_GET((bp)->blk_prop, 32, 8)
>> -#define BP_SET_COMPRESS(bp, x) BF64_SET((bp)->blk_prop, 32, 8,
>> x)
>> +#define BP_GET_COMPRESS(bp) BF64_GET((bp)->blk_prop, 32, 7)
>> +#define BP_SET_COMPRESS(bp, x) BF64_SET((bp)->blk_prop, 32, 7,
>> x)
>>
>> #define BP_GET_CHECKSUM(bp) BF64_GET((bp)->blk_prop, 40, 8)
>> #define BP_SET_CHECKSUM(bp, x) BF64_SET((bp)->blk_prop, 40, 8,
>> x)
>> @@ -215,6 +216,8 @@ typedef struct blkptr {
>> #define BP_GET_LEVEL(bp) BF64_GET((bp)->blk_prop, 56, 5)
>> #define BP_SET_LEVEL(bp, x) BF64_SET((bp)->blk_prop, 56, 5,
>> x)
>>
>> +#define BP_IS_EMBEDDED(bp) BF64_GET((bp)->blk_prop, 39, 1)
>> +
>> #define BP_GET_PROP_BIT_61(bp) BF64_GET((bp)->blk_prop, 61, 1)
>> #define BP_SET_PROP_BIT_61(bp, x) BF64_SET((bp)->blk_prop, 61, 1,
>> x)
>>
>> @@ -277,6 +280,22 @@ typedef struct blkptr {
>> (zcp)->zc_word[3] = w3; \
>> }
>>
>> +#define BPE_GET_ETYPE(bp) BP_GET_CHECKSUM(bp)
>> +#define BPE_GET_LSIZE(bp) \
>> + BF64_GET_SB((bp)->blk_prop, 0, 25, 0, 1)
>> +#define BPE_GET_PSIZE(bp) \
>> + BF64_GET_SB((bp)->blk_prop, 25, 7, 0, 1)
>> +
>> +typedef enum bp_embedded_type {
>> + BP_EMBEDDED_TYPE_DATA,
>> + NUM_BP_EMBEDDED_TYPES
>> +} bp_embedded_type_t;
>> +
>> +#define BPE_NUM_WORDS 14
>> +#define BPE_PAYLOAD_SIZE (BPE_NUM_WORDS * sizeof(grub_uint64_t))
>> +#define BPE_IS_PAYLOADWORD(bp, wp) \
>> + ((wp) != &(bp)->blk_prop && (wp) != &(bp)->blk_birth)
>> +
>> #define BP_IDENTITY(bp) (&(bp)->blk_dva[0])
>> #define BP_IS_GANG(bp) DVA_GET_GANG(BP_IDENTITY(bp))
>> #define DVA_IS_EMPTY(dva) ((dva)->dva_word[0] == 0ULL && \
>
> <zio_read_lsize_vs_psize.patch>