--- block-raw-posix.c | 240 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 238 insertions(+), 2 deletions(-) Index: qemu/block-raw-posix.c =================================================================== --- qemu.orig/block-raw-posix.c 2008-05-16 17:08:13.000000000 +0200 +++ qemu/block-raw-posix.c 2008-05-20 10:31:59.000000000 +0200 @@ -70,6 +70,8 @@ #define FTYPE_CD 1 #define FTYPE_FD 2 +#define ALIGNED_BUFFER_SIZE (32 * 512) + /* if the FD is not accessed during that time (in ms), we try to reopen it to see if the disk has been changed */ #define FD_OPEN_TIMEOUT 1000 @@ -86,6 +88,9 @@ typedef struct BDRVRawState { int fd_got_error; int fd_media_changed; #endif +#if defined(O_DIRECT) && !defined(QEMU_IMG) + uint8_t* aligned_buf; +#endif } BDRVRawState; static int fd_open(BlockDriverState *bs); @@ -121,6 +126,17 @@ static int raw_open(BlockDriverState *bs return ret; } s->fd = fd; +#if defined(O_DIRECT) && !defined(QEMU_IMG) + s->aligned_buf = NULL; + if (flags & BDRV_O_DIRECT) { + s->aligned_buf = qemu_memalign(512, ALIGNED_BUFFER_SIZE); + if (s->aligned_buf == NULL) { + ret = -errno; + close(fd); + return ret; + } + } +#endif return 0; } @@ -141,7 +157,14 @@ static int raw_open(BlockDriverState *bs #endif */ -static int raw_pread(BlockDriverState *bs, int64_t offset, +/* + * offset and count are in bytes, but must be multiples of 512 for files + * opened with O_DIRECT. buf must be aligned to 512 bytes then. + * + * This function may be called without alignment if the caller ensures + * that O_DIRECT is not in effect. + */ +static int raw_pread_aligned(BlockDriverState *bs, int64_t offset, uint8_t *buf, int count) { BDRVRawState *s = bs->opaque; @@ -194,7 +217,14 @@ label__raw_read__success: return ret; } -static int raw_pwrite(BlockDriverState *bs, int64_t offset, +/* + * offset and count are in bytes, but must be multiples of 512 for files + * opened with O_DIRECT. buf must be aligned to 512 bytes then. + * + * This function may be called without alignment if the caller ensures + * that O_DIRECT is not in effect. + */ +static int raw_pwrite_aligned(BlockDriverState *bs, int64_t offset, const uint8_t *buf, int count) { BDRVRawState *s = bs->opaque; @@ -230,6 +260,164 @@ label__raw_write__success: return ret; } + +#if defined(O_DIRECT) && !defined(QEMU_IMG) +/* + * offset and count are in bytes and possibly not aligned. For files opened + * with O_DIRECT, necessary alignments are ensured before calling + * raw_pread_aligned to do the actual read. + */ +static int raw_pread(BlockDriverState *bs, int64_t offset, + uint8_t *buf, int count) +{ + BDRVRawState *s = bs->opaque; + int size, ret, shift, sum; + + sum = 0; + + if (s->aligned_buf != NULL) { + + if (offset & 0x1ff) { + /* align offset on a 512 bytes boundary */ + + shift = offset & 0x1ff; + size = (shift + count + 0x1ff) & ~0x1ff; + if (size > ALIGNED_BUFFER_SIZE) + size = ALIGNED_BUFFER_SIZE; + ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, size); + if (ret < 0) + return ret; + + size = 512 - shift; + if (size > count) + size = count; + memcpy(buf, s->aligned_buf + shift, size); + + buf += size; + offset += size; + count -= size; + sum += size; + + if (count == 0) + return sum; + } + if (count & 0x1ff || (uintptr_t) buf & 0x1ff) { + + /* read on aligned buffer */ + + while (count) { + + size = (count + 0x1ff) & ~0x1ff; + if (size > ALIGNED_BUFFER_SIZE) + size = ALIGNED_BUFFER_SIZE; + + ret = raw_pread_aligned(bs, offset, s->aligned_buf, size); + if (ret < 0) + return ret; + + size = ret; + if (size > count) + size = count; + + memcpy(buf, s->aligned_buf, size); + + buf += size; + offset += size; + count -= size; + sum += size; + } + + return sum; + } + } + + return raw_pread_aligned(bs, offset, buf, count) + sum; +} + +/* + * offset and count are in bytes and possibly not aligned. For files opened + * with O_DIRECT, necessary alignments are ensured before calling + * raw_pwrite_aligned to do the actual write. + */ +static int raw_pwrite(BlockDriverState *bs, int64_t offset, + const uint8_t *buf, int count) +{ + BDRVRawState *s = bs->opaque; + int size, ret, shift, sum; + + sum = 0; + + if (s->aligned_buf != NULL) { + + if (offset & 0x1ff) { + /* align offset on a 512 bytes boundary */ + shift = offset & 0x1ff; + ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, 512); + if (ret < 0) + return ret; + + size = 512 - shift; + if (size > count) + size = count; + memcpy(s->aligned_buf + shift, buf, size); + + ret = raw_pwrite_aligned(bs, offset - shift, s->aligned_buf, 512); + if (ret < 0) + return ret; + + buf += size; + offset += size; + count -= size; + sum += size; + + if (count == 0) + return sum; + } + if (count & 0x1ff || (uintptr_t) buf & 0x1ff) { + + while ((size = (count & ~0x1ff)) != 0) { + + if (size > ALIGNED_BUFFER_SIZE) + size = ALIGNED_BUFFER_SIZE; + + memcpy(s->aligned_buf, buf, size); + + ret = raw_pwrite_aligned(bs, offset, s->aligned_buf, size); + if (ret < 0) + return ret; + + buf += ret; + offset += ret; + count -= ret; + sum += ret; + } + /* here, count < 512 because (count & ~0x1ff) == 0 */ + if (count) { + ret = raw_pread_aligned(bs, offset, s->aligned_buf, 512); + if (ret < 0) + return ret; + memcpy(s->aligned_buf, buf, count); + + ret = raw_pwrite_aligned(bs, offset, s->aligned_buf, 512); + if (ret < 0) + return ret; + if (count < ret) + ret = count; + + sum += ret; + } + return sum; + } + } + return raw_pwrite_aligned(bs, offset, buf, count) + sum; +} + +#else +#define raw_pread raw_pread_aligned +#define raw_pwrite raw_pwrite_aligned +#endif + + /***********************************************************/ /* Unix AIO using POSIX AIO */ @@ -237,6 +425,7 @@ typedef struct RawAIOCB { BlockDriverAIOCB common; struct aiocb aiocb; struct RawAIOCB *next; + int ret; } RawAIOCB; static int aio_sig_num = SIGUSR2; @@ -397,12 +586,38 @@ static RawAIOCB *raw_aio_setup(BlockDriv return acb; } +#ifndef QEMU_IMG +static void raw_aio_em_cb(void* opaque) +{ + RawAIOCB *acb = opaque; + acb->common.cb(acb->common.opaque, acb->ret); + qemu_aio_release(acb); +} +#endif + static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, int nb_sectors, BlockDriverCompletionFunc *cb, void *opaque) { RawAIOCB *acb; + /* + * If O_DIRECT is used and the buffer is not aligned fall back + * to synchronous IO. + */ +#if defined(O_DIRECT) && !defined(QEMU_IMG) + BDRVRawState *s = bs->opaque; + + if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) { + QEMUBH *bh; + acb = qemu_aio_get(bs, cb, opaque); + acb->ret = raw_pread(bs, 512 * sector_num, buf, 512 * nb_sectors); + bh = qemu_bh_new(raw_aio_em_cb, acb); + qemu_bh_schedule(bh); + return &acb->common; + } +#endif + acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque); if (!acb) return NULL; @@ -419,6 +634,23 @@ static BlockDriverAIOCB *raw_aio_write(B { RawAIOCB *acb; + /* + * If O_DIRECT is used and the buffer is not aligned fall back + * to synchronous IO. + */ +#if defined(O_DIRECT) && !defined(QEMU_IMG) + BDRVRawState *s = bs->opaque; + + if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) { + QEMUBH *bh; + acb = qemu_aio_get(bs, cb, opaque); + acb->ret = raw_pwrite(bs, 512 * sector_num, buf, 512 * nb_sectors); + bh = qemu_bh_new(raw_aio_em_cb, acb); + qemu_bh_schedule(bh); + return &acb->common; + } +#endif + acb = raw_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque); if (!acb) return NULL; @@ -462,6 +694,10 @@ static void raw_close(BlockDriverState * if (s->fd >= 0) { close(s->fd); s->fd = -1; +#if defined(O_DIRECT) && !defined(QEMU_IMG) + if (s->aligned_buf != NULL) + qemu_free(s->aligned_buf); +#endif } }