[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH v3 44/44] nbd: Implement NBD_OPT_BLOCK_SIZE on clien
From: |
Eric Blake |
Subject: |
[Qemu-devel] [PATCH v3 44/44] nbd: Implement NBD_OPT_BLOCK_SIZE on client |
Date: |
Fri, 22 Apr 2016 17:40:52 -0600 |
The upstream NBD Protocol has defined a new extension to allow
the server to advertise block sizes to the client, as well as
a way for the client to inform the server that it intends to
obey block sizes.
Pass any received sizes on to the block layer.
Use the minimum block size as the sector size we pass to the
kernel - which also has the nice effect of cooperating with
(non-qemu) servers that don't do read-modify-write when exposing
a block device with 4k sectors; it can also allow us to visit a
file larger than 2T on a 32-bit kernel.
Signed-off-by: Eric Blake <address@hidden>
---
include/block/nbd.h | 3 +++
block/nbd-client.c | 3 +++
block/nbd.c | 17 +++++++++---
nbd/client.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++-----
4 files changed, 87 insertions(+), 10 deletions(-)
diff --git a/include/block/nbd.h b/include/block/nbd.h
index a5c68df..27a6854 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -133,6 +133,9 @@ enum {
struct NbdExportInfo {
uint64_t size;
uint16_t flags;
+ uint32_t min_block;
+ uint32_t opt_block;
+ uint32_t max_block;
};
typedef struct NbdExportInfo NbdExportInfo;
diff --git a/block/nbd-client.c b/block/nbd-client.c
index 2b6ac27..602a8ab 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -443,6 +443,9 @@ int nbd_client_init(BlockDriverState *bs,
logout("Failed to negotiate with the NBD server\n");
return ret;
}
+ if (client->info.min_block > bs->request_alignment) {
+ bs->request_alignment = client->info.min_block;
+ }
qemu_co_mutex_init(&client->send_mutex);
qemu_co_mutex_init(&client->free_sema);
diff --git a/block/nbd.c b/block/nbd.c
index 5172039..bb7df55 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -407,9 +407,20 @@ static int nbd_co_flush(BlockDriverState *bs)
static void nbd_refresh_limits(BlockDriverState *bs, Error **errp)
{
- bs->bl.max_discard = UINT32_MAX >> BDRV_SECTOR_BITS;
- bs->bl.max_write_zeroes = UINT32_MAX >> BDRV_SECTOR_BITS;
- bs->bl.max_transfer_length = UINT32_MAX >> BDRV_SECTOR_BITS;
+ NbdClientSession *s = nbd_get_client_session(bs);
+ int max = UINT32_MAX >> BDRV_SECTOR_BITS;
+
+ if (s->info.max_block) {
+ max = s->info.max_block >> BDRV_SECTOR_BITS;
+ }
+ bs->bl.max_discard = max;
+ bs->bl.max_write_zeroes = max;
+ bs->bl.max_transfer_length = max;
+
+ if (s->info.opt_block &&
+ s->info.opt_block >> BDRV_SECTOR_BITS > bs->bl.opt_transfer_length) {
+ bs->bl.opt_transfer_length = s->info.opt_block >> BDRV_SECTOR_BITS;
+ }
}
static int nbd_co_discard(BlockDriverState *bs, int64_t sector_num,
diff --git a/nbd/client.c b/nbd/client.c
index dac4f29..24f6b0b 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -232,6 +232,11 @@ static int nbd_handle_reply_err(QIOChannel *ioc,
nbd_opt_reply *reply,
reply->option);
break;
+ case NBD_REP_ERR_BLOCK_SIZE_REQD:
+ error_setg(errp, "Server wants OPT_BLOCK_SIZE before option %" PRIx32,
+ reply->option);
+ break;
+
default:
error_setg(errp, "Unknown error code when asking for option %" PRIx32,
reply->option);
@@ -333,6 +338,21 @@ static int nbd_opt_go(QIOChannel *ioc, const char
*wantname,
* flags still 0 is a witness of a broken server. */
info->flags = 0;
+ /* Some servers use NBD_OPT_GO to advertise non-default block
+ * sizes, and require that we first use NBD_OPT_BLOCK_SIZE to
+ * agree to that. */
+ TRACE("Attempting NBD_OPT_BLOCK_SIZE");
+ if (nbd_send_option_request(ioc, NBD_OPT_BLOCK_SIZE, 0, NULL, errp) < 0) {
+ return -1;
+ }
+ if (nbd_receive_option_reply(ioc, NBD_OPT_BLOCK_SIZE, &reply, errp) < 0) {
+ return -1;
+ }
+ error = nbd_handle_reply_err(ioc, &reply, errp);
+ if (error < 0) {
+ return error;
+ }
+
TRACE("Attempting NBD_OPT_GO for export '%s'", wantname);
if (nbd_send_option_request(ioc, NBD_OPT_GO, -1, wantname, errp) < 0) {
return -1;
@@ -402,6 +422,45 @@ static int nbd_opt_go(QIOChannel *ioc, const char
*wantname,
info->size, info->flags);
break;
+ case NBD_INFO_BLOCK_SIZE:
+ if (len != sizeof(info->min_block) * 3) {
+ error_setg(errp, "remaining export info len %" PRIu32
+ " is unexpected size", len);
+ return -1;
+ }
+ if (read_sync(ioc, &info->min_block, sizeof(info->min_block)) !=
+ sizeof(info->min_block)) {
+ error_setg(errp, "failed to read info minimum block size");
+ return -1;
+ }
+ be32_to_cpus(&info->min_block);
+ if (!is_power_of_2(info->min_block)) {
+ error_setg(errp, "server minimum block size %" PRId32
+ "is not a power of two", info->min_block);
+ return -1;
+ }
+ if (read_sync(ioc, &info->opt_block, sizeof(info->opt_block)) !=
+ sizeof(info->opt_block)) {
+ error_setg(errp, "failed to read info preferred block size");
+ return -1;
+ }
+ be32_to_cpus(&info->opt_block);
+ if (!is_power_of_2(info->opt_block) ||
+ info->opt_block < info->min_block) {
+ error_setg(errp, "server preferred block size %" PRId32
+ "is not valid", info->opt_block);
+ return -1;
+ }
+ if (read_sync(ioc, &info->max_block, sizeof(info->max_block)) !=
+ sizeof(info->max_block)) {
+ error_setg(errp, "failed to read info maximum block size");
+ return -1;
+ }
+ be32_to_cpus(&info->max_block);
+ TRACE("Block sizes are 0x%" PRIx32 ", 0x%" PRIx32 ", 0x%" PRIx32,
+ info->min_block, info->opt_block, info->max_block);
+ break;
+
default:
TRACE("ignoring unknown export info %" PRIu16, type);
if (drop_sync(ioc, len) != len) {
@@ -710,8 +769,9 @@ fail:
#ifdef __linux__
int nbd_init(int fd, QIOChannelSocket *sioc, NbdExportInfo *info)
{
- unsigned long sectors = info->size / BDRV_SECTOR_SIZE;
- if (info->size / BDRV_SECTOR_SIZE != sectors) {
+ unsigned long sector_size = MAX(BDRV_SECTOR_SIZE, info->min_block);
+ unsigned long sectors = info->size / sector_size;
+ if (info->size / sector_size != sectors) {
LOG("Export size %" PRId64 " too large for 32-bit kernel", info->size);
return -E2BIG;
}
@@ -724,18 +784,18 @@ int nbd_init(int fd, QIOChannelSocket *sioc,
NbdExportInfo *info)
return -serrno;
}
- TRACE("Setting block size to %lu", (unsigned long)BDRV_SECTOR_SIZE);
+ TRACE("Setting block size to %lu", sector_size);
- if (ioctl(fd, NBD_SET_BLKSIZE, (unsigned long)BDRV_SECTOR_SIZE) < 0) {
+ if (ioctl(fd, NBD_SET_BLKSIZE, sector_size) < 0) {
int serrno = errno;
LOG("Failed setting NBD block size");
return -serrno;
}
TRACE("Setting size to %lu block(s)", sectors);
- if (size % BDRV_SECTOR_SIZE) {
- TRACE("Ignoring trailing %d bytes of export",
- (int) (size % BDRV_SECTOR_SIZE));
+ if (info->size % sector_size) {
+ TRACE("Ignoring trailing %" PRId64 " bytes of export",
+ info->size % sector_size);
}
if (ioctl(fd, NBD_SET_SIZE_BLOCKS, sectors) < 0) {
--
2.5.5
- [Qemu-devel] [PATCH v3 34/44] nbd: Less allocation during NBD_OPT_LIST, (continued)
- [Qemu-devel] [PATCH v3 34/44] nbd: Less allocation during NBD_OPT_LIST, Eric Blake, 2016/04/22
- [Qemu-devel] [PATCH v3 35/44] nbd: Support shorter handshake, Eric Blake, 2016/04/22
- [Qemu-devel] [PATCH v3 32/44] nbd: Share common option-sending code in client, Eric Blake, 2016/04/22
- [Qemu-devel] [PATCH v3 36/44] nbd: Improve handling of shutdown requests, Eric Blake, 2016/04/22
- [Qemu-devel] [PATCH v3 40/44] nbd: Implement NBD_OPT_GO on client, Eric Blake, 2016/04/22
- [Qemu-devel] [PATCH v3 39/44] nbd: Implement NBD_OPT_GO on server, Eric Blake, 2016/04/22
- [Qemu-devel] [PATCH v3 44/44] nbd: Implement NBD_OPT_BLOCK_SIZE on client,
Eric Blake <=