[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PULL 11/38] hw/block/nvme: add simple copy command
From: |
Klaus Jensen |
Subject: |
[PULL 11/38] hw/block/nvme: add simple copy command |
Date: |
Mon, 8 Mar 2021 13:22:46 +0100 |
From: Klaus Jensen <k.jensen@samsung.com>
Add support for TP 4065a ("Simple Copy Command"), v2020.05.04
("Ratified").
The implementation uses a bounce buffer to first read in the source
logical blocks, then issue a write of that bounce buffer. The default
maximum number of source logical blocks is 128, translating to 512 KiB
for 4k logical blocks which aligns with the default value of MDTS.
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
Reviewed-by: Keith Busch <kbusch@kernel.org>
---
hw/block/nvme-ns.h | 4 +
hw/block/nvme.h | 1 +
hw/block/nvme-ns.c | 8 ++
hw/block/nvme.c | 252 +++++++++++++++++++++++++++++++++++++++++-
hw/block/trace-events | 6 +
5 files changed, 270 insertions(+), 1 deletion(-)
diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h
index 929e78861903..7af6884862b5 100644
--- a/hw/block/nvme-ns.h
+++ b/hw/block/nvme-ns.h
@@ -29,6 +29,10 @@ typedef struct NvmeNamespaceParams {
uint32_t nsid;
QemuUUID uuid;
+ uint16_t mssrl;
+ uint32_t mcl;
+ uint8_t msrc;
+
bool zoned;
bool cross_zone_read;
uint64_t zone_size_bs;
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index b8f5f2d6ffb8..cb2b5175f1a1 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -69,6 +69,7 @@ static inline const char *nvme_io_opc_str(uint8_t opc)
case NVME_CMD_COMPARE: return "NVME_NVM_CMD_COMPARE";
case NVME_CMD_WRITE_ZEROES: return "NVME_NVM_CMD_WRITE_ZEROES";
case NVME_CMD_DSM: return "NVME_NVM_CMD_DSM";
+ case NVME_CMD_COPY: return "NVME_NVM_CMD_COPY";
case NVME_CMD_ZONE_MGMT_SEND: return "NVME_ZONED_CMD_MGMT_SEND";
case NVME_CMD_ZONE_MGMT_RECV: return "NVME_ZONED_CMD_MGMT_RECV";
case NVME_CMD_ZONE_APPEND: return "NVME_ZONED_CMD_ZONE_APPEND";
diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
index 64b6a491adc3..fd73d0321109 100644
--- a/hw/block/nvme-ns.c
+++ b/hw/block/nvme-ns.c
@@ -67,6 +67,11 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
id_ns->nmic |= NVME_NMIC_NS_SHARED;
}
+ /* simple copy */
+ id_ns->mssrl = cpu_to_le16(ns->params.mssrl);
+ id_ns->mcl = cpu_to_le32(ns->params.mcl);
+ id_ns->msrc = ns->params.msrc;
+
return 0;
}
@@ -384,6 +389,9 @@ static Property nvme_ns_props[] = {
NvmeSubsystem *),
DEFINE_PROP_UINT32("nsid", NvmeNamespace, params.nsid, 0),
DEFINE_PROP_UUID("uuid", NvmeNamespace, params.uuid),
+ DEFINE_PROP_UINT16("mssrl", NvmeNamespace, params.mssrl, 128),
+ DEFINE_PROP_UINT32("mcl", NvmeNamespace, params.mcl, 128),
+ DEFINE_PROP_UINT8("msrc", NvmeNamespace, params.msrc, 127),
DEFINE_PROP_BOOL("zoned", NvmeNamespace, params.zoned, false),
DEFINE_PROP_SIZE("zoned.zone_size", NvmeNamespace, params.zone_size_bs,
NVME_DEFAULT_ZONE_SIZE),
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 4d6cd7755986..59faebce28f9 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -186,6 +186,7 @@ static const uint32_t nvme_cse_iocs_nvm[256] = {
[NVME_CMD_WRITE] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
[NVME_CMD_READ] = NVME_CMD_EFF_CSUPP,
[NVME_CMD_DSM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
+ [NVME_CMD_COPY] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
[NVME_CMD_COMPARE] = NVME_CMD_EFF_CSUPP,
};
@@ -195,6 +196,7 @@ static const uint32_t nvme_cse_iocs_zoned[256] = {
[NVME_CMD_WRITE] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
[NVME_CMD_READ] = NVME_CMD_EFF_CSUPP,
[NVME_CMD_DSM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
+ [NVME_CMD_COPY] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
[NVME_CMD_COMPARE] = NVME_CMD_EFF_CSUPP,
[NVME_CMD_ZONE_APPEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
[NVME_CMD_ZONE_MGMT_SEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
@@ -1523,6 +1525,136 @@ static void nvme_aio_zone_reset_cb(void *opaque, int
ret)
nvme_enqueue_req_completion(nvme_cq(req), req);
}
+struct nvme_copy_ctx {
+ int copies;
+ uint8_t *bounce;
+ uint32_t nlb;
+};
+
+struct nvme_copy_in_ctx {
+ NvmeRequest *req;
+ QEMUIOVector iov;
+};
+
+static void nvme_copy_cb(void *opaque, int ret)
+{
+ NvmeRequest *req = opaque;
+ NvmeNamespace *ns = req->ns;
+ struct nvme_copy_ctx *ctx = req->opaque;
+
+ trace_pci_nvme_copy_cb(nvme_cid(req));
+
+ if (ns->params.zoned) {
+ NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd;
+ uint64_t sdlba = le64_to_cpu(copy->sdlba);
+ NvmeZone *zone = nvme_get_zone_by_slba(ns, sdlba);
+
+ __nvme_advance_zone_wp(ns, zone, ctx->nlb);
+ }
+
+ if (!ret) {
+ block_acct_done(blk_get_stats(ns->blkconf.blk), &req->acct);
+ } else {
+ block_acct_failed(blk_get_stats(ns->blkconf.blk), &req->acct);
+ nvme_aio_err(req, ret);
+ }
+
+ g_free(ctx->bounce);
+ g_free(ctx);
+
+ nvme_enqueue_req_completion(nvme_cq(req), req);
+}
+
+static void nvme_copy_in_complete(NvmeRequest *req)
+{
+ NvmeNamespace *ns = req->ns;
+ NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd;
+ struct nvme_copy_ctx *ctx = req->opaque;
+ uint64_t sdlba = le64_to_cpu(copy->sdlba);
+ uint16_t status;
+
+ trace_pci_nvme_copy_in_complete(nvme_cid(req));
+
+ block_acct_done(blk_get_stats(ns->blkconf.blk), &req->acct);
+
+ status = nvme_check_bounds(ns, sdlba, ctx->nlb);
+ if (status) {
+ trace_pci_nvme_err_invalid_lba_range(sdlba, ctx->nlb, ns->id_ns.nsze);
+ goto invalid;
+ }
+
+ if (ns->params.zoned) {
+ NvmeZone *zone = nvme_get_zone_by_slba(ns, sdlba);
+
+ status = nvme_check_zone_write(ns, zone, sdlba, ctx->nlb);
+ if (status) {
+ goto invalid;
+ }
+
+ status = nvme_zrm_auto(ns, zone);
+ if (status) {
+ goto invalid;
+ }
+
+ zone->w_ptr += ctx->nlb;
+ }
+
+ qemu_iovec_init(&req->iov, 1);
+ qemu_iovec_add(&req->iov, ctx->bounce, nvme_l2b(ns, ctx->nlb));
+
+ block_acct_start(blk_get_stats(ns->blkconf.blk), &req->acct, 0,
+ BLOCK_ACCT_WRITE);
+
+ req->aiocb = blk_aio_pwritev(ns->blkconf.blk, nvme_l2b(ns, sdlba),
+ &req->iov, 0, nvme_copy_cb, req);
+
+ return;
+
+invalid:
+ req->status = status;
+
+ g_free(ctx->bounce);
+ g_free(ctx);
+
+ nvme_enqueue_req_completion(nvme_cq(req), req);
+}
+
+static void nvme_aio_copy_in_cb(void *opaque, int ret)
+{
+ struct nvme_copy_in_ctx *in_ctx = opaque;
+ NvmeRequest *req = in_ctx->req;
+ NvmeNamespace *ns = req->ns;
+ struct nvme_copy_ctx *ctx = req->opaque;
+
+ qemu_iovec_destroy(&in_ctx->iov);
+ g_free(in_ctx);
+
+ trace_pci_nvme_aio_copy_in_cb(nvme_cid(req));
+
+ if (ret) {
+ nvme_aio_err(req, ret);
+ }
+
+ ctx->copies--;
+
+ if (ctx->copies) {
+ return;
+ }
+
+ if (req->status) {
+ block_acct_failed(blk_get_stats(ns->blkconf.blk), &req->acct);
+
+ g_free(ctx->bounce);
+ g_free(ctx);
+
+ nvme_enqueue_req_completion(nvme_cq(req), req);
+
+ return;
+ }
+
+ nvme_copy_in_complete(req);
+}
+
struct nvme_compare_ctx {
QEMUIOVector iov;
uint8_t *bounce;
@@ -1641,6 +1773,121 @@ static uint16_t nvme_dsm(NvmeCtrl *n, NvmeRequest *req)
return status;
}
+static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req)
+{
+ NvmeNamespace *ns = req->ns;
+ NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd;
+ g_autofree NvmeCopySourceRange *range = NULL;
+
+ uint16_t nr = copy->nr + 1;
+ uint8_t format = copy->control[0] & 0xf;
+ uint32_t nlb = 0;
+
+ uint8_t *bounce = NULL, *bouncep = NULL;
+ struct nvme_copy_ctx *ctx;
+ uint16_t status;
+ int i;
+
+ trace_pci_nvme_copy(nvme_cid(req), nvme_nsid(ns), nr, format);
+
+ if (!(n->id_ctrl.ocfs & (1 << format))) {
+ trace_pci_nvme_err_copy_invalid_format(format);
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ if (nr > ns->id_ns.msrc + 1) {
+ return NVME_CMD_SIZE_LIMIT | NVME_DNR;
+ }
+
+ range = g_new(NvmeCopySourceRange, nr);
+
+ status = nvme_dma(n, (uint8_t *)range, nr * sizeof(NvmeCopySourceRange),
+ DMA_DIRECTION_TO_DEVICE, req);
+ if (status) {
+ return status;
+ }
+
+ for (i = 0; i < nr; i++) {
+ uint64_t slba = le64_to_cpu(range[i].slba);
+ uint32_t _nlb = le16_to_cpu(range[i].nlb) + 1;
+
+ if (_nlb > le16_to_cpu(ns->id_ns.mssrl)) {
+ return NVME_CMD_SIZE_LIMIT | NVME_DNR;
+ }
+
+ status = nvme_check_bounds(ns, slba, _nlb);
+ if (status) {
+ trace_pci_nvme_err_invalid_lba_range(slba, _nlb, ns->id_ns.nsze);
+ return status;
+ }
+
+ if (NVME_ERR_REC_DULBE(ns->features.err_rec)) {
+ status = nvme_check_dulbe(ns, slba, _nlb);
+ if (status) {
+ return status;
+ }
+ }
+
+ if (ns->params.zoned) {
+ status = nvme_check_zone_read(ns, slba, _nlb);
+ if (status) {
+ return status;
+ }
+ }
+
+ nlb += _nlb;
+ }
+
+ if (nlb > le32_to_cpu(ns->id_ns.mcl)) {
+ return NVME_CMD_SIZE_LIMIT | NVME_DNR;
+ }
+
+ bounce = bouncep = g_malloc(nvme_l2b(ns, nlb));
+
+ block_acct_start(blk_get_stats(ns->blkconf.blk), &req->acct, 0,
+ BLOCK_ACCT_READ);
+
+ ctx = g_new(struct nvme_copy_ctx, 1);
+
+ ctx->bounce = bounce;
+ ctx->nlb = nlb;
+ ctx->copies = 1;
+
+ req->opaque = ctx;
+
+ for (i = 0; i < nr; i++) {
+ uint64_t slba = le64_to_cpu(range[i].slba);
+ uint32_t nlb = le16_to_cpu(range[i].nlb) + 1;
+
+ size_t len = nvme_l2b(ns, nlb);
+ int64_t offset = nvme_l2b(ns, slba);
+
+ trace_pci_nvme_copy_source_range(slba, nlb);
+
+ struct nvme_copy_in_ctx *in_ctx = g_new(struct nvme_copy_in_ctx, 1);
+ in_ctx->req = req;
+
+ qemu_iovec_init(&in_ctx->iov, 1);
+ qemu_iovec_add(&in_ctx->iov, bouncep, len);
+
+ ctx->copies++;
+
+ blk_aio_preadv(ns->blkconf.blk, offset, &in_ctx->iov, 0,
+ nvme_aio_copy_in_cb, in_ctx);
+
+ bouncep += len;
+ }
+
+ /* account for the 1-initialization */
+ ctx->copies--;
+
+ if (!ctx->copies) {
+ nvme_copy_in_complete(req);
+ }
+
+ return NVME_NO_COMPLETE;
+}
+
static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req)
{
NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
@@ -2378,6 +2625,8 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
return nvme_compare(n, req);
case NVME_CMD_DSM:
return nvme_dsm(n, req);
+ case NVME_CMD_COPY:
+ return nvme_copy(n, req);
case NVME_CMD_ZONE_MGMT_SEND:
return nvme_zone_mgmt_send(n, req);
case NVME_CMD_ZONE_MGMT_RECV:
@@ -4475,9 +4724,10 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice
*pci_dev)
id->nn = cpu_to_le32(n->num_namespaces);
id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROES | NVME_ONCS_TIMESTAMP |
NVME_ONCS_FEATURES | NVME_ONCS_DSM |
- NVME_ONCS_COMPARE);
+ NVME_ONCS_COMPARE | NVME_ONCS_COPY);
id->vwc = (0x2 << 1) | 0x1;
+ id->ocfs = cpu_to_le16(NVME_OCFS_COPY_FORMAT_0);
id->sgls = cpu_to_le32(NVME_CTRL_SGLS_SUPPORT_NO_ALIGN |
NVME_CTRL_SGLS_BITBUCKET);
diff --git a/hw/block/trace-events b/hw/block/trace-events
index d32475c3989e..4b5ee04024f4 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -43,12 +43,17 @@ pci_nvme_admin_cmd(uint16_t cid, uint16_t sqid, uint8_t
opcode, const char *opna
pci_nvme_read(uint16_t cid, uint32_t nsid, uint32_t nlb, uint64_t count,
uint64_t lba) "cid %"PRIu16" nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba
0x%"PRIx64""
pci_nvme_write(uint16_t cid, const char *verb, uint32_t nsid, uint32_t nlb,
uint64_t count, uint64_t lba) "cid %"PRIu16" opname '%s' nsid %"PRIu32" nlb
%"PRIu32" count %"PRIu64" lba 0x%"PRIx64""
pci_nvme_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
+pci_nvme_copy(uint16_t cid, uint32_t nsid, uint16_t nr, uint8_t format) "cid
%"PRIu16" nsid %"PRIu32" nr %"PRIu16" format 0x%"PRIx8""
+pci_nvme_copy_source_range(uint64_t slba, uint32_t nlb) "slba 0x%"PRIx64" nlb
%"PRIu32""
+pci_nvme_copy_in_complete(uint16_t cid) "cid %"PRIu16""
+pci_nvme_copy_cb(uint16_t cid) "cid %"PRIu16""
pci_nvme_block_status(int64_t offset, int64_t bytes, int64_t pnum, int ret,
bool zeroed) "offset %"PRId64" bytes %"PRId64" pnum %"PRId64" ret 0x%x zeroed
%d"
pci_nvme_dsm(uint16_t cid, uint32_t nsid, uint32_t nr, uint32_t attr) "cid
%"PRIu16" nsid %"PRIu32" nr %"PRIu32" attr 0x%"PRIx32""
pci_nvme_dsm_deallocate(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t
nlb) "cid %"PRIu16" nsid %"PRIu32" slba %"PRIu64" nlb %"PRIu32""
pci_nvme_compare(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb)
"cid %"PRIu16" nsid %"PRIu32" slba 0x%"PRIx64" nlb %"PRIu32""
pci_nvme_compare_cb(uint16_t cid) "cid %"PRIu16""
pci_nvme_aio_discard_cb(uint16_t cid) "cid %"PRIu16""
+pci_nvme_aio_copy_in_cb(uint16_t cid) "cid %"PRIu16""
pci_nvme_aio_zone_reset_cb(uint16_t cid, uint64_t zslba) "cid %"PRIu16" zslba
0x%"PRIx64""
pci_nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t
qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64",
sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16""
pci_nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t
size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64",
cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d"
@@ -113,6 +118,7 @@ pci_nvme_err_addr_read(uint64_t addr) "addr 0x%"PRIx64""
pci_nvme_err_addr_write(uint64_t addr) "addr 0x%"PRIx64""
pci_nvme_err_cfs(void) "controller fatal status"
pci_nvme_err_aio(uint16_t cid, const char *errname, uint16_t status) "cid
%"PRIu16" err '%s' status 0x%"PRIx16""
+pci_nvme_err_copy_invalid_format(uint8_t format) "format 0x%"PRIx8""
pci_nvme_err_invalid_sgld(uint16_t cid, uint8_t typ) "cid %"PRIu16" type
0x%"PRIx8""
pci_nvme_err_invalid_num_sgld(uint16_t cid, uint8_t typ) "cid %"PRIu16" type
0x%"PRIx8""
pci_nvme_err_invalid_sgl_excess_length(uint16_t cid) "cid %"PRIu16""
--
2.30.1
- [PULL 03/38] hw/block/nvme: add CMIC enum value for Identify Controller, (continued)
- [PULL 03/38] hw/block/nvme: add CMIC enum value for Identify Controller, Klaus Jensen, 2021/03/08
- [PULL 05/38] hw/block/nvme: add NMIC enum value for Identify Namespace, Klaus Jensen, 2021/03/08
- [PULL 07/38] hw/block/nvme: remove unused parameter in check zone write, Klaus Jensen, 2021/03/08
- [PULL 12/38] hw/block/nvme: fix Close Zone, Klaus Jensen, 2021/03/08
- [PULL 13/38] hw/block/nvme: add missing mor/mar constraint checks, Klaus Jensen, 2021/03/08
- [PULL 06/38] hw/block/nvme: support for shared namespace in subsystem, Klaus Jensen, 2021/03/08
- [PULL 09/38] hw/block/nvme: pull write pointer advancement to separate function, Klaus Jensen, 2021/03/08
- [PULL 08/38] hw/block/nvme: refactor zone resource management, Klaus Jensen, 2021/03/08
- [PULL 10/38] nvme: updated shared header for copy command, Klaus Jensen, 2021/03/08
- [PULL 14/38] hw/block/nvme: improve invalid zasl value reporting, Klaus Jensen, 2021/03/08
- [PULL 11/38] hw/block/nvme: add simple copy command,
Klaus Jensen <=
- [PULL 15/38] hw/block/nvme: use locally assigned QEMU IEEE OUI, Klaus Jensen, 2021/03/08
- [PULL 16/38] hw/block/nvme: add broadcast nsid support flush command, Klaus Jensen, 2021/03/08
- [PULL 17/38] hw/block/nvme: document 'mdts' nvme device parameter, Klaus Jensen, 2021/03/08
- [PULL 18/38] hw/block/nvme: deduplicate bad mdts trace event, Klaus Jensen, 2021/03/08
- [PULL 20/38] hw/block/nvme: remove unnecessary endian conversion, Klaus Jensen, 2021/03/08
- [PULL 19/38] hw/block/nvme: align zoned.zasl with mdts, Klaus Jensen, 2021/03/08
- [PULL 28/38] hw/block/nvme: try to deal with the iov/qsg duality, Klaus Jensen, 2021/03/08
- [PULL 29/38] hw/block/nvme: remove the req dependency in map functions, Klaus Jensen, 2021/03/08
- [PULL 30/38] hw/block/nvme: refactor nvme_dma, Klaus Jensen, 2021/03/08
- [PULL 21/38] hw/block/nvme: add identify trace event, Klaus Jensen, 2021/03/08