qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH 2/4] hw/block/nvme: add zone descriptor changed log page


From: Klaus Jensen
Subject: Re: [PATCH 2/4] hw/block/nvme: add zone descriptor changed log page
Date: Fri, 21 Oct 2022 08:26:06 +0200

On Okt 20 17:18, clay.mayers@kioxia.com wrote:
> From: Clay Mayers <clay.mayers@kioxia.com>
> 
> Zones marked with ZONE_FINISH_RECOMMENDED are added to the zone
> descriptor changed log page.  Once read with RAE cleared, they are
> removed from the list.
> 
> Zones stay in the list regardless of what other states the zones may
> go through so applications must be aware of ABA issues where finish
> may be recommended, the zone freed and re-opened and now the attribute
> is now clear.
> 
> Signed-off-by: Clay Mayers <clay.mayers@kioxia.com>
> ---
>  hw/nvme/ctrl.c       | 50 ++++++++++++++++++++++++++++++++++++++++++++
>  hw/nvme/ns.c         |  6 ++++++
>  hw/nvme/nvme.h       |  8 +++++++
>  hw/nvme/trace-events |  1 +
>  include/block/nvme.h |  8 +++++++
>  5 files changed, 73 insertions(+)
> 
> diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
> index d7e9fae0b0..3ffd0fb469 100644
> --- a/hw/nvme/ctrl.c
> +++ b/hw/nvme/ctrl.c
> @@ -1516,15 +1516,42 @@ static void nvme_clear_events(NvmeCtrl *n, uint8_t 
> event_type)
>      }
>  }
>  
> +static void nvme_zdc_list(NvmeNamespace *ns, NvmeZoneIdList *zlist, bool 
> reset)
> +{
> +    NvmeZdc *zdc;
> +    NvmeZdc *next;
> +    int index = 0;
> +
> +    QTAILQ_FOREACH_SAFE(zdc, &ns->zdc_list, entry, next) {
> +        if (index >= ARRAY_SIZE(zlist->zids)) {
> +            break;
> +        }

I could've sweared that a previous revision of the spec stated that this
required `nzid` to be set to 0xffff and the log page be zeroed in this
case.

However, upon reading it again, that doesnt seem to be the case, so this
seems to be just fine.

> +        zlist->zids[index++] = zdc->zone->d.zslba;
> +        if (reset) {
> +            QTAILQ_REMOVE(&ns->zdc_list, zdc, entry);
> +            zdc->zone->zdc_entry = NULL;
> +            g_free(zdc);
> +        }
> +    }
> +    zlist->nzid = cpu_to_le16(index);
> +}
> +
>  static void nvme_check_finish(NvmeNamespace *ns, NvmeZoneListHead *list)
>  {
>      int64_t now = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
>      NvmeZone *zone;
> +    NvmeZdc  *zdc;
>  
>      QTAILQ_FOREACH(zone, list, entry) {
>          if (zone->finish_ms <= now) {
>              zone->finish_ms = INT64_MAX;
>              zone->d.za |= NVME_ZA_FINISH_RECOMMENDED;
> +            if (!zone->zdc_entry) {
> +                zdc = g_malloc0(sizeof(*zdc));
> +                zdc->zone = zone;
> +                zone->zdc_entry = zdc;
> +                QTAILQ_INSERT_TAIL(&ns->zdc_list, zdc, entry);
> +            }
>          } else if (zone->finish_ms != INT64_MAX) {
>              timer_mod_anticipate(ns->active_timer, zone->finish_ms);
>          }
> @@ -4675,6 +4702,27 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t 
> csi, uint32_t buf_len,
>      return nvme_c2h(n, ((uint8_t *)&log) + off, trans_len, req);
>  }
>  
> +static uint16_t nvme_changed_zones(NvmeCtrl *n, uint8_t rae, uint32_t 
> buf_len,
> +                                    uint64_t off, NvmeRequest *req)
> +{
> +    NvmeNamespace *ns;
> +    NvmeCmd *cmd = &req->cmd;
> +    uint32_t nsid = le32_to_cpu(cmd->nsid);
> +    NvmeZoneIdList zlist = { };
> +    uint32_t trans_len = MIN(sizeof(zlist) - off, buf_len);

I believe this is unsafe if off >= sizeof(zlist).

> +
> +    nsid = le32_to_cpu(cmd->nsid);
> +    trace_pci_nvme_changed_zones(nsid);
> +
> +    ns = nvme_ns(n, nsid);
> +    if (!ns) {
> +        return NVME_INVALID_NSID | NVME_DNR;
> +    }
> +    nvme_zdc_list(ns, &zlist, !rae);
> +
> +    return nvme_c2h(n, ((uint8_t *)&zlist) + off, trans_len, req);
> +}
> +
>  static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req)
>  {
>      NvmeCmd *cmd = &req->cmd;
> @@ -4722,6 +4770,8 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest 
> *req)
>          return nvme_changed_nslist(n, rae, len, off, req);
>      case NVME_LOG_CMD_EFFECTS:
>          return nvme_cmd_effects(n, csi, len, off, req);
> +    case NVME_LOG_CHANGED_ZONE:
> +        return nvme_changed_zones(n, rae, len, off, req);
>      default:
>          trace_pci_nvme_err_invalid_log_page(nvme_cid(req), lid);
>          return NVME_INVALID_FIELD | NVME_DNR;
> diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
> index b577f2d8e0..25cd490c99 100644
> --- a/hw/nvme/ns.c
> +++ b/hw/nvme/ns.c
> @@ -240,6 +240,7 @@ static void nvme_ns_zoned_init_state(NvmeNamespace *ns)
>      QTAILQ_INIT(&ns->imp_open_zones);
>      QTAILQ_INIT(&ns->closed_zones);
>      QTAILQ_INIT(&ns->full_zones);
> +    QTAILQ_INIT(&ns->zdc_list);
>  
>      zone = ns->zone_array;
>      for (i = 0; i < ns->num_zones; i++, zone++) {
> @@ -526,8 +527,13 @@ void nvme_ns_shutdown(NvmeNamespace *ns)
>  
>  void nvme_ns_cleanup(NvmeNamespace *ns)
>  {
> +    NvmeZdc *zdc;
> +
>      if (ns->params.zoned) {
>          timer_free(ns->active_timer);
> +        while ((zdc = QTAILQ_FIRST(&ns->zdc_list))) {
> +            g_free(zdc);
> +        }
>          g_free(ns->id_ns_zoned);
>          g_free(ns->zone_array);
>          g_free(ns->zd_extensions);
> diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
> index 9a54dcdb32..ae65226150 100644
> --- a/hw/nvme/nvme.h
> +++ b/hw/nvme/nvme.h
> @@ -32,6 +32,7 @@ QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES > NVME_NSID_BROADCAST 
> - 1);
>  
>  typedef struct NvmeCtrl NvmeCtrl;
>  typedef struct NvmeNamespace NvmeNamespace;
> +typedef struct NvmeZone NvmeZone;
>  
>  #define TYPE_NVME_BUS "nvme-bus"
>  OBJECT_DECLARE_SIMPLE_TYPE(NvmeBus, NVME_BUS)
> @@ -90,10 +91,16 @@ static inline NvmeNamespace *nvme_subsys_ns(NvmeSubsystem 
> *subsys,
>  #define NVME_NS(obj) \
>      OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS)
>  
> +typedef struct NvmeZdc {
> +    QTAILQ_ENTRY(NvmeZdc) entry;
> +    NvmeZone *zone;
> +} NvmeZdc;
> +
>  typedef struct NvmeZone {
>      NvmeZoneDescr   d;
>      uint64_t        w_ptr;
>      int64_t         finish_ms;
> +    NvmeZdc         *zdc_entry;
>      QTAILQ_ENTRY(NvmeZone) entry;
>  } NvmeZone;
>  
> @@ -172,6 +179,7 @@ typedef struct NvmeNamespace {
>  
>      int64_t         fto_ms;
>      QEMUTimer       *active_timer;
> +    QTAILQ_HEAD(, NvmeZdc) zdc_list;
>  
>      NvmeNamespaceParams params;
>  
> diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events
> index fccb79f489..337927e607 100644
> --- a/hw/nvme/trace-events
> +++ b/hw/nvme/trace-events
> @@ -64,6 +64,7 @@ pci_nvme_identify_nslist(uint32_t ns) "nsid %"PRIu32""
>  pci_nvme_identify_nslist_csi(uint16_t ns, uint8_t csi) "nsid=%"PRIu16", 
> csi=0x%"PRIx8""
>  pci_nvme_identify_cmd_set(void) "identify i/o command set"
>  pci_nvme_identify_ns_descr_list(uint32_t ns) "nsid %"PRIu32""
> +pci_nvme_changed_zones(uint32_t ns) "nsid %"PRIu32""
>  pci_nvme_get_log(uint16_t cid, uint8_t lid, uint8_t lsp, uint8_t rae, 
> uint32_t len, uint64_t off) "cid %"PRIu16" lid 0x%"PRIx8" lsp 0x%"PRIx8" rae 
> 0x%"PRIx8" len %"PRIu32" off %"PRIu64""
>  pci_nvme_getfeat(uint16_t cid, uint32_t nsid, uint8_t fid, uint8_t sel, 
> uint32_t cdw11) "cid %"PRIu16" nsid 0x%"PRIx32" fid 0x%"PRIx8" sel 0x%"PRIx8" 
> cdw11 0x%"PRIx32""
>  pci_nvme_setfeat(uint16_t cid, uint32_t nsid, uint8_t fid, uint8_t save, 
> uint32_t cdw11) "cid %"PRIu16" nsid 0x%"PRIx32" fid 0x%"PRIx8" save 
> 0x%"PRIx8" cdw11 0x%"PRIx32""
> diff --git a/include/block/nvme.h b/include/block/nvme.h
> index 8027b7126b..c747cc4948 100644
> --- a/include/block/nvme.h
> +++ b/include/block/nvme.h
> @@ -1010,6 +1010,7 @@ enum NvmeLogIdentifier {
>      NVME_LOG_FW_SLOT_INFO   = 0x03,
>      NVME_LOG_CHANGED_NSLIST = 0x04,
>      NVME_LOG_CMD_EFFECTS    = 0x05,
> +    NVME_LOG_CHANGED_ZONE   = 0xbf,
>  };
>  
>  typedef struct QEMU_PACKED NvmePSD {
> @@ -1617,6 +1618,12 @@ typedef enum NvmeVirtualResourceType {
>      NVME_VIRT_RES_INTERRUPT     = 0x01,
>  } NvmeVirtualResourceType;
>  
> +typedef struct QEMU_PACKED NvmeZoneIdList {
> +    uint16_t nzid;
> +    uint16_t rsvd2[3];
> +    uint64_t zids[511];
> +} NvmeZoneIdList;
> +
>  static inline void _nvme_check_size(void)
>  {
>      QEMU_BUILD_BUG_ON(sizeof(NvmeBar) != 4096);
> @@ -1655,5 +1662,6 @@ static inline void _nvme_check_size(void)
>      QEMU_BUILD_BUG_ON(sizeof(NvmePriCtrlCap) != 4096);
>      QEMU_BUILD_BUG_ON(sizeof(NvmeSecCtrlEntry) != 32);
>      QEMU_BUILD_BUG_ON(sizeof(NvmeSecCtrlList) != 4096);
> +    QEMU_BUILD_BUG_ON(sizeof(NvmeZoneIdList) != 4096);
>  }
>  #endif
> -- 
> 2.27.0
> 

-- 
One of us - No more doubt, silence or taboo about mental illness.

Attachment: signature.asc
Description: PGP signature


reply via email to

[Prev in Thread] Current Thread [Next in Thread]