qemu-block
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v2 17/18] hw/block/nvme: Use zone metadata file for persisten


From: Klaus Jensen
Subject: Re: [PATCH v2 17/18] hw/block/nvme: Use zone metadata file for persistence
Date: Wed, 1 Jul 2020 19:26:35 +0200

On Jun 18 06:34, Dmitry Fomichev wrote:
> A ZNS drive that is emulated by this driver is currently initialized
> with all zones Empty upon startup. However, actual ZNS SSDs save the
> state and condition of all zones in their internal NVRAM in the event
> of power loss. When such a drive is powered up again, it closes or
> finishes all zones that were open at the moment of shutdown. Besides
> that, the write pointer position as well as the state and condition
> of all zones is preserved across power-downs.
> 
> This commit adds the capability to have a persistent zone metadata
> to the driver. The new optional driver property, "zone_file",
> is introduced. If added to the command line, this property specifies
> the name of the file that stores the zone metadata. If "zone_file" is
> omitted, the driver will initialize with all zones empty, the same as
> before.
> 
> If zone metadata is configured to be persistent, then zone descriptor
> extensions also persist across controller shutdowns.
> 
> Signed-off-by: Dmitry Fomichev <dmitry.fomichev@wdc.com>

Stefan, before I review this in depth, can you comment on if mmap'ing a
file from a device model and issuing regular msync's is an acceptable
approach to storing state persistently across QEMU invocations?

I could not find any examples of this in hw/, so I am unsure. I
implemented something like this using an additional blockdev on the
device and doing blk_aio's, but just mmaping a file seems much simpler,
but at the cost of portability? On the other hand, I can't find any
examples of using an additional blockdev either.

Can you shed any light on the preferred approach?

> ---
>  hw/block/nvme.c | 371 +++++++++++++++++++++++++++++++++++++++++++++---
>  hw/block/nvme.h |  38 +++++
>  2 files changed, 388 insertions(+), 21 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 14d5f1d155..63e7a6352e 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -69,6 +69,8 @@
>      } while (0)
>  
>  static void nvme_process_sq(void *opaque);
> +static void nvme_sync_zone_file(NvmeCtrl *n, NvmeNamespace *ns,
> +    NvmeZone *zone, int len);
>  
>  /*
>   * Add a zone to the tail of a zone list.
> @@ -90,6 +92,7 @@ static void nvme_add_zone_tail(NvmeCtrl *n, NvmeNamespace 
> *ns, NvmeZoneList *zl,
>          zl->tail = idx;
>      }
>      zl->size++;
> +    nvme_set_zone_meta_dirty(n, ns, true);
>  }
>  
>  /*
> @@ -106,12 +109,15 @@ static void nvme_remove_zone(NvmeCtrl *n, NvmeNamespace 
> *ns, NvmeZoneList *zl,
>      if (zl->size == 0) {
>          zl->head = NVME_ZONE_LIST_NIL;
>          zl->tail = NVME_ZONE_LIST_NIL;
> +        nvme_set_zone_meta_dirty(n, ns, true);
>      } else if (idx == zl->head) {
>          zl->head = zone->next;
>          ns->zone_array[zl->head].prev = NVME_ZONE_LIST_NIL;
> +        nvme_set_zone_meta_dirty(n, ns, true);
>      } else if (idx == zl->tail) {
>          zl->tail = zone->prev;
>          ns->zone_array[zl->tail].next = NVME_ZONE_LIST_NIL;
> +        nvme_set_zone_meta_dirty(n, ns, true);
>      } else {
>          ns->zone_array[zone->next].prev = zone->prev;
>          ns->zone_array[zone->prev].next = zone->next;
> @@ -138,6 +144,7 @@ static NvmeZone *nvme_remove_zone_head(NvmeCtrl *n, 
> NvmeNamespace *ns,
>              ns->zone_array[zl->head].prev = NVME_ZONE_LIST_NIL;
>          }
>          zone->prev = zone->next = 0;
> +        nvme_set_zone_meta_dirty(n, ns, true);
>      }
>  
>      return zone;
> @@ -476,6 +483,7 @@ static void nvme_assign_zone_state(NvmeCtrl *n, 
> NvmeNamespace *ns,
>      case NVME_ZONE_STATE_READ_ONLY:
>          zone->tstamp = 0;
>      }
> +    nvme_sync_zone_file(n, ns, zone, sizeof(NvmeZone));
>  }
>  
>  static bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr)
> @@ -2976,9 +2984,114 @@ static const MemoryRegionOps nvme_cmb_ops = {
>      },
>  };
>  
> -static int nvme_init_zone_meta(NvmeCtrl *n, NvmeNamespace *ns,
> +static int nvme_validate_zone_file(NvmeCtrl *n, NvmeNamespace *ns,
>      uint64_t capacity)
>  {
> +    NvmeZoneMeta *meta = ns->zone_meta;
> +    NvmeZone *zone = ns->zone_array;
> +    uint64_t start = 0, zone_size = n->params.zone_size;
> +    int i, n_imp_open = 0, n_exp_open = 0, n_closed = 0, n_full = 0;
> +
> +    if (meta->magic != NVME_ZONE_META_MAGIC) {
> +        return 1;
> +    }
> +    if (meta->version != NVME_ZONE_META_VER) {
> +        return 2;
> +    }
> +    if (meta->zone_size != zone_size) {
> +        return 3;
> +    }
> +    if (meta->zone_capacity != n->params.zone_capacity) {
> +        return 4;
> +    }
> +    if (meta->nr_offline_zones != n->params.nr_offline_zones) {
> +        return 5;
> +    }
> +    if (meta->nr_rdonly_zones != n->params.nr_rdonly_zones) {
> +        return 6;
> +    }
> +    if (meta->lba_size != n->conf.logical_block_size) {
> +        return 7;
> +    }
> +    if (meta->zd_extension_size != n->params.zd_extension_size) {
> +        return 8;
> +    }
> +
> +    for (i = 0; i < n->num_zones; i++, zone++) {
> +        if (start + zone_size > capacity) {
> +            zone_size = capacity - start;
> +        }
> +        if (zone->d.zt != NVME_ZONE_TYPE_SEQ_WRITE) {
> +            return 9;
> +        }
> +        if (zone->d.zcap != n->params.zone_capacity) {
> +            return 10;
> +        }
> +        if (zone->d.zslba != start) {
> +            return 11;
> +        }
> +        switch (nvme_get_zone_state(zone)) {
> +        case NVME_ZONE_STATE_EMPTY:
> +        case NVME_ZONE_STATE_OFFLINE:
> +        case NVME_ZONE_STATE_READ_ONLY:
> +            if (zone->d.wp != start) {
> +                return 12;
> +            }
> +            break;
> +        case NVME_ZONE_STATE_IMPLICITLY_OPEN:
> +            if (zone->d.wp < start ||
> +                zone->d.wp >= zone->d.zslba + zone->d.zcap) {
> +                return 13;
> +            }
> +            n_imp_open++;
> +            break;
> +        case NVME_ZONE_STATE_EXPLICITLY_OPEN:
> +            if (zone->d.wp < start ||
> +                zone->d.wp >= zone->d.zslba + zone->d.zcap) {
> +                return 13;
> +            }
> +            n_exp_open++;
> +            break;
> +        case NVME_ZONE_STATE_CLOSED:
> +            if (zone->d.wp < start ||
> +                zone->d.wp >= zone->d.zslba + zone->d.zcap) {
> +                return 13;
> +            }
> +            n_closed++;
> +            break;
> +        case NVME_ZONE_STATE_FULL:
> +            if (zone->d.wp != zone->d.zslba + zone->d.zcap) {
> +                return 14;
> +            }
> +            n_full++;
> +            break;
> +        default:
> +            return 15;
> +        }
> +
> +        start += zone_size;
> +    }
> +
> +    if (n_imp_open != nvme_zone_list_size(ns->exp_open_zones)) {
> +        return 16;
> +    }
> +    if (n_exp_open != nvme_zone_list_size(ns->imp_open_zones)) {
> +        return 17;
> +    }
> +    if (n_closed != nvme_zone_list_size(ns->closed_zones)) {
> +        return 18;
> +    }
> +    if (n_full != nvme_zone_list_size(ns->full_zones)) {
> +        return 19;
> +    }
> +
> +    return 0;
> +}
> +
> +static int nvme_init_zone_file(NvmeCtrl *n, NvmeNamespace *ns,
> +    uint64_t capacity)
> +{
> +    NvmeZoneMeta *meta = ns->zone_meta;
>      NvmeZone *zone;
>      Error *err;
>      uint64_t start = 0, zone_size = n->params.zone_size;
> @@ -2986,18 +3099,33 @@ static int nvme_init_zone_meta(NvmeCtrl *n, 
> NvmeNamespace *ns,
>      int i;
>      uint16_t zs;
>  
> -    ns->zone_array = g_malloc0(n->zone_array_size);
> -    ns->exp_open_zones = g_malloc0(sizeof(NvmeZoneList));
> -    ns->imp_open_zones = g_malloc0(sizeof(NvmeZoneList));
> -    ns->closed_zones = g_malloc0(sizeof(NvmeZoneList));
> -    ns->full_zones = g_malloc0(sizeof(NvmeZoneList));
> -    ns->zd_extensions = g_malloc0(n->params.zd_extension_size * 
> n->num_zones);
> +    if (n->params.zone_file) {
> +        meta->magic = NVME_ZONE_META_MAGIC;
> +        meta->version = NVME_ZONE_META_VER;
> +        meta->zone_size = zone_size;
> +        meta->zone_capacity = n->params.zone_capacity;
> +        meta->lba_size = n->conf.logical_block_size;
> +        meta->nr_offline_zones = n->params.nr_offline_zones;
> +        meta->nr_rdonly_zones = n->params.nr_rdonly_zones;
> +        meta->zd_extension_size = n->params.zd_extension_size;
> +    } else {
> +        ns->zone_array = g_malloc0(n->zone_array_size);
> +        ns->exp_open_zones = g_malloc0(sizeof(NvmeZoneList));
> +        ns->imp_open_zones = g_malloc0(sizeof(NvmeZoneList));
> +        ns->closed_zones = g_malloc0(sizeof(NvmeZoneList));
> +        ns->full_zones = g_malloc0(sizeof(NvmeZoneList));
> +        ns->zd_extensions =
> +            g_malloc0(n->params.zd_extension_size * n->num_zones);
> +    }
>      zone = ns->zone_array;
>  
>      nvme_init_zone_list(ns->exp_open_zones);
>      nvme_init_zone_list(ns->imp_open_zones);
>      nvme_init_zone_list(ns->closed_zones);
>      nvme_init_zone_list(ns->full_zones);
> +    if (n->params.zone_file) {
> +        nvme_set_zone_meta_dirty(n, ns, true);
> +    }
>  
>      for (i = 0; i < n->num_zones; i++, zone++) {
>          if (start + zone_size > capacity) {
> @@ -3048,7 +3176,189 @@ static int nvme_init_zone_meta(NvmeCtrl *n, 
> NvmeNamespace *ns,
>      return 0;
>  }
>  
> -static void nvme_zoned_init_ctrl(NvmeCtrl *n, Error **errp)
> +static int nvme_open_zone_file(NvmeCtrl *n, bool *init_meta)
> +{
> +    struct stat statbuf;
> +    size_t fsize;
> +    int ret;
> +
> +    ret = stat(n->params.zone_file, &statbuf);
> +    if (ret && errno == ENOENT) {
> +        *init_meta = true;
> +    } else if (!S_ISREG(statbuf.st_mode)) {
> +        fprintf(stderr, "%s is not a regular file\n", strerror(errno));
> +        return -1;
> +    }
> +
> +    n->zone_file_fd = open(n->params.zone_file,
> +                           O_RDWR | O_LARGEFILE | O_BINARY | O_CREAT, 644);
> +    if (n->zone_file_fd < 0) {
> +            fprintf(stderr, "failed to create zone file %s, err %s\n",
> +                    n->params.zone_file, strerror(errno));
> +            return -1;
> +    }
> +
> +    fsize = n->meta_size * n->num_namespaces;
> +
> +    if (stat(n->params.zone_file, &statbuf)) {
> +        fprintf(stderr, "can't stat zone file %s, err %s\n",
> +                n->params.zone_file, strerror(errno));
> +        return -1;
> +    }
> +    if (statbuf.st_size != fsize) {
> +        ret = ftruncate(n->zone_file_fd, fsize);
> +        if (ret < 0) {
> +            fprintf(stderr, "can't truncate zone file %s, err %s\n",
> +                    n->params.zone_file, strerror(errno));
> +            return -1;
> +        }
> +        *init_meta = true;
> +    }
> +
> +    return 0;
> +}
> +
> +static int nvme_map_zone_file(NvmeCtrl *n, NvmeNamespace *ns, bool 
> *init_meta)
> +{
> +    off_t meta_ofs = n->meta_size * (ns->nsid - 1);
> +
> +    ns->zone_meta = mmap(0, n->meta_size, PROT_READ | PROT_WRITE,
> +                         MAP_SHARED, n->zone_file_fd, meta_ofs);
> +    if (ns->zone_meta == MAP_FAILED) {
> +        fprintf(stderr, "failed to map zone file %s, ofs %lu, err %s\n",
> +                n->params.zone_file, meta_ofs, strerror(errno));
> +        return -1;
> +    }
> +
> +    ns->zone_array = (NvmeZone *)(ns->zone_meta + 1);
> +    ns->exp_open_zones = &ns->zone_meta->exp_open_zones;
> +    ns->imp_open_zones = &ns->zone_meta->imp_open_zones;
> +    ns->closed_zones = &ns->zone_meta->closed_zones;
> +    ns->full_zones = &ns->zone_meta->full_zones;
> +
> +    if (n->params.zd_extension_size) {
> +        ns->zd_extensions = (uint8_t *)(ns->zone_meta + 1);
> +        ns->zd_extensions += n->zone_array_size;
> +    }
> +
> +    return 0;
> +}
> +
> +static void nvme_sync_zone_file(NvmeCtrl *n, NvmeNamespace *ns,
> +    NvmeZone *zone, int len)
> +{
> +    uintptr_t addr, zd = (uintptr_t)zone;
> +
> +    addr = zd & qemu_real_host_page_mask;
> +    len += zd - addr;
> +    if (msync((void *)addr, len, MS_ASYNC) < 0)
> +        fprintf(stderr, "msync: failed to sync zone descriptors, file %s\n",
> +                strerror(errno));
> +
> +    if (nvme_zone_meta_dirty(n, ns)) {
> +        nvme_set_zone_meta_dirty(n, ns, false);
> +        if (msync(ns->zone_meta, sizeof(NvmeZoneMeta), MS_ASYNC) < 0)
> +            fprintf(stderr, "msync: failed to sync zone meta, file %s\n",
> +                    strerror(errno));
> +    }
> +}
> +
> +/*
> + * Close or finish all the zones that might be still open after power-down.
> + */
> +static void nvme_prepare_zones(NvmeCtrl *n, NvmeNamespace *ns)
> +{
> +    NvmeZone *zone;
> +    uint32_t set_state;
> +    int i;
> +
> +    assert(!ns->nr_active_zones);
> +    assert(!ns->nr_open_zones);
> +
> +    zone = ns->zone_array;
> +    for (i = 0; i < n->num_zones; i++, zone++) {
> +        zone->flags = 0;
> +        zone->tstamp = 0;
> +
> +        switch (nvme_get_zone_state(zone)) {
> +        case NVME_ZONE_STATE_IMPLICITLY_OPEN:
> +        case NVME_ZONE_STATE_EXPLICITLY_OPEN:
> +            break;
> +        case NVME_ZONE_STATE_CLOSED:
> +            nvme_aor_inc_active(n, ns);
> +            /* pass through */
> +        default:
> +            continue;
> +        }
> +
> +        if (zone->d.za & NVME_ZA_ZD_EXT_VALID) {
> +            set_state = NVME_ZONE_STATE_CLOSED;
> +        } else if (zone->d.wp == zone->d.zslba) {
> +            set_state = NVME_ZONE_STATE_EMPTY;
> +        } else if (n->params.max_active_zones == 0 ||
> +                   ns->nr_active_zones < n->params.max_active_zones) {
> +            set_state = NVME_ZONE_STATE_CLOSED;
> +        } else {
> +            set_state = NVME_ZONE_STATE_FULL;
> +        }
> +
> +        switch (set_state) {
> +        case NVME_ZONE_STATE_CLOSED:
> +            trace_pci_nvme_power_on_close(nvme_get_zone_state(zone),
> +                                          zone->d.zslba);
> +            nvme_aor_inc_active(n, ns);
> +            nvme_add_zone_tail(n, ns, ns->closed_zones, zone);
> +        break;
> +        case NVME_ZONE_STATE_EMPTY:
> +            trace_pci_nvme_power_on_reset(nvme_get_zone_state(zone),
> +                                          zone->d.zslba);
> +        break;
> +        case NVME_ZONE_STATE_FULL:
> +            trace_pci_nvme_power_on_full(nvme_get_zone_state(zone),
> +                                         zone->d.zslba);
> +            zone->d.wp = nvme_zone_wr_boundary(zone);
> +        }
> +
> +        nvme_set_zone_state(zone, set_state);
> +    }
> +}
> +
> +static int nvme_load_zone_meta(NvmeCtrl *n, NvmeNamespace *ns,
> +    uint64_t capacity, bool init_meta)
> +{
> +    int ret = 0;
> +
> +    if (n->params.zone_file) {
> +        ret = nvme_map_zone_file(n, ns, &init_meta);
> +        trace_pci_nvme_mapped_zone_file(n->params.zone_file, ret);
> +        if (ret < 0) {
> +            return ret;
> +        }
> +
> +        if (!init_meta) {
> +            ret = nvme_validate_zone_file(n, ns, capacity);
> +            if (ret) {
> +                trace_pci_nvme_err_zone_file_invalid(ret);
> +                init_meta = true;
> +            }
> +        }
> +    } else {
> +        init_meta = true;
> +    }
> +
> +    if (init_meta) {
> +        ret = nvme_init_zone_file(n, ns, capacity);
> +    } else {
> +        nvme_prepare_zones(n, ns);
> +    }
> +    if (!ret && n->params.zone_file) {
> +        nvme_sync_zone_file(n, ns, ns->zone_array, n->zone_array_size);
> +    }
> +
> +    return ret;
> +}
> +
> +static void nvme_zoned_init_ctrl(NvmeCtrl *n, bool *init_meta, Error **errp)
>  {
>      uint64_t zone_size = 0, capacity;
>      uint32_t nz;
> @@ -3084,6 +3394,9 @@ static void nvme_zoned_init_ctrl(NvmeCtrl *n, Error 
> **errp)
>      nz = DIV_ROUND_UP(capacity, zone_size);
>      n->num_zones = nz;
>      n->zone_array_size = sizeof(NvmeZone) * nz;
> +    n->meta_size = sizeof(NvmeZoneMeta) + n->zone_array_size +
> +                          nz * n->params.zd_extension_size;
> +    n->meta_size = ROUND_UP(n->meta_size, qemu_real_host_page_size);
>  
>      n->params.rzr_delay_usec *= SCALE_MS;
>      n->params.rrl_usec *= SCALE_MS;
> @@ -3119,6 +3432,13 @@ static void nvme_zoned_init_ctrl(NvmeCtrl *n, Error 
> **errp)
>          }
>      }
>  
> +    if (n->params.zone_file) {
> +        if (nvme_open_zone_file(n, init_meta) < 0) {
> +            error_setg(errp, "cannot open zone metadata file");
> +            return;
> +        }
> +    }
> +
>      if (n->params.zone_async_events) {
>          n->ae_cfg |= NVME_AEN_CFG_ZONE_DESCR_CHNGD_NOTICES;
>      }
> @@ -3127,13 +3447,14 @@ static void nvme_zoned_init_ctrl(NvmeCtrl *n, Error 
> **errp)
>  }
>  
>  static int nvme_zoned_init_ns(NvmeCtrl *n, NvmeNamespace *ns, int lba_index,
> -    Error **errp)
> +    bool init_meta, Error **errp)
>  {
>      int ret;
>  
> -    ret = nvme_init_zone_meta(n, ns, n->num_zones * n->params.zone_size);
> +    ret = nvme_load_zone_meta(n, ns, n->num_zones * n->params.zone_size,
> +                              init_meta);
>      if (ret) {
> -        error_setg(errp, "could not init zone metadata");
> +        error_setg(errp, "could not load/init zone metadata");
>          return -1;
>      }
>  
> @@ -3164,15 +3485,20 @@ static void nvme_zoned_clear(NvmeCtrl *n)
>  {
>      int i;
>  
> +    if (n->params.zone_file)  {
> +        close(n->zone_file_fd);
> +    }
>      for (i = 0; i < n->num_namespaces; i++) {
>          NvmeNamespace *ns = &n->namespaces[i];
>          g_free(ns->id_ns_zoned);
> -        g_free(ns->zone_array);
> -        g_free(ns->exp_open_zones);
> -        g_free(ns->imp_open_zones);
> -        g_free(ns->closed_zones);
> -        g_free(ns->full_zones);
> -        g_free(ns->zd_extensions);
> +        if (!n->params.zone_file) {
> +            g_free(ns->zone_array);
> +            g_free(ns->exp_open_zones);
> +            g_free(ns->imp_open_zones);
> +            g_free(ns->closed_zones);
> +            g_free(ns->full_zones);
> +            g_free(ns->zd_extensions);
> +        }
>      }
>  }
>  
> @@ -3258,7 +3584,8 @@ static void nvme_init_blk(NvmeCtrl *n, Error **errp)
>      n->ns_size = bs_size;
>  }
>  
> -static void nvme_init_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
> +static void nvme_init_namespace(NvmeCtrl *n, NvmeNamespace *ns, bool 
> init_meta,
> +    Error **errp)
>  {
>      NvmeIdNs *id_ns = &ns->id_ns;
>      int lba_index;
> @@ -3272,7 +3599,7 @@ static void nvme_init_namespace(NvmeCtrl *n, 
> NvmeNamespace *ns, Error **errp)
>      if (n->params.zoned) {
>          ns->csi = NVME_CSI_ZONED;
>          id_ns->ncap = cpu_to_le64(n->params.zone_capacity * n->num_zones);
> -        if (nvme_zoned_init_ns(n, ns, lba_index, errp) != 0) {
> +        if (nvme_zoned_init_ns(n, ns, lba_index, init_meta, errp) != 0) {
>              return;
>          }
>      } else {
> @@ -3429,6 +3756,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error 
> **errp)
>      NvmeCtrl *n = NVME(pci_dev);
>      NvmeNamespace *ns;
>      Error *local_err = NULL;
> +    bool init_meta = false;
>  
>      int i;
>  
> @@ -3452,7 +3780,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error 
> **errp)
>      }
>  
>      if (n->params.zoned) {
> -        nvme_zoned_init_ctrl(n, &local_err);
> +        nvme_zoned_init_ctrl(n, &init_meta, &local_err);
>          if (local_err) {
>              error_propagate(errp, local_err);
>              return;
> @@ -3463,7 +3791,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error 
> **errp)
>      ns = n->namespaces;
>      for (i = 0; i < n->num_namespaces; i++, ns++) {
>          ns->nsid = i + 1;
> -        nvme_init_namespace(n, ns, &local_err);
> +        nvme_init_namespace(n, ns, init_meta, &local_err);
>          if (local_err) {
>              error_propagate(errp, local_err);
>              return;
> @@ -3506,6 +3834,7 @@ static Property nvme_props[] = {
>      DEFINE_PROP_UINT64("zone_size", NvmeCtrl, params.zone_size, 512),
>      DEFINE_PROP_UINT64("zone_capacity", NvmeCtrl, params.zone_capacity, 512),
>      DEFINE_PROP_UINT32("zone_append_max_size", NvmeCtrl, params.zamds_bs, 0),
> +    DEFINE_PROP_STRING("zone_file", NvmeCtrl, params.zone_file),
>      DEFINE_PROP_UINT32("zone_descr_ext_size", NvmeCtrl,
>                         params.zd_extension_size, 0),
>      DEFINE_PROP_INT32("max_active", NvmeCtrl, params.max_active_zones, 0),
> diff --git a/hw/block/nvme.h b/hw/block/nvme.h
> index 900fc54809..5e9a3a62f7 100644
> --- a/hw/block/nvme.h
> +++ b/hw/block/nvme.h
> @@ -14,6 +14,7 @@ typedef struct NvmeParams {
>      uint16_t msix_qsize;
>      uint32_t cmb_size_mb;
>  
> +    char        *zone_file;
>      bool        zoned;
>      bool        cross_zone_read;
>      bool        zone_async_events;
> @@ -114,6 +115,27 @@ typedef struct NvmeZoneList {
>      uint8_t         rsvd12[4];
>  } NvmeZoneList;
>  
> +#define NVME_ZONE_META_MAGIC 0x3aebaa70
> +#define NVME_ZONE_META_VER  1
> +
> +typedef struct NvmeZoneMeta {
> +    uint32_t        magic;
> +    uint32_t        version;
> +    uint64_t        zone_size;
> +    uint64_t        zone_capacity;
> +    uint32_t        nr_offline_zones;
> +    uint32_t        nr_rdonly_zones;
> +    uint32_t        lba_size;
> +    uint32_t        rsvd40;
> +    NvmeZoneList    exp_open_zones;
> +    NvmeZoneList    imp_open_zones;
> +    NvmeZoneList    closed_zones;
> +    NvmeZoneList    full_zones;
> +    uint8_t         zd_extension_size;
> +    uint8_t         dirty;
> +    uint8_t         rsvd594[3990];
> +} NvmeZoneMeta;
> +
>  typedef struct NvmeNamespace {
>      NvmeIdNs        id_ns;
>      uint32_t        nsid;
> @@ -122,6 +144,7 @@ typedef struct NvmeNamespace {
>  
>      NvmeIdNsZoned   *id_ns_zoned;
>      NvmeZone        *zone_array;
> +    NvmeZoneMeta    *zone_meta;
>      NvmeZoneList    *exp_open_zones;
>      NvmeZoneList    *imp_open_zones;
>      NvmeZoneList    *closed_zones;
> @@ -174,6 +197,7 @@ typedef struct NvmeCtrl {
>  
>      int             zone_file_fd;
>      uint32_t        num_zones;
> +    size_t          meta_size;
>      uint64_t        zone_size_bs;
>      uint64_t        zone_array_size;
>      uint8_t         zamds;
> @@ -282,6 +306,19 @@ static inline NvmeZone 
> *nvme_next_zone_in_list(NvmeNamespace *ns, NvmeZone *z,
>      return &ns->zone_array[z->next];
>  }
>  
> +static inline bool nvme_zone_meta_dirty(NvmeCtrl *n, NvmeNamespace *ns)
> +{
> +    return n->params.zone_file ? ns->zone_meta->dirty : false;
> +}
> +
> +static inline void nvme_set_zone_meta_dirty(NvmeCtrl *n, NvmeNamespace *ns,
> +    bool yesno)
> +{
> +    if (n->params.zone_file) {
> +        ns->zone_meta->dirty = yesno;
> +    }
> +}
> +
>  static inline int nvme_ilog2(uint64_t i)
>  {
>      int log = -1;
> @@ -295,6 +332,7 @@ static inline int nvme_ilog2(uint64_t i)
>  
>  static inline void _hw_nvme_check_size(void)
>  {
> +    QEMU_BUILD_BUG_ON(sizeof(NvmeZoneMeta) != 4096);
>      QEMU_BUILD_BUG_ON(sizeof(NvmeZoneList) != 16);
>      QEMU_BUILD_BUG_ON(sizeof(NvmeZone) != 88);
>  }
> -- 
> 2.21.0
> 
> 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]