qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [RFC 1/3] util/vfio-helpers: Collect IOVA reserved regions


From: Fam Zheng
Subject: Re: [RFC 1/3] util/vfio-helpers: Collect IOVA reserved regions
Date: Fri, 25 Sep 2020 15:43:07 +0100

On 2020-09-25 15:48, Eric Auger wrote:
> The IOVA allocator currently ignores host reserved regions.
> As a result some chosen IOVAs may collide with some of them,
> resulting in VFIO MAP_DMA errors later on. This happens on ARM
> where the MSI reserved window quickly is encountered:
> [0x8000000, 0x8100000]. since 5.4 kernel, VFIO returns the usable
> IOVA regions. So let's enumerate them in the prospect to avoid
> them, later on.
> 
> Signed-off-by: Eric Auger <eric.auger@redhat.com>
> ---
>  util/vfio-helpers.c | 75 +++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 73 insertions(+), 2 deletions(-)
> 
> diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
> index 583bdfb36f..8e91beba95 100644
> --- a/util/vfio-helpers.c
> +++ b/util/vfio-helpers.c
> @@ -40,6 +40,11 @@ typedef struct {
>      uint64_t iova;
>  } IOVAMapping;
>  
> +struct IOVARange {
> +    uint64_t start;
> +    uint64_t end;
> +};
> +
>  struct QEMUVFIOState {
>      QemuMutex lock;
>  
> @@ -49,6 +54,8 @@ struct QEMUVFIOState {
>      int device;
>      RAMBlockNotifier ram_notifier;
>      struct vfio_region_info config_region_info, bar_region_info[6];
> +    struct IOVARange *usable_iova_ranges;
> +    uint8_t nb_iova_ranges;
>  
>      /* These fields are protected by @lock */
>      /* VFIO's IO virtual address space is managed by splitting into a few
> @@ -236,6 +243,36 @@ static int qemu_vfio_pci_write_config(QEMUVFIOState *s, 
> void *buf, int size, int
>      return ret == size ? 0 : -errno;
>  }
>  
> +static void collect_usable_iova_ranges(QEMUVFIOState *s, void *first_cap)
> +{
> +    struct vfio_iommu_type1_info_cap_iova_range *cap_iova_range;
> +    struct vfio_info_cap_header *cap = first_cap;
> +    void *offset = first_cap;
> +    int i;
> +
> +    while (cap->id != VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE) {
> +        if (cap->next) {

This check looks reversed.

> +            return;
> +        }
> +        offset += cap->next;

@offset is unused.

> +        cap = (struct vfio_info_cap_header *)cap;

This assignment is nop.

> +    }
> +
> +    cap_iova_range = (struct vfio_iommu_type1_info_cap_iova_range *)cap;
> +
> +    s->nb_iova_ranges = cap_iova_range->nr_iovas;
> +    if (s->nb_iova_ranges > 1) {
> +        s->usable_iova_ranges =
> +            g_realloc(s->usable_iova_ranges,
> +                      s->nb_iova_ranges * sizeof(struct IOVARange));
> +    }
> +
> +    for (i = 0; i <  s->nb_iova_ranges; i++) {

s/  / /

> +        s->usable_iova_ranges[i].start = 
> cap_iova_range->iova_ranges[i].start;
> +        s->usable_iova_ranges[i].end = cap_iova_range->iova_ranges[i].end;
> +    }
> +}
> +
>  static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
>                                Error **errp)
>  {
> @@ -243,10 +280,13 @@ static int qemu_vfio_init_pci(QEMUVFIOState *s, const 
> char *device,
>      int i;
>      uint16_t pci_cmd;
>      struct vfio_group_status group_status = { .argsz = sizeof(group_status) 
> };
> -    struct vfio_iommu_type1_info iommu_info = { .argsz = sizeof(iommu_info) 
> };
> +    struct vfio_iommu_type1_info *iommu_info = NULL;
> +    size_t iommu_info_size = sizeof(*iommu_info);
>      struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
>      char *group_file = NULL;
>  
> +    s->usable_iova_ranges = NULL;
> +
>      /* Create a new container */
>      s->container = open("/dev/vfio/vfio", O_RDWR);
>  
> @@ -310,13 +350,38 @@ static int qemu_vfio_init_pci(QEMUVFIOState *s, const 
> char *device,
>          goto fail;
>      }
>  
> +    iommu_info = g_malloc0(iommu_info_size);
> +    iommu_info->argsz = iommu_info_size;
> +
>      /* Get additional IOMMU info */
> -    if (ioctl(s->container, VFIO_IOMMU_GET_INFO, &iommu_info)) {
> +    if (ioctl(s->container, VFIO_IOMMU_GET_INFO, iommu_info)) {
>          error_setg_errno(errp, errno, "Failed to get IOMMU info");
>          ret = -errno;
>          goto fail;
>      }
>  
> +    /*
> +     * if the kernel does not report usable IOVA regions, choose
> +     * the legacy [QEMU_VFIO_IOVA_MIN, QEMU_VFIO_IOVA_MAX -1] region
> +     */
> +    s->nb_iova_ranges = 1;
> +    s->usable_iova_ranges = g_new0(struct IOVARange, 1);
> +    s->usable_iova_ranges[0].start = QEMU_VFIO_IOVA_MIN;
> +    s->usable_iova_ranges[0].end = QEMU_VFIO_IOVA_MAX - 1;
> +
> +    if (iommu_info->argsz > iommu_info_size) {
> +        void *first_cap;
> +
> +        iommu_info_size = iommu_info->argsz;
> +        iommu_info = g_realloc(iommu_info, iommu_info_size);
> +        if (ioctl(s->container, VFIO_IOMMU_GET_INFO, iommu_info)) {
> +            ret = -errno;
> +            goto fail;
> +        }
> +        first_cap = (void *)iommu_info + iommu_info->cap_offset;
> +        collect_usable_iova_ranges(s, first_cap);
> +    }
> +
>      s->device = ioctl(s->group, VFIO_GROUP_GET_DEVICE_FD, device);
>  
>      if (s->device < 0) {
> @@ -365,8 +430,12 @@ static int qemu_vfio_init_pci(QEMUVFIOState *s, const 
> char *device,
>      if (ret) {
>          goto fail;
>      }
> +    g_free(iommu_info);
>      return 0;
>  fail:
> +    g_free(s->usable_iova_ranges);

Set s->usable_iova_ranges to NULL to avoid double free?

> +    s->nb_iova_ranges = 0;
> +    g_free(iommu_info);
>      close(s->group);
>  fail_container:
>      close(s->container);
> @@ -716,6 +785,8 @@ void qemu_vfio_close(QEMUVFIOState *s)
>          qemu_vfio_undo_mapping(s, &s->mappings[i], NULL);
>      }
>      ram_block_notifier_remove(&s->ram_notifier);
> +    g_free(s->usable_iova_ranges);
> +    s->nb_iova_ranges = 0;
>      qemu_vfio_reset(s);
>      close(s->device);
>      close(s->group);
> -- 
> 2.21.3
> 
> 

Fam



reply via email to

[Prev in Thread] Current Thread [Next in Thread]