qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

RE: [RFC v9 16/29] vfio: Pass stage 1 MSI bindings to the host


From: Shameerali Kolothum Thodi
Subject: RE: [RFC v9 16/29] vfio: Pass stage 1 MSI bindings to the host
Date: Fri, 15 Oct 2021 10:54:06 +0000

Hi Eric,

> -----Original Message-----
> From: Eric Auger [mailto:eric.auger@redhat.com]
> Sent: 11 April 2021 13:09
> To: eric.auger.pro@gmail.com; eric.auger@redhat.com;
> qemu-devel@nongnu.org; qemu-arm@nongnu.org;
> alex.williamson@redhat.com
> Cc: peter.maydell@linaro.org; jean-philippe@linaro.org; peterx@redhat.com;
> jacob.jun.pan@linux.intel.com; yi.l.liu@intel.com; Shameerali Kolothum Thodi
> <shameerali.kolothum.thodi@huawei.com>; tn@semihalf.com;
> nicoleotsuka@gmail.com; yuzenghui <yuzenghui@huawei.com>;
> zhangfei.gao@gmail.com; vivek.gautam@arm.com; jiangkunkun
> <jiangkunkun@huawei.com>; vdumpa@nvidia.com; chenxiang (M)
> <chenxiang66@hisilicon.com>; zhukeqian <zhukeqian1@huawei.com>
> Subject: [RFC v9 16/29] vfio: Pass stage 1 MSI bindings to the host
> 
> We register the stage1 MSI bindings when enabling the vectors
> and we unregister them on msi disable.
> 
> Signed-off-by: Eric Auger <eric.auger@redhat.com>
> 
> ---
> 
> v7 -> v8:
> - add unregistration on msix_diable
> - remove vfio_container_unbind_msis()
> 
> v4 -> v5:
> - use VFIO_IOMMU_SET_MSI_BINDING
> 
> v2 -> v3:
> - only register the notifier if the IOMMU translates MSIs
> - record the msi bindings in a container list and unregister on
>   container release
> ---
>  include/hw/vfio/vfio-common.h | 12 ++++++
>  hw/vfio/common.c              | 59 +++++++++++++++++++++++++++
>  hw/vfio/pci.c                 | 76
> ++++++++++++++++++++++++++++++++++-
>  hw/vfio/trace-events          |  2 +
>  4 files changed, 147 insertions(+), 2 deletions(-)
> 
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index 6141162d7a..f30133b2a3 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -74,6 +74,14 @@ typedef struct VFIOAddressSpace {
>      QLIST_ENTRY(VFIOAddressSpace) list;
>  } VFIOAddressSpace;
> 
> +typedef struct VFIOMSIBinding {
> +    int index;
> +    hwaddr iova;
> +    hwaddr gpa;
> +    hwaddr size;
> +    QLIST_ENTRY(VFIOMSIBinding) next;
> +} VFIOMSIBinding;
> +
>  struct VFIOGroup;
> 
>  typedef struct VFIOContainer {
> @@ -91,6 +99,7 @@ typedef struct VFIOContainer {
>      QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
>      QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
>      QLIST_HEAD(, VFIOGroup) group_list;
> +    QLIST_HEAD(, VFIOMSIBinding) msibinding_list;
>      QLIST_ENTRY(VFIOContainer) next;
>  } VFIOContainer;
> 
> @@ -200,6 +209,9 @@ VFIOGroup *vfio_get_group(int groupid,
> AddressSpace *as, Error **errp);
>  void vfio_put_group(VFIOGroup *group);
>  int vfio_get_device(VFIOGroup *group, const char *name,
>                      VFIODevice *vbasedev, Error **errp);
> +int vfio_iommu_set_msi_binding(VFIOContainer *container, int n,
> +                               IOMMUTLBEntry *entry);
> +int vfio_iommu_unset_msi_binding(VFIOContainer *container, int n);
> 
>  extern const MemoryRegionOps vfio_region_ops;
>  typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList;
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index e369d451e7..970a5a7be7 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -662,6 +662,65 @@ static void
> vfio_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
>      }
>  }
> 
> +int vfio_iommu_set_msi_binding(VFIOContainer *container, int n,
> +                               IOMMUTLBEntry *iotlb)
> +{
> +    struct vfio_iommu_type1_set_msi_binding ustruct;
> +    VFIOMSIBinding *binding;
> +    int ret;
> +
> +    QLIST_FOREACH(binding, &container->msibinding_list, next) {
> +        if (binding->index == n) {
> +            return 0;
> +        }
> +    }
> +
> +    ustruct.argsz = sizeof(struct vfio_iommu_type1_set_msi_binding);
> +    ustruct.iova = iotlb->iova;
> +    ustruct.flags = VFIO_IOMMU_BIND_MSI;
> +    ustruct.gpa = iotlb->translated_addr;
> +    ustruct.size = iotlb->addr_mask + 1;
> +    ret = ioctl(container->fd, VFIO_IOMMU_SET_MSI_BINDING , &ustruct);
> +    if (ret) {
> +        error_report("%s: failed to register the stage1 MSI binding (%m)",
> +                     __func__);
> +        return ret;
> +    }
> +    binding =  g_new0(VFIOMSIBinding, 1);
> +    binding->iova = ustruct.iova;
> +    binding->gpa = ustruct.gpa;
> +    binding->size = ustruct.size;
> +    binding->index = n;
> +
> +    QLIST_INSERT_HEAD(&container->msibinding_list, binding, next);
> +    return 0;
> +}
> +
> +int vfio_iommu_unset_msi_binding(VFIOContainer *container, int n)
> +{
> +    struct vfio_iommu_type1_set_msi_binding ustruct;
> +    VFIOMSIBinding *binding, *tmp;
> +    int ret;
> +
> +    ustruct.argsz = sizeof(struct vfio_iommu_type1_set_msi_binding);
> +    QLIST_FOREACH_SAFE(binding, &container->msibinding_list, next, tmp) {
> +        if (binding->index != n) {
> +            continue;
> +        }
> +        ustruct.flags = VFIO_IOMMU_UNBIND_MSI;
> +        ustruct.iova = binding->iova;
> +        ret = ioctl(container->fd, VFIO_IOMMU_SET_MSI_BINDING ,
> &ustruct);
> +        if (ret) {
> +            error_report("Failed to unregister the stage1 MSI binding "
> +                         "for iova=0x%"PRIx64" (%m)", binding->iova);
> +        }
> +        QLIST_REMOVE(binding, next);
> +        g_free(binding);
> +        return ret;
> +    }
> +    return 0;
> +}
> +
>  static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry
> *iotlb)
>  {
>      VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index cad7deec71..a49029dfa4 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -366,6 +366,65 @@ static void vfio_msi_interrupt(void *opaque)
>      notify(&vdev->pdev, nr);
>  }
> 
> +static bool vfio_iommu_require_msi_binding(IOMMUMemoryRegion
> *iommu_mr)
> +{
> +    bool msi_translate = false, nested = false;
> +
> +    memory_region_iommu_get_attr(iommu_mr,
> IOMMU_ATTR_MSI_TRANSLATE,
> +                                 (void *)&msi_translate);
> +    memory_region_iommu_get_attr(iommu_mr,
> IOMMU_ATTR_VFIO_NESTED,
> +                                 (void *)&nested);
> +    if (!nested || !msi_translate) {
> +        return false;
> +    }
> +   return true;
> +}
> +
> +static int vfio_register_msi_binding(VFIOPCIDevice *vdev,
> +                                     int vector_n, bool set)
> +{
> +    VFIOContainer *container = vdev->vbasedev.group->container;
> +    PCIDevice *dev = &vdev->pdev;
> +    AddressSpace *as = pci_device_iommu_address_space(dev);
> +    IOMMUMemoryRegionClass *imrc;
> +    IOMMUMemoryRegion *iommu_mr;
> +    IOMMUTLBEntry entry;
> +    MSIMessage msg;
> +
> +    if (as == &address_space_memory) {
> +        return 0;
> +    }
> +
> +    iommu_mr = IOMMU_MEMORY_REGION(as->root);
> +    if (!vfio_iommu_require_msi_binding(iommu_mr)) {
> +        return 0;
> +    }
> +
> +    /* MSI doorbell address is translated by an IOMMU */
> +
> +    if (!set) { /* unregister */
> +        trace_vfio_unregister_msi_binding(vdev->vbasedev.name,
> vector_n);
> +
> +        return vfio_iommu_unset_msi_binding(container, vector_n);
> +    }
> +
> +    msg = pci_get_msi_message(dev, vector_n);
> +    imrc = memory_region_get_iommu_class_nocheck(iommu_mr);
> +
> +    rcu_read_lock();
> +    entry = imrc->translate(iommu_mr, msg.address, IOMMU_WO, 0);
> +    rcu_read_unlock();
> +
> +    if (entry.perm == IOMMU_NONE) {
> +        return -ENOENT;
> +    }
> +
> +    trace_vfio_register_msi_binding(vdev->vbasedev.name, vector_n,
> +                                    msg.address,
> entry.translated_addr);
> +
> +    return vfio_iommu_set_msi_binding(container, vector_n, &entry);
> +}
> +
>  static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix)
>  {
>      struct vfio_irq_set *irq_set;
> @@ -383,7 +442,7 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev,
> bool msix)
>      fds = (int32_t *)&irq_set->data;
> 
>      for (i = 0; i < vdev->nr_vectors; i++) {
> -        int fd = -1;
> +        int ret, fd = -1;
> 
>          /*
>           * MSI vs MSI-X - The guest has direct access to MSI mask and
> pending
> @@ -392,6 +451,12 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev,
> bool msix)
>           * KVM signaling path only when configured and unmasked.
>           */
>          if (vdev->msi_vectors[i].use) {
> +            ret = vfio_register_msi_binding(vdev, i, true);

Just a heads up if you have plans to re-spin this series soon.

With the recent kernel commit 438553958ba1("PCI/MSI: Enable and mask
MSI-X early"), MSI-X is enabled early but vectors remained masked. 

Looks like it creates a problem here when you try to call pci_get_msi_message()
above as it returns msg.address as 0. Hence the S1 MSI bind fails for devices 
with
MSI-X cap. 

I have a temp fix here[2] for the issue where we effectively wait till MASKALL 
is 
cleared to enable MSI-X. Or else we need to delay this binding for MSI-X for a 
later stage.

Please take a look and let me know.  

Thanks,
Shameer
[1] https://lore.kernel.org/all/20210729222542.344136412@linutronix.de/
[2] 
https://github.com/Linaro/qemu/commit/568820e409417473eb6f16dfdf8e9075f5a5feaf
 
 
> +            if (ret) {
> +                error_report("%s failed to register S1 MSI binding "
> +                             "for vector %d(%d)", vdev->vbasedev.name,
> i, ret);
> +                goto out;
> +            }
>              if (vdev->msi_vectors[i].virq < 0 ||
>                  (msix && msix_is_masked(&vdev->pdev, i))) {
>                  fd =
> event_notifier_get_fd(&vdev->msi_vectors[i].interrupt);
> @@ -405,6 +470,7 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev,
> bool msix)
> 
>      ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set);
> 
> +out:
>      g_free(irq_set);
> 
>      return ret;
> @@ -719,7 +785,8 @@ static void vfio_msi_disable_common(VFIOPCIDevice
> *vdev)
> 
>  static void vfio_msix_disable(VFIOPCIDevice *vdev)
>  {
> -    int i;
> +    int ret, i;
> +
> 
>      msix_unset_vector_notifiers(&vdev->pdev);
> 
> @@ -731,6 +798,11 @@ static void vfio_msix_disable(VFIOPCIDevice *vdev)
>          if (vdev->msi_vectors[i].use) {
>              vfio_msix_vector_release(&vdev->pdev, i);
>              msix_vector_unuse(&vdev->pdev, i);
> +            ret = vfio_register_msi_binding(vdev, i, false);
> +            if (ret) {
> +                error_report("%s: failed to unregister S1 MSI binding "
> +                             "for vector %d(%d)", vdev->vbasedev.name,
> i, ret);
> +            }
>          }
>      }
> 
> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
> index 43696afc15..5c1b28d0d4 100644
> --- a/hw/vfio/trace-events
> +++ b/hw/vfio/trace-events
> @@ -122,6 +122,8 @@ vfio_get_dev_region(const char *name, int index,
> uint32_t type, uint32_t subtype
>  vfio_dma_unmap_overflow_workaround(void) ""
>  vfio_iommu_addr_inv_iotlb(int asid, uint64_t addr, uint64_t size, uint64_t
> nb_granules, bool leaf) "nested IOTLB invalidate asid=%d, addr=0x%"PRIx64"
> granule_size=0x%"PRIx64" nb_granules=0x%"PRIx64" leaf=%d"
>  vfio_iommu_asid_inv_iotlb(int asid) "nested IOTLB invalidate asid=%d"
> +vfio_register_msi_binding(const char *name, int vector, uint64_t giova,
> uint64_t gdb) "%s: register vector %d gIOVA=0x%"PRIx64 "-> gDB=0x%"PRIx64"
> stage 1 mapping"
> +vfio_unregister_msi_binding(const char *name, int vector) "%s: unregister
> vector %d stage 1 mapping"
> 
>  # platform.c
>  vfio_platform_base_device_init(char *name, int groupid) "%s belongs to
> group #%d"
> --
> 2.26.3




reply via email to

[Prev in Thread] Current Thread [Next in Thread]