qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v4 12/15] vfio/common: Support device dirty page tracking wit


From: Duan, Zhenzhong
Subject: Re: [PATCH v4 12/15] vfio/common: Support device dirty page tracking with vIOMMU
Date: Fri, 8 Sep 2023 14:11:07 +0800
User-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Thunderbird/102.15.0

Hi Joao,

On 6/23/2023 5:48 AM, Joao Martins wrote:
Currently, device dirty page tracking with vIOMMU is not supported,
and a blocker is added and the migration is prevented.

When vIOMMU is used, IOVA ranges are DMA mapped/unmapped on the fly as
requesting by the vIOMMU. These IOVA ranges can potentially be mapped
anywhere in the vIOMMU IOVA space as advertised by the VMM.

To support device dirty tracking when vIOMMU enabled instead create the
dirty ranges based on the vIOMMU provided limits, which leads to the
tracking of the whole IOVA space regardless of what devices use.

Signed-off-by: Avihai Horon <avihaih@nvidia.com>
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
---
  include/hw/vfio/vfio-common.h |  1 +
  hw/vfio/common.c              | 58 +++++++++++++++++++++++++++++------
  hw/vfio/pci.c                 |  7 +++++
  3 files changed, 56 insertions(+), 10 deletions(-)

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index f41860988d6b..c4bafad084b4 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -71,6 +71,7 @@ typedef struct VFIOMigration {
  typedef struct VFIOAddressSpace {
      AddressSpace *as;
      bool no_dma_translation;
+    hwaddr max_iova;
      QLIST_HEAD(, VFIOContainer) containers;
      QLIST_ENTRY(VFIOAddressSpace) list;
  } VFIOAddressSpace;
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index ecfb9afb3fb6..85fddef24026 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -428,6 +428,25 @@ static bool vfio_viommu_preset(void)
      return false;
  }
+static int vfio_viommu_get_max_iova(hwaddr *max_iova)
+{
+    VFIOAddressSpace *space;
+
+    *max_iova = 0;
+
+    QLIST_FOREACH(space, &vfio_address_spaces, list) {
+        if (space->as == &address_space_memory) {
+            continue;
+        }

Just curious why address_space_memory is bypassed?

Imagine two vfio devices linked to two host bridge, one has bypass_iommu set

and the other not. Don't we need to include the guest memory ranges in

address_space_memory?

+
+        if (*max_iova < space->max_iova) {
+            *max_iova = space->max_iova;
+        }
+    }
+
+    return *max_iova == 0;
+}
+
  int vfio_block_giommu_migration(Error **errp)
  {
      int ret;
@@ -1464,10 +1483,11 @@ static const MemoryListener 
vfio_dirty_tracking_listener = {
      .region_add = vfio_listener_dirty_tracking_update,
  };
-static void vfio_dirty_tracking_init(VFIOContainer *container,
+static int vfio_dirty_tracking_init(VFIOContainer *container,
                                       VFIODirtyRanges *ranges)
  {
      VFIODirtyRangesListener dirty;
+    int ret;
memset(&dirty, 0, sizeof(dirty));
      dirty.ranges.min32 = UINT32_MAX;
@@ -1475,17 +1495,29 @@ static void vfio_dirty_tracking_init(VFIOContainer 
*container,
      dirty.listener = vfio_dirty_tracking_listener;
      dirty.container = container;
- memory_listener_register(&dirty.listener,
-                             container->space->as);
+    if (vfio_viommu_preset()) {
+        hwaddr iommu_max_iova;
+
+        ret = vfio_viommu_get_max_iova(&iommu_max_iova);
+        if (ret) {
+            return -EINVAL;
+        }
+
+        vfio_dirty_tracking_update(0, iommu_max_iova, &dirty.ranges);
+    } else {
+        memory_listener_register(&dirty.listener,
+                                 container->space->as);
+        /*
+         * The memory listener is synchronous, and used to calculate the range
+         * to dirty tracking. Unregister it after we are done as we are not
+         * interested in any follow-up updates.
+         */
+        memory_listener_unregister(&dirty.listener);
+    }
*ranges = dirty.ranges; - /*
-     * The memory listener is synchronous, and used to calculate the range
-     * to dirty tracking. Unregister it after we are done as we are not
-     * interested in any follow-up updates.
-     */
-    memory_listener_unregister(&dirty.listener);
+    return 0;
  }
static void vfio_devices_dma_logging_stop(VFIOContainer *container)
@@ -1590,7 +1622,13 @@ static int vfio_devices_dma_logging_start(VFIOContainer 
*container)
      VFIOGroup *group;
      int ret = 0;
- vfio_dirty_tracking_init(container, &ranges);
+    ret = vfio_dirty_tracking_init(container, &ranges);
+    if (ret) {
+        error_report("Failed to init DMA logging ranges, err %d",
+                      ret);
+        return -EOPNOTSUPP;
+    }
+
      feature = vfio_device_feature_dma_logging_start_create(container,
                                                             &ranges);

No clear how much dirty range size could impact device dirty tracking.

Maybe some devices linking to vIOMMU with small aw_bits or bypassing vIOMMU

with small guest memory could benefit if we use per address space's dirty range

Thanks

Zhenzhong




reply via email to

[Prev in Thread] Current Thread [Next in Thread]