qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

RE: [PATCH v3 5/5] intel_iommu: Optimize out some unnecessary UNMAP call


From: Duan, Zhenzhong
Subject: RE: [PATCH v3 5/5] intel_iommu: Optimize out some unnecessary UNMAP calls
Date: Wed, 14 Jun 2023 09:47:05 +0000

>-----Original Message-----
>From: Peter Xu <peterx@redhat.com>
>Sent: Saturday, June 10, 2023 5:26 AM
>Subject: Re: [PATCH v3 5/5] intel_iommu: Optimize out some unnecessary
>UNMAP calls
>
>On Fri, Jun 09, 2023 at 05:49:06AM +0000, Duan, Zhenzhong wrote:
>> Seems vtd_page_walk_one() already works in above way, checking mapping
>> changes and calling kernel for changed entries?
>
>Agreed in most cases, but the path this patch modified is not?  E.g. it happens
>in rare cases where we simply want to unmap everything (e.g. on a system
>reset, or invalid context entry)?
>
>That's also why I'm curious whether perf of this path matters at all (and
>assuming now we all agree that's the only goal now..), because afaiu it didn't
>really trigger in common paths.
I used below changes to collect time spent with iommufd backend during system 
reset.
Enable macro TEST_UNMAP to test unmap iova tree entries one by one.
Disable TEST_UNMAP to use unmap_ALL

--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -3736,16 +3736,44 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, 
PCIBus *bus,
     return vtd_dev_as;
 }

+static gboolean iova_tree_iterator1(DMAMap *map)
+{
+    static int cnt;
+    printf("**********dump iova tree %d: iova %lx, size %lx\n", ++cnt, 
map->iova, map->size);
+    return false;
+}
+
+//#define TEST_UNMAP
+#ifdef TEST_UNMAP
+static gboolean vtd_unmap_single(DMAMap *map, gpointer *private)
+{
+    IOMMUTLBEvent event;
+
+    event.type = IOMMU_NOTIFIER_UNMAP;
+    event.entry.iova = map->iova;
+    event.entry.addr_mask = map->size;
+    event.entry.target_as = &address_space_memory;
+    event.entry.perm = IOMMU_NONE;
+    event.entry.translated_addr = 0;
+
+    memory_region_notify_iommu_one((IOMMUNotifier *)private, &event);
+    return false;
+}
+#endif
+
 /* Unmap the whole range in the notifier's scope. */
 static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
 {
+    int64_t start_tv, delta_tv;
     hwaddr size, remain;
     hwaddr start = n->start;
     hwaddr end = n->end;
     IntelIOMMUState *s = as->iommu_state;
-    IOMMUTLBEvent event;
     DMAMap map;

+    iova_tree_foreach(as->iova_tree, iova_tree_iterator1);
+
+    start_tv = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
     /*
      * Note: all the codes in this function has a assumption that IOVA
      * bits are no more than VTD_MGAW bits (which is restricted by
@@ -3763,6 +3791,13 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, 
IOMMUNotifier *n)
     assert(start <= end);
     size = remain = end - start + 1;

+#ifdef TEST_UNMAP
+    map.iova = n->start;
+    map.size = size - 1;
+    iova_tree_foreach_range_data(as->iova_tree, &map, vtd_unmap_single,
+                                 (gpointer *)n);
+#else
+    IOMMUTLBEvent event;
     event.type = IOMMU_NOTIFIER_UNMAP;
     event.entry.target_as = &address_space_memory;
     event.entry.perm = IOMMU_NONE;
@@ -3788,6 +3823,7 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, 
IOMMUNotifier *n)
     }

     assert(!remain);
+#endif

     trace_vtd_as_unmap_whole(pci_bus_num(as->bus),
                              VTD_PCI_SLOT(as->devfn),
@@ -3797,6 +3833,9 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, 
IOMMUNotifier *n)
     map.iova = n->start;
     map.size = size - 1; /* Inclusive */
     iova_tree_remove(as->iova_tree, map);
+
+    delta_tv = qemu_clock_get_us(QEMU_CLOCK_REALTIME) - start_tv;
+    printf("************ delta_tv %lu us **************\n", delta_tv);
 }


RESULT:
[   14.825015] reboot: Power down
**********dump iova tree 1: iova fffbe000, size fff
...
**********dump iova tree 66: iova fffff000, size fff
...
With TEST_UNMAP:
************ delta_tv 393 us **************
************ delta_tv 0 us **************

Without TEST_UNMAP:
************ delta_tv 364 us **************
************ delta_tv 2 us **************

It looks no explicit difference, unmap_ALL is a little better.
I also tried legacy container, result is similar as above:

With TEST_UNMAP:
************ delta_tv 325 us **************
************ delta_tv 0 us **************

Without TEST_UNMAP:
************ delta_tv 317 us **************
************ delta_tv 1 us **************

Thanks
Zhenzhong

reply via email to

[Prev in Thread] Current Thread [Next in Thread]