qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [RFC] hw/nvme: Use irqfd to send interrupts


From: Stefan Hajnoczi
Subject: Re: [RFC] hw/nvme: Use irqfd to send interrupts
Date: Wed, 20 Jul 2022 06:21:49 -0400



On Sat, Jul 9, 2022, 00:36 Jinhao Fan <fanjinhao21s@ict.ac.cn> wrote:
Use irqfd to directly notify KVM to inject interrupts. This is done by
registering a virtual IRQ(virq) in KVM and associate the virq with an
irqfd, so that KVM can directly inject the interrupt when it receives
notification from the irqfd. This approach is supposed to improve
performance because it bypasses QEMU's MSI interrupt emulation logic.

However, I did not see an obvious improvement of the emulation KIOPS:

QD      1   4  16  64
QEMU   38 123 210 329
irqfd  40 129 219 328

I found this problem quite hard to diagnose since irqfd's workflow
involves both QEMU and the in-kernel KVM.

Could you help me figure out the following questions:

1. How much performance improvement can I expect from using irqfd?

Hi Jinhao,
Thanks for working on this!

irqfd is not necessarily faster than KVM ioctl interrupt injection.

There are at least two non performance reasons for irqfd:
1. It avoids QEMU emulation code, which historically was not thread safe and needed the Big QEMU Lock. IOThreads don't hold the BQL and therefore cannot safely call the regular interrupt emulation code in QEMU. I think this is still true today although parts of the code may now be less reliant on the BQL.
2. The eventfd interface decouples interrupt injection from the KVM ioctl interface. Vhost kernel and vhost-user device emulation code has no dependency on KVM thanks to irqfd. They work with any eventfd, including irqfd.

2. How can I debug this kind of cross QEMU-KVM problems?

perf(1) is good at observing both kernel and userspace activity together. What is it that you want to debug.


Signed-off-by: Jinhao Fan <fanjinhao21s@ict.ac.cn>
---
 hw/nvme/ctrl.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++-
 hw/nvme/nvme.h |  3 +++
 2 files changed, 68 insertions(+), 1 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 4b75c5f549..59768c4586 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -159,6 +159,7 @@
 #include "sysemu/sysemu.h"
 #include "sysemu/block-backend.h"
 #include "sysemu/hostmem.h"
+#include "sysemu/kvm.h"
 #include "hw/pci/msix.h"
 #include "migration/vmstate.h"

@@ -484,12 +485,70 @@ static void nvme_irq_check(NvmeCtrl *n)
     }
 }

+static int nvme_kvm_msix_vector_use(NvmeCtrl *n,
+                                    NvmeCQueue *cq,
+                                    int vector)
+{
+    int ret;
+
+    KVMRouteChange c = kvm_irqchip_begin_route_changes(kvm_state);
+    ret = kvm_irqchip_add_msi_route(&c, vector, &n->parent_obj);
+    if (ret < 0) {
+        return ret;
+    }
+    kvm_irqchip_commit_route_changes(&c);
+    cq->virq = ret;
+    return 0;
+}
+
+static int nvme_init_cq_irqfd(NvmeCQueue *cq)
+{
+    int ret;
+   
+    ret = nvme_kvm_msix_vector_use(cq->ctrl, cq, (int)cq->vector);
+    if (ret < 0) {
+        goto fail;
+    }
+
+    ret = event_notifier_init(&cq->irq_notifier, 0);
+    if (ret < 0) {
+        goto fail_notifier;
+    }
+
+    ret = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, &cq->irq_notifier,
+                                              NULL, cq->virq);
+    if (ret < 0) {
+        goto fail_kvm;
+    }
+
+    return 0;
+                       
+fail_kvm:
+    event_notifier_cleanup(&cq->irq_notifier);
+fail_notifier:
+    kvm_irqchip_release_virq(kvm_state, cq->virq);
+fail:
+    return ret;
+}
+
 static void nvme_irq_assert(NvmeCtrl *n, NvmeCQueue *cq)
 {
     if (cq->irq_enabled) {
         if (msix_enabled(&(n->parent_obj))) {
+            /* Initialize CQ irqfd */
+            if (!cq->irqfd_enabled && n->params.ioeventfd && cq->cqid != 0) {
+                int ret = nvme_init_cq_irqfd(cq);
+                if (ret == 0) {
+                    cq->irqfd_enabled = true;
+                }
+            }
+
             trace_pci_nvme_irq_msix(cq->vector);
-            msix_notify(&(n->parent_obj), cq->vector);
+            if (cq->irqfd_enabled) {
+                event_notifier_set(&cq->irq_notifier);

What happens when the MSI-X vector is masked?

I remember the VIRTIO code having masking support. I'm on my phone and can't check now, but I think it registers a temporary eventfd and buffers irqs while the vector is masked.

This makes me wonder if the VIRTIO and NVMe IOThread irqfd code can be unified. Maybe IOThread support can be built into the core device emulation code (e.g. irq APIs) so that it's not necessary to duplicate it.

+            } else {
+                msix_notify(&(n->parent_obj), cq->vector);
+            }
         } else {
             trace_pci_nvme_irq_pin();
             assert(cq->vector < 32);
@@ -4670,6 +4729,12 @@ static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n)
         event_notifier_cleanup(&cq->notifier);
     }
     if (msix_enabled(&n->parent_obj)) {
+        if (cq->irqfd_enabled) {
+            kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, &cq->irq_notifier,
+                                                  cq->virq);
+            kvm_irqchip_release_virq(kvm_state, cq->virq);
+            event_notifier_cleanup(&cq->irq_notifier);
+        }
         msix_vector_unuse(&n->parent_obj, cq->vector);
     }
     if (cq->cqid) {
diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
index 2a9beea0c8..84e5b00fe3 100644
--- a/hw/nvme/nvme.h
+++ b/hw/nvme/nvme.h
@@ -391,7 +391,10 @@ typedef struct NvmeCQueue {
     uint64_t    ei_addr;
     QEMUTimer   *timer;
     EventNotifier notifier;
+    EventNotifier irq_notifier;
+    int         virq;
     bool        ioeventfd_enabled;
+    bool        irqfd_enabled;
     QTAILQ_HEAD(, NvmeSQueue) sq_list;
     QTAILQ_HEAD(, NvmeRequest) req_list;
 } NvmeCQueue;
--
2.25.1



reply via email to

[Prev in Thread] Current Thread [Next in Thread]