qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[RFC] hw/nvme: Use irqfd to send interrupts


From: Jinhao Fan
Subject: [RFC] hw/nvme: Use irqfd to send interrupts
Date: Sat, 9 Jul 2022 12:35:03 +0800

Use irqfd to directly notify KVM to inject interrupts. This is done by
registering a virtual IRQ(virq) in KVM and associate the virq with an
irqfd, so that KVM can directly inject the interrupt when it receives
notification from the irqfd. This approach is supposed to improve 
performance because it bypasses QEMU's MSI interrupt emulation logic.

However, I did not see an obvious improvement of the emulation KIOPS:

QD      1   4  16  64 
QEMU   38 123 210 329
irqfd  40 129 219 328

I found this problem quite hard to diagnose since irqfd's workflow
involves both QEMU and the in-kernel KVM. 

Could you help me figure out the following questions:

1. How much performance improvement can I expect from using irqfd?
2. How can I debug this kind of cross QEMU-KVM problems?

Signed-off-by: Jinhao Fan <fanjinhao21s@ict.ac.cn>
---
 hw/nvme/ctrl.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++-
 hw/nvme/nvme.h |  3 +++
 2 files changed, 68 insertions(+), 1 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 4b75c5f549..59768c4586 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -159,6 +159,7 @@
 #include "sysemu/sysemu.h"
 #include "sysemu/block-backend.h"
 #include "sysemu/hostmem.h"
+#include "sysemu/kvm.h"
 #include "hw/pci/msix.h"
 #include "migration/vmstate.h"
 
@@ -484,12 +485,70 @@ static void nvme_irq_check(NvmeCtrl *n)
     }
 }
 
+static int nvme_kvm_msix_vector_use(NvmeCtrl *n,
+                                    NvmeCQueue *cq,
+                                    int vector)
+{
+    int ret;
+
+    KVMRouteChange c = kvm_irqchip_begin_route_changes(kvm_state);
+    ret = kvm_irqchip_add_msi_route(&c, vector, &n->parent_obj);
+    if (ret < 0) {
+        return ret;
+    }
+    kvm_irqchip_commit_route_changes(&c);
+    cq->virq = ret;
+    return 0;
+}
+
+static int nvme_init_cq_irqfd(NvmeCQueue *cq)
+{
+    int ret;
+    
+    ret = nvme_kvm_msix_vector_use(cq->ctrl, cq, (int)cq->vector);
+    if (ret < 0) {
+        goto fail;
+    }
+
+    ret = event_notifier_init(&cq->irq_notifier, 0);
+    if (ret < 0) {
+        goto fail_notifier;
+    }
+
+    ret = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, &cq->irq_notifier,
+                                              NULL, cq->virq);
+    if (ret < 0) {
+        goto fail_kvm;
+    }
+
+    return 0;
+                        
+fail_kvm:
+    event_notifier_cleanup(&cq->irq_notifier);
+fail_notifier:
+    kvm_irqchip_release_virq(kvm_state, cq->virq);
+fail:
+    return ret;
+}
+
 static void nvme_irq_assert(NvmeCtrl *n, NvmeCQueue *cq)
 {
     if (cq->irq_enabled) {
         if (msix_enabled(&(n->parent_obj))) {
+            /* Initialize CQ irqfd */
+            if (!cq->irqfd_enabled && n->params.ioeventfd && cq->cqid != 0) {
+                int ret = nvme_init_cq_irqfd(cq);
+                if (ret == 0) {
+                    cq->irqfd_enabled = true;
+                }
+            }
+
             trace_pci_nvme_irq_msix(cq->vector);
-            msix_notify(&(n->parent_obj), cq->vector);
+            if (cq->irqfd_enabled) {
+                event_notifier_set(&cq->irq_notifier);
+            } else {
+                msix_notify(&(n->parent_obj), cq->vector);
+            }
         } else {
             trace_pci_nvme_irq_pin();
             assert(cq->vector < 32);
@@ -4670,6 +4729,12 @@ static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n)
         event_notifier_cleanup(&cq->notifier);
     }
     if (msix_enabled(&n->parent_obj)) {
+        if (cq->irqfd_enabled) {
+            kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, &cq->irq_notifier,
+                                                  cq->virq);
+            kvm_irqchip_release_virq(kvm_state, cq->virq);
+            event_notifier_cleanup(&cq->irq_notifier);
+        }
         msix_vector_unuse(&n->parent_obj, cq->vector);
     }
     if (cq->cqid) {
diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
index 2a9beea0c8..84e5b00fe3 100644
--- a/hw/nvme/nvme.h
+++ b/hw/nvme/nvme.h
@@ -391,7 +391,10 @@ typedef struct NvmeCQueue {
     uint64_t    ei_addr;
     QEMUTimer   *timer;
     EventNotifier notifier;
+    EventNotifier irq_notifier;
+    int         virq;
     bool        ioeventfd_enabled;
+    bool        irqfd_enabled;
     QTAILQ_HEAD(, NvmeSQueue) sq_list;
     QTAILQ_HEAD(, NvmeRequest) req_list;
 } NvmeCQueue;
-- 
2.25.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]