qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH V3 24/42] migration: close kvm after cpr


From: Steven Sistare
Subject: Re: [PATCH V3 24/42] migration: close kvm after cpr
Date: Fri, 16 May 2025 15:17:47 -0400
User-agent: Mozilla Thunderbird

On 5/16/2025 1:14 PM, Peter Xu wrote:
On Fri, May 16, 2025 at 10:35:44AM +0200, Cédric Le Goater wrote:
On 5/12/25 17:32, Steve Sistare wrote:
cpr-transfer breaks vfio network connectivity to and from the guest, and
the host system log shows:
    irq bypass consumer (token 00000000a03c32e5) registration fails: -16
which is EBUSY.  This occurs because KVM descriptors are still open in
the old QEMU process.  Close them.


[1]

Signed-off-by: Steve Sistare <steven.sistare@oracle.com>

This patch doesn't build.

/usr/bin/ld: libcommon.a.p/migration_cpr.c.o: in function `cpr_kvm_close':
./build/../migration/cpr.c:260: undefined reference to `kvm_close'

And it'll be also good if this patch can keep copying the kvm maintainer
(Paolo)..  I have Paolo copied.  So this patch is more of a kvm change not
migration, afaict.  Maybe we should split this into two patches.

Steve, you could attach a cc line in this patch to make sure it won't be
forgotten when you repost (at [1] above, I think git-send-email would
remember that then):

Cc: Paolo Bonzini <pbonzini@redhat.com>

Righto.  My intention was to cc him on this specific patch and not the whole
series, which I did in V2 but forgot in V3.  Thanks for the tip on embedding
cc in the patch.

Some other questions below.

---
   accel/kvm/kvm-all.c           | 28 ++++++++++++++++++++++++++++
   hw/vfio/helpers.c             | 10 ++++++++++
   include/hw/vfio/vfio-device.h |  2 ++
   include/migration/cpr.h       |  2 ++
   include/qemu/vfio-helpers.h   |  1 -
   include/system/kvm.h          |  1 +
   migration/cpr-transfer.c      | 18 ++++++++++++++++++
   migration/cpr.c               |  8 ++++++++
   migration/migration.c         |  1 +
   9 files changed, 70 insertions(+), 1 deletion(-)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 278a506..d619448 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -512,16 +512,23 @@ static int do_kvm_destroy_vcpu(CPUState *cpu)
           goto err;
       }
+    /* If I am the CPU that created coalesced_mmio_ring, then discard it */

Are these "reset to NULL" below required or cleanup?  It's not yet clear to
me when coalesced_mmio_ring isn't owned by the current CPU state.  Maybe
also better to split this chunk with some commit message?

The pointers are not valid after this point.  Setting to NULL is cleanup, but
a best practice IMO.  The vcpus are paused, so nothing should be touching
coalesced_mmio_ring, and it does not matter in which order we destroy vcpus.

- Steve

+    if (s->coalesced_mmio_ring == (void *)cpu->kvm_run + PAGE_SIZE) {
+        s->coalesced_mmio_ring = NULL;
+    }
+
       ret = munmap(cpu->kvm_run, mmap_size);
       if (ret < 0) {
           goto err;
       }
+    cpu->kvm_run = NULL;
       if (cpu->kvm_dirty_gfns) {
           ret = munmap(cpu->kvm_dirty_gfns, s->kvm_dirty_ring_bytes);
           if (ret < 0) {
               goto err;
           }
+        cpu->kvm_dirty_gfns = NULL;
       }
       kvm_park_vcpu(cpu);
@@ -600,6 +607,27 @@ err:
       return ret;
   }
+void kvm_close(void)
+{
+    CPUState *cpu;
+
+    CPU_FOREACH(cpu) {
+        cpu_remove_sync(cpu);
+        close(cpu->kvm_fd);
+        cpu->kvm_fd = -1;
+        close(cpu->kvm_vcpu_stats_fd);
+        cpu->kvm_vcpu_stats_fd = -1;
+    }
+
+    if (kvm_state && kvm_state->fd != -1) {
+        close(kvm_state->vmfd);
+        kvm_state->vmfd = -1;
+        close(kvm_state->fd);
+        kvm_state->fd = -1;
+    }
+    kvm_state = NULL;
+}
+
   /*
    * dirty pages logging control
    */
diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
index d0dbab1..af1db2f 100644
--- a/hw/vfio/helpers.c
+++ b/hw/vfio/helpers.c
@@ -117,6 +117,16 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info 
*info,
   int vfio_kvm_device_fd = -1;
   #endif
+void vfio_kvm_device_close(void)
+{
+#ifdef CONFIG_KVM
+    if (vfio_kvm_device_fd != -1) {
+        close(vfio_kvm_device_fd);
+        vfio_kvm_device_fd = -1;
+    }
+#endif
+}
+
   int vfio_kvm_device_add_fd(int fd, Error **errp)
   {
   #ifdef CONFIG_KVM
diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h
index 4e4d0b6..6eb6f21 100644
--- a/include/hw/vfio/vfio-device.h
+++ b/include/hw/vfio/vfio-device.h
@@ -231,4 +231,6 @@ void vfio_device_set_fd(VFIODevice *vbasedev, const char 
*str, Error **errp);
   void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops,
                         DeviceState *dev, bool ram_discard);
   int vfio_device_get_aw_bits(VFIODevice *vdev);
+
+void vfio_kvm_device_close(void);
   #endif /* HW_VFIO_VFIO_COMMON_H */
diff --git a/include/migration/cpr.h b/include/migration/cpr.h
index fc6aa33..5f1ff10 100644
--- a/include/migration/cpr.h
+++ b/include/migration/cpr.h
@@ -31,7 +31,9 @@ void cpr_state_close(void);
   struct QIOChannel *cpr_state_ioc(void);
   bool cpr_needed_for_reuse(void *opaque);
+void cpr_kvm_close(void);
+void cpr_transfer_init(void);
   QEMUFile *cpr_transfer_output(MigrationChannel *channel, Error **errp);
   QEMUFile *cpr_transfer_input(MigrationChannel *channel, Error **errp);
diff --git a/include/qemu/vfio-helpers.h b/include/qemu/vfio-helpers.h
index bde9495..a029036 100644
--- a/include/qemu/vfio-helpers.h
+++ b/include/qemu/vfio-helpers.h
@@ -28,5 +28,4 @@ void qemu_vfio_pci_unmap_bar(QEMUVFIOState *s, int index, 
void *bar,
                                uint64_t offset, uint64_t size);
   int qemu_vfio_pci_init_irq(QEMUVFIOState *s, EventNotifier *e,
                              int irq_type, Error **errp);
-
   #endif
diff --git a/include/system/kvm.h b/include/system/kvm.h
index b690dda..cfaa94c 100644
--- a/include/system/kvm.h
+++ b/include/system/kvm.h
@@ -194,6 +194,7 @@ bool kvm_has_sync_mmu(void);
   int kvm_has_vcpu_events(void);
   int kvm_max_nested_state_length(void);
   int kvm_has_gsi_routing(void);
+void kvm_close(void);
   /**
    * kvm_arm_supports_user_irq
diff --git a/migration/cpr-transfer.c b/migration/cpr-transfer.c
index e1f1403..396558f 100644
--- a/migration/cpr-transfer.c
+++ b/migration/cpr-transfer.c
@@ -17,6 +17,24 @@
   #include "migration/vmstate.h"
   #include "trace.h"
+static int cpr_transfer_notifier(NotifierWithReturn *notifier,
+                                 MigrationEvent *e,
+                                 Error **errp)
+{
+    if (e->type == MIG_EVENT_PRECOPY_DONE) {
+        cpr_kvm_close();
+    }
+    return 0;
+}
+
+void cpr_transfer_init(void)
+{
+    static NotifierWithReturn notifier;
+
+    migration_add_notifier_mode(&notifier, cpr_transfer_notifier,
+                                MIG_MODE_CPR_TRANSFER);
+}
+
   QEMUFile *cpr_transfer_output(MigrationChannel *channel, Error **errp)
   {
       MigrationAddress *addr = channel->addr;
diff --git a/migration/cpr.c b/migration/cpr.c
index 0b01e25..6102d04 100644
--- a/migration/cpr.c
+++ b/migration/cpr.c
@@ -7,12 +7,14 @@
   #include "qemu/osdep.h"
   #include "qapi/error.h"
+#include "hw/vfio/vfio-device.h"
   #include "migration/cpr.h"
   #include "migration/misc.h"
   #include "migration/options.h"
   #include "migration/qemu-file.h"
   #include "migration/savevm.h"
   #include "migration/vmstate.h"
+#include "system/kvm.h"
   #include "system/runstate.h"
   #include "trace.h"
@@ -252,3 +254,9 @@ bool cpr_needed_for_reuse(void *opaque)
       MigMode mode = migrate_mode();
       return mode == MIG_MODE_CPR_TRANSFER;
   }
+
+void cpr_kvm_close(void)
+{
+    kvm_close();
+    vfio_kvm_device_close();
+}
diff --git a/migration/migration.c b/migration/migration.c
index 4697732..89e2026 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -337,6 +337,7 @@ void migration_object_init(void)
       ram_mig_init();
       dirty_bitmap_mig_init();
+    cpr_transfer_init();
       /* Initialize cpu throttle timers */
       cpu_throttle_init();






reply via email to

[Prev in Thread] Current Thread [Next in Thread]