[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH V9 22/46] cpr: exec mode
From: |
Steve Sistare |
Subject: |
[PATCH V9 22/46] cpr: exec mode |
Date: |
Tue, 26 Jul 2022 09:10:19 -0700 |
Add the cpr-exec migration mode. Usage:
qemu-system-$arch -migrate-mode-enable cpr-exec ...
migrate_set_parameter mode cpr-exec
migrate_set_parameter cpr-exec-args <arg1> <arg2> ... -incoming defer
migrate -d file:<filename>
... poll for runstate inmigrate ...
migrate_incoming file:<filename>
In this mode, the migrate command saves state to a file, directly exec's a
new version of qemu on the same host, replacing the original process while
retaining its PID, and loads the file via the migrate-incoming command.
The caller must specify a migration URI that writes to and reads from a
file. Arguments for the new qemu process are taken from the @cpr-exec-args
parameter. The first argument should be the path of a new qemu binary, or
a prefix command that exec's the new qemu binary. The arguments must match
those used to initially start qemu, plus the -incoming option.
Guest RAM must be backed by a memory backend with share=on, but cannot
be memory-backend-ram, and the '-migrate-mode-enable cpr-exec' option
is required. This causes secondary guest ram blocks (those not specified
on the command line) to be allocated by mmap'ing a memfd. The memfds
are kept open across exec, their values are saved in special cpr state
which is retrieved after exec, and they are re-mmap'd. Hence guest RAM is
preserved in place, albeit with new virtual addresses in the qemu process.
Since guest RAM is not copied, and storage blocks are not migrated, the
caller must disable all capabilities related to page and block copy, and
the implementation ignores all related parameters.
Cpr-exec mode supports memory-backend-memfd, memory-backend-epc, and vfio
devices in subsequent patches.
Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
---
MAINTAINERS | 2 ++
include/migration/cpr.h | 18 +++++++++++
migration/cpr.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++++
migration/meson.build | 1 +
migration/migration.c | 9 ++++++
migration/ram.c | 4 ++-
qapi/migration.json | 25 ++++++++++++---
softmmu/physmem.c | 48 +++++++++++++++++++++++++++-
softmmu/runstate.c | 4 ++-
trace-events | 1 +
10 files changed, 190 insertions(+), 7 deletions(-)
create mode 100644 include/migration/cpr.h
create mode 100644 migration/cpr.c
diff --git a/MAINTAINERS b/MAINTAINERS
index 122af30..42f6f4a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3168,6 +3168,8 @@ F: tests/unit/test-strlist.c
F: include/migration/cpr-state.h
F: migration/cpr-state.c
F: stubs/cpr-state.c
+F: include/migration/cpr.h
+F: migration/cpr.c
Record/replay
M: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>
diff --git a/include/migration/cpr.h b/include/migration/cpr.h
new file mode 100644
index 0000000..c48be2d
--- /dev/null
+++ b/include/migration/cpr.h
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2021, 2022 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef MIGRATION_CPR_H
+#define MIGRATION_CPR_H
+
+extern bool only_cpr_capable;
+
+void cpr_init(void);
+void cpr_exec(void);
+void cpr_exec_failed(Error *err);
+void cpr_preserve_fds(void);
+
+#endif
diff --git a/migration/cpr.c b/migration/cpr.c
new file mode 100644
index 0000000..698baa4
--- /dev/null
+++ b/migration/cpr.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2021, 2022 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "migration/migration.h"
+#include "migration/misc.h"
+#include "migration/cpr.h"
+#include "migration/cpr-state.h"
+#include "sysemu/runstate.h"
+
+bool only_cpr_capable;
+static Notifier cpr_fd_notifier;
+
+static int preserve_fd(const char *name, int id, int fd, void *opaque)
+{
+ qemu_clear_cloexec(fd);
+ return 0;
+}
+
+static int unpreserve_fd(const char *name, int id, int fd, void *opaque)
+{
+ qemu_set_cloexec(fd);
+ return 0;
+}
+
+static void cpr_fd_notifier_func(Notifier *notifier, void *data)
+{
+ MigrationState *s = data;
+
+ if (migrate_mode_of(s) == MIG_MODE_CPR_EXEC && migration_has_failed(s)) {
+ cpr_walk_fd(unpreserve_fd, 0);
+ }
+}
+
+void cpr_preserve_fds(void)
+{
+ cpr_walk_fd(preserve_fd, 0);
+}
+
+void cpr_init(void)
+{
+ cpr_state_load(&error_fatal);
+ migration_add_notifier(&cpr_fd_notifier, cpr_fd_notifier_func);
+}
+
+void cpr_exec(void)
+{
+ MigrationState *s = migrate_get_current();
+ Error *err = NULL;
+
+ if (migrate_mode_of(s) == MIG_MODE_CPR_EXEC && !migration_has_failed(s)) {
+ if (!migration_has_finished(s)) {
+ error_setg(&err, "cannot exec: migration status is '%s', "
+ "but must be 'completed'",
+ MigrationStatus_str(s->state));
+ goto error;
+ }
+
+ if (cpr_state_save(&err)) {
+ goto error;
+ }
+
+ qemu_system_exec_request(s->parameters.cpr_exec_args);
+ }
+ return;
+
+error:
+ cpr_exec_failed(err);
+}
+
+void cpr_exec_failed(Error *err)
+{
+ MigrationState *s = migrate_get_current();
+
+ migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
+ migrate_set_error(s, err);
+ error_report_err(err);
+ migration_call_notifiers(s);
+ cpr_state_unsave();
+}
diff --git a/migration/meson.build b/migration/meson.build
index f7d130d..5b65561 100644
--- a/migration/meson.build
+++ b/migration/meson.build
@@ -15,6 +15,7 @@ softmmu_ss.add(files(
'channel-block.c',
'colo-failover.c',
'colo.c',
+ 'cpr.c',
'cpr-state.c',
'exec.c',
'fd.c',
diff --git a/migration/migration.c b/migration/migration.c
index d7c6902..1a8a6ff 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -32,6 +32,7 @@
#include "migration.h"
#include "savevm.h"
#include "qemu-file.h"
+#include "migration/cpr.h"
#include "migration/vmstate.h"
#include "block/block.h"
#include "qapi/error.h"
@@ -231,6 +232,7 @@ void migration_object_init(void)
blk_mig_init();
ram_mig_init();
dirty_bitmap_mig_init();
+ cpr_init();
}
void migration_cancel(const Error *error)
@@ -1964,6 +1966,7 @@ static void migrate_fd_cleanup(MigrationState *s)
}
migration_call_notifiers(s);
block_cleanup_parameters(s);
+ cpr_exec();
yank_unregister_instance(MIGRATION_YANK_INSTANCE);
}
@@ -2489,6 +2492,12 @@ static bool migrate_prepare(MigrationState *s, bool blk,
bool blk_inc,
return false;
}
+ if (migrate_mode_of(s) == MIG_MODE_CPR_EXEC &&
+ !s->parameters.has_cpr_exec_args) {
+ error_setg(errp, "cpr-exec mode requires setting cpr-exec-args");
+ return false;
+ }
+
if (migration_is_blocked(errp)) {
return false;
}
diff --git a/migration/ram.c b/migration/ram.c
index 3ea3b41..4c868d2 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -165,6 +165,7 @@ out:
bool ramblock_is_ignored(RAMBlock *block)
{
return !qemu_ram_is_migratable(block) ||
+ migrate_mode() == MIG_MODE_CPR_EXEC ||
(migrate_ignore_shared() && qemu_ram_is_shared(block) &&
ramblock_is_named_file(block));
}
@@ -3058,7 +3059,8 @@ static void ram_init_bitmaps(RAMState *rs)
WITH_RCU_READ_LOCK_GUARD() {
ram_list_init_bitmaps();
/* We don't use dirty log with background snapshots */
- if (!migrate_background_snapshot()) {
+ if (!migrate_background_snapshot() &&
+ migrate_mode() == MIG_MODE_NORMAL) {
memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION);
migration_bitmap_sync_precopy(rs);
}
diff --git a/qapi/migration.json b/qapi/migration.json
index 839fcd4..a068d4c 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -586,10 +586,21 @@
# arguments must match those used to initially start qemu, plus
# the -incoming option.
#
+# @cpr-exec: The migrate command saves state to a file, directly exec's a
+# new version of qemu on the same host, replacing the original
+# process while retaining its PID, and loads the file via the
+# migrate-incoming command. The caller must specify a migration URI
+# that writes to and reads from a file. Guest RAM must be backed by
+# a memory backend with share=on, and cannot be memory-backend-ram.
+# Guest RAM is not copied, and storage blocks are not migrated, so
+# all capabilities related to page and block copy must be disabled,
+# and all related parameters are ignored. Arguments for the new
+# qemu process are taken from the @cpr-exec-args parameter.
+#
# Since: 7.1
##
{ 'enum': 'MigMode',
- 'data': [ 'normal', 'cpr-reboot' ] }
+ 'data': [ 'normal', 'cpr-reboot', 'cpr-exec' ] }
##
# @BitmapMigrationBitmapAliasTransform:
@@ -712,7 +723,11 @@
# at tail stage.
# The default value is false. (Since 5.1)
#
-# @cpr-exec-args: defined in a subsequent patch.
+# @cpr-exec-args: arguments passed to new qemu for cpr-exec mode. The first
+# argument should be the path of a new qemu binary, or a prefix
+# command that exec's the new qemu binary. The arguments must
+# match those used to initially start qemu, plus the -incoming
+# option. (Since 7.1)
#
# @tls-creds: ID of the 'tls-creds' object that provides credentials for
# establishing a TLS connection over the migration data channel.
@@ -885,7 +900,8 @@
# at tail stage.
# The default value is false. (Since 5.1)
#
-# @cpr-exec-args: defined in a subsequent patch.
+# @cpr-exec-args: Arguments passed to new qemu for cpr-exec mode.
+# See description in @MigrationParameter. (Since 7.1)
#
# @tls-creds: ID of the 'tls-creds' object that provides credentials
# for establishing a TLS connection over the migration data
@@ -1090,7 +1106,8 @@
# at tail stage.
# The default value is false. (Since 5.1)
#
-# @cpr-exec-args: defined in a subsequent patch.
+# @cpr-exec-args: Arguments passed to new qemu for cpr-exec mode.
+# See description in @MigrationParameter. (Since 7.1)
#
# @tls-creds: ID of the 'tls-creds' object that provides credentials
# for establishing a TLS connection over the migration data
diff --git a/softmmu/physmem.c b/softmmu/physmem.c
index cce721a..29baa0f 100644
--- a/softmmu/physmem.c
+++ b/softmmu/physmem.c
@@ -44,6 +44,7 @@
#include "qemu/qemu-print.h"
#include "qemu/log.h"
#include "qemu/memalign.h"
+#include "qemu/memfd.h"
#include "exec/memory.h"
#include "exec/ioport.h"
#include "sysemu/dma.h"
@@ -66,6 +67,8 @@
#include "qemu/pmem.h"
+#include "migration/cpr-state.h"
+#include "migration/misc.h"
#include "migration/vmstate.h"
#include "qemu/range.h"
@@ -1971,6 +1974,40 @@ static void dirty_memory_extend(ram_addr_t old_ram_size,
}
}
+static bool memory_region_is_backend(MemoryRegion *mr)
+{
+ return !!object_dynamic_cast(OBJECT(mr)->parent, TYPE_MEMORY_BACKEND);
+}
+
+static void *qemu_anon_memfd_alloc(RAMBlock *rb, size_t maxlen, Error **errp)
+{
+ size_t len, align;
+ void *addr;
+ struct MemoryRegion *mr = rb->mr;
+ const char *name = memory_region_name(mr);
+ int mfd = cpr_find_memfd(name, &len, &maxlen, &align);
+
+ if (mfd >= 0) {
+ rb->used_length = len;
+ rb->max_length = maxlen;
+ mr->align = align;
+ } else {
+ len = rb->used_length;
+ maxlen = rb->max_length;
+ mr->align = QEMU_VMALLOC_ALIGN;
+ mfd = qemu_memfd_create(name, maxlen + mr->align, 0, 0, 0, errp);
+ if (mfd < 0) {
+ return NULL;
+ }
+ cpr_save_memfd(name, mfd, len, maxlen, mr->align);
+ }
+ rb->flags |= RAM_SHARED;
+ qemu_set_cloexec(mfd);
+ addr = file_ram_alloc(rb, maxlen, mfd, false, false, 0, errp);
+ trace_anon_memfd_alloc(name, maxlen, addr, mfd);
+ return addr;
+}
+
static void ram_block_add(RAMBlock *new_block, Error **errp)
{
const bool noreserve = qemu_ram_is_noreserve(new_block);
@@ -1994,6 +2031,14 @@ static void ram_block_add(RAMBlock *new_block, Error
**errp)
qemu_mutex_unlock_ramlist();
return;
}
+ } else if (migrate_mode_enabled(MIG_MODE_CPR_EXEC) &&
+ !memory_region_is_backend(new_block->mr)) {
+ new_block->host = qemu_anon_memfd_alloc(new_block,
+ new_block->max_length,
+ errp);
+ if (!new_block->host) {
+ return;
+ }
} else {
new_block->host = qemu_anon_ram_alloc(new_block->max_length,
&new_block->mr->align,
@@ -2005,8 +2050,8 @@ static void ram_block_add(RAMBlock *new_block, Error
**errp)
qemu_mutex_unlock_ramlist();
return;
}
- memory_try_enable_merging(new_block->host, new_block->max_length);
}
+ memory_try_enable_merging(new_block->host, new_block->max_length);
}
new_ram_size = MAX(old_ram_size,
@@ -2239,6 +2284,7 @@ void qemu_ram_free(RAMBlock *block)
}
qemu_mutex_lock_ramlist();
+ cpr_delete_memfd(memory_region_name(block->mr));
QLIST_REMOVE_RCU(block, next);
ram_list.mru_block = NULL;
/* Write list before version */
diff --git a/softmmu/runstate.c b/softmmu/runstate.c
index 14b43df..fb86740 100644
--- a/softmmu/runstate.c
+++ b/softmmu/runstate.c
@@ -32,6 +32,7 @@
#include "exec/cpu-common.h"
#include "exec/gdbstub.h"
#include "hw/boards.h"
+#include "migration/cpr.h"
#include "migration/misc.h"
#include "migration/postcopy-ram.h"
#include "monitor/monitor.h"
@@ -692,9 +693,10 @@ static bool main_loop_should_exit(void)
if (qemu_exec_requested()) {
Error *err = NULL;
+ cpr_preserve_fds();
execvp(exec_argv[0], exec_argv);
error_setg_errno(&err, errno, "execvp %s failed", exec_argv[0]);
- error_report_err(err);
+ cpr_exec_failed(err);
g_strfreev(exec_argv);
exec_argv = NULL;
return false;
diff --git a/trace-events b/trace-events
index bc71006..07369bb 100644
--- a/trace-events
+++ b/trace-events
@@ -45,6 +45,7 @@ ram_block_discard_range(const char *rbname, void *hva, size_t
length, bool need_
# accel/tcg/cputlb.c
memory_notdirty_write_access(uint64_t vaddr, uint64_t ram_addr, unsigned size)
"0x%" PRIx64 " ram_addr 0x%" PRIx64 " size %u"
memory_notdirty_set_dirty(uint64_t vaddr) "0x%" PRIx64
+anon_memfd_alloc(const char *name, size_t size, void *ptr, int fd) "%s size
%zu ptr %p fd %d"
# gdbstub.c
gdbstub_op_start(const char *device) "Starting gdbstub using device %s"
--
1.8.3.1
- [PATCH V9 24/46] cpr: ram block blockers, (continued)
- [PATCH V9 24/46] cpr: ram block blockers, Steve Sistare, 2022/07/26
- [PATCH V9 08/46] cpr: relax some blockers, Steve Sistare, 2022/07/26
- [PATCH V9 28/46] hostmem-epc: cpr support, Steve Sistare, 2022/07/26
- [PATCH V9 33/46] vfio-pci: cpr part 3 (intx), Steve Sistare, 2022/07/26
- [PATCH V9 06/46] migration: simplify blockers, Steve Sistare, 2022/07/26
- [PATCH V9 10/46] qdev-properties: strList, Steve Sistare, 2022/07/26
- [PATCH V9 29/46] pci: export msix_is_pending, Steve Sistare, 2022/07/26
- [PATCH V9 11/46] qapi: strList_from_string, Steve Sistare, 2022/07/26
- [PATCH V9 07/46] migration: per-mode blockers, Steve Sistare, 2022/07/26
- [PATCH V9 35/46] vhost: reset vhost devices for cpr, Steve Sistare, 2022/07/26
- [PATCH V9 22/46] cpr: exec mode,
Steve Sistare <=
- [PATCH V9 16/46] migration: simplify notifiers, Steve Sistare, 2022/07/26
- [PATCH V9 26/46] cpr: Mismatched GPAs fix, Steve Sistare, 2022/07/26
- [PATCH V9 37/46] chardev: cpr for simple devices, Steve Sistare, 2022/07/26
- [PATCH V9 38/46] chardev: cpr for pty, Steve Sistare, 2022/07/26
- [PATCH V9 32/46] vfio-pci: cpr part 2 (msi), Steve Sistare, 2022/07/26
- [PATCH V9 34/46] vfio-pci: recover from unmap-all-vaddr failure, Steve Sistare, 2022/07/26
- [PATCH V9 40/46] python/machine: QEMUMachine full_args, Steve Sistare, 2022/07/26
- [PATCH V9 17/46] migration: check mode in notifiers, Steve Sistare, 2022/07/26
- [PATCH V9 45/46] migration: notifier error reporting, Steve Sistare, 2022/07/26