[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH v1 3/4] migration: refactor ram_save_target_page functions
From: |
Prasad Pandit |
Subject: |
Re: [PATCH v1 3/4] migration: refactor ram_save_target_page functions |
Date: |
Thu, 28 Nov 2024 15:48:36 +0530 |
On Wed, 27 Nov 2024 at 19:42, Fabiano Rosas <farosas@suse.de> wrote:
> From e9110360eb0efddf6945f37c518e3cc38d12b600 Mon Sep 17 00:00:00 2001
> From: Fabiano Rosas <farosas@suse.de>
> Date: Wed, 27 Nov 2024 11:03:04 -0300
> Subject: [PATCH] migration: Rationalize multifd flushes from ram code
>
> We currently have a mess of conditionals to achieve the correct
> combination of multifd local flushes, where we sync the local
> (send/recv) multifd threads between themselves, and multifd remote
> flushes, where we put a flag on the stream to inform the recv side to
> perform a local flush.
>
> On top of that we also have the multifd_flush_after_each_section
> property, which is there to provide backward compatibility from when
> we used to flush after every vmstate section.
>
> And lastly, there's the mapped-ram feature which always wants the
> non-backward compatible behavior and also does not support extraneous
> flags on the stream (such as the MULTIFD_FLUSH flag).
>
> Move the conditionals into a separate function that encapsulates and
> explains the whole situation.
>
> Signed-off-by: Fabiano Rosas <farosas@suse.de>
> ---
> migration/ram.c | 198 ++++++++++++++++++++++++++++++++++++++----------
> 1 file changed, 157 insertions(+), 41 deletions(-)
>
> diff --git a/migration/ram.c b/migration/ram.c
> index 05ff9eb328..caaaae6fdc 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -1277,6 +1277,149 @@ static int ram_save_page(RAMState *rs,
> PageSearchStatus *pss)
> return pages;
> }
>
> +enum RamMultifdFlushSpots {
> + FLUSH_SAVE_SETUP,
> + FLUSH_SAVE_ITER,
> + FLUSH_DIRTY_BLOCK,
> + FLUSH_SAVE_COMPLETE,
> +
> + FLUSH_LOAD_POSTCOPY_EOS,
> + FLUSH_LOAD_POSTCOPY_FLUSH,
> + FLUSH_LOAD_PRECOPY_EOS,
> + FLUSH_LOAD_PRECOPY_FLUSH,
> +};
> +
> +static int ram_multifd_flush(QEMUFile *f, enum RamMultifdFlushSpots spot)
> +{
> + int ret;
> + bool always_flush, do_local_flush, do_remote_flush;
> + bool mapped_ram = migrate_mapped_ram();
> +
> + if (!migrate_multifd()) {
> + return 0;
> + }
> +
> + /*
> + * For backward compatibility, whether to flush multifd after each
> + * section is sent. This is mutually exclusive with a
> + * RAM_SAVE_FLAG_MULTIFD_FLUSH on the stream
> + */
> + always_flush = migrate_multifd_flush_after_each_section();
> +
> + /*
> + * Save side flushes
> + */
> +
> + do_local_flush = false;
> +
> + switch (spot) {
> + case FLUSH_SAVE_SETUP:
> + assert(!bql_locked());
> + do_local_flush = true;
> + break;
> +
> + case FLUSH_SAVE_ITER:
> + /*
> + * This flush is not necessary, only do for backward
> + * compatibility. Mapped-ram assumes the new scheme.
> + */
> + do_local_flush = always_flush && !mapped_ram;
> + break;
> +
> + case FLUSH_DIRTY_BLOCK:
> + /*
> + * This is the flush that's actually required, always do it
> + * unless we're emulating the old behavior.
> + */
> + do_local_flush = !always_flush || mapped_ram;
> + break;
> +
> + case FLUSH_SAVE_COMPLETE:
> + do_local_flush = true;
> + break;
> +
> + default:
> + break;
> + }
> +
> + if (do_local_flush) {
> + ret = multifd_ram_flush_and_sync();
> + if (ret < 0) {
> + return ret;
> + }
> + }
> +
> + /*
> + * There's never a remote flush with mapped-ram because any flags
> + * put on the stream (aside from RAM_SAVE_FLAG_MEM_SIZE and
> + * RAM_SAVE_FLAG_EOS) break mapped-ram's assumption that ram pages
> + * can be read contiguously from the stream.
> + *
> + * On the recv side, there's no local flush, even at EOS because
> + * mapped-ram does its own flush after loading the ramblock.
> + */
> + if (mapped_ram) {
> + return 0;
> + }
> +
> + do_remote_flush = false;
> +
> + /* Save side remote flush */
> + switch (spot) {
> + case FLUSH_SAVE_SETUP:
> + do_remote_flush = !always_flush;
> + break;
> +
> + case FLUSH_SAVE_ITER:
> + do_remote_flush = false;
> + break;
> +
> + case FLUSH_DIRTY_BLOCK:
> + do_remote_flush = do_local_flush;
> + break;
> +
> + case FLUSH_SAVE_COMPLETE:
> + do_remote_flush = false;
> + break;
> +
> + default:
> + break;
> + }
> +
> + /* Put a flag on the stream to trigger a remote flush */
> + if (do_remote_flush) {
> + qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH);
> + qemu_fflush(f);
> + }
> +
> + /*
> + * Load side flushes.
> + */
> +
> + do_local_flush = false;
> +
> + switch (spot) {
> + case FLUSH_LOAD_PRECOPY_EOS:
> + case FLUSH_LOAD_POSTCOPY_EOS:
> + do_local_flush = always_flush;
> + break;
> +
> + case FLUSH_LOAD_PRECOPY_FLUSH:
> + case FLUSH_LOAD_POSTCOPY_FLUSH:
> + do_local_flush = true;
> + break;
> +
> + default:
> + break;
> + }
> +
> + if (do_local_flush) {
> + multifd_recv_sync_main();
> + }
> +
> + return 0;
> +}
> +
> static int ram_save_multifd_page(RAMBlock *block, ram_addr_t offset)
> {
> if (!multifd_queue_page(block, offset)) {
> @@ -1323,19 +1466,10 @@ static int find_dirty_block(RAMState *rs,
> PageSearchStatus *pss)
> pss->page = 0;
> pss->block = QLIST_NEXT_RCU(pss->block, next);
> if (!pss->block) {
> - if (migrate_multifd() &&
> - (!migrate_multifd_flush_after_each_section() ||
> - migrate_mapped_ram())) {
> - QEMUFile *f = rs->pss[RAM_CHANNEL_PRECOPY].pss_channel;
> - int ret = multifd_ram_flush_and_sync();
> - if (ret < 0) {
> - return ret;
> - }
> -
> - if (!migrate_mapped_ram()) {
> - qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH);
> - qemu_fflush(f);
> - }
> + int ret =
> ram_multifd_flush(rs->pss[RAM_CHANNEL_PRECOPY].pss_channel,
> + FLUSH_DIRTY_BLOCK);
> + if (ret < 0) {
> + return ret;
> }
>
> /* Hit the end of the list */
> @@ -3065,18 +3199,13 @@ static int ram_save_setup(QEMUFile *f, void *opaque,
> Error **errp)
> }
>
> bql_unlock();
> - ret = multifd_ram_flush_and_sync();
> + ret = ram_multifd_flush(f, FLUSH_SAVE_SETUP);
> bql_lock();
> if (ret < 0) {
> error_setg(errp, "%s: multifd synchronization failed", __func__);
> return ret;
> }
>
> - if (migrate_multifd() && !migrate_multifd_flush_after_each_section()
> - && !migrate_mapped_ram()) {
> - qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH);
> - }
> -
> qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
> ret = qemu_fflush(f);
> if (ret < 0) {
> @@ -3209,12 +3338,10 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
>
> out:
> if (ret >= 0 && migration_is_running()) {
> - if (migrate_multifd() && migrate_multifd_flush_after_each_section()
> &&
> - !migrate_mapped_ram()) {
> - ret = multifd_ram_flush_and_sync();
> - if (ret < 0) {
> - return ret;
> - }
> +
> + ret = ram_multifd_flush(f, FLUSH_SAVE_ITER);
> + if (ret < 0) {
> + return ret;
> }
>
> qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
> @@ -3283,7 +3410,7 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
> }
> }
>
> - ret = multifd_ram_flush_and_sync();
> + ret = ram_multifd_flush(f, FLUSH_SAVE_COMPLETE);
> if (ret < 0) {
> return ret;
> }
> @@ -3797,14 +3924,11 @@ int ram_load_postcopy(QEMUFile *f, int channel)
> }
> break;
> case RAM_SAVE_FLAG_MULTIFD_FLUSH:
> - multifd_recv_sync_main();
> + ram_multifd_flush(f, FLUSH_LOAD_POSTCOPY_FLUSH);
> break;
> case RAM_SAVE_FLAG_EOS:
> /* normal exit */
> - if (migrate_multifd() &&
> - migrate_multifd_flush_after_each_section()) {
> - multifd_recv_sync_main();
> - }
> + ram_multifd_flush(f, FLUSH_LOAD_POSTCOPY_EOS);
> break;
> default:
> error_report("Unknown combination of migration flags: 0x%x"
> @@ -4237,19 +4361,11 @@ static int ram_load_precopy(QEMUFile *f)
> }
> break;
> case RAM_SAVE_FLAG_MULTIFD_FLUSH:
> - multifd_recv_sync_main();
> + ram_multifd_flush(f, FLUSH_LOAD_PRECOPY_FLUSH);
> break;
> case RAM_SAVE_FLAG_EOS:
> /* normal exit */
> - if (migrate_multifd() &&
> - migrate_multifd_flush_after_each_section() &&
> - /*
> - * Mapped-ram migration flushes once and for all after
> - * parsing ramblocks. Always ignore EOS for it.
> - */
> - !migrate_mapped_ram()) {
> - multifd_recv_sync_main();
> - }
> + ram_multifd_flush(f, FLUSH_LOAD_PRECOPY_EOS);
> break;
> case RAM_SAVE_FLAG_HOOK:
> ret = rdma_registration_handle(f);
> --
> 2.35.3
>
* This does not seem to solve for complexity. When reading/following
code, it is easier to see 3-4 conditions and work them to check if the
full expression is 'true' or 'false', that is not doable here.
* fflush(1) is just flushing buffered content into (or out of) the
stream IIUC, why do we have to tie it to a specific spot? At any time
it is going to do the same thing: flush available data to (or out of)
the stream.
* Could we separate out send side fflush(1) from the receive side
fflush(1) operations? Writing a flag in the stream on the send side to
trigger fflush(1) on the receive side is weird; Data stream need not
say when to fflush(1). Let the sender and receiver decide when they
want to fflush(1) and fsync(1) data, no? Maybe that'll help to
reduce/solve the complexity of long conditionals? ie. if we are able
to fflush(1) and fsync(1) without any condition?
Thank you.
---
- Prasad
- [PATCH v1 1/4] migration/multifd: move macros to multifd header, (continued)
[PATCH v1 4/4] migration: enable multifd and postcopy together, Prasad Pandit, 2024/11/26