[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH v17 6/7] migration: Include migration support for machine che
From: |
David Gibson |
Subject: |
Re: [PATCH v17 6/7] migration: Include migration support for machine check handling |
Date: |
Tue, 19 Nov 2019 13:45:14 +1100 |
User-agent: |
Mutt/1.12.1 (2019-06-15) |
On Thu, Oct 24, 2019 at 01:13:06PM +0530, Ganesh Goudar wrote:
> From: Aravinda Prasad <address@hidden>
>
> This patch includes migration support for machine check
> handling. Especially this patch blocks VM migration
> requests until the machine check error handling is
> complete as these errors are specific to the source
> hardware and is irrelevant on the target hardware.
>
> [Do not set FWNMI cap in post_load, now its done in .apply hook]
> Signed-off-by: Ganesh Goudar <address@hidden>
> Signed-off-by: Aravinda Prasad <address@hidden>
> ---
> hw/ppc/spapr.c | 41 +++++++++++++++++++++++++++++++++++++++++
> hw/ppc/spapr_events.c | 16 +++++++++++++++-
> hw/ppc/spapr_rtas.c | 2 ++
> include/hw/ppc/spapr.h | 2 ++
> 4 files changed, 60 insertions(+), 1 deletion(-)
>
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 346ec5ba6c..e0d0f95ec0 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -46,6 +46,7 @@
> #include "migration/qemu-file-types.h"
> #include "migration/global_state.h"
> #include "migration/register.h"
> +#include "migration/blocker.h"
> #include "mmu-hash64.h"
> #include "mmu-book3s-v3.h"
> #include "cpu-models.h"
> @@ -1751,6 +1752,8 @@ static void spapr_machine_reset(MachineState *machine)
>
> /* Signal all vCPUs waiting on this condition */
> qemu_cond_broadcast(&spapr->mc_delivery_cond);
> +
> + migrate_del_blocker(spapr->fwnmi_migration_blocker);
> }
>
> static void spapr_create_nvram(SpaprMachineState *spapr)
> @@ -2041,6 +2044,43 @@ static const VMStateDescription vmstate_spapr_dtb = {
> },
> };
>
> +static bool spapr_fwnmi_needed(void *opaque)
> +{
> + SpaprMachineState *spapr = (SpaprMachineState *)opaque;
> +
> + return spapr->guest_machine_check_addr != -1;
> +}
> +
> +static int spapr_fwnmi_pre_save(void *opaque)
> +{
> + SpaprMachineState *spapr = (SpaprMachineState *)opaque;
> +
> + /*
> + * With -only-migratable QEMU option, we cannot block migration.
> + * Hence check if machine check handling is in progress and print
> + * a warning message.
> + */
IIUC the logic below this could also occur in the case where the fwnmi
event occurs after a migration has started, but before it completes,
not just with -only-migratable. Is that correct?
> + if (spapr->mc_status != -1) {
> + warn_report("A machine check is being handled during migration. The"
> + "handler may run and log hardware error on the destination");
> + }
> +
> + return 0;
> +}
> +
> +static const VMStateDescription vmstate_spapr_machine_check = {
> + .name = "spapr_machine_check",
> + .version_id = 1,
> + .minimum_version_id = 1,
> + .needed = spapr_fwnmi_needed,
> + .pre_save = spapr_fwnmi_pre_save,
> + .fields = (VMStateField[]) {
> + VMSTATE_UINT64(guest_machine_check_addr, SpaprMachineState),
> + VMSTATE_INT32(mc_status, SpaprMachineState),
> + VMSTATE_END_OF_LIST()
> + },
> +};
> +
> static const VMStateDescription vmstate_spapr = {
> .name = "spapr",
> .version_id = 3,
> @@ -2075,6 +2115,7 @@ static const VMStateDescription vmstate_spapr = {
> &vmstate_spapr_cap_large_decr,
> &vmstate_spapr_cap_ccf_assist,
> &vmstate_spapr_cap_fwnmi,
> + &vmstate_spapr_machine_check,
> NULL
> }
> };
> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> index db44e09154..30d9371c88 100644
> --- a/hw/ppc/spapr_events.c
> +++ b/hw/ppc/spapr_events.c
> @@ -43,6 +43,7 @@
> #include "qemu/main-loop.h"
> #include "hw/ppc/spapr_ovec.h"
> #include <libfdt.h>
> +#include "migration/blocker.h"
>
> #define RTAS_LOG_VERSION_MASK 0xff000000
> #define RTAS_LOG_VERSION_6 0x06000000
> @@ -842,6 +843,8 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
> {
> SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
> CPUState *cs = CPU(cpu);
> + int ret;
> + Error *local_err = NULL;
>
> if (spapr->guest_machine_check_addr == -1) {
> /*
> @@ -871,8 +874,19 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
> return;
> }
> }
> - spapr->mc_status = cpu->vcpu_id;
>
> + ret = migrate_add_blocker(spapr->fwnmi_migration_blocker, &local_err);
> + if (ret == -EBUSY) {
> + /*
> + * We don't want to abort so we let the migration to continue.
> + * In a rare case, the machine check handler will run on the target.
> + * Though this is not preferable, it is better than aborting
> + * the migration or killing the VM.
> + */
> + warn_report_err(local_err);
I suspect the error message in local_err won't be particularly
meaningful on its own. Perhaps you need to add a prefix to clarify
that the problem is you've received a fwnmi after migration has
commenced?
> + }
> +
> + spapr->mc_status = cpu->vcpu_id;
> spapr_mce_dispatch_elog(cpu, recovered);
> }
>
> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
> index 0328b1f341..c78d96ee7e 100644
> --- a/hw/ppc/spapr_rtas.c
> +++ b/hw/ppc/spapr_rtas.c
> @@ -50,6 +50,7 @@
> #include "hw/ppc/fdt.h"
> #include "target/ppc/mmu-hash64.h"
> #include "target/ppc/mmu-book3s-v3.h"
> +#include "migration/blocker.h"
>
> static void rtas_display_character(PowerPCCPU *cpu, SpaprMachineState *spapr,
> uint32_t token, uint32_t nargs,
> @@ -446,6 +447,7 @@ static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
> */
> spapr->mc_status = -1;
> qemu_cond_signal(&spapr->mc_delivery_cond);
> + migrate_del_blocker(spapr->fwnmi_migration_blocker);
Oh... damn. I suggested using a static fwnmi_migration_blocker
instance, but I just realized there's a problem with it.
If we do receive multiple fwnmi events on different cpus at roughly
the same time, this will break: I think we'll try to add the same
migration blocker instance multiple times, which won't be good. Even
if that doesn't do anything *too* bad, then we'll unblock the
migration on the first interlock, rather than waiting for all pending
fwnmi events to complete.
> rtas_st(rets, 0, RTAS_OUT_SUCCESS);
> }
>
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 86f0fc8fdd..290abd6328 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -215,6 +215,8 @@ struct SpaprMachineState {
>
> unsigned gpu_numa_id;
> SpaprTpmProxy *tpm_proxy;
> +
> + Error *fwnmi_migration_blocker;
> };
>
> #define H_SUCCESS 0
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
signature.asc
Description: PGP signature
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- Re: [PATCH v17 6/7] migration: Include migration support for machine check handling,
David Gibson <=