[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [Qemu-ppc] [PATCH v9 6/6] migration: Include migration
From: |
Greg Kurz |
Subject: |
Re: [Qemu-devel] [Qemu-ppc] [PATCH v9 6/6] migration: Include migration support for machine check handling |
Date: |
Tue, 4 Jun 2019 22:11:51 +0200 |
On Tue, 4 Jun 2019 22:04:21 +0200
Greg Kurz <address@hidden> wrote:
> On Tue, 4 Jun 2019 12:34:37 +0530
> Aravinda Prasad <address@hidden> wrote:
>
> > On Monday 03 June 2019 09:10 PM, Greg Kurz wrote:
> > > On Wed, 29 May 2019 11:10:57 +0530
> > > Aravinda Prasad <address@hidden> wrote:
> > >
> > >> This patch includes migration support for machine check
> > >> handling. Especially this patch blocks VM migration
> > >> requests until the machine check error handling is
> > >> complete as (i) these errors are specific to the source
> > >> hardware and is irrelevant on the target hardware,
> > >> (ii) these errors cause data corruption and should
> > >> be handled before migration.
> > >>
> > >> Signed-off-by: Aravinda Prasad <address@hidden>
> > >> ---
> > >
> > > LGTM, just one issue: machine reset should del and free the blocker as
> > > well,
> > > otherwise QEMU would crash if spapr_mce_req_event() is called again.
> >
> > Sure.
> >
> >
> > >
> > >> hw/ppc/spapr.c | 20 ++++++++++++++++++++
> > >> hw/ppc/spapr_events.c | 17 +++++++++++++++++
> > >> hw/ppc/spapr_rtas.c | 4 ++++
> > >> include/hw/ppc/spapr.h | 2 ++
> > >> 4 files changed, 43 insertions(+)
> > >>
> > >> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > >> index e8a77636..31c4850 100644
> > >> --- a/hw/ppc/spapr.c
> > >> +++ b/hw/ppc/spapr.c
> > >> @@ -2104,6 +2104,25 @@ static const VMStateDescription vmstate_spapr_dtb
> > >> = {
> > >> },
> > >> };
> > >>
> > >> +static bool spapr_fwnmi_needed(void *opaque)
> > >> +{
> > >> + SpaprMachineState *spapr = (SpaprMachineState *)opaque;
> > >> +
> > >> + return (spapr->guest_machine_check_addr == -1) ? 0 : 1;
>
> And also you can drop the parens since == as precedence over ?:
>
... or even better make it spapr->guest_machine_check_addr != -1 :)
> > >> +}
> > >> +
> > >> +static const VMStateDescription vmstate_spapr_machine_check = {
> > >> + .name = "spapr_machine_check",
> > >> + .version_id = 1,
> > >> + .minimum_version_id = 1,
> > >> + .needed = spapr_fwnmi_needed,
> > >> + .fields = (VMStateField[]) {
> > >> + VMSTATE_UINT64(guest_machine_check_addr, SpaprMachineState),
> > >> + VMSTATE_INT32(mc_status, SpaprMachineState),
> > >> + VMSTATE_END_OF_LIST()
> > >> + },
> > >> +};
> > >> +
> > >> static const VMStateDescription vmstate_spapr = {
> > >> .name = "spapr",
> > >> .version_id = 3,
> > >> @@ -2137,6 +2156,7 @@ static const VMStateDescription vmstate_spapr = {
> > >> &vmstate_spapr_dtb,
> > >> &vmstate_spapr_cap_large_decr,
> > >> &vmstate_spapr_cap_ccf_assist,
> > >> + &vmstate_spapr_machine_check,
> > >> NULL
> > >> }
> > >> };
> > >> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> > >> index 573c0b7..35e21e4 100644
> > >> --- a/hw/ppc/spapr_events.c
> > >> +++ b/hw/ppc/spapr_events.c
> > >> @@ -41,6 +41,7 @@
> > >> #include "qemu/bcd.h"
> > >> #include "hw/ppc/spapr_ovec.h"
> > >> #include <libfdt.h>
> > >> +#include "migration/blocker.h"
> > >>
> > >> #define RTAS_LOG_VERSION_MASK 0xff000000
> > >> #define RTAS_LOG_VERSION_6 0x06000000
> > >> @@ -855,6 +856,22 @@ static void spapr_mce_dispatch_elog(PowerPCCPU
> > >> *cpu, bool recovered)
> > >> void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
> > >> {
> > >> SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
> > >> + int ret;
> > >> + Error *local_err = NULL;
> > >> +
> > >> + error_setg(&spapr->fwnmi_migration_blocker,
> > >> + "Live migration not supported during machine check
> > >> handling");
> > >> + ret = migrate_add_blocker(spapr->fwnmi_migration_blocker,
> > >> &local_err);
> > >> + if (ret < 0) {
> > >> + /*
> > >> + * We don't want to abort and let the migration to continue. In
> > >> a
> > >> + * rare case, the machine check handler will run on the target
> > >> + * hardware. Though this is not preferable, it is better than
> > >> aborting
> > >> + * the migration or killing the VM.
> > >> + */
> > >> + error_free(spapr->fwnmi_migration_blocker);
> > >> + warn_report_err(local_err);
> > >> + }
> > >>
> > >> while (spapr->mc_status != -1) {
> > >> /*
> > >> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
> > >> index 91a7ab9..c849223 100644
> > >> --- a/hw/ppc/spapr_rtas.c
> > >> +++ b/hw/ppc/spapr_rtas.c
> > >> @@ -50,6 +50,7 @@
> > >> #include "target/ppc/mmu-hash64.h"
> > >> #include "target/ppc/mmu-book3s-v3.h"
> > >> #include "kvm_ppc.h"
> > >> +#include "migration/blocker.h"
> > >>
> > >> static void rtas_display_character(PowerPCCPU *cpu, SpaprMachineState
> > >> *spapr,
> > >> uint32_t token, uint32_t nargs,
> > >> @@ -404,6 +405,9 @@ static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
> > >> spapr->mc_status = -1;
> > >> qemu_cond_signal(&spapr->mc_delivery_cond);
> > >> rtas_st(rets, 0, RTAS_OUT_SUCCESS);
> > >> + migrate_del_blocker(spapr->fwnmi_migration_blocker);
> > >> + error_free(spapr->fwnmi_migration_blocker);
> > >> + spapr->fwnmi_migration_blocker = NULL;
> > >> }
> > >> }
> > >>
> > >> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> > >> index bd75d4b..6c0cfd8 100644
> > >> --- a/include/hw/ppc/spapr.h
> > >> +++ b/include/hw/ppc/spapr.h
> > >> @@ -214,6 +214,8 @@ struct SpaprMachineState {
> > >> SpaprCapabilities def, eff, mig;
> > >>
> > >> unsigned gpu_numa_id;
> > >> +
> > >> + Error *fwnmi_migration_blocker;
> > >> };
> > >>
> > >> #define H_SUCCESS 0
> > >>
> > >>
> > >
> >
>
>