[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH v2 9/9] spapr: implement nested-hv capability for the virtual
From: |
Nicholas Piggin |
Subject: |
Re: [PATCH v2 9/9] spapr: implement nested-hv capability for the virtual hypervisor |
Date: |
Wed, 16 Feb 2022 22:30:05 +1000 |
Excerpts from Nicholas Piggin's message of February 16, 2022 9:38 pm:
> Excerpts from Cédric Le Goater's message of February 16, 2022 8:52 pm:
>> On 2/16/22 11:25, Nicholas Piggin wrote:
>>> This implements the Nested KVM HV hcall API for spapr under TCG.
>>>
>>> The L2 is switched in when the H_ENTER_NESTED hcall is made, and the
>>> L1 is switched back in returned from the hcall when a HV exception
>>> is sent to the vhyp. Register state is copied in and out according to
>>> the nested KVM HV hcall API specification.
>>>
>>> The hdecr timer is started when the L2 is switched in, and it provides
>>> the HDEC / 0x980 return to L1.
>>>
>>> The MMU re-uses the bare metal radix 2-level page table walker by
>>> using the get_pate method to point the MMU to the nested partition
>>> table entry. MMU faults due to partition scope errors raise HV
>>> exceptions and accordingly are routed back to the L1.
>>>
>>> The MMU does not tag translations for the L1 (direct) vs L2 (nested)
>>> guests, so the TLB is flushed on any L1<->L2 transition (hcall entry
>>> and exit).>
>>> Reviewed-by: Fabiano Rosas <farosas@linux.ibm.com>
>>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>>
>> Reviewed-by: Cédric Le Goater <clg@kaod.org>
>>
>> Some last comments below,
>
> [...]
>
>>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>>> index edbf3eeed0..852fe61b36 100644
>>> --- a/include/hw/ppc/spapr.h
>>> +++ b/include/hw/ppc/spapr.h
>>> @@ -199,6 +199,9 @@ struct SpaprMachineState {
>>> bool has_graphics;
>>> uint32_t vsmt; /* Virtual SMT mode (KVM's "core stride") */
>>>
>>> + /* Nested HV support (TCG only) */
>>> + uint64_t nested_ptcr;
>>> +
>>
>> this is new state to migrate.
>>
>
> [...]
>
>>> +/* Linux 64-bit powerpc pt_regs struct, used by nested HV */
>>> +struct kvmppc_pt_regs {
>>> + uint64_t gpr[32];
>>> + uint64_t nip;
>>> + uint64_t msr;
>>> + uint64_t orig_gpr3; /* Used for restarting system calls */
>>> + uint64_t ctr;
>>> + uint64_t link;
>>> + uint64_t xer;
>>> + uint64_t ccr;
>>> + uint64_t softe; /* Soft enabled/disabled */
>>> + uint64_t trap; /* Reason for being here */
>>> + uint64_t dar; /* Fault registers */
>>> + uint64_t dsisr; /* on 4xx/Book-E used for ESR */
>>> + uint64_t result; /* Result of a system call */
>>> +};
>>
>> I think we need to start moving all the spapr hcall definitions under
>> spapr_hcall.h. It can come later.
>
> Sure.
>
> [...]
>
>>> diff --git a/include/hw/ppc/spapr_cpu_core.h
>>> b/include/hw/ppc/spapr_cpu_core.h
>>> index dab3dfc76c..b560514560 100644
>>> --- a/include/hw/ppc/spapr_cpu_core.h
>>> +++ b/include/hw/ppc/spapr_cpu_core.h
>>> @@ -48,6 +48,11 @@ typedef struct SpaprCpuState {
>>> bool prod; /* not migrated, only used to improve dispatch latencies */
>>> struct ICPState *icp;
>>> struct XiveTCTX *tctx;
>>> +
>>> + /* Fields for nested-HV support */
>>> + bool in_nested; /* true while the L2 is executing */
>>> + CPUPPCState *nested_host_state; /* holds the L1 state while L2
>>> executes */
>>> + int64_t nested_tb_offset; /* L1->L2 TB offset */
>>
>> This needs a new vmstate.
>
> How about instead of the vmstate (we would need all the L1 state in
> nested_host_state as well), we just add a migration blocker in the
> L2 entry path. We could limit the max hdecr to say 1 second to
> ensure it unblocks before long.
>
> I know migration blockers are not preferred but in this case it gives
> us some iterations to debug and optimise first, which might change
> the data to migrate.
This should be roughly the incremental patch to do this.
Thanks,
Nick
--
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 87e68da77f..14e41b7d31 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2882,6 +2882,13 @@ static void spapr_machine_init(MachineState *machine)
"may run and log hardware error on the destination");
}
+ if (spapr_get_cap(spapr, SPAPR_CAP_NESTED_KVM_HV) == SPAPR_CAP_ON) {
+ /* Create the error string for live migration blocker */
+ error_setg(&spapr->nested_hv_migration_blocker,
+ "A nested-hv L2 guest is running. Migration is blocked until it "
+ "exits to the L1.");
+ }
+
if (mc->nvdimm_supported) {
spapr_create_nvdimm_dr_connectors(spapr);
}
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index e183892287..89295bc723 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -21,6 +21,7 @@
#include "hw/ppc/spapr_numa.h"
#include "mmu-book3s-v3.h"
#include "hw/mem/memory-device.h"
+#include "migration/blocker.h"
bool is_ram_address(SpaprMachineState *spapr, hwaddr addr)
{
@@ -1565,7 +1566,7 @@ static target_ulong h_enter_nested(PowerPCCPU *cpu,
SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
target_ulong hv_ptr = args[0];
target_ulong regs_ptr = args[1];
- target_ulong hdec, now = cpu_ppc_load_tbl(env);
+ target_ulong hdec, now;
target_ulong lpcr, lpcr_mask;
struct kvmppc_hv_guest_state *hvstate;
struct kvmppc_hv_guest_state hv_state;
@@ -1578,11 +1579,16 @@ static target_ulong h_enter_nested(PowerPCCPU *cpu,
return H_NOT_AVAILABLE;
}
+ if (migrate_add_blocker(spapr->nested_hv_migration_blocker, NULL)) {
+ return 0; /* This returns nothing to the L1, essentially an EAGAIN */
+ }
+
len = sizeof(*hvstate);
hvstate = address_space_map(CPU(cpu)->as, hv_ptr, &len, false,
MEMTXATTRS_UNSPECIFIED);
if (len != sizeof(*hvstate)) {
address_space_unmap(CPU(cpu)->as, hvstate, len, 0, false);
+ migrate_del_blocker(spapr->nested_hv_migration_blocker);
return H_PARAMETER;
}
@@ -1590,16 +1596,36 @@ static target_ulong h_enter_nested(PowerPCCPU *cpu,
address_space_unmap(CPU(cpu)->as, hvstate, len, len, false);
+ spapr_cpu->nested_tb_offset = hv_state.tb_offset;
+ spapr_cpu->nested_hdec_expiry = hv_state.hdec_expiry;
+
+ now = cpu_ppc_load_tbl(env);
+ if (now >= hv_state.hdec_expiry) {
+ migrate_del_blocker(spapr->nested_hv_migration_blocker);
+ return env->excp_vectors[POWERPC_EXCP_HDECR];
+ }
+
+ hdec = hv_state.hdec_expiry - now;
+ if (hdec > env->tb_env->tb_freq) {
+ /*
+ * Limit hdecr to 1 second to prevent the L1 blocking migration for
+ * too long with a large hdecr value.
+ */
+ hdec = env->tb_env->tb_freq;
+ }
+
/*
* We accept versions 1 and 2. Version 2 fields are unused because TCG
* does not implement DAWR*.
*/
if (hv_state.version > HV_GUEST_STATE_VERSION) {
+ migrate_del_blocker(spapr->nested_hv_migration_blocker);
return H_PARAMETER;
}
spapr_cpu->nested_host_state = g_try_malloc(sizeof(CPUPPCState));
if (!spapr_cpu->nested_host_state) {
+ migrate_del_blocker(spapr->nested_hv_migration_blocker);
return H_NO_MEM;
}
@@ -1611,6 +1637,7 @@ static target_ulong h_enter_nested(PowerPCCPU *cpu,
if (!regs || len != sizeof(*regs)) {
address_space_unmap(CPU(cpu)->as, regs, len, 0, false);
g_free(spapr_cpu->nested_host_state);
+ migrate_del_blocker(spapr->nested_hv_migration_blocker);
return H_P2;
}
@@ -1648,8 +1675,6 @@ static target_ulong h_enter_nested(PowerPCCPU *cpu,
/* hv_state.amor is not used */
env->spr[SPR_DPDES] = hv_state.dpdes;
env->spr[SPR_HFSCR] = hv_state.hfscr;
- hdec = hv_state.hdec_expiry - now;
- spapr_cpu->nested_tb_offset = hv_state.tb_offset;
/* TCG does not implement DAWR*, CIABR, PURR, SPURR, IC, VTB, HEIR SPRs*/
env->spr[SPR_SRR0] = hv_state.srr0;
env->spr[SPR_SRR1] = hv_state.srr1;
@@ -1693,6 +1718,7 @@ static target_ulong h_enter_nested(PowerPCCPU *cpu,
void spapr_exit_nested(PowerPCCPU *cpu, int excp)
{
+ SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
CPUState *cs = CPU(cpu);
CPUPPCState *env = &cpu->env;
SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
@@ -1781,6 +1807,19 @@ void spapr_exit_nested(PowerPCCPU *cpu, int excp)
/* Is it okay to specify write length larger than actual data written? */
address_space_unmap(CPU(cpu)->as, regs, len, len, true);
+ /*
+ * hdecr is capped at entry, so we may exit here with a HDECR exception
+ * without having exceeded the guest's limit. Clear the HDECR interrupt
+ * return in this case.
+ */
+ if (excp == POWERPC_EXCP_HDECR) {
+ target_ulong now;
+ now = cpu_ppc_load_tbl(env) - spapr_cpu->nested_tb_offset;
+ if (now < spapr_cpu->nested_hdec_expiry) {
+ r3_return = 0;
+ }
+ }
+
out_restore_l1:
memcpy(env->gpr, spapr_cpu->nested_host_state->gpr, sizeof(env->gpr));
env->lr = spapr_cpu->nested_host_state->lr;
@@ -1825,6 +1864,8 @@ out_restore_l1:
g_free(spapr_cpu->nested_host_state);
spapr_cpu->nested_host_state = NULL;
+
+ migrate_del_blocker(spapr->nested_hv_migration_blocker);
}
static void hypercall_register_types(void)
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 852fe61b36..70b330ef9a 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -266,6 +266,7 @@ struct SpaprMachineState {
uint32_t FORM2_assoc_array[NUMA_NODES_MAX_NUM][FORM2_NUMA_ASSOC_SIZE];
Error *fwnmi_migration_blocker;
+ Error *nested_hv_migration_blocker;
};
#define H_SUCCESS 0
diff --git a/include/hw/ppc/spapr_cpu_core.h b/include/hw/ppc/spapr_cpu_core.h
index b560514560..09da577ca1 100644
--- a/include/hw/ppc/spapr_cpu_core.h
+++ b/include/hw/ppc/spapr_cpu_core.h
@@ -53,6 +53,7 @@ typedef struct SpaprCpuState {
bool in_nested; /* true while the L2 is executing */
CPUPPCState *nested_host_state; /* holds the L1 state while L2 executes */
int64_t nested_tb_offset; /* L1->L2 TB offset */
+ uint64_t nested_hdec_expiry; /* L1 hdec expiry in absolute L1 TB */
} SpaprCpuState;
static inline SpaprCpuState *spapr_cpu_state(PowerPCCPU *cpu)
- [PATCH v2 3/9] ppc: allow the hdecr timer to be created/destroyed, (continued)
- [PATCH v2 3/9] ppc: allow the hdecr timer to be created/destroyed, Nicholas Piggin, 2022/02/16
- [PATCH v2 5/9] target/ppc: make vhyp get_pate method take lpid and return success, Nicholas Piggin, 2022/02/16
- [PATCH v2 4/9] target/ppc: add vhyp addressing mode helper for radix MMU, Nicholas Piggin, 2022/02/16
- [PATCH v2 6/9] target/ppc: add helper for books vhyp hypercall handler, Nicholas Piggin, 2022/02/16
- [PATCH v2 7/9] target/ppc: Add powerpc_reset_excp_state helper, Nicholas Piggin, 2022/02/16
- [PATCH v2 8/9] target/ppc: Introduce a vhyp framework for nested HV support, Nicholas Piggin, 2022/02/16
- [PATCH v2 9/9] spapr: implement nested-hv capability for the virtual hypervisor, Nicholas Piggin, 2022/02/16
Re: [PATCH v2 0/9] ppc: nested KVM HV for spapr virtual hypervisor, Cédric Le Goater, 2022/02/18