[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-ppc] [PATCH] spapr/irq: Only claim VALID interrupts at the KVM
From: |
Greg Kurz |
Subject: |
Re: [Qemu-ppc] [PATCH] spapr/irq: Only claim VALID interrupts at the KVM level |
Date: |
Tue, 10 Sep 2019 21:36:25 +0200 |
On Tue, 10 Sep 2019 08:13:26 +0200
Cédric Le Goater <address@hidden> wrote:
> A typical pseries VM with 16 vCPUs, one disk, one network adapater
> uses less than 100 interrupts but the whole IRQ number space of the
> QEMU machine is allocated at reset time and it is 8K wide. This is
> wasting considerably the global IRQ space of the overall system which
> has 1M interrupts per socket on a POWER9.
>
> To optimise the HW resources, only request at the KVM level interrupts
> which have been claimed. This will help up increase the maximum number
> of VMs per system.
>
> To keep migration compatibility, we introduce a machine class
> attribute to adapt the reset behavior on older pseries machines.
>
This can be achieved in a simpler way, see below.
> Signed-off-by: Cédric Le Goater <address@hidden>
> ---
> include/hw/ppc/spapr.h | 1 +
> include/hw/ppc/spapr_xive.h | 1 +
> include/hw/ppc/xics.h | 6 ++++++
> hw/intc/spapr_xive.c | 1 +
> hw/intc/spapr_xive_kvm.c | 34 +++++++++++++++++++++++++++++++---
> hw/intc/xics.c | 1 +
> hw/intc/xics_kvm.c | 14 ++++++++++++++
> hw/ppc/spapr.c | 1 +
> hw/ppc/spapr_irq.c | 13 +++++++------
> 9 files changed, 63 insertions(+), 9 deletions(-)
>
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 03111fd55bc8..6c622954a60c 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -131,6 +131,7 @@ struct SpaprMachineClass {
> SpaprResizeHpt resize_hpt_default;
> SpaprCapabilities default_caps;
> SpaprIrq *irq;
> + bool irq_reset_all;
> };
>
> /**
> diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
> index bfd40f01d882..b33913eb0f28 100644
> --- a/include/hw/ppc/spapr_xive.h
> +++ b/include/hw/ppc/spapr_xive.h
> @@ -45,6 +45,7 @@ typedef struct SpaprXive {
> void *tm_mmap;
> MemoryRegion tm_mmio_kvm;
> VMChangeStateEntry *change;
> + bool reset_all;
> } SpaprXive;
>
> /*
> diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
> index f2a8d6a4b4f9..856815362406 100644
> --- a/include/hw/ppc/xics.h
> +++ b/include/hw/ppc/xics.h
> @@ -117,6 +117,7 @@ struct ICSState {
> DeviceState parent_obj;
> /*< public >*/
> uint32_t nr_irqs;
> + bool reset_all;
> uint32_t offset;
> ICSIRQState *irqs;
> XICSFabric *xics;
> @@ -179,6 +180,11 @@ void ics_simple_write_xive(ICSState *ics, int nr, int
> server,
> uint8_t priority, uint8_t saved_priority);
> void ics_simple_set_irq(void *opaque, int srcno, int val);
>
> +static inline bool ics_irq_free(ICSState *ics, uint32_t srcno)
> +{
> + return !(ics->irqs[srcno].flags & XICS_FLAGS_IRQ_MASK);
> +}
> +
> void ics_set_irq_type(ICSState *ics, int srcno, bool lsi);
> void icp_pic_print_info(ICPState *icp, Monitor *mon);
> void ics_pic_print_info(ICSState *ics, Monitor *mon);
> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
> index c1c97192a7d2..b717d9e09314 100644
> --- a/hw/intc/spapr_xive.c
> +++ b/hw/intc/spapr_xive.c
> @@ -492,6 +492,7 @@ static Property spapr_xive_properties[] = {
> DEFINE_PROP_UINT32("nr-ends", SpaprXive, nr_ends, 0),
> DEFINE_PROP_UINT64("vc-base", SpaprXive, vc_base, SPAPR_XIVE_VC_BASE),
> DEFINE_PROP_UINT64("tm-base", SpaprXive, tm_base, SPAPR_XIVE_TM_BASE),
> + DEFINE_PROP_BOOL("reset-all", ICSState, reset_all, false),
s/ICSState/SpaprXive
> DEFINE_PROP_END_OF_LIST(),
> };
>
> diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
> index 17af4d19f54e..225abce36270 100644
> --- a/hw/intc/spapr_xive_kvm.c
> +++ b/hw/intc/spapr_xive_kvm.c
> @@ -253,13 +253,23 @@ void kvmppc_xive_source_reset_one(XiveSource *xsrc, int
> srcno, Error **errp)
> true, errp);
> }
>
> +static bool xive_source_skip_reset(SpaprXive *xive, int srcno)
> +{
> + return !xive->reset_all && !xive_eas_is_valid(&xive->eat[srcno]);
> +}
> +
> static void kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp)
> {
> + SpaprXive *xive = SPAPR_XIVE(xsrc->xive);
> int i;
>
> for (i = 0; i < xsrc->nr_irqs; i++) {
> Error *local_err = NULL;
>
> + if (xive_source_skip_reset(xive, i)) {
> + continue;
> + }
> +
> kvmppc_xive_source_reset_one(xsrc, i, &local_err);
> if (local_err) {
> error_propagate(errp, local_err);
> @@ -328,11 +338,18 @@ uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int
> srcno, uint32_t offset,
>
> static void kvmppc_xive_source_get_state(XiveSource *xsrc)
> {
> + SpaprXive *xive = SPAPR_XIVE(xsrc->xive);
> int i;
>
> for (i = 0; i < xsrc->nr_irqs; i++) {
> + uint8_t pq;
> +
> + if (xive_source_skip_reset(xive, i)) {
This looks a bit weird to "skip reset" in a function that isn't
supposed to reset anything... Is it really necessary to check the
reset_all flag actually ? I guess checking the EAS is valid should
be enough.
> + continue;
> + }
> +
> /* Perform a load without side effect to retrieve the PQ bits */
> - uint8_t pq = xive_esb_read(xsrc, i, XIVE_ESB_GET);
> + pq = xive_esb_read(xsrc, i, XIVE_ESB_GET);
>
> /* and save PQ locally */
> xive_source_esb_set(xsrc, i, pq);
> @@ -521,9 +538,14 @@ static void kvmppc_xive_change_state_handler(void
> *opaque, int running,
> */
> if (running) {
> for (i = 0; i < xsrc->nr_irqs; i++) {
> - uint8_t pq = xive_source_esb_get(xsrc, i);
> + uint8_t pq;
> uint8_t old_pq;
>
> + if (xive_source_skip_reset(xive, i)) {
Same here ?
> + continue;
> + }
> +
> + pq = xive_source_esb_get(xsrc, i);
> old_pq = xive_esb_read(xsrc, i, XIVE_ESB_SET_PQ_00 + (pq << 8));
>
> /*
> @@ -545,7 +567,13 @@ static void kvmppc_xive_change_state_handler(void
> *opaque, int running,
> * migration is in progress.
> */
> for (i = 0; i < xsrc->nr_irqs; i++) {
> - uint8_t pq = xive_esb_read(xsrc, i, XIVE_ESB_GET);
> + uint8_t pq;
> +
> + if (xive_source_skip_reset(xive, i)) {
and here ?
> + continue;
> + }
> +
> + pq = xive_esb_read(xsrc, i, XIVE_ESB_GET);
>
> /*
> * PQ is set to PENDING to possibly catch a triggered
> diff --git a/hw/intc/xics.c b/hw/intc/xics.c
> index b2fca2975cc4..ae3f83cf0aad 100644
> --- a/hw/intc/xics.c
> +++ b/hw/intc/xics.c
> @@ -746,6 +746,7 @@ static const VMStateDescription vmstate_ics_base = {
>
> static Property ics_base_properties[] = {
> DEFINE_PROP_UINT32("nr-irqs", ICSState, nr_irqs, 0),
> + DEFINE_PROP_BOOL("reset-all", ICSState, reset_all, false),
> DEFINE_PROP_END_OF_LIST(),
> };
>
> diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c
> index a4d2e876cc5f..aa017b99801c 100644
> --- a/hw/intc/xics_kvm.c
> +++ b/hw/intc/xics_kvm.c
> @@ -177,6 +177,12 @@ void icp_kvm_realize(DeviceState *dev, Error **errp)
> /*
> * ICS-KVM
> */
> +
> +static bool ics_irq_skip_reset(ICSState *ics, int srcno)
> +{
> + return !ics->reset_all && ics_irq_free(ics, srcno);
> +}
> +
> void ics_get_kvm_state(ICSState *ics)
> {
> uint64_t state;
> @@ -190,6 +196,10 @@ void ics_get_kvm_state(ICSState *ics)
> for (i = 0; i < ics->nr_irqs; i++) {
> ICSIRQState *irq = &ics->irqs[i];
>
> + if (ics_irq_skip_reset(ics, i)) {
And here ?
> + continue;
> + }
> +
> kvm_device_access(kernel_xics_fd, KVM_DEV_XICS_GRP_SOURCES,
> i + ics->offset, &state, false, &error_fatal);
>
> @@ -301,6 +311,10 @@ int ics_set_kvm_state(ICSState *ics, Error **errp)
> Error *local_err = NULL;
> int ret;
>
> + if (ics_irq_skip_reset(ics, i)) {
> + continue;
> + }
> +
> ret = ics_set_kvm_state_one(ics, i, &local_err);
> if (ret < 0) {
> error_propagate(errp, local_err);
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 7124053b43b7..af89f3b6c698 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -4594,6 +4594,7 @@ static void
> spapr_machine_4_1_class_options(MachineClass *mc)
> smc->linux_pci_probe = false;
> compat_props_add(mc->compat_props, hw_compat_4_1, hw_compat_4_1_len);
> compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
> + smc->irq_reset_all = true;
Drop this property and add two lines in compat static:
static GlobalProperty compat[] = {
/* Only allow 4kiB and 64kiB IOMMU pagesizes */
{ TYPE_SPAPR_PCI_HOST_BRIDGE, "pgsz", "0x11000" },
{ TYPE_SPAPR_XIVE, "reset-all", "on" },
{ TYPE_ICS_BASE, "reset-all", "on" },
};
This ensures that any instance of SpaprXive or ICSState created by
a pseries-4.1 machine has reset_all set to true.
> }
>
> DEFINE_SPAPR_MACHINE(4_1, "4.1", false);
> diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
> index 06fe2432bae5..8c3682613404 100644
> --- a/hw/ppc/spapr_irq.c
> +++ b/hw/ppc/spapr_irq.c
> @@ -97,11 +97,13 @@ static void spapr_irq_init_xics(SpaprMachineState *spapr,
> int nr_irqs,
> {
> Object *obj;
> Error *local_err = NULL;
> + bool reset_all = SPAPR_MACHINE_GET_CLASS(spapr)->irq_reset_all;
>
... and you can drop all the propagation code here...
> obj = object_new(TYPE_ICS_SIMPLE);
> object_property_add_child(OBJECT(spapr), "ics", obj, &error_abort);
> object_property_add_const_link(obj, ICS_PROP_XICS, OBJECT(spapr),
> &error_fatal);
> + object_property_set_bool(obj, reset_all, "reset-all", &error_fatal);
> object_property_set_int(obj, nr_irqs, "nr-irqs", &error_fatal);
> object_property_set_bool(obj, true, "realized", &local_err);
> if (local_err) {
> @@ -114,9 +116,6 @@ static void spapr_irq_init_xics(SpaprMachineState *spapr,
> int nr_irqs,
> xics_spapr_init(spapr);
> }
>
> -#define ICS_IRQ_FREE(ics, srcno) \
> - (!((ics)->irqs[(srcno)].flags & (XICS_FLAGS_IRQ_MASK)))
> -
> static int spapr_irq_claim_xics(SpaprMachineState *spapr, int irq, bool lsi,
> Error **errp)
> {
> @@ -129,7 +128,7 @@ static int spapr_irq_claim_xics(SpaprMachineState *spapr,
> int irq, bool lsi,
> return -1;
> }
>
> - if (!ICS_IRQ_FREE(ics, irq - ics->offset)) {
> + if (!ics_irq_free(ics, irq - ics->offset)) {
> error_setg(errp, "IRQ %d is not free", irq);
> return -1;
> }
> @@ -147,7 +146,7 @@ static void spapr_irq_free_xics(SpaprMachineState *spapr,
> int irq, int num)
> if (ics_valid_irq(ics, irq)) {
> trace_spapr_irq_free(0, irq, num);
> for (i = srcno; i < srcno + num; ++i) {
> - if (ICS_IRQ_FREE(ics, i)) {
> + if (ics_irq_free(ics, i)) {
> trace_spapr_irq_free_warn(0, i);
> }
> memset(&ics->irqs[i], 0, sizeof(ICSIRQState));
> @@ -270,9 +269,11 @@ static void spapr_irq_init_xive(SpaprMachineState
> *spapr, int nr_irqs,
> {
> uint32_t nr_servers = spapr_max_server_number(spapr);
> DeviceState *dev;
> + bool reset_all = SPAPR_MACHINE_GET_CLASS(spapr)->irq_reset_all;
... and here.
> int i;
>
> dev = qdev_create(NULL, TYPE_SPAPR_XIVE);
> + object_property_set_bool(OBJECT(dev), reset_all, "reset-all",
> &error_fatal);
> qdev_prop_set_uint32(dev, "nr-irqs", nr_irqs);
> /*
> * 8 XIVE END structures per CPU. One for each available priority
> @@ -767,7 +768,7 @@ static int ics_find_free_block(ICSState *ics, int num,
> int alignnum)
> return -1;
> }
> for (i = first; i < first + num; ++i) {
> - if (!ICS_IRQ_FREE(ics, i)) {
> + if (!ics_irq_free(ics, i)) {
> break;
> }
> }