[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-ppc] [PATCH 12/25] spapr: introduce a XIVE interrupt presenter
From: |
Cédric Le Goater |
Subject: |
Re: [Qemu-ppc] [PATCH 12/25] spapr: introduce a XIVE interrupt presenter model |
Date: |
Wed, 29 Nov 2017 10:55:34 +0100 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Thunderbird/52.4.0 |
On 11/29/2017 06:11 AM, David Gibson wrote:
> On Thu, Nov 23, 2017 at 02:29:42PM +0100, Cédric Le Goater wrote:
>> The XIVE interrupt presenter exposes a set of rings, also called
>> Thread Interrupt Management Areas (TIMA), to handle priority
>> management and interrupt acknowledgment among other things. There is
>> one ring per level of privilege, four in all. The one we are
>> interested in for the sPAPR machine is the OS ring.
>>
>> The TIMA is mapped at the same address for each CPU. 'current_cpu' is
>> used to retrieve the targeted interrupt presenter object holding the
>> cache data of the registers the model use.
>>
>> Signed-off-by: Cédric Le Goater <address@hidden>
>> ---
>> hw/intc/spapr_xive.c | 271
>> ++++++++++++++++++++++++++++++++++++++++++++
>> hw/intc/xive-internal.h | 89 +++++++++++++++
>> include/hw/ppc/spapr_xive.h | 11 ++
>> 3 files changed, 371 insertions(+)
>>
>> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
>> index b1e3f8710cff..554b25e0884c 100644
>> --- a/hw/intc/spapr_xive.c
>> +++ b/hw/intc/spapr_xive.c
>> @@ -23,9 +23,166 @@
>> #include "sysemu/dma.h"
>> #include "monitor/monitor.h"
>> #include "hw/ppc/spapr_xive.h"
>> +#include "hw/ppc/xics.h"
>>
>> #include "xive-internal.h"
>>
>> +struct sPAPRXiveICP {
>
> I'd really prefer to avoid calling anything in xive "icp" to avoid
> confusion with xics.
OK.
The specs refers to the whole as an IVPE : Interrupt Virtualization
Presentation Engine. In our model, we use the TIMA cached values of
the OS ring and the qemu_irq for the CPU line.
Would 'sPAPRXivePresenter' be fine ?
>> + DeviceState parent_obj;
>> +
>> + CPUState *cs;
>> + uint8_t tima[TM_RING_COUNT * 0x10];
>
> What does the 0x10 represent? #define for clarity, maybe.
yes.
> Do we need to model the whole range as memory, or just the relevant
> pieces with read/write meaning?
Yes. we could limit the TIMA and MMIO region to what sPAPR only needs :
the OS ring.
>
>> + uint8_t *tima_os;
>> + qemu_irq output;
>> +};
>> +
>> +static uint64_t spapr_xive_icp_accept(sPAPRXiveICP *icp)
>> +{
>> + return 0;
>> +}
>> +
>> +static void spapr_xive_icp_set_cppr(sPAPRXiveICP *icp, uint8_t cppr)
>> +{
>> + if (cppr > XIVE_PRIORITY_MAX) {
>> + cppr = 0xff;
>> + }
>> +
>> + icp->tima_os[TM_CPPR] = cppr;
>> +}
>> +
>> +/*
>> + * Thread Interrupt Management Area MMIO
>> + */
>> +static uint64_t spapr_xive_tm_read_special(sPAPRXiveICP *icp, hwaddr offset,
>> + unsigned size)
>> +{
>> + uint64_t ret = -1;
>> +
>> + if (offset == TM_SPC_ACK_OS_REG && size == 2) {
>> + ret = spapr_xive_icp_accept(icp);
>> + } else {
>> + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid TIMA read @%"
>> + HWADDR_PRIx" size %d\n", offset, size);
>> + }
>> +
>> + return ret;
>> +}
>> +
>> +static uint64_t spapr_xive_tm_read(void *opaque, hwaddr offset, unsigned
>> size)
>> +{
>> + PowerPCCPU *cpu = POWERPC_CPU(current_cpu);
>
> So, strictly speaking this could be handled by setting each of the
> CPUs address spaces separately, to something with their own TIMA
> superimposed on address_space_memory.
Ah. I didn't know we could do that.
> What you have might be more practical though.
well, you will see at the end of the patchset how cpu->intc is assigned.
>> + sPAPRXiveICP *icp = SPAPR_XIVE_ICP(cpu->intc);
>> + uint64_t ret = -1;
>> + int i;
>> +
>> + if (offset >= TM_SPC_ACK_EBB) {
>> + return spapr_xive_tm_read_special(icp, offset, size);
>> + }
>> +
>> + if ((offset & 0xf0) == TM_QW1_OS) {
>> + switch (size) {
>> + case 1:
>> + case 2:
>> + case 4:
>> + case 8:
>> + if (QEMU_IS_ALIGNED(offset, size)) {
>
> Hm, the MR subsystem doesn't already split unaligned accesses?
euh. yes, I might be doing a little too much.
>> + ret = 0;
>> + for (i = 0; i < size; i++) {
>> + ret |= icp->tima[offset + i] << (8 * i);
>> + }
>> + } else {
>> + qemu_log_mask(LOG_GUEST_ERROR,
>> + "XIVE: invalid TIMA read alignment @%"
>> + HWADDR_PRIx" size %d\n", offset, size);
>> + }
>> + break;
>> + default:
>> + g_assert_not_reached();
>> + }
>> + } else {
>> + qemu_log_mask(LOG_UNIMP, "XIVE: does handle non-OS TIMA ring @%"
>> + HWADDR_PRIx"\n", offset);
>> + }
>> +
>> + return ret;
>> +}
>> +
>> +static bool spapr_xive_tm_is_readonly(uint8_t offset)
>> +{
>> + /* Let's be optimistic and prepare ground for HV mode support */
>> + switch (offset) {
>> + case TM_QW1_OS + TM_CPPR:
>> + return false;
>> + default:
>> + return true;
>> + }
>> +}
>> +
>> +static void spapr_xive_tm_write_special(sPAPRXiveICP *icp, hwaddr offset,
>> + uint64_t value, unsigned size)
>> +{
>> + /* TODO: support TM_SPC_SET_OS_PENDING */
>> +
>> + /* TODO: support TM_SPC_ACK_OS_EL */
>> +}
>> +
>> +static void spapr_xive_tm_write(void *opaque, hwaddr offset,
>> + uint64_t value, unsigned size)
>> +{
>> + PowerPCCPU *cpu = POWERPC_CPU(current_cpu);
>> + sPAPRXiveICP *icp = SPAPR_XIVE_ICP(cpu->intc);
>> + int i;
>> +
>> + if (offset >= TM_SPC_ACK_EBB) {
>> + spapr_xive_tm_write_special(icp, offset, value, size);
>> + return;
>> + }
>> +
>> + if ((offset & 0xf0) == TM_QW1_OS) {
>> + switch (size) {
>> + case 1:
>> + if (offset == TM_QW1_OS + TM_CPPR) {
>> + spapr_xive_icp_set_cppr(icp, value & 0xff);
>> + }
>> + break;
>> + case 4:
>> + case 8:
>> + if (QEMU_IS_ALIGNED(offset, size)) {
>> + for (i = 0; i < size; i++) {
>> + if (!spapr_xive_tm_is_readonly(offset + i)) {
>> + icp->tima[offset + i] = (value >> (8 * i)) & 0xff;
>> + }
>> + }
>> + } else {
>> + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid TIMA write @%"
>> + HWADDR_PRIx" size %d\n", offset, size);
>> + }
>> + break;
>> + default:
>> + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid TIMA write @%"
>> + HWADDR_PRIx" size %d\n", offset, size);
>> + }
>> + } else {
>> + qemu_log_mask(LOG_UNIMP, "XIVE: does handle non-OS TIMA ring @%"
>> + HWADDR_PRIx"\n", offset);
>
> The many qemu_log()s worry me a little. They're not ratelimited, so
> the guest could in principle chew through the host's log space.
>
> IIUC these are very unlikely to be hit in practice, so maybe
> tracepoints would be more suitable.
ok.
>> + }
>> +}
>> +
>> +
>> +static const MemoryRegionOps spapr_xive_tm_ops = {
>> + .read = spapr_xive_tm_read,
>> + .write = spapr_xive_tm_write,
>> + .endianness = DEVICE_BIG_ENDIAN,
>> + .valid = {
>> + .min_access_size = 1,
>> + .max_access_size = 8,
>> + },
>> + .impl = {
>> + .min_access_size = 1,
>> + .max_access_size = 8,
>> + },
>> +};
>> +
>> static void spapr_xive_irq(sPAPRXive *xive, int lisn)
>> {
>>
>> @@ -287,6 +444,11 @@ static void spapr_xive_source_set_irq(void *opaque, int
>> lisn, int val)
>> #define VC_BAR_SIZE 0x08000000000ull
>> #define ESB_SHIFT 16 /* One 64k page. OPAL has two */
>>
>> +/* Thread Interrupt Management Area MMIO */
>> +#define TM_BAR_DEFAULT 0x30203180000ull
>> +#define TM_SHIFT 16
>> +#define TM_BAR_SIZE (TM_RING_COUNT * (1 << TM_SHIFT))
>> +
>> static uint64_t spapr_xive_esb_default_read(void *p, hwaddr offset,
>> unsigned size)
>> {
>> @@ -392,6 +554,14 @@ static void spapr_xive_realize(DeviceState *dev, Error
>> **errp)
>> (1ull << xive->esb_shift) * xive->nr_irqs);
>> memory_region_add_subregion(&xive->esb_mr, 0, &xive->esb_iomem);
>>
>> + /* TM BAR. Same address for each chip */
>> + xive->tm_base = (P9_MMIO_BASE | TM_BAR_DEFAULT);
>> + xive->tm_shift = TM_SHIFT;
>
> Any reason for this to be a variable?
no, we could just use TM_SHIFT. I will look into it.
>> +
>> + memory_region_init_io(&xive->tm_iomem, OBJECT(xive), &spapr_xive_tm_ops,
>> + xive, "xive.tm", TM_BAR_SIZE);
>> + sysbus_init_mmio(SYS_BUS_DEVICE(dev), &xive->tm_iomem);
>> +
>> qemu_register_reset(spapr_xive_reset, dev);
>> }
>>
>> @@ -448,9 +618,110 @@ static const TypeInfo spapr_xive_info = {
>> .class_init = spapr_xive_class_init,
>> };
>>
>> +void spapr_xive_icp_pic_print_info(sPAPRXiveICP *xicp, Monitor *mon)
>> +{
>> + int cpu_index = xicp->cs ? xicp->cs->cpu_index : -1;
>> +
>> + monitor_printf(mon, "CPU %d CPPR=%02x IPB=%02x PIPR=%02x NSR=%02x\n",
>> + cpu_index, xicp->tima_os[TM_CPPR], xicp->tima_os[TM_IPB],
>> + xicp->tima_os[TM_PIPR], xicp->tima_os[TM_NSR]);
>> +}
>> +
>> +static void spapr_xive_icp_reset(void *dev)
>> +{
>> + sPAPRXiveICP *xicp = SPAPR_XIVE_ICP(dev);
>> +
>> + memset(xicp->tima, 0, sizeof(xicp->tima));
>> +}
>> +
>> +static void spapr_xive_icp_realize(DeviceState *dev, Error **errp)
>> +{
>> + sPAPRXiveICP *xicp = SPAPR_XIVE_ICP(dev);
>> + PowerPCCPU *cpu;
>> + CPUPPCState *env;
>> + Object *obj;
>> + Error *err = NULL;
>> +
>> + obj = object_property_get_link(OBJECT(dev), ICP_PROP_CPU, &err);
>> + if (!obj) {
>> + error_propagate(errp, err);
>> + error_prepend(errp, "required link '" ICP_PROP_CPU "' not found: ");
>> + return;
>> + }
>> +
>> + cpu = POWERPC_CPU(obj);
>> + xicp->cs = CPU(obj);
>> +
>> + env = &cpu->env;
>> + switch (PPC_INPUT(env)) {
>> + case PPC_FLAGS_INPUT_POWER7:
>> + xicp->output = env->irq_inputs[POWER7_INPUT_INT];
>> + break;
>> +
>> + case PPC_FLAGS_INPUT_970:
>> + xicp->output = env->irq_inputs[PPC970_INPUT_INT];
>> + break;
>
> I really don't think we need to implement XIVE for 970.
Indeed. This is a left over from a copy/paste of the ICPState
realize routine.
>> +
>> + default:
>> + error_setg(errp, "XIVE interrupt controller does not support "
>> + "this CPU bus model");
>> + return;
>> + }
>> +
>> + qemu_register_reset(spapr_xive_icp_reset, dev);
>> +}
>> +
>> +static void spapr_xive_icp_unrealize(DeviceState *dev, Error **errp)
>> +{
>> + qemu_unregister_reset(spapr_xive_icp_reset, dev);
>> +}
>> +
>> +static void spapr_xive_icp_init(Object *obj)
>> +{
>> + sPAPRXiveICP *xicp = SPAPR_XIVE_ICP(obj);
>> +
>> + xicp->tima_os = &xicp->tima[TM_QW1_OS];
>
> This is a fixed offset, so why store it as a pointer. For the PAPR
> guest case, do we even need to model the other rings?
No we don't. I will simplify.
Thanks,
C.
>
>> +}
>> +
>> +static bool vmstate_spapr_xive_icp_needed(void *opaque)
>> +{
>> + /* TODO check machine XIVE support */
>> + return true;
>> +}
>> +
>> +static const VMStateDescription vmstate_spapr_xive_icp = {
>> + .name = TYPE_SPAPR_XIVE_ICP,
>> + .version_id = 1,
>> + .minimum_version_id = 1,
>> + .needed = vmstate_spapr_xive_icp_needed,
>> + .fields = (VMStateField[]) {
>> + VMSTATE_BUFFER(tima, sPAPRXiveICP),
>> + VMSTATE_END_OF_LIST()
>> + },
>> +};
>> +
>> +static void spapr_xive_icp_class_init(ObjectClass *klass, void *data)
>> +{
>> + DeviceClass *dc = DEVICE_CLASS(klass);
>> +
>> + dc->realize = spapr_xive_icp_realize;
>> + dc->unrealize = spapr_xive_icp_unrealize;
>> + dc->desc = "sPAPR XIVE Interrupt Presenter";
>> + dc->vmsd = &vmstate_spapr_xive_icp;
>> +}
>> +
>> +static const TypeInfo xive_icp_info = {
>> + .name = TYPE_SPAPR_XIVE_ICP,
>> + .parent = TYPE_DEVICE,
>> + .instance_size = sizeof(sPAPRXiveICP),
>> + .instance_init = spapr_xive_icp_init,
>> + .class_init = spapr_xive_icp_class_init,
>> +};
>> +
>> static void spapr_xive_register_types(void)
>> {
>> type_register_static(&spapr_xive_info);
>> + type_register_static(&xive_icp_info);
>> }
>>
>> type_init(spapr_xive_register_types)
>> diff --git a/hw/intc/xive-internal.h b/hw/intc/xive-internal.h
>> index bea88d82992c..7d329f203a9b 100644
>> --- a/hw/intc/xive-internal.h
>> +++ b/hw/intc/xive-internal.h
>> @@ -24,6 +24,93 @@
>> #define PPC_BITMASK32(bs, be) ((PPC_BIT32(bs) - PPC_BIT32(be)) | \
>> PPC_BIT32(bs))
>>
>> +/*
>> + * Thread Management (aka "TM") registers
>
> Because "TM" didn't stand for enough things already :/.
>
>> + */
>> +
>> +/* Number of Thread Management Interrupt Areas */
>> +#define TM_RING_COUNT 4
>> +
>> +/* TM register offsets */
>> +#define TM_QW0_USER 0x000 /* All rings */
>> +#define TM_QW1_OS 0x010 /* Ring 0..2 */
>> +#define TM_QW2_HV_POOL 0x020 /* Ring 0..1 */
>> +#define TM_QW3_HV_PHYS 0x030 /* Ring 0..1 */
>> +
>> +/* Byte offsets inside a QW QW0 QW1 QW2 QW3 */
>> +#define TM_NSR 0x0 /* + + - + */
>> +#define TM_CPPR 0x1 /* - + - + */
>> +#define TM_IPB 0x2 /* - + + + */
>> +#define TM_LSMFB 0x3 /* - + + + */
>> +#define TM_ACK_CNT 0x4 /* - + - - */
>> +#define TM_INC 0x5 /* - + - + */
>> +#define TM_AGE 0x6 /* - + - + */
>> +#define TM_PIPR 0x7 /* - + - + */
>> +
>> +#define TM_WORD0 0x0
>> +#define TM_WORD1 0x4
>> +
>> +/*
>> + * QW word 2 contains the valid bit at the top and other fields
>> + * depending on the QW.
>> + */
>> +#define TM_WORD2 0x8
>> +#define TM_QW0W2_VU PPC_BIT32(0)
>> +#define TM_QW0W2_LOGIC_SERV PPC_BITMASK32(1, 31) /* XX 2,31 ? */
>> +#define TM_QW1W2_VO PPC_BIT32(0)
>> +#define TM_QW1W2_OS_CAM PPC_BITMASK32(8, 31)
>> +#define TM_QW2W2_VP PPC_BIT32(0)
>> +#define TM_QW2W2_POOL_CAM PPC_BITMASK32(8, 31)
>> +#define TM_QW3W2_VT PPC_BIT32(0)
>> +#define TM_QW3W2_LP PPC_BIT32(6)
>> +#define TM_QW3W2_LE PPC_BIT32(7)
>> +#define TM_QW3W2_T PPC_BIT32(31)
>> +
>> +/*
>> + * In addition to normal loads to "peek" and writes (only when invalid)
>> + * using 4 and 8 bytes accesses, the above registers support these
>> + * "special" byte operations:
>> + *
>> + * - Byte load from QW0[NSR] - User level NSR (EBB)
>> + * - Byte store to QW0[NSR] - User level NSR (EBB)
>> + * - Byte load/store to QW1[CPPR] and QW3[CPPR] - CPPR access
>> + * - Byte load from QW3[TM_WORD2] - Read VT||00000||LP||LE on thrd 0
>> + * otherwise VT||0000000
>> + * - Byte store to QW3[TM_WORD2] - Set VT bit (and LP/LE if present)
>> + *
>> + * Then we have all these "special" CI ops at these offset that trigger
>> + * all sorts of side effects:
>> + */
>> +#define TM_SPC_ACK_EBB 0x800 /* Load8 ack EBB to reg*/
>> +#define TM_SPC_ACK_OS_REG 0x810 /* Load16 ack OS irq to reg */
>> +#define TM_SPC_PUSH_USR_CTX 0x808 /* Store32 Push/Validate user
>> context */
>> +#define TM_SPC_PULL_USR_CTX 0x808 /* Load32 Pull/Invalidate user
>> + * context */
>> +#define TM_SPC_SET_OS_PENDING 0x812 /* Store8 Set OS irq pending bit */
>> +#define TM_SPC_PULL_OS_CTX 0x818 /* Load32/Load64 Pull/Invalidate OS
>> + * context to reg */
>> +#define TM_SPC_PULL_POOL_CTX 0x828 /* Load32/Load64 Pull/Invalidate
>> Pool
>> + * context to reg*/
>> +#define TM_SPC_ACK_HV_REG 0x830 /* Load16 ack HV irq to reg */
>> +#define TM_SPC_PULL_USR_CTX_OL 0xc08 /* Store8 Pull/Inval usr ctx to odd
>> + * line */
>> +#define TM_SPC_ACK_OS_EL 0xc10 /* Store8 ack OS irq to even line */
>> +#define TM_SPC_ACK_HV_POOL_EL 0xc20 /* Store8 ack HV evt pool to even
>> + * line */
>> +#define TM_SPC_ACK_HV_EL 0xc30 /* Store8 ack HV irq to even line */
>> +/* XXX more... */
>> +
>> +/* NSR fields for the various QW ack types */
>> +#define TM_QW0_NSR_EB PPC_BIT8(0)
>> +#define TM_QW1_NSR_EO PPC_BIT8(0)
>> +#define TM_QW3_NSR_HE PPC_BITMASK8(0, 1)
>> +#define TM_QW3_NSR_HE_NONE 0
>> +#define TM_QW3_NSR_HE_POOL 1
>> +#define TM_QW3_NSR_HE_PHYS 2
>> +#define TM_QW3_NSR_HE_LSI 3
>> +#define TM_QW3_NSR_I PPC_BIT8(2)
>> +#define TM_QW3_NSR_GRP_LVL PPC_BIT8(3, 7)
>> +
>> /* IVE/EAS
>> *
>> * One per interrupt source. Targets that interrupt to a given EQ
>> @@ -44,6 +131,8 @@ typedef struct XiveIVE {
>> #define IVE_EQ_DATA PPC_BITMASK(33, 63) /* Data written to the EQ
>> */
>> } XiveIVE;
>>
>> +#define XIVE_PRIORITY_MAX 7
>> +
>> void spapr_xive_reset(void *dev);
>> XiveIVE *spapr_xive_get_ive(sPAPRXive *xive, uint32_t lisn);
>>
>> diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
>> index 7a308fb4db2b..6e8a189e723f 100644
>> --- a/include/hw/ppc/spapr_xive.h
>> +++ b/include/hw/ppc/spapr_xive.h
>> @@ -23,10 +23,15 @@
>>
>> typedef struct sPAPRXive sPAPRXive;
>> typedef struct XiveIVE XiveIVE;
>> +typedef struct sPAPRXiveICP sPAPRXiveICP;
>>
>> #define TYPE_SPAPR_XIVE "spapr-xive"
>> #define SPAPR_XIVE(obj) OBJECT_CHECK(sPAPRXive, (obj), TYPE_SPAPR_XIVE)
>>
>> +#define TYPE_SPAPR_XIVE_ICP "spapr-xive-icp"
>> +#define SPAPR_XIVE_ICP(obj) \
>> + OBJECT_CHECK(sPAPRXiveICP, (obj), TYPE_SPAPR_XIVE_ICP)
>> +
>> struct sPAPRXive {
>> SysBusDevice parent;
>>
>> @@ -57,6 +62,11 @@ struct sPAPRXive {
>> hwaddr esb_base;
>> MemoryRegion esb_mr;
>> MemoryRegion esb_iomem;
>> +
>> + /* TIMA memory region */
>> + uint32_t tm_shift;
>> + hwaddr tm_base;
>> + MemoryRegion tm_iomem;
>> };
>>
>> static inline bool spapr_xive_irq_is_lsi(sPAPRXive *xive, int lisn)
>> @@ -67,5 +77,6 @@ static inline bool spapr_xive_irq_is_lsi(sPAPRXive *xive,
>> int lisn)
>> bool spapr_xive_irq_set(sPAPRXive *xive, uint32_t lisn, bool lsi);
>> bool spapr_xive_irq_unset(sPAPRXive *xive, uint32_t lisn);
>> void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon);
>> +void spapr_xive_icp_pic_print_info(sPAPRXiveICP *xicp, Monitor *mon);
>>
>> #endif /* PPC_SPAPR_XIVE_H */
>
[Qemu-ppc] [PATCH 11/25] spapr: describe the XIVE interrupt source flags, Cédric Le Goater, 2017/11/23
[Qemu-ppc] [PATCH 12/25] spapr: introduce a XIVE interrupt presenter model, Cédric Le Goater, 2017/11/23
[Qemu-ppc] [PATCH 13/25] spapr: introduce the XIVE Event Queues, Cédric Le Goater, 2017/11/23
Re: [Qemu-ppc] [PATCH 13/25] spapr: introduce the XIVE Event Queues, David Gibson, 2017/11/30
[Qemu-ppc] [PATCH 14/25] spapr: push the XIVE EQ data in OS event queue, Cédric Le Goater, 2017/11/23