[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-ppc] [PATCH] spapr-pci: rework MSI/MSIX
From: |
Alexey Kardashevskiy |
Subject: |
Re: [Qemu-ppc] [PATCH] spapr-pci: rework MSI/MSIX |
Date: |
Tue, 30 Jul 2013 12:56:15 +1000 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:17.0) Gecko/20130514 Thunderbird/17.0.6 |
Ping, anyone?
On 07/23/2013 12:54 PM, Alexey Kardashevskiy wrote:
> Michael, could you please review the patch as it is about PCI? Thanks!
>
>
> On 07/12/2013 05:38 PM, Alexey Kardashevskiy wrote:
>> On the sPAPR platform a guest allocates MSI/MSIX vectors via RTAS
>> hypercalls which return global IRQ numbers to a guest so it only
>> operates with those and never touches MSIMessage.
>>
>> Therefore MSIMessage handling is completely hidden in QEMU.
>>
>> Previously every sPAPR PCI host bridge implemented its own MSI window
>> to catch msi_notify()/msix_notify() calls from QEMU devices (virtio-pci
>> or vfio) and route them to the guest via qemu_pulse_irq().
>> MSIMessage used to be encoded as:
>> .addr - address within the PHB MSI window;
>> .data - the device index on PHB plus vector number.
>> The MSI MR write function translated this MSIMessage to a global IRQ
>> number and called qemu_pulse_irq().
>>
>> However the total number of IRQs is not really big (at the moment it is
>> 1024 IRQs starting from 4096) and even 16bit data field of MSIMessage
>> seems to be enough to store an IRQ number there.
>>
>> This simplifies MSI handling in sPAPR PHB. Specifically, this does:
>> 1. remove a MSI window from a PHB;
>> 2. add a single memory region for all MSIs to sPAPREnvironment
>> and spapr_pci_msi_init() to initialize it;
>> 3. encode MSIMessage as:
>> * .addr - a fixed address of SPAPR_PCI_MSI_WINDOW==0x40000000000ULL;
>> * .data as an IRQ number.
>> 4. change IRQ allocator to align first IRQ number in a block for MSI.
>> MSI uses lower bits to specify the vector number so the first IRQ has to
>> be aligned. MSIX does not need any special allocator though.
>>
>> Signed-off-by: Alexey Kardashevskiy <address@hidden>
>> ---
>> hw/ppc/spapr.c | 29 ++++++++++++++++++---
>> hw/ppc/spapr_pci.c | 61
>> ++++++++++++++++++++-------------------------
>> include/hw/pci-host/spapr.h | 8 +++---
>> include/hw/ppc/spapr.h | 4 ++-
>> 4 files changed, 60 insertions(+), 42 deletions(-)
>>
>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>> index 671bbd9..6b21191 100644
>> --- a/hw/ppc/spapr.c
>> +++ b/hw/ppc/spapr.c
>> @@ -88,6 +88,9 @@ int spapr_allocate_irq(int hint, bool lsi)
>>
>> if (hint) {
>> irq = hint;
>> + if (hint >= spapr->next_irq) {
>> + spapr->next_irq = hint + 1;
>> + }
>> /* FIXME: we should probably check for collisions somehow */
>> } else {
>> irq = spapr->next_irq++;
>> @@ -103,22 +106,39 @@ int spapr_allocate_irq(int hint, bool lsi)
>> return irq;
>> }
>>
>> -/* Allocate block of consequtive IRQs, returns a number of the first */
>> -int spapr_allocate_irq_block(int num, bool lsi)
>> +/*
>> + * Allocate block of consequtive IRQs, returns a number of the first.
>> + * If msi==true, aligns the first IRQ number to num.
>> + */
>> +int spapr_allocate_irq_block(int num, bool lsi, bool msi)
>> {
>> int first = -1;
>> - int i;
>> + int i, hint = 0;
>> +
>> + /*
>> + * MSIMesage::data is used for storing VIRQ so
>> + * it has to be aligned to num to support multiple
>> + * MSI vectors. MSI-X is not affected by this.
>> + * The hint is used for the first IRQ, the rest should
>> + * be allocated continously.
>> + */
>> + if (msi) {
>> + assert((num == 1) || (num == 2) || (num == 4) ||
>> + (num == 8) || (num == 16) || (num == 32));
>> + hint = (spapr->next_irq + num - 1) & ~(num - 1);
>> + }
>>
>> for (i = 0; i < num; ++i) {
>> int irq;
>>
>> - irq = spapr_allocate_irq(0, lsi);
>> + irq = spapr_allocate_irq(hint, lsi);
>> if (!irq) {
>> return -1;
>> }
>>
>> if (0 == i) {
>> first = irq;
>> + hint = 0;
>> }
>>
>> /* If the above doesn't create a consecutive block then that's
>> @@ -1127,6 +1147,7 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args)
>> spapr_create_nvram(spapr);
>>
>> /* Set up PCI */
>> + spapr_pci_msi_init(spapr, SPAPR_PCI_MSI_WINDOW);
>> spapr_pci_rtas_init();
>>
>> phb = spapr_create_phb(spapr, 0);
>> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
>> index dfe4d04..c8ea777 100644
>> --- a/hw/ppc/spapr_pci.c
>> +++ b/hw/ppc/spapr_pci.c
>> @@ -258,11 +258,11 @@ static int spapr_msicfg_find(sPAPRPHBState *phb,
>> uint32_t config_addr,
>> * This is required for msi_notify()/msix_notify() which
>> * will write at the addresses via spapr_msi_write().
>> */
>> -static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr,
>> - bool msix, unsigned req_num)
>> +static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr, bool msix,
>> + unsigned first_irq, unsigned req_num)
>> {
>> unsigned i;
>> - MSIMessage msg = { .address = addr, .data = 0 };
>> + MSIMessage msg = { .address = addr, .data = first_irq };
>>
>> if (!msix) {
>> msi_set_message(pdev, msg);
>> @@ -270,8 +270,7 @@ static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr
>> addr,
>> return;
>> }
>>
>> - for (i = 0; i < req_num; ++i) {
>> - msg.address = addr | (i << 2);
>> + for (i = 0; i < req_num; ++i, ++msg.data) {
>> msix_set_message(pdev, i, msg);
>> trace_spapr_pci_msi_setup(pdev->name, i, msg.address);
>> }
>> @@ -351,7 +350,8 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu,
>> sPAPREnvironment *spapr,
>>
>> /* There is no cached config, allocate MSIs */
>> if (!phb->msi_table[ndev].nvec) {
>> - irq = spapr_allocate_irq_block(req_num, false);
>> + irq = spapr_allocate_irq_block(req_num, false,
>> + ret_intr_type == RTAS_TYPE_MSI);
>> if (irq < 0) {
>> fprintf(stderr, "Cannot allocate MSIs for device#%d", ndev);
>> rtas_st(rets, 0, -1); /* Hardware error */
>> @@ -363,8 +363,8 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu,
>> sPAPREnvironment *spapr,
>> }
>>
>> /* Setup MSI/MSIX vectors in the device (via cfgspace or MSIX BAR) */
>> - spapr_msi_setmsg(pdev, phb->msi_win_addr | (ndev << 16),
>> - ret_intr_type == RTAS_TYPE_MSIX, req_num);
>> + spapr_msi_setmsg(pdev, spapr->msi_win_addr, ret_intr_type ==
>> RTAS_TYPE_MSIX,
>> + phb->msi_table[ndev].irq, req_num);
>>
>> rtas_st(rets, 0, 0);
>> rtas_st(rets, 1, req_num);
>> @@ -487,10 +487,7 @@ static const MemoryRegionOps spapr_io_ops = {
>> static void spapr_msi_write(void *opaque, hwaddr addr,
>> uint64_t data, unsigned size)
>> {
>> - sPAPRPHBState *phb = opaque;
>> - int ndev = addr >> 16;
>> - int vec = ((addr & 0xFFFF) >> 2) | data;
>> - uint32_t irq = phb->msi_table[ndev].irq + vec;
>> + uint32_t irq = data;
>>
>> trace_spapr_pci_msi_write(addr, data, irq);
>>
>> @@ -504,6 +501,23 @@ static const MemoryRegionOps spapr_msi_ops = {
>> .endianness = DEVICE_LITTLE_ENDIAN
>> };
>>
>> +void spapr_pci_msi_init(sPAPREnvironment *spapr, hwaddr addr)
>> +{
>> + /*
>> + * As MSI/MSIX interrupts trigger by writing at MSI/MSIX vectors,
>> + * we need to allocate some memory to catch those writes coming
>> + * from msi_notify()/msix_notify().
>> + * As MSIMessage:addr is going to be the same and MSIMessage:data
>> + * is going to be a VIRQ number, 4 bytes of the MSI MR will only
>> + * be used.
>> + */
>> + spapr->msi_win_addr = addr;
>> + memory_region_init_io(&spapr->msiwindow, NULL, &spapr_msi_ops, spapr,
>> + "msi", getpagesize());
>> + memory_region_add_subregion(get_system_memory(), spapr->msi_win_addr,
>> + &spapr->msiwindow);
>> +}
>> +
>> /*
>> * PHB PCI device
>> */
>> @@ -528,8 +542,7 @@ static int spapr_phb_init(SysBusDevice *s)
>>
>> if ((sphb->buid != -1) || (sphb->dma_liobn != -1)
>> || (sphb->mem_win_addr != -1)
>> - || (sphb->io_win_addr != -1)
>> - || (sphb->msi_win_addr != -1)) {
>> + || (sphb->io_win_addr != -1)) {
>> fprintf(stderr, "Either \"index\" or other parameters must"
>> " be specified for PAPR PHB, not both\n");
>> return -1;
>> @@ -542,7 +555,6 @@ static int spapr_phb_init(SysBusDevice *s)
>> + sphb->index * SPAPR_PCI_WINDOW_SPACING;
>> sphb->mem_win_addr = windows_base + SPAPR_PCI_MMIO_WIN_OFF;
>> sphb->io_win_addr = windows_base + SPAPR_PCI_IO_WIN_OFF;
>> - sphb->msi_win_addr = windows_base + SPAPR_PCI_MSI_WIN_OFF;
>> }
>>
>> if (sphb->buid == -1) {
>> @@ -565,11 +577,6 @@ static int spapr_phb_init(SysBusDevice *s)
>> return -1;
>> }
>>
>> - if (sphb->msi_win_addr == -1) {
>> - fprintf(stderr, "MSI window address not specified for PHB\n");
>> - return -1;
>> - }
>> -
>> if (find_phb(spapr, sphb->buid)) {
>> fprintf(stderr, "PCI host bridges must have unique BUIDs\n");
>> return -1;
>> @@ -609,18 +616,6 @@ static int spapr_phb_init(SysBusDevice *s)
>> namebuf, SPAPR_PCI_IO_WIN_SIZE);
>> memory_region_add_subregion(get_system_memory(), sphb->io_win_addr,
>> &sphb->iowindow);
>> -
>> - /* As MSI/MSIX interrupts trigger by writing at MSI/MSIX vectors,
>> - * we need to allocate some memory to catch those writes coming
>> - * from msi_notify()/msix_notify() */
>> - if (msi_supported) {
>> - sprintf(namebuf, "%s.msi", sphb->dtbusname);
>> - memory_region_init_io(&sphb->msiwindow, OBJECT(sphb),
>> &spapr_msi_ops, sphb,
>> - namebuf, SPAPR_MSIX_MAX_DEVS * 0x10000);
>> - memory_region_add_subregion(get_system_memory(), sphb->msi_win_addr,
>> - &sphb->msiwindow);
>> - }
>> -
>> /*
>> * Selecting a busname is more complex than you'd think, due to
>> * interacting constraints. If the user has specified an id
>> @@ -695,7 +690,6 @@ static Property spapr_phb_properties[] = {
>> DEFINE_PROP_HEX64("io_win_addr", sPAPRPHBState, io_win_addr, -1),
>> DEFINE_PROP_HEX64("io_win_size", sPAPRPHBState, io_win_size,
>> SPAPR_PCI_IO_WIN_SIZE),
>> - DEFINE_PROP_HEX64("msi_win_addr", sPAPRPHBState, msi_win_addr, -1),
>> DEFINE_PROP_END_OF_LIST(),
>> };
>>
>> @@ -737,7 +731,6 @@ static const VMStateDescription vmstate_spapr_pci = {
>> VMSTATE_UINT64_EQUAL(mem_win_size, sPAPRPHBState),
>> VMSTATE_UINT64_EQUAL(io_win_addr, sPAPRPHBState),
>> VMSTATE_UINT64_EQUAL(io_win_size, sPAPRPHBState),
>> - VMSTATE_UINT64_EQUAL(msi_win_addr, sPAPRPHBState),
>> VMSTATE_STRUCT_ARRAY(lsi_table, sPAPRPHBState, PCI_NUM_PINS, 0,
>> vmstate_spapr_pci_lsi, struct spapr_pci_lsi),
>> VMSTATE_STRUCT_ARRAY(msi_table, sPAPRPHBState, SPAPR_MSIX_MAX_DEVS,
>> 0,
>> diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
>> index 93f9511..970b4a9 100644
>> --- a/include/hw/pci-host/spapr.h
>> +++ b/include/hw/pci-host/spapr.h
>> @@ -43,8 +43,7 @@ typedef struct sPAPRPHBState {
>>
>> MemoryRegion memspace, iospace;
>> hwaddr mem_win_addr, mem_win_size, io_win_addr, io_win_size;
>> - hwaddr msi_win_addr;
>> - MemoryRegion memwindow, iowindow, msiwindow;
>> + MemoryRegion memwindow, iowindow;
>>
>> uint32_t dma_liobn;
>> uint64_t dma_window_start;
>> @@ -73,7 +72,8 @@ typedef struct sPAPRPHBState {
>> #define SPAPR_PCI_MMIO_WIN_SIZE 0x20000000
>> #define SPAPR_PCI_IO_WIN_OFF 0x80000000
>> #define SPAPR_PCI_IO_WIN_SIZE 0x10000
>> -#define SPAPR_PCI_MSI_WIN_OFF 0x90000000
>> +
>> +#define SPAPR_PCI_MSI_WINDOW 0x40000000000ULL
>>
>> #define SPAPR_PCI_MEM_WIN_BUS_OFFSET 0x80000000ULL
>>
>> @@ -88,6 +88,8 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
>> uint32_t xics_phandle,
>> void *fdt);
>>
>> +void spapr_pci_msi_init(sPAPREnvironment *spapr, hwaddr addr);
>> +
>> void spapr_pci_rtas_init(void);
>>
>> #endif /* __HW_SPAPR_PCI_H__ */
>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>> index 7aa9a3d..aed02e1 100644
>> --- a/include/hw/ppc/spapr.h
>> +++ b/include/hw/ppc/spapr.h
>> @@ -14,6 +14,8 @@ struct icp_state;
>> typedef struct sPAPREnvironment {
>> struct VIOsPAPRBus *vio_bus;
>> QLIST_HEAD(, sPAPRPHBState) phbs;
>> + hwaddr msi_win_addr;
>> + MemoryRegion msiwindow;
>> struct sPAPRNVRAM *nvram;
>> struct icp_state *icp;
>>
>> @@ -303,7 +305,7 @@ target_ulong spapr_hypercall(PowerPCCPU *cpu,
>> target_ulong opcode,
>> target_ulong *args);
>>
>> int spapr_allocate_irq(int hint, bool lsi);
>> -int spapr_allocate_irq_block(int num, bool lsi);
>> +int spapr_allocate_irq_block(int num, bool lsi, bool msi);
>>
>> static inline int spapr_allocate_msi(int hint)
>> {
>>
>
>
--
Alexey