[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PULL 3/8] vfio: Generalize region support
From: |
Alex Williamson |
Subject: |
Re: [Qemu-devel] [PULL 3/8] vfio: Generalize region support |
Date: |
Thu, 10 Mar 2016 09:34:08 -0700 |
On Thu, 10 Mar 2016 01:54:45 +0100
Eric Auger <address@hidden> wrote:
> Hi Alex,
> On 03/09/2016 08:53 PM, Alex Williamson wrote:
> > Both platform and PCI vfio drivers create a "slow", I/O memory region
> > with one or more mmap memory regions overlayed when supported by the
> > device. Generalize this to a set of common helpers in the core that
> > pulls the region info from vfio, fills the region data, configures
> > slow mapping, and adds helpers for comleting the mmap, enable/disable,
> > and teardown. This can be immediately used by the PCI MSI-X code,
> > which needs to mmap around the MSI-X vector table.
> >
> > This also changes VFIORegion.mem to be dynamically allocated because
> > otherwise we don't know how the caller has allocated VFIORegion and
> > therefore don't know whether to unreference it to destroy the
> > MemoryRegion or not.
> >
> > Signed-off-by: Alex Williamson <address@hidden>
> > ---
> > hw/arm/sysbus-fdt.c | 4 -
> > hw/vfio/common.c | 172
> > ++++++++++++++++++++++++++++++++++-------
> > hw/vfio/pci-quirks.c | 24 +++---
> > hw/vfio/pci.c | 168
> > +++++++++++++++++++++-------------------
> > hw/vfio/platform.c | 72 +++--------------
> > include/hw/vfio/vfio-common.h | 23 ++++-
> > trace-events | 10 ++
> > 7 files changed, 283 insertions(+), 190 deletions(-)
> >
> > diff --git a/hw/arm/sysbus-fdt.c b/hw/arm/sysbus-fdt.c
> > index 04afeae..49bd212 100644
> > --- a/hw/arm/sysbus-fdt.c
> > +++ b/hw/arm/sysbus-fdt.c
> > @@ -240,7 +240,7 @@ static int
> > add_calxeda_midway_xgmac_fdt_node(SysBusDevice *sbdev, void *opaque)
> > mmio_base = platform_bus_get_mmio_addr(pbus, sbdev, i);
> > reg_attr[2 * i] = cpu_to_be32(mmio_base);
> > reg_attr[2 * i + 1] = cpu_to_be32(
> > -
> > memory_region_size(&vdev->regions[i]->mem));
> > + memory_region_size(vdev->regions[i]->mem));
> > }
> > qemu_fdt_setprop(fdt, nodename, "reg", reg_attr,
> > vbasedev->num_regions * 2 * sizeof(uint32_t));
> > @@ -374,7 +374,7 @@ static int add_amd_xgbe_fdt_node(SysBusDevice *sbdev,
> > void *opaque)
> > mmio_base = platform_bus_get_mmio_addr(pbus, sbdev, i);
> > reg_attr[2 * i] = cpu_to_be32(mmio_base);
> > reg_attr[2 * i + 1] = cpu_to_be32(
> > -
> > memory_region_size(&vdev->regions[i]->mem));
> > + memory_region_size(vdev->regions[i]->mem));
> > }
> > qemu_fdt_setprop(guest_fdt, nodename, "reg", reg_attr,
> > vbasedev->num_regions * 2 * sizeof(uint32_t));
> > diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> > index e20fc4f..96ccb79 100644
> > --- a/hw/vfio/common.c
> > +++ b/hw/vfio/common.c
> > @@ -493,46 +493,162 @@ static void vfio_listener_release(VFIOContainer
> > *container)
> > memory_listener_unregister(&container->listener);
> > }
> >
> > -int vfio_mmap_region(Object *obj, VFIORegion *region,
> > - MemoryRegion *mem, MemoryRegion *submem,
> > - void **map, size_t size, off_t offset,
> > - const char *name)
> > +int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion
> > *region,
> > + int index, const char *name)
> > {
> > - int ret = 0;
> > - VFIODevice *vbasedev = region->vbasedev;
> > + struct vfio_region_info *info;
> > + int ret;
> > +
> > + ret = vfio_get_region_info(vbasedev, index, &info);
> > + if (ret) {
> > + return ret;
> > + }
> > +
> > + region->vbasedev = vbasedev;
> > + region->flags = info->flags;
> > + region->size = info->size;
> > + region->fd_offset = info->offset;
> > + region->nr = index;
> >
> > - if (!vbasedev->no_mmap && size && region->flags &
> > - VFIO_REGION_INFO_FLAG_MMAP) {
> > - int prot = 0;
> > + if (region->size) {
> > + region->mem = g_new0(MemoryRegion, 1);
> > + memory_region_init_io(region->mem, obj, &vfio_region_ops,
> > + region, name, region->size);
> >
> > - if (region->flags & VFIO_REGION_INFO_FLAG_READ) {
> > - prot |= PROT_READ;
> > + if (!vbasedev->no_mmap &&
> > + region->flags & VFIO_REGION_INFO_FLAG_MMAP &&
> > + !(region->size & ~qemu_real_host_page_mask)) {
> > +
> > + region->nr_mmaps = 1;
> > + region->mmaps = g_new0(VFIOMmap, region->nr_mmaps);
> > +
> > + region->mmaps[0].offset = 0;
> > + region->mmaps[0].size = region->size;
> > }
> > + }
> > +
> > + g_free(info);
> > +
> > + trace_vfio_region_setup(vbasedev->name, index, name,
> > + region->flags, region->fd_offset,
> > region->size);
> > + return 0;
> > +}
> >
> > - if (region->flags & VFIO_REGION_INFO_FLAG_WRITE) {
> > - prot |= PROT_WRITE;
> > +int vfio_region_mmap(VFIORegion *region)
> > +{
> > + int i, prot = 0;
> > + char *name;
> > +
> > + if (!region->mem) {
> > + return 0;
> > + }
> > +
> > + prot |= region->flags & VFIO_REGION_INFO_FLAG_READ ? PROT_READ : 0;
> > + prot |= region->flags & VFIO_REGION_INFO_FLAG_WRITE ? PROT_WRITE : 0;
> > +
> > + for (i = 0; i < region->nr_mmaps; i++) {
> > + region->mmaps[i].mmap = mmap(NULL, region->mmaps[i].size, prot,
> > + MAP_SHARED, region->vbasedev->fd,
> > + region->fd_offset +
> > + region->mmaps[i].offset);
> > + if (region->mmaps[i].mmap == MAP_FAILED) {
> > + int ret = -errno;
> > +
> > + trace_vfio_region_mmap_fault(memory_region_name(region->mem),
> > i,
> > + region->fd_offset +
> > + region->mmaps[i].offset,
> > + region->fd_offset +
> > + region->mmaps[i].offset +
> > + region->mmaps[i].size - 1, ret);
> > +
> > + region->mmaps[i].mmap = NULL;
> > +
> > + for (i--; i >= 0; i--) {
> > + memory_region_del_subregion(region->mem,
> > ®ion->mmaps[i].mem);
> > + munmap(region->mmaps[i].mmap, region->mmaps[i].size);
> > + object_unparent(OBJECT(®ion->mmaps[i].mem));
> > + region->mmaps[i].mmap = NULL;
> > + }
> > +
> > + return ret;
> > }
> >
> > - *map = mmap(NULL, size, prot, MAP_SHARED,
> > - vbasedev->fd,
> > - region->fd_offset + offset);
> > - if (*map == MAP_FAILED) {
> > - *map = NULL;
> > - ret = -errno;
> > - goto empty_region;
> > + name = g_strdup_printf("%s mmaps[%d]",
> > + memory_region_name(region->mem), i);
> > + memory_region_init_ram_ptr(®ion->mmaps[i].mem,
> > + memory_region_owner(region->mem),
> > + name, region->mmaps[i].size,
> > + region->mmaps[i].mmap);
> > + g_free(name);
> > + memory_region_set_skip_dump(®ion->mmaps[i].mem);
> > + memory_region_add_subregion(region->mem, region->mmaps[i].offset,
> > + ®ion->mmaps[i].mem);
> > +
> > + trace_vfio_region_mmap(memory_region_name(®ion->mmaps[i].mem),
> > + region->mmaps[i].offset,
> > + region->mmaps[i].offset +
> > + region->mmaps[i].size - 1);
> > + }
> > +
> > + return 0;
> > +}
> > +
> > +void vfio_region_exit(VFIORegion *region)
> > +{
> > + int i;
> > +
> > + if (!region->mem) {
> > + return;
> > + }
> > +
> > + for (i = 0; i < region->nr_mmaps; i++) {
> > + if (region->mmaps[i].mmap) {
> > + memory_region_del_subregion(region->mem,
> > ®ion->mmaps[i].mem);
> > }
> > + }
> >
> > - memory_region_init_ram_ptr(submem, obj, name, size, *map);
> > - memory_region_set_skip_dump(submem);
> > - } else {
> > -empty_region:
> > - /* Create a zero sized sub-region to make cleanup easy. */
> > - memory_region_init(submem, obj, name, 0);
> > + trace_vfio_region_exit(region->vbasedev->name, region->nr);
> > +}
> > +
> > +void vfio_region_finalize(VFIORegion *region)
> > +{
> > + int i;
> > +
> > + if (!region->mem) {
> > + return;
> > }
> >
> > - memory_region_add_subregion(mem, offset, submem);
> > + for (i = 0; i < region->nr_mmaps; i++) {
> > + if (region->mmaps[i].mmap) {
> > + munmap(region->mmaps[i].mmap, region->mmaps[i].size);
> > + object_unparent(OBJECT(®ion->mmaps[i].mem));
> > + }
> > + }
> >
> > - return ret;
> > + object_unparent(OBJECT(region->mem));
> > +
> > + g_free(region->mem);
> > + g_free(region->mmaps);
> > +
> > + trace_vfio_region_finalize(region->vbasedev->name, region->nr);
> > +}
> > +
> > +void vfio_region_mmaps_set_enabled(VFIORegion *region, bool enabled)
> > +{
> > + int i;
> > +
> > + if (!region->mem) {
> > + return;
> > + }
> > +
> > + for (i = 0; i < region->nr_mmaps; i++) {
> > + if (region->mmaps[i].mmap) {
> > + memory_region_set_enabled(®ion->mmaps[i].mem, enabled);
> > + }
> > + }
> > +
> > + trace_vfio_region_mmaps_set_enabled(memory_region_name(region->mem),
> > + enabled);
> > }
> >
> > void vfio_reset_handler(void *opaque)
> > diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
> > index 4815527..d626ec9 100644
> > --- a/hw/vfio/pci-quirks.c
> > +++ b/hw/vfio/pci-quirks.c
> > @@ -337,14 +337,14 @@ static void vfio_probe_ati_bar4_quirk(VFIOPCIDevice
> > *vdev, int nr)
> > memory_region_init_io(window->addr_mem, OBJECT(vdev),
> > &vfio_generic_window_address_quirk, window,
> > "vfio-ati-bar4-window-address-quirk", 4);
> > - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
> > + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
> > window->address_offset,
> > window->addr_mem, 1);
> >
> > memory_region_init_io(window->data_mem, OBJECT(vdev),
> > &vfio_generic_window_data_quirk, window,
> > "vfio-ati-bar4-window-data-quirk", 4);
> > - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
> > + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
> > window->data_offset,
> > window->data_mem, 1);
> >
> > @@ -378,7 +378,7 @@ static void vfio_probe_ati_bar2_quirk(VFIOPCIDevice
> > *vdev, int nr)
> > memory_region_init_io(mirror->mem, OBJECT(vdev),
> > &vfio_generic_mirror_quirk, mirror,
> > "vfio-ati-bar2-4000-quirk",
> > PCI_CONFIG_SPACE_SIZE);
> > - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
> > + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
> > mirror->offset, mirror->mem, 1);
> >
> > QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
> > @@ -683,7 +683,7 @@ static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice
> > *vdev, int nr)
> > memory_region_init_io(window->addr_mem, OBJECT(vdev),
> > &vfio_generic_window_address_quirk, window,
> > "vfio-nvidia-bar5-window-address-quirk", 4);
> > - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
> > + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
> > window->address_offset,
> > window->addr_mem, 1);
> > memory_region_set_enabled(window->addr_mem, false);
> > @@ -691,7 +691,7 @@ static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice
> > *vdev, int nr)
> > memory_region_init_io(window->data_mem, OBJECT(vdev),
> > &vfio_generic_window_data_quirk, window,
> > "vfio-nvidia-bar5-window-data-quirk", 4);
> > - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
> > + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
> > window->data_offset,
> > window->data_mem, 1);
> > memory_region_set_enabled(window->data_mem, false);
> > @@ -699,13 +699,13 @@ static void
> > vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr)
> > memory_region_init_io(&quirk->mem[2], OBJECT(vdev),
> > &vfio_nvidia_bar5_quirk_master, bar5,
> > "vfio-nvidia-bar5-master-quirk", 4);
> > - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
> > + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
> > 0, &quirk->mem[2], 1);
> >
> > memory_region_init_io(&quirk->mem[3], OBJECT(vdev),
> > &vfio_nvidia_bar5_quirk_enable, bar5,
> > "vfio-nvidia-bar5-enable-quirk", 4);
> > - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
> > + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
> > 4, &quirk->mem[3], 1);
> >
> > QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
> > @@ -767,7 +767,7 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice
> > *vdev, int nr)
> > &vfio_nvidia_mirror_quirk, mirror,
> > "vfio-nvidia-bar0-88000-mirror-quirk",
> > vdev->config_size);
> > - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
> > + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
> > mirror->offset, mirror->mem, 1);
> >
> > QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
> > @@ -786,7 +786,7 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice
> > *vdev, int nr)
> > &vfio_nvidia_mirror_quirk, mirror,
> > "vfio-nvidia-bar0-1800-mirror-quirk",
> > PCI_CONFIG_SPACE_SIZE);
> > - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
> > + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
> > mirror->offset, mirror->mem,
> > 1);
> >
> > QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
> > @@ -947,13 +947,13 @@ static void
> > vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr)
> > memory_region_init_io(&quirk->mem[0], OBJECT(vdev),
> > &vfio_rtl_address_quirk, rtl,
> > "vfio-rtl8168-window-address-quirk", 4);
> > - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
> > + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
> > 0x74, &quirk->mem[0], 1);
> >
> > memory_region_init_io(&quirk->mem[1], OBJECT(vdev),
> > &vfio_rtl_data_quirk, rtl,
> > "vfio-rtl8168-window-data-quirk", 4);
> > - memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
> > + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
> > 0x70, &quirk->mem[1], 1);
> >
> > QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
> > @@ -1020,7 +1020,7 @@ void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int
> > nr)
> >
> > QLIST_FOREACH(quirk, &bar->quirks, next) {
> > for (i = 0; i < quirk->nr_mem; i++) {
> > - memory_region_del_subregion(&bar->region.mem, &quirk->mem[i]);
> > + memory_region_del_subregion(bar->region.mem, &quirk->mem[i]);
> > }
> > }
> > }
> > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> > index db7a950..f18a678 100644
> > --- a/hw/vfio/pci.c
> > +++ b/hw/vfio/pci.c
> > @@ -1166,6 +1166,74 @@ static int vfio_msi_setup(VFIOPCIDevice *vdev, int
> > pos)
> > return 0;
> > }
> >
> > +static void vfio_pci_fixup_msix_region(VFIOPCIDevice *vdev)
> > +{
> > + off_t start, end;
> > + VFIORegion *region = &vdev->bars[vdev->msix->table_bar].region;
> > +
> > + /*
> > + * We expect to find a single mmap covering the whole BAR, anything
> > else
> > + * means it's either unsupported or already setup.
> > + */
> > + if (region->nr_mmaps != 1 || region->mmaps[0].offset ||
> > + region->size != region->mmaps[0].size) {
> > + return;
> > + }
> > +
> > + /* MSI-X table start and end aligned to host page size */
> > + start = vdev->msix->table_offset & qemu_real_host_page_mask;
> > + end = REAL_HOST_PAGE_ALIGN((uint64_t)vdev->msix->table_offset +
> > + (vdev->msix->entries *
> > PCI_MSIX_ENTRY_SIZE));
> > +
> > + /*
> > + * Does the MSI-X table cover the beginning of the BAR? The whole BAR?
> > + * NB - Host page size is necessarily a power of two and so is the PCI
> > + * BAR (not counting EA yet), therefore if we have host page aligned
> > + * @start and @end, then any remainder of the BAR before or after those
> > + * must be at least host page sized and therefore mmap'able.
> > + */
> > + if (!start) {
> > + if (end >= region->size) {
> > + region->nr_mmaps = 0;
> > + g_free(region->mmaps);
> > + region->mmaps = NULL;
> > + trace_vfio_msix_fixup(vdev->vbasedev.name,
> > + vdev->msix->table_bar, 0, 0);
> > + } else {
> > + region->mmaps[0].offset = end;
> > + region->mmaps[0].size = region->size - end;
> > + trace_vfio_msix_fixup(vdev->vbasedev.name,
> > + vdev->msix->table_bar,
> > region->mmaps[0].offset,
> > + region->mmaps[0].offset +
> > region->mmaps[0].size);
> Sorry this does not compile for me on arm 32b:
>
> ./trace/generated-tracers.h:16113:23: error: format ‘%lx’ expects
> argument of type ‘long unsigned int’, but argument 8 has type ‘off_t’
> [-Werror=format=] , name, bar, offset, size);
>
> -> vfio_msix_fixup(const char *name, int bar, off_t start, off_t end) "
> (%s) MSI-X region %d mmap fixup [0x%"PRIx64" - 0x%"PRIx64"]" ?
Thanks Eric, that's exactly the right fix:
diff --git a/trace-events b/trace-events
index e36bc07..fd07512 100644
--- a/trace-events
+++ b/trace-events
@@ -1652,7 +1652,7 @@ vfio_msix_enable(const char *name) " (%s)"
vfio_msix_pba_disable(const char *name) " (%s)"
vfio_msix_pba_enable(const char *name) " (%s)"
vfio_msix_disable(const char *name) " (%s)"
-vfio_msix_fixup(const char *name, int bar, off_t offset, size_t size) " (%s)
MSI-X region %d mmap fixup [0x%lx - 0x%lx]"
+vfio_msix_fixup(const char *name, int bar, off_t start, off_t end) " (%s)
MSI-X region %d mmap fixup [0x%"PRIx64" - 0x%"PRIx64"]"
vfio_msi_enable(const char *name, int nr_vectors) " (%s) Enabled %d MSI
vectors"
vfio_msi_disable(const char *name) " (%s)"
vfio_pci_load_rom(const char *name, unsigned long size, unsigned long offset,
unsigned long flags) "Device %s ROM:\n size: 0x%lx, offset: 0x%lx, flags:
0x%lx"
[Qemu-devel] [PULL 4/8] vfio/pci: Convert all MemoryRegion to dynamic alloc and consistent functions, Alex Williamson, 2016/03/09
[Qemu-devel] [PULL 5/8] vfio/pci: Fixup PCI option ROMs, Alex Williamson, 2016/03/09
[Qemu-devel] [PULL 6/8] vfio/pci: Split out VGA setup, Alex Williamson, 2016/03/09
[Qemu-devel] [PULL 7/8] vfio/pci: replace fixed string limit by g_strdup_printf, Alex Williamson, 2016/03/09
[Qemu-devel] [PULL 8/8] MAINTAINERS: Add entry for the include/hw/vfio/ folder, Alex Williamson, 2016/03/09
Re: [Qemu-devel] [PULL 0/8] VFIO updates 2016-03-09, Peter Maydell, 2016/03/10