[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH v3 2/5] softmmu: Support concurrent bounce buffers
From: |
Peter Xu |
Subject: |
Re: [PATCH v3 2/5] softmmu: Support concurrent bounce buffers |
Date: |
Wed, 13 Sep 2023 15:11:53 -0400 |
On Thu, Sep 07, 2023 at 06:04:07AM -0700, Mattias Nissler wrote:
> When DMA memory can't be directly accessed, as is the case when
> running the device model in a separate process without shareable DMA
> file descriptors, bounce buffering is used.
>
> It is not uncommon for device models to request mapping of several DMA
> regions at the same time. Examples include:
> * net devices, e.g. when transmitting a packet that is split across
> several TX descriptors (observed with igb)
> * USB host controllers, when handling a packet with multiple data TRBs
> (observed with xhci)
>
> Previously, qemu only provided a single bounce buffer per AddressSpace
> and would fail DMA map requests while the buffer was already in use. In
> turn, this would cause DMA failures that ultimately manifest as hardware
> errors from the guest perspective.
>
> This change allocates DMA bounce buffers dynamically instead of
> supporting only a single buffer. Thus, multiple DMA mappings work
> correctly also when RAM can't be mmap()-ed.
>
> The total bounce buffer allocation size is limited individually for each
> AddressSpace. The default limit is 4096 bytes, matching the previous
> maximum buffer size. A new x-max-bounce-buffer-size parameter is
> provided to configure the limit for PCI devices.
>
> Signed-off-by: Mattias Nissler <mnissler@rivosinc.com>
> ---
> hw/pci/pci.c | 8 ++++
> include/exec/memory.h | 14 ++----
> include/hw/pci/pci_device.h | 3 ++
> softmmu/memory.c | 3 +-
> softmmu/physmem.c | 94 +++++++++++++++++++++++++------------
> 5 files changed, 80 insertions(+), 42 deletions(-)
>
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index 881d774fb6..8c4541b394 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -85,6 +85,8 @@ static Property pci_props[] = {
> QEMU_PCIE_ERR_UNC_MASK_BITNR, true),
> DEFINE_PROP_BIT("x-pcie-ari-nextfn-1", PCIDevice, cap_present,
> QEMU_PCIE_ARI_NEXTFN_1_BITNR, false),
> + DEFINE_PROP_SIZE("x-max-bounce-buffer-size", PCIDevice,
> + max_bounce_buffer_size, 4096),
> DEFINE_PROP_END_OF_LIST()
> };
>
> @@ -1208,6 +1210,8 @@ static PCIDevice *do_pci_register_device(PCIDevice
> *pci_dev,
> "bus master container", UINT64_MAX);
> address_space_init(&pci_dev->bus_master_as,
> &pci_dev->bus_master_container_region, pci_dev->name);
> + pci_dev->bus_master_as.max_bounce_buffer_size =
> + pci_dev->max_bounce_buffer_size;
>
> if (phase_check(PHASE_MACHINE_READY)) {
> pci_init_bus_master(pci_dev);
> @@ -2664,6 +2668,10 @@ static void pci_device_class_init(ObjectClass *klass,
> void *data)
> k->unrealize = pci_qdev_unrealize;
> k->bus_type = TYPE_PCI_BUS;
> device_class_set_props(k, pci_props);
> + object_class_property_set_description(
> + klass, "x-max-bounce-buffer-size",
> + "Maximum buffer size allocated for bounce buffers used for mapped "
> + "access to indirect DMA memory");
> }
>
> static void pci_device_class_base_init(ObjectClass *klass, void *data)
> diff --git a/include/exec/memory.h b/include/exec/memory.h
> index 7d68936157..5577542b5e 100644
> --- a/include/exec/memory.h
> +++ b/include/exec/memory.h
> @@ -1081,14 +1081,6 @@ typedef struct AddressSpaceMapClient {
> QLIST_ENTRY(AddressSpaceMapClient) link;
> } AddressSpaceMapClient;
>
> -typedef struct {
> - MemoryRegion *mr;
> - void *buffer;
> - hwaddr addr;
> - hwaddr len;
> - bool in_use;
> -} BounceBuffer;
> -
> /**
> * struct AddressSpace: describes a mapping of addresses to #MemoryRegion
> objects
> */
> @@ -1106,8 +1098,10 @@ struct AddressSpace {
> QTAILQ_HEAD(, MemoryListener) listeners;
> QTAILQ_ENTRY(AddressSpace) address_spaces_link;
>
> - /* Bounce buffer to use for this address space. */
> - BounceBuffer bounce;
> + /* Maximum DMA bounce buffer size used for indirect memory map requests
> */
> + uint64_t max_bounce_buffer_size;
> + /* Total size of bounce buffers currently allocated, atomically accessed
> */
> + uint64_t bounce_buffer_size;
> /* List of callbacks to invoke when buffers free up */
> QemuMutex map_client_list_lock;
> QLIST_HEAD(, AddressSpaceMapClient) map_client_list;
> diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h
> index d3dd0f64b2..f4027c5379 100644
> --- a/include/hw/pci/pci_device.h
> +++ b/include/hw/pci/pci_device.h
> @@ -160,6 +160,9 @@ struct PCIDevice {
> /* ID of standby device in net_failover pair */
> char *failover_pair_id;
> uint32_t acpi_index;
> +
> + /* Maximum DMA bounce buffer size used for indirect memory map requests
> */
> + uint64_t max_bounce_buffer_size;
> };
>
> static inline int pci_intx(PCIDevice *pci_dev)
> diff --git a/softmmu/memory.c b/softmmu/memory.c
> index 5c9622c3d6..e02799359c 100644
> --- a/softmmu/memory.c
> +++ b/softmmu/memory.c
> @@ -3105,7 +3105,8 @@ void address_space_init(AddressSpace *as, MemoryRegion
> *root, const char *name)
> as->ioeventfds = NULL;
> QTAILQ_INIT(&as->listeners);
> QTAILQ_INSERT_TAIL(&address_spaces, as, address_spaces_link);
> - as->bounce.in_use = false;
> + as->max_bounce_buffer_size = 4096;
Instead of hard-code this 4k again (besides the pci property), maybe we can
have address_space_init_with_bouncebuffer() and always pass it in from pci
do_realize? Then by default no bounce buffer supported for the AS only if
requested.
> + as->bounce_buffer_size = 0;
> qemu_mutex_init(&as->map_client_list_lock);
> QLIST_INIT(&as->map_client_list);
> as->name = g_strdup(name ? name : "anonymous");
> diff --git a/softmmu/physmem.c b/softmmu/physmem.c
> index f40cc564b8..e3d1cf5fba 100644
> --- a/softmmu/physmem.c
> +++ b/softmmu/physmem.c
> @@ -2926,6 +2926,20 @@ void cpu_flush_icache_range(hwaddr start, hwaddr len)
> NULL, len, FLUSH_CACHE);
> }
>
> +/*
> + * A magic value stored in the first 8 bytes of the bounce buffer struct.
> Used
> + * to detect illegal pointers passed to address_space_unmap.
> + */
> +#define BOUNCE_BUFFER_MAGIC 0xb4017ceb4ffe12ed
> +
> +typedef struct {
> + uint64_t magic;
> + MemoryRegion *mr;
> + hwaddr addr;
> + size_t len;
> + uint8_t buffer[];
> +} BounceBuffer;
> +
> static void
> address_space_unregister_map_client_do(AddressSpaceMapClient *client)
> {
> @@ -2953,7 +2967,7 @@ void address_space_register_map_client(AddressSpace
> *as, QEMUBH *bh)
> QLIST_INSERT_HEAD(&as->map_client_list, client, link);
> /* Write map_client_list before reading bounce_buffer_size. */
> smp_mb();
> - if (!qatomic_read(&as->bounce.in_use)) {
> + if (qatomic_read(&as->bounce_buffer_size) < as->max_bounce_buffer_size) {
> address_space_notify_map_clients_locked(as);
> }
> qemu_mutex_unlock(&as->map_client_list_lock);
> @@ -3081,31 +3095,36 @@ void *address_space_map(AddressSpace *as,
> RCU_READ_LOCK_GUARD();
> fv = address_space_to_flatview(as);
> mr = flatview_translate(fv, addr, &xlat, &l, is_write, attrs);
> + memory_region_ref(mr);
>
> if (!memory_access_is_direct(mr, is_write)) {
> - if (qatomic_xchg(&as->bounce.in_use, true)) {
> + size_t size = qatomic_add_fetch(&as->bounce_buffer_size, l);
> + if (size > as->max_bounce_buffer_size) {
> + size_t excess = size - as->max_bounce_buffer_size;
> + l -= excess;
> + qatomic_sub(&as->bounce_buffer_size, excess);
> + }
> +
> + if (l == 0) {
> *plen = 0;
> return NULL;
MR refcount leak?
> }
> - /* Avoid unbounded allocations */
> - l = MIN(l, TARGET_PAGE_SIZE);
> - as->bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
> - as->bounce.addr = addr;
> - as->bounce.len = l;
>
> - memory_region_ref(mr);
> - as->bounce.mr = mr;
> + BounceBuffer *bounce = g_malloc0(l + sizeof(BounceBuffer));
> + bounce->magic = BOUNCE_BUFFER_MAGIC;
> + bounce->mr = mr;
> + bounce->addr = addr;
> + bounce->len = l;
> +
> if (!is_write) {
> flatview_read(fv, addr, MEMTXATTRS_UNSPECIFIED,
> - as->bounce.buffer, l);
> + bounce->buffer, l);
> }
>
> *plen = l;
> - return as->bounce.buffer;
> + return bounce->buffer;
> }
>
> -
> - memory_region_ref(mr);
> *plen = flatview_extend_translation(fv, addr, len, mr, xlat,
> l, is_write, attrs);
> fuzz_dma_read_cb(addr, *plen, mr);
> @@ -3119,31 +3138,44 @@ void *address_space_map(AddressSpace *as,
> void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
> bool is_write, hwaddr access_len)
> {
> - if (buffer != as->bounce.buffer) {
> - MemoryRegion *mr;
> - ram_addr_t addr1;
> + MemoryRegion *mr;
> + ram_addr_t addr1;
> +
> + mr = memory_region_from_host(buffer, &addr1);
> + if (mr == NULL) {
> + BounceBuffer *bounce = container_of(buffer, BounceBuffer, buffer);
> + if (bounce->magic != BOUNCE_BUFFER_MAGIC) {
> + error_report(
> + "Unmap request for %p, which neither corresponds to a memory
> "
> + "region, nor looks like a bounce buffer, ignoring!",
> + buffer);
> + return;
Would assert() be better here? So that when trigger we can have the user
stack already.
> + }
>
> - mr = memory_region_from_host(buffer, &addr1);
> - assert(mr != NULL);
> if (is_write) {
> - invalidate_and_set_dirty(mr, addr1, access_len);
> + address_space_write(as, bounce->addr, MEMTXATTRS_UNSPECIFIED,
> + bounce->buffer, access_len);
> }
> - if (xen_enabled()) {
> - xen_invalidate_map_cache_entry(buffer);
> +
> + memory_region_unref(bounce->mr);
> + uint64_t previous_buffer_size =
> + qatomic_fetch_sub(&as->bounce_buffer_size, bounce->len);
> + if (previous_buffer_size == as->max_bounce_buffer_size) {
Can there be race condition that someone just temporarily boosted
bounce_buffer_size, so that it won't exactly equal to max but actually
larger (but we should still notify)?
If the current context has already a bounce buffer and is going to unmap it
(len>0), IIUC it means we should always notify because there will
definitely be some space left, and the requesters can retry to see whether
the size fit.
> + /* Write bounce_buffer_size before reading map_client_list. */
> + smp_mb();
I know it comes from the old code.. but I don't know why this is needed;
mutex lock should contain an mb() already before the list iterations later.
Just to raise it up, maybe Paolo would like to comment.
> + address_space_notify_map_clients(as);
> }
> - memory_region_unref(mr);
> + bounce->magic = ~BOUNCE_BUFFER_MAGIC;
> + g_free(bounce);
> return;
> }
> +
> + if (xen_enabled()) {
> + xen_invalidate_map_cache_entry(buffer);
> + }
> if (is_write) {
> - address_space_write(as, as->bounce.addr, MEMTXATTRS_UNSPECIFIED,
> - as->bounce.buffer, access_len);
> - }
> - qemu_vfree(as->bounce.buffer);
> - as->bounce.buffer = NULL;
> - memory_region_unref(as->bounce.mr);
> - /* Clear in_use before reading map_client_list. */
> - qatomic_set_mb(&as->bounce.in_use, false);
> - address_space_notify_map_clients(as);
> + invalidate_and_set_dirty(mr, addr1, access_len);
> + }
> }
>
> void *cpu_physical_memory_map(hwaddr addr,
> --
> 2.34.1
>
--
Peter Xu
[PATCH v3 4/5] vfio-user: Message-based DMA support, Mattias Nissler, 2023/09/07
[PATCH v3 3/5] Update subprojects/libvfio-user, Mattias Nissler, 2023/09/07
[PATCH v3 1/5] softmmu: Per-AddressSpace bounce buffering, Mattias Nissler, 2023/09/07