qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v3 4/5] vfio-user: Message-based DMA support


From: Stefan Hajnoczi
Subject: Re: [PATCH v3 4/5] vfio-user: Message-based DMA support
Date: Thu, 14 Sep 2023 15:04:48 -0400

On Thu, Sep 07, 2023 at 06:04:09AM -0700, Mattias Nissler wrote:
> Wire up support for DMA for the case where the vfio-user client does not
> provide mmap()-able file descriptors, but DMA requests must be performed
> via the VFIO-user protocol. This installs an indirect memory region,
> which already works for pci_dma_{read,write}, and pci_dma_map works
> thanks to the existing DMA bounce buffering support.
> 
> Note that while simple scenarios work with this patch, there's a known
> race condition in libvfio-user that will mess up the communication
> channel. See https://github.com/nutanix/libvfio-user/issues/279 for
> details as well as a proposed fix.
> 
> Signed-off-by: Mattias Nissler <mnissler@rivosinc.com>
> ---
>  hw/remote/trace-events    |  2 +
>  hw/remote/vfio-user-obj.c | 84 +++++++++++++++++++++++++++++++++++----
>  2 files changed, 79 insertions(+), 7 deletions(-)
> 
> diff --git a/hw/remote/trace-events b/hw/remote/trace-events
> index 0d1b7d56a5..358a68fb34 100644
> --- a/hw/remote/trace-events
> +++ b/hw/remote/trace-events
> @@ -9,6 +9,8 @@ vfu_cfg_read(uint32_t offset, uint32_t val) "vfu: cfg: 0x%x 
> -> 0x%x"
>  vfu_cfg_write(uint32_t offset, uint32_t val) "vfu: cfg: 0x%x <- 0x%x"
>  vfu_dma_register(uint64_t gpa, size_t len) "vfu: registering GPA 
> 0x%"PRIx64", %zu bytes"
>  vfu_dma_unregister(uint64_t gpa) "vfu: unregistering GPA 0x%"PRIx64""
> +vfu_dma_read(uint64_t gpa, size_t len) "vfu: DMA read 0x%"PRIx64", %zu bytes"
> +vfu_dma_write(uint64_t gpa, size_t len) "vfu: DMA write 0x%"PRIx64", %zu 
> bytes"
>  vfu_bar_register(int i, uint64_t addr, uint64_t size) "vfu: BAR %d: addr 
> 0x%"PRIx64" size 0x%"PRIx64""
>  vfu_bar_rw_enter(const char *op, uint64_t addr) "vfu: %s request for BAR 
> address 0x%"PRIx64""
>  vfu_bar_rw_exit(const char *op, uint64_t addr) "vfu: Finished %s of BAR 
> address 0x%"PRIx64""
> diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c
> index 8b10c32a3c..cee5e615a9 100644
> --- a/hw/remote/vfio-user-obj.c
> +++ b/hw/remote/vfio-user-obj.c
> @@ -300,6 +300,63 @@ static ssize_t vfu_object_cfg_access(vfu_ctx_t *vfu_ctx, 
> char * const buf,
>      return count;
>  }
>  
> +static MemTxResult vfu_dma_read(void *opaque, hwaddr addr, uint64_t *val,
> +                                unsigned size, MemTxAttrs attrs)
> +{
> +    MemoryRegion *region = opaque;
> +    VfuObject *o = VFU_OBJECT(region->owner);
> +    uint8_t buf[sizeof(uint64_t)];
> +
> +    trace_vfu_dma_read(region->addr + addr, size);
> +
> +    dma_sg_t *sg = alloca(dma_sg_size());

Variable-length arrays have recently been removed from QEMU and
alloca(3) is a similar case. An example is commit
b3c8246750b7077add335559341268f2956f6470 ("hw/nvme: Avoid dynamic stack
allocation").

libvfio-user returns a sane sizeof(struct dma_sg) value so we don't need
to worry about bogus values, so the risk is low here.

However, its hard to scan for and forbid the dangerous alloca(3) calls
when exceptions are made for some alloca(3) uses.

I would avoid alloca(3) and instead use:

  g_autofree dma_sg_t *sg = g_new(dma_sg_size(), 1);

> +    vfu_dma_addr_t vfu_addr = (vfu_dma_addr_t)(region->addr + addr);
> +    if (vfu_addr_to_sgl(o->vfu_ctx, vfu_addr, size, sg, 1, PROT_READ) < 0 ||
> +        vfu_sgl_read(o->vfu_ctx, sg, 1, buf) != 0) {
> +        return MEMTX_ERROR;
> +    }
> +
> +    *val = ldn_he_p(buf, size);
> +
> +    return MEMTX_OK;
> +}
> +
> +static MemTxResult vfu_dma_write(void *opaque, hwaddr addr, uint64_t val,
> +                                 unsigned size, MemTxAttrs attrs)
> +{
> +    MemoryRegion *region = opaque;
> +    VfuObject *o = VFU_OBJECT(region->owner);
> +    uint8_t buf[sizeof(uint64_t)];
> +
> +    trace_vfu_dma_write(region->addr + addr, size);
> +
> +    stn_he_p(buf, size, val);
> +
> +    dma_sg_t *sg = alloca(dma_sg_size());

Same here.

> +    vfu_dma_addr_t vfu_addr = (vfu_dma_addr_t)(region->addr + addr);
> +    if (vfu_addr_to_sgl(o->vfu_ctx, vfu_addr, size, sg, 1, PROT_WRITE) < 0 ||
> +        vfu_sgl_write(o->vfu_ctx, sg, 1, buf) != 0)  {
> +        return MEMTX_ERROR;
> +    }
> +
> +    return MEMTX_OK;
> +}
> +
> +static const MemoryRegionOps vfu_dma_ops = {
> +    .read_with_attrs = vfu_dma_read,
> +    .write_with_attrs = vfu_dma_write,
> +    .endianness = DEVICE_HOST_ENDIAN,
> +    .valid = {
> +        .min_access_size = 1,
> +        .max_access_size = 8,
> +        .unaligned = true,
> +    },
> +    .impl = {
> +        .min_access_size = 1,
> +        .max_access_size = 8,
> +    },
> +};
> +
>  static void dma_register(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info)
>  {
>      VfuObject *o = vfu_get_private(vfu_ctx);
> @@ -308,17 +365,30 @@ static void dma_register(vfu_ctx_t *vfu_ctx, 
> vfu_dma_info_t *info)
>      g_autofree char *name = NULL;
>      struct iovec *iov = &info->iova;
>  
> -    if (!info->vaddr) {
> -        return;
> -    }
> -
>      name = g_strdup_printf("mem-%s-%"PRIx64"", o->device,
> -                           (uint64_t)info->vaddr);
> +                           (uint64_t)iov->iov_base);
>  
>      subregion = g_new0(MemoryRegion, 1);
>  
> -    memory_region_init_ram_ptr(subregion, NULL, name,
> -                               iov->iov_len, info->vaddr);
> +    if (info->vaddr) {
> +        memory_region_init_ram_ptr(subregion, OBJECT(o), name,
> +                                   iov->iov_len, info->vaddr);
> +    } else {
> +        /*
> +         * Note that I/O regions' MemoryRegionOps handle accesses of at most 
> 8
> +         * bytes at a time, and larger accesses are broken down. However,
> +         * many/most DMA accesses are larger than 8 bytes and VFIO-user can
> +         * handle large DMA accesses just fine, thus this size restriction
> +         * unnecessarily hurts performance, in particular given that each
> +         * access causes a round trip on the VFIO-user socket.
> +         *
> +         * TODO: Investigate how to plumb larger accesses through memory
> +         * regions, possibly by amending MemoryRegionOps or by creating a new
> +         * memory region type.
> +         */
> +        memory_region_init_io(subregion, OBJECT(o), &vfu_dma_ops, subregion,
> +                              name, iov->iov_len);
> +    }
>  
>      dma_as = pci_device_iommu_address_space(o->pci_dev);
>  
> -- 
> 2.34.1
> 

Attachment: signature.asc
Description: PGP signature


reply via email to

[Prev in Thread] Current Thread [Next in Thread]