qemu-block
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [RFC v3 7/8] blkio: implement BDRV_REQ_REGISTERED_BUF optimization


From: Stefano Garzarella
Subject: Re: [RFC v3 7/8] blkio: implement BDRV_REQ_REGISTERED_BUF optimization
Date: Tue, 12 Jul 2022 16:28:02 +0200

On Fri, Jul 08, 2022 at 05:17:36AM +0100, Stefan Hajnoczi wrote:
Avoid bounce buffers when QEMUIOVector elements are within previously
registered bdrv_register_buf() buffers.

The idea is that emulated storage controllers will register guest RAM
using bdrv_register_buf() and set the BDRV_REQ_REGISTERED_BUF on I/O
requests. Therefore no blkio_map_mem_region() calls are necessary in the
performance-critical I/O code path.

This optimization doesn't apply if the I/O buffer is internally
allocated by QEMU (e.g. qcow2 metadata). There we still take the slow
path because BDRV_REQ_REGISTERED_BUF is not set.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
block/blkio.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 101 insertions(+), 3 deletions(-)

diff --git a/block/blkio.c b/block/blkio.c
index 7fbdbd7fae..37d593a20c 100644
--- a/block/blkio.c
+++ b/block/blkio.c
@@ -1,7 +1,9 @@
#include "qemu/osdep.h"
#include <blkio.h>
#include "block/block_int.h"
+#include "exec/memory.h"
#include "qapi/error.h"
+#include "qemu/error-report.h"
#include "qapi/qmp/qdict.h"
#include "qemu/module.h"

@@ -28,6 +30,9 @@ typedef struct {

    /* Can we skip adding/deleting blkio_mem_regions? */
    bool needs_mem_regions;
+
+    /* Are file descriptors necessary for blkio_mem_regions? */
+    bool needs_mem_region_fd;
} BDRVBlkioState;

static void blkio_aiocb_complete(BlkioAIOCB *acb, int ret)
@@ -198,6 +203,8 @@ static BlockAIOCB *blkio_aio_preadv(BlockDriverState *bs, 
int64_t offset,
        BlockCompletionFunc *cb, void *opaque)
{
    BDRVBlkioState *s = bs->opaque;
+    bool needs_mem_regions =
+        s->needs_mem_regions && !(flags & BDRV_REQ_REGISTERED_BUF);
    struct iovec *iov = qiov->iov;
    int iovcnt = qiov->niov;
    BlkioAIOCB *acb;
@@ -206,7 +213,7 @@ static BlockAIOCB *blkio_aio_preadv(BlockDriverState *bs, 
int64_t offset,

    acb = blkio_aiocb_get(bs, cb, opaque);

-    if (s->needs_mem_regions) {
+    if (needs_mem_regions) {
        if (blkio_aiocb_init_mem_region_locked(acb, bytes) < 0) {
            qemu_aio_unref(&acb->common);
            return NULL;
@@ -230,6 +237,8 @@ static BlockAIOCB *blkio_aio_pwritev(BlockDriverState *bs, 
int64_t offset,
{
    uint32_t blkio_flags = (flags & BDRV_REQ_FUA) ? BLKIO_REQ_FUA : 0;
    BDRVBlkioState *s = bs->opaque;
+    bool needs_mem_regions =
+        s->needs_mem_regions && !(flags & BDRV_REQ_REGISTERED_BUF);
    struct iovec *iov = qiov->iov;
    int iovcnt = qiov->niov;
    BlkioAIOCB *acb;
@@ -238,7 +247,7 @@ static BlockAIOCB *blkio_aio_pwritev(BlockDriverState *bs, 
int64_t offset,

    acb = blkio_aiocb_get(bs, cb, opaque);

-    if (s->needs_mem_regions) {
+    if (needs_mem_regions) {
        if (blkio_aiocb_init_mem_region_locked(acb, bytes) < 0) {
            qemu_aio_unref(&acb->common);
            return NULL;
@@ -324,6 +333,80 @@ static void blkio_io_unplug(BlockDriverState *bs)
    }
}

+static void blkio_register_buf(BlockDriverState *bs, void *host, size_t size)
+{
+    BDRVBlkioState *s = bs->opaque;
+    int ret;
+    struct blkio_mem_region region = (struct blkio_mem_region){
+        .addr = host,
+        .len = size,
+        .fd = -1,
+    };
+
+    if (((uintptr_t)host | size) % s->mem_region_alignment) {
+        error_report_once("%s: skipping unaligned buf %p with size %zu",
+                          __func__, host, size);
+        return; /* skip unaligned */
+    }
+
+    /* Attempt to find the fd for a MemoryRegion */
+    if (s->needs_mem_region_fd) {
+        int fd = -1;
+        ram_addr_t offset;
+        MemoryRegion *mr;
+
+        /*
+         * bdrv_register_buf() is called with the BQL held so mr lives at least
+         * until this function returns.
+         */
+        mr = memory_region_from_host(host, &offset);
+        if (mr) {
+            fd = memory_region_get_fd(mr);

If s->needs_mem_region_fd is true, memory_region_get_fd() crashes I think because mr->ram_block is not yet set, indeed from the stack trace blkio_register_buf() is called inside qemu_ram_alloc_resizeable(), and its result is used to set mr->ram_block in memory_region_init_resizeable_ram():

Program terminated with signal SIGSEGV, Segmentation fault.
#0  0x000056235bf1f7a3 in memory_region_get_fd (mr=<optimized out>) at 
../softmmu/memory.c:2309
#1  0x000056235c07e54d in blkio_register_buf (bs=<optimized out>, 
host=0x7f824e200000, size=2097152)
    at ../block/blkio.c:364
#2  0x000056235c0246c6 in bdrv_register_buf (bs=0x56235d606b40, 
host=0x7f824e200000, size=2097152)
    at ../block/io.c:3362
#3  0x000056235bea44e6 in ram_block_notify_add (host=0x7f824e200000, 
size=131072, max_size=2097152)
    at ../hw/core/numa.c:863
#4  0x000056235bf22c00 in ram_block_add (new_block=<optimized out>, 
errp=<optimized out>)
    at ../softmmu/physmem.c:2057
#5 0x000056235bf232e4 in qemu_ram_alloc_internal (size=size@entry=131072, max_size=max_size@entry=2097152, resized=resized@entry=0x56235bc0f920 <fw_cfg_resized>, host=host@entry=0x0, ram_flags=ram_flags@entry=4, mr=mr@entry=0x56235dc3fe00, errp=0x7ffcb21f1be0) at ../softmmu/physmem.c:2180 #6 0x000056235bf26426 in qemu_ram_alloc_resizeable (size=size@entry=131072, maxsz=maxsz@entry=2097152, resized=resized@entry=0x56235bc0f920 <fw_cfg_resized>, mr=mr@entry=0x56235dc3fe00, errp=errp@entry=0x7ffcb21f1be0) at ../softmmu/physmem.c:2209 #7 0x000056235bf1cc99 in memory_region_init_resizeable_ram (mr=0x56235dc3fe00, owner=owner@entry=0x56235d93ffc0, name=name@entry=0x7ffcb21f1ca0 "/rom@etc/acpi/tables", size=131072, max_size=2097152, resized=resized@entry=0x56235bc0f920 <fw_cfg_resized>, errp=0x56235c996490 <error_fatal>) at ../softmmu/memory.c:1586 #8 0x000056235bc0f99c in rom_set_mr (rom=rom@entry=0x56235ddd0200, owner=0x56235d93ffc0, name=name@entry=0x7ffcb21f1ca0 "/rom@etc/acpi/tables", ro=ro@entry=true)
    at ../hw/core/loader.c:961
#9 0x000056235bc12a65 in rom_add_blob (name=name@entry=0x56235c1a2a09 "etc/acpi/tables", blob=0x56235df4f4b0, len=<optimized out>, max_len=max_len@entry=2097152, addr=addr@entry=18446744073709551615, fw_file_name=fw_file_name@entry=0x56235c1a2a09 "etc/acpi/tables", fw_callback=0x56235be47f90 <acpi_build_update>, callback_opaque=0x56235d817830, as=0x0, read_only=true) at ../hw/core/loader.c:1102
#10 0x000056235bbe0990 in acpi_add_rom_blob (
update=update@entry=0x56235be47f90 <acpi_build_update>, opaque=opaque@entry=0x56235d817830, blob=0x56235d3ab750, name=name@entry=0x56235c1a2a09 "etc/acpi/tables") at ../hw/acpi/utils.c:46
#11 0x000056235be481e6 in acpi_setup () at ../hw/i386/acpi-build.c:2805
#12 0x000056235be3e209 in pc_machine_done (notifier=0x56235d5efce8, 
data=<optimized out>)
    at ../hw/i386/pc.c:758
#13 0x000056235c12e4a7 in notifier_list_notify (
    list=list@entry=0x56235c963790 <machine_init_done_notifiers>, 
data=data@entry=0x0)
    at ../util/notify.c:39

Thanks,
Stefano




reply via email to

[Prev in Thread] Current Thread [Next in Thread]