[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH v3 16/16] block/nvme: Use per-queuepair IRQ notifier and AIO
From: |
Philippe Mathieu-Daudé |
Subject: |
Re: [PATCH v3 16/16] block/nvme: Use per-queuepair IRQ notifier and AIO context |
Date: |
Mon, 6 Jul 2020 11:45:04 +0200 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:68.0) Gecko/20100101 Thunderbird/68.5.0 |
On 7/4/20 11:30 PM, Philippe Mathieu-Daudé wrote:
> To be able to use multiple queues on the same hardware,
> we need to have each queuepair able to receive IRQ
> notifications in the correct AIO context.
>
> The AIO context and the notification handler have to be proper
> to each queue, not to the block driver. Move aio_context and
> irq_notifier from BDRVNVMeState to NVMeQueuePair.
>
> Before this patch, only the admin queuepair had an EventNotifier
> and was checking all queues when notified by IRQ.
> After this patch, each queuepair (admin or io) is hooked with its
> own IRQ notifier up to VFIO.
Hmm I should also add a note that we currently only use a single IO
queuepair: nvme_add_io_queue() is called once in nvme_init().
Now after this patch, we should be able to call it twice...
>
> AioContexts must be identical across all queuepairs and
> BlockDriverStates. Although they all have their own AioContext
> pointer there is no true support for different AioContexts yet.
> (For example, nvme_cmd_sync() is called with a bs argument but
> AIO_WAIT_WHILE(q->aio_context, ...) uses the queuepair
> aio_context so the assumption is that they match.)
>
> Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
> ---
> v3:
> - Add notifier to IO queuepairs
> - Reword with Stefan help
>
> I'd like to split this into smaller changes, but I'm not sure
> if it is possible...
> Maybe move EventNotifier first (keeping aio_context shared),
> then move AioContext per queuepair?
> ---
> block/nvme.c | 102 +++++++++++++++++++++++++--------------------------
> 1 file changed, 51 insertions(+), 51 deletions(-)
>
> diff --git a/block/nvme.c b/block/nvme.c
> index 42c0d5284f..fcf8d93fb2 100644
> --- a/block/nvme.c
> +++ b/block/nvme.c
> @@ -60,6 +60,8 @@ typedef struct {
>
> typedef struct {
> QemuMutex lock;
> + AioContext *aio_context;
> + EventNotifier irq_notifier;
>
> /* Read from I/O code path, initialized under BQL */
> BDRVNVMeState *s;
> @@ -107,7 +109,6 @@ QEMU_BUILD_BUG_ON(offsetof(NVMeRegs, doorbells) !=
> 0x1000);
> #define QUEUE_INDEX_IO(n) (1 + n)
>
> struct BDRVNVMeState {
> - AioContext *aio_context;
> QEMUVFIOState *vfio;
> NVMeRegs *regs;
> /* The submission/completion queue pairs.
> @@ -120,7 +121,6 @@ struct BDRVNVMeState {
> /* How many uint32_t elements does each doorbell entry take. */
> size_t doorbell_scale;
> bool write_cache_supported;
> - EventNotifier irq_notifier;
>
> uint64_t nsze; /* Namespace size reported by identify command */
> int nsid; /* The namespace id to read/write data. */
> @@ -227,11 +227,17 @@ static NVMeQueuePair
> *nvme_create_queue_pair(BDRVNVMeState *s,
> if (!q->prp_list_pages) {
> goto fail;
> }
> + r = event_notifier_init(&q->irq_notifier, 0);
> + if (r) {
> + error_setg(errp, "Failed to init event notifier");
> + goto fail;
> + }
> memset(q->prp_list_pages, 0, s->page_size * NVME_QUEUE_SIZE);
> qemu_mutex_init(&q->lock);
> q->s = s;
> q->index = idx;
> qemu_co_queue_init(&q->free_req_queue);
> + q->aio_context = aio_context;
> q->completion_bh = aio_bh_new(aio_context, nvme_process_completion_bh,
> q);
> r = qemu_vfio_dma_map(s->vfio, q->prp_list_pages,
> s->page_size * NVME_NUM_REQS,
> @@ -325,7 +331,7 @@ static void nvme_put_free_req_locked(NVMeQueuePair *q,
> NVMeRequest *req)
> static void nvme_wake_free_req_locked(NVMeQueuePair *q)
> {
> if (!qemu_co_queue_empty(&q->free_req_queue)) {
> - replay_bh_schedule_oneshot_event(q->s->aio_context,
> + replay_bh_schedule_oneshot_event(q->aio_context,
> nvme_free_req_queue_cb, q);
> }
> }
> @@ -492,7 +498,6 @@ static void nvme_cmd_sync_cb(void *opaque, int ret)
> static int nvme_cmd_sync(BlockDriverState *bs, NVMeQueuePair *q,
> NvmeCmd *cmd)
> {
> - AioContext *aio_context = bdrv_get_aio_context(bs);
> NVMeRequest *req;
> int ret = -EINPROGRESS;
> req = nvme_get_free_req(q);
> @@ -501,7 +506,7 @@ static int nvme_cmd_sync(BlockDriverState *bs,
> NVMeQueuePair *q,
> }
> nvme_submit_command(q, req, cmd, nvme_cmd_sync_cb, &ret);
>
> - AIO_WAIT_WHILE(aio_context, ret == -EINPROGRESS);
> + AIO_WAIT_WHILE(q->aio_context, ret == -EINPROGRESS);
> return ret;
> }
>
> @@ -616,47 +621,35 @@ static bool nvme_poll_queue(NVMeQueuePair *q)
> return progress;
> }
>
> -static bool nvme_poll_queues(BDRVNVMeState *s)
> -{
> - bool progress = false;
> - int i;
> -
> - for (i = 0; i < s->nr_queues; i++) {
> - if (nvme_poll_queue(s->queues[i])) {
> - progress = true;
> - }
> - }
> - return progress;
> -}
> -
> static void nvme_handle_event(EventNotifier *n)
> {
> - BDRVNVMeState *s = container_of(n, BDRVNVMeState, irq_notifier);
> + NVMeQueuePair *q = container_of(n, NVMeQueuePair, irq_notifier);
>
> - trace_nvme_handle_event(s);
> + trace_nvme_handle_event(q);
> event_notifier_test_and_clear(n);
> - nvme_poll_queues(s);
> + nvme_poll_queue(q);
> }
>
> static bool nvme_poll_cb(void *opaque)
> {
> EventNotifier *e = opaque;
> - BDRVNVMeState *s = container_of(e, BDRVNVMeState, irq_notifier);
> + NVMeQueuePair *q = container_of(e, NVMeQueuePair, irq_notifier);
>
> - trace_nvme_poll_cb(s);
> - return nvme_poll_queues(s);
> + trace_nvme_poll_cb(q);
> + return nvme_poll_queue(q);
> }
>
> -static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp)
> +static bool nvme_add_io_queue(BlockDriverState *bs,
> + AioContext *aio_context, Error **errp)
> {
> BDRVNVMeState *s = bs->opaque;
> int n = s->nr_queues;
> NVMeQueuePair *q;
> NvmeCmd cmd;
> int queue_size = NVME_QUEUE_SIZE;
> + int ret;
>
> - q = nvme_create_queue_pair(s, bdrv_get_aio_context(bs),
> - n, queue_size, errp);
> + q = nvme_create_queue_pair(s, aio_context, n, queue_size, errp);
> if (!q) {
> return false;
> }
> @@ -683,6 +676,17 @@ static bool nvme_add_io_queue(BlockDriverState *bs,
> Error **errp)
> s->queues = g_renew(NVMeQueuePair *, s->queues, n + 1);
> s->queues[n] = q;
> s->nr_queues++;
> +
> + ret = qemu_vfio_pci_init_irq(s->vfio,
> + &s->queues[n]->irq_notifier,
> + VFIO_PCI_MSIX_IRQ_INDEX, errp);
> + if (ret) {
> + goto out_error;
> + }
> + aio_set_event_notifier(aio_context,
> + &s->queues[n]->irq_notifier,
> + false, nvme_handle_event, nvme_poll_cb);
> +
> return true;
> out_error:
> nvme_free_queue_pair(q);
> @@ -704,12 +708,6 @@ static int nvme_init(BlockDriverState *bs, const char
> *device, int namespace,
> qemu_co_queue_init(&s->dma_flush_queue);
> s->device = g_strdup(device);
> s->nsid = namespace;
> - s->aio_context = bdrv_get_aio_context(bs);
> - ret = event_notifier_init(&s->irq_notifier, 0);
> - if (ret) {
> - error_setg(errp, "Failed to init event notifier");
> - return ret;
> - }
>
> s->vfio = qemu_vfio_open_pci(device, errp);
> if (!s->vfio) {
> @@ -784,12 +782,14 @@ static int nvme_init(BlockDriverState *bs, const char
> *device, int namespace,
> }
> }
>
> - ret = qemu_vfio_pci_init_irq(s->vfio, &s->irq_notifier,
> + ret = qemu_vfio_pci_init_irq(s->vfio,
> + &s->queues[QUEUE_INDEX_ADMIN]->irq_notifier,
> VFIO_PCI_MSIX_IRQ_INDEX, errp);
> if (ret) {
> goto out;
> }
> - aio_set_event_notifier(bdrv_get_aio_context(bs), &s->irq_notifier,
> + aio_set_event_notifier(aio_context,
> + &s->queues[QUEUE_INDEX_ADMIN]->irq_notifier,
> false, nvme_handle_event, nvme_poll_cb);
>
> nvme_identify(bs, namespace, &local_err);
> @@ -800,7 +800,7 @@ static int nvme_init(BlockDriverState *bs, const char
> *device, int namespace,
> }
>
> /* Set up command queues. */
> - if (!nvme_add_io_queue(bs, errp)) {
> + if (!nvme_add_io_queue(bs, aio_context, errp)) {
> ret = -EIO;
> }
> out:
> @@ -869,12 +869,14 @@ static void nvme_close(BlockDriverState *bs)
> BDRVNVMeState *s = bs->opaque;
>
> for (i = 0; i < s->nr_queues; ++i) {
> - nvme_free_queue_pair(s->queues[i]);
> + NVMeQueuePair *q = s->queues[i];
> +
> + aio_set_event_notifier(q->aio_context,
> + &q->irq_notifier, false, NULL, NULL);
> + event_notifier_cleanup(&q->irq_notifier);
> + nvme_free_queue_pair(q);
> }
> g_free(s->queues);
> - aio_set_event_notifier(bdrv_get_aio_context(bs), &s->irq_notifier,
> - false, NULL, NULL);
> - event_notifier_cleanup(&s->irq_notifier);
> qemu_vfio_pci_unmap_bar(s->vfio, 0, (void *)s->regs, 0, NVME_BAR_SIZE);
> qemu_vfio_close(s->vfio);
>
> @@ -1086,7 +1088,7 @@ static coroutine_fn int
> nvme_co_prw_aligned(BlockDriverState *bs,
> .cdw12 = cpu_to_le32(cdw12),
> };
> NVMeCoData data = {
> - .ctx = bdrv_get_aio_context(bs),
> + .ctx = ioq->aio_context,
> .ret = -EINPROGRESS,
> };
>
> @@ -1195,7 +1197,7 @@ static coroutine_fn int nvme_co_flush(BlockDriverState
> *bs)
> .nsid = cpu_to_le32(s->nsid),
> };
> NVMeCoData data = {
> - .ctx = bdrv_get_aio_context(bs),
> + .ctx = ioq->aio_context,
> .ret = -EINPROGRESS,
> };
>
> @@ -1236,7 +1238,7 @@ static coroutine_fn int
> nvme_co_pwrite_zeroes(BlockDriverState *bs,
> };
>
> NVMeCoData data = {
> - .ctx = bdrv_get_aio_context(bs),
> + .ctx = ioq->aio_context,
> .ret = -EINPROGRESS,
> };
>
> @@ -1286,7 +1288,7 @@ static int coroutine_fn
> nvme_co_pdiscard(BlockDriverState *bs,
> };
>
> NVMeCoData data = {
> - .ctx = bdrv_get_aio_context(bs),
> + .ctx = ioq->aio_context,
> .ret = -EINPROGRESS,
> };
>
> @@ -1379,10 +1381,10 @@ static void nvme_detach_aio_context(BlockDriverState
> *bs)
>
> qemu_bh_delete(q->completion_bh);
> q->completion_bh = NULL;
> - }
>
> - aio_set_event_notifier(bdrv_get_aio_context(bs), &s->irq_notifier,
> - false, NULL, NULL);
> + aio_set_event_notifier(bdrv_get_aio_context(bs), &q->irq_notifier,
> + false, NULL, NULL);
> + }
> }
>
> static void nvme_attach_aio_context(BlockDriverState *bs,
> @@ -1390,13 +1392,11 @@ static void nvme_attach_aio_context(BlockDriverState
> *bs,
> {
> BDRVNVMeState *s = bs->opaque;
>
> - s->aio_context = new_context;
> - aio_set_event_notifier(new_context, &s->irq_notifier,
> - false, nvme_handle_event, nvme_poll_cb);
> -
> for (int i = 0; i < s->nr_queues; i++) {
> NVMeQueuePair *q = s->queues[i];
>
> + aio_set_event_notifier(new_context, &q->irq_notifier,
> + false, nvme_handle_event, nvme_poll_cb);
> q->completion_bh =
> aio_bh_new(new_context, nvme_process_completion_bh, q);
> }
>
- [PATCH v3 09/16] block/nvme: Replace qemu_try_blockalign0 by qemu_try_blockalign/memset, (continued)
- [PATCH v3 09/16] block/nvme: Replace qemu_try_blockalign0 by qemu_try_blockalign/memset, Philippe Mathieu-Daudé, 2020/07/04
- [PATCH v3 10/16] block/nvme: Replace qemu_try_blockalign(bs) by qemu_try_memalign(pg_sz), Philippe Mathieu-Daudé, 2020/07/04
- [PATCH v3 11/16] block/nvme: Simplify nvme_init_queue() arguments, Philippe Mathieu-Daudé, 2020/07/04
- [PATCH v3 12/16] block/nvme: Replace BDRV_POLL_WHILE by AIO_WAIT_WHILE, Philippe Mathieu-Daudé, 2020/07/04
- [PATCH v3 13/16] block/nvme: Simplify nvme_create_queue_pair() arguments, Philippe Mathieu-Daudé, 2020/07/04
- [PATCH v3 14/16] block/nvme: Extract nvme_poll_queue(), Philippe Mathieu-Daudé, 2020/07/04
- [PATCH v3 15/16] block/nvme: Move nvme_poll_cb() earlier, Philippe Mathieu-Daudé, 2020/07/04
- [PATCH v3 16/16] block/nvme: Use per-queuepair IRQ notifier and AIO context, Philippe Mathieu-Daudé, 2020/07/04