qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[RFC PATCH v9 20/23] vdpa: Buffer CVQ support on shadow virtqueue


From: Eugenio Pérez
Subject: [RFC PATCH v9 20/23] vdpa: Buffer CVQ support on shadow virtqueue
Date: Wed, 6 Jul 2022 20:40:05 +0200

Introduce the control virtqueue support for vDPA shadow virtqueue. This
is needed for advanced networking features like multiqueue.

Virtio-net control VQ will copy the descriptors to qemu's VA, so we
avoid TOCTOU with the guest's or device's memory every time there is a
device model change.  When address space isolation is implemented, this
will allow, CVQ to only have access to control messages too.

To demonstrate command handling, VIRTIO_NET_F_CTRL_MACADDR is
implemented.  If virtio-net driver changes MAC the virtio-net device
model will be updated with the new one.

Others cvq commands could be added here straightforwardly but they have
been not tested.

Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
---
 include/hw/virtio/vhost-vdpa.h |   3 +
 hw/virtio/vhost-vdpa.c         |   5 +-
 net/vhost-vdpa.c               | 373 +++++++++++++++++++++++++++++++++
 3 files changed, 379 insertions(+), 2 deletions(-)

diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
index 7214eb47dc..1111d85643 100644
--- a/include/hw/virtio/vhost-vdpa.h
+++ b/include/hw/virtio/vhost-vdpa.h
@@ -15,6 +15,7 @@
 #include <gmodule.h>
 
 #include "hw/virtio/vhost-iova-tree.h"
+#include "hw/virtio/vhost-shadow-virtqueue.h"
 #include "hw/virtio/virtio.h"
 #include "standard-headers/linux/vhost_types.h"
 
@@ -35,6 +36,8 @@ typedef struct vhost_vdpa {
     /* IOVA mapping used by the Shadow Virtqueue */
     VhostIOVATree *iova_tree;
     GPtrArray *shadow_vqs;
+    const VhostShadowVirtqueueOps *shadow_vq_ops;
+    void *shadow_vq_ops_opaque;
     struct vhost_dev *dev;
     VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
 } VhostVDPA;
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 613c3483b0..94bda07b4d 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -417,9 +417,10 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, 
struct vhost_vdpa *v,
 
     shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free);
     for (unsigned n = 0; n < hdev->nvqs; ++n) {
-        g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new(v->iova_tree, NULL,
-                                                            NULL);
+        g_autoptr(VhostShadowVirtqueue) svq = NULL;
 
+        svq = vhost_svq_new(v->iova_tree, v->shadow_vq_ops,
+                            v->shadow_vq_ops_opaque);
         if (unlikely(!svq)) {
             error_setg(errp, "Cannot create svq %u", n);
             return -1;
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index b0158f625e..e415cc8de5 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -11,11 +11,15 @@
 
 #include "qemu/osdep.h"
 #include "clients.h"
+#include "hw/virtio/virtio-net.h"
 #include "net/vhost_net.h"
 #include "net/vhost-vdpa.h"
 #include "hw/virtio/vhost-vdpa.h"
+#include "qemu/buffer.h"
 #include "qemu/config-file.h"
 #include "qemu/error-report.h"
+#include "qemu/log.h"
+#include "qemu/memalign.h"
 #include "qemu/option.h"
 #include "qapi/error.h"
 #include <linux/vhost.h>
@@ -25,6 +29,26 @@
 #include "monitor/monitor.h"
 #include "hw/virtio/vhost.h"
 
+typedef struct CVQElement {
+    /* Device's in and out buffer */
+    void *in_buf, *out_buf;
+
+    /* Optional guest element from where this cvqelement was created */
+    VirtQueueElement *guest_elem;
+
+    /* Control header sent by the guest. */
+    struct virtio_net_ctrl_hdr ctrl;
+
+    /* vhost-vdpa device, for cleanup reasons */
+    struct vhost_vdpa *vdpa;
+
+    /* Length of out data */
+    size_t out_len;
+
+    /* Copy of the out data sent by the guest excluding ctrl. */
+    uint8_t out_data[];
+} CVQElement;
+
 /* Todo:need to add the multiqueue support here */
 typedef struct VhostVDPAState {
     NetClientState nc;
@@ -187,6 +211,351 @@ static NetClientInfo net_vhost_vdpa_info = {
         .check_peer_type = vhost_vdpa_check_peer_type,
 };
 
+/**
+ * Unmap a descriptor chain of a SVQ element, optionally copying its in buffers
+ *
+ * @svq: Shadow VirtQueue
+ * @iova: SVQ IO Virtual address of descriptor
+ * @iov: Optional iovec to store device writable buffer
+ * @iov_cnt: iov length
+ * @buf_len: Length written by the device
+ *
+ * TODO: Use me! and adapt to net/vhost-vdpa format
+ * Print error message in case of error
+ */
+static void vhost_vdpa_cvq_unmap_buf(CVQElement *elem, void *addr)
+{
+    struct vhost_vdpa *v = elem->vdpa;
+    VhostIOVATree *tree = v->iova_tree;
+    DMAMap needle = {
+        /*
+         * No need to specify size or to look for more translations since
+         * this contiguous chunk was allocated by us.
+         */
+        .translated_addr = (hwaddr)(uintptr_t)addr,
+    };
+    const DMAMap *map = vhost_iova_tree_find_iova(tree, &needle);
+    int r;
+
+    if (unlikely(!map)) {
+        error_report("Cannot locate expected map");
+        goto err;
+    }
+
+    r = vhost_vdpa_dma_unmap(v, map->iova, map->size + 1);
+    if (unlikely(r != 0)) {
+        error_report("Device cannot unmap: %s(%d)", g_strerror(r), r);
+    }
+
+    vhost_iova_tree_remove(tree, map);
+
+err:
+    qemu_vfree(addr);
+}
+
+static void vhost_vdpa_cvq_delete_elem(CVQElement *elem)
+{
+    if (elem->out_buf) {
+        vhost_vdpa_cvq_unmap_buf(elem, g_steal_pointer(&elem->out_buf));
+    }
+
+    if (elem->in_buf) {
+        vhost_vdpa_cvq_unmap_buf(elem, g_steal_pointer(&elem->in_buf));
+    }
+
+    /* Guest element must have been returned to the guest or free otherway */
+    assert(!elem->guest_elem);
+
+    g_free(elem);
+}
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(CVQElement, vhost_vdpa_cvq_delete_elem);
+
+static int vhost_vdpa_net_cvq_svq_inject(VhostShadowVirtqueue *svq,
+                                         CVQElement *cvq_elem,
+                                         size_t out_len)
+{
+    const struct iovec iov[] = {
+        {
+            .iov_base = cvq_elem->out_buf,
+            .iov_len = out_len,
+        },{
+            .iov_base = cvq_elem->in_buf,
+            .iov_len = sizeof(virtio_net_ctrl_ack),
+        }
+    };
+
+    return vhost_svq_inject(svq, iov, 1, 1, cvq_elem);
+}
+
+static void *vhost_vdpa_cvq_alloc_buf(struct vhost_vdpa *v,
+                                      const uint8_t *out_data, size_t data_len,
+                                      bool write)
+{
+    DMAMap map = {};
+    size_t buf_len = ROUND_UP(data_len, qemu_real_host_page_size());
+    void *buf = qemu_memalign(qemu_real_host_page_size(), buf_len);
+    int r;
+
+    if (!write) {
+        memcpy(buf, out_data, data_len);
+        memset(buf + data_len, 0, buf_len - data_len);
+    } else {
+        memset(buf, 0, data_len);
+    }
+
+    map.translated_addr = (hwaddr)(uintptr_t)buf;
+    map.size = buf_len - 1;
+    map.perm = write ? IOMMU_RW : IOMMU_RO,
+    r = vhost_iova_tree_map_alloc(v->iova_tree, &map);
+    if (unlikely(r != IOVA_OK)) {
+        error_report("Cannot map injected element");
+        goto err;
+    }
+
+    r = vhost_vdpa_dma_map(v, map.iova, buf_len, buf, !write);
+    /* TODO: Handle error */
+    assert(r == 0);
+
+    return buf;
+
+err:
+    qemu_vfree(buf);
+    return NULL;
+}
+
+/**
+ * Allocate an element suitable to be injected
+ *
+ * @iov: The iovec
+ * @out_num: Number of out elements, placed first in iov
+ * @in_num: Number of in elements, placed after out ones
+ * @elem: Optional guest element from where this one was created
+ *
+ * TODO: Do we need a sg for out_num? I think not
+ */
+static CVQElement *vhost_vdpa_cvq_alloc_elem(VhostVDPAState *s,
+                                             struct virtio_net_ctrl_hdr ctrl,
+                                             const struct iovec *out_sg,
+                                             size_t out_num, size_t out_size,
+                                             VirtQueueElement *elem)
+{
+    g_autoptr(CVQElement) cvq_elem = g_malloc(sizeof(CVQElement) + out_size);
+    uint8_t *out_cursor = cvq_elem->out_data;
+    struct vhost_vdpa *v = &s->vhost_vdpa;
+
+    /* Start with a clean base */
+    memset(cvq_elem, 0, sizeof(*cvq_elem));
+    cvq_elem->vdpa = &s->vhost_vdpa;
+
+    /*
+     * Linearize element. If guest had a descriptor chain, we expose the device
+     * a single buffer.
+     */
+    cvq_elem->out_len = out_size;
+    memcpy(out_cursor, &ctrl, sizeof(ctrl));
+    out_size -= sizeof(ctrl);
+    out_cursor += sizeof(ctrl);
+    iov_to_buf(out_sg, out_num, 0, out_cursor, out_size);
+
+    cvq_elem->out_buf = vhost_vdpa_cvq_alloc_buf(v, cvq_elem->out_data,
+                                                 out_size, false);
+    assert(cvq_elem->out_buf);
+    cvq_elem->in_buf = vhost_vdpa_cvq_alloc_buf(v, NULL,
+                                                sizeof(virtio_net_ctrl_ack),
+                                                true);
+    assert(cvq_elem->in_buf);
+
+    cvq_elem->guest_elem = elem;
+    cvq_elem->ctrl = ctrl;
+    return g_steal_pointer(&cvq_elem);
+}
+
+/**
+ * iov_size with an upper limit. It's assumed UINT64_MAX is an invalid
+ * iov_size.
+ */
+static uint64_t vhost_vdpa_net_iov_len(const struct iovec *iov,
+                                       unsigned int iov_cnt, size_t max)
+{
+    uint64_t len = 0;
+
+    for (unsigned int i = 0; len < max && i < iov_cnt; i++) {
+        bool overflow = uadd64_overflow(iov[i].iov_len, len, &len);
+        if (unlikely(overflow)) {
+            return UINT64_MAX;
+        }
+    }
+
+    return len;
+}
+
+static CVQElement *vhost_vdpa_net_cvq_copy_elem(VhostVDPAState *s,
+                                                VirtQueueElement *elem)
+{
+    struct virtio_net_ctrl_hdr ctrl;
+    g_autofree struct iovec *iov = NULL;
+    struct iovec *iov2;
+    unsigned int out_num = elem->out_num;
+    size_t n, out_size = 0;
+
+    /* TODO: in buffer MUST have only a single entry with a char? size */
+    if (unlikely(vhost_vdpa_net_iov_len(elem->in_sg, elem->in_num,
+                                        sizeof(virtio_net_ctrl_ack))
+                                              < sizeof(virtio_net_ctrl_ack))) {
+        return NULL;
+    }
+
+    n = iov_to_buf(elem->out_sg, out_num, 0, &ctrl, sizeof(ctrl));
+    if (unlikely(n != sizeof(ctrl))) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid out size\n", __func__);
+        return NULL;
+    }
+
+    iov = iov2 = g_memdup2(elem->out_sg, sizeof(struct iovec) * elem->out_num);
+    iov_discard_front(&iov2, &out_num, sizeof(ctrl));
+    switch (ctrl.class) {
+    case VIRTIO_NET_CTRL_MAC:
+        switch (ctrl.cmd) {
+        case VIRTIO_NET_CTRL_MAC_ADDR_SET:
+            if (likely(vhost_vdpa_net_iov_len(iov2, out_num, 6))) {
+                out_size += 6;
+                break;
+            }
+
+            qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid mac size\n", __func__);
+            return NULL;
+        default:
+            qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid mac cmd %u\n",
+                          __func__, ctrl.cmd);
+            return NULL;
+        };
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid control class %u\n",
+                      __func__, ctrl.class);
+        return NULL;
+    };
+
+    return vhost_vdpa_cvq_alloc_elem(s, ctrl, iov2, out_num,
+                                     sizeof(ctrl) + out_size, elem);
+}
+
+/**
+ * Validate and copy control virtqueue commands.
+ *
+ * Following QEMU guidelines, we offer a copy of the buffers to the device to
+ * prevent TOCTOU bugs.  This functions check that the buffers length are
+ * expected too.
+ */
+static bool vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
+                                             VirtQueueElement *guest_elem,
+                                             void *opaque)
+{
+    VhostVDPAState *s = opaque;
+    g_autoptr(CVQElement) cvq_elem = NULL;
+    g_autofree VirtQueueElement *elem = guest_elem;
+    size_t out_size, in_len;
+    virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
+    int r;
+
+    cvq_elem = vhost_vdpa_net_cvq_copy_elem(s, elem);
+    if (unlikely(!cvq_elem)) {
+        goto err;
+    }
+
+    /* out size validated at vhost_vdpa_net_cvq_copy_elem */
+    out_size = iov_size(elem->out_sg, elem->out_num);
+    r = vhost_vdpa_net_cvq_svq_inject(svq, cvq_elem, out_size);
+    if (unlikely(r != 0)) {
+        goto err;
+    }
+
+    cvq_elem->guest_elem = g_steal_pointer(&elem);
+    /* Now CVQ elem belongs to SVQ */
+    g_steal_pointer(&cvq_elem);
+    return true;
+
+err:
+    in_len = iov_from_buf(elem->in_sg, elem->in_num, 0, &status,
+                          sizeof(status));
+    vhost_svq_push_elem(svq, elem, in_len);
+    return true;
+}
+
+static VirtQueueElement *vhost_vdpa_net_handle_ctrl_detach(void *elem_opaque)
+{
+    g_autoptr(CVQElement) cvq_elem = elem_opaque;
+    return g_steal_pointer(&cvq_elem->guest_elem);
+}
+
+static void vhost_vdpa_net_handle_ctrl_used(VhostShadowVirtqueue *svq,
+                                            void *vq_elem_opaque,
+                                            uint32_t dev_written)
+{
+    g_autoptr(CVQElement) cvq_elem = vq_elem_opaque;
+    virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
+    const struct iovec out = {
+        .iov_base = cvq_elem->out_data,
+        .iov_len = cvq_elem->out_len,
+    };
+    const DMAMap status_map_needle = {
+        .translated_addr = (hwaddr)(uintptr_t)cvq_elem->in_buf,
+        .size = sizeof(status),
+    };
+    const DMAMap *in_map;
+    const struct iovec in = {
+        .iov_base = &status,
+        .iov_len = sizeof(status),
+    };
+    g_autofree VirtQueueElement *guest_elem = NULL;
+
+    if (unlikely(dev_written < sizeof(status))) {
+        error_report("Insufficient written data (%llu)",
+                     (long long unsigned)dev_written);
+        goto out;
+    }
+
+    in_map = vhost_iova_tree_find_iova(svq->iova_tree, &status_map_needle);
+    if (unlikely(!in_map)) {
+        error_report("Cannot locate out mapping");
+        goto out;
+    }
+
+    switch (cvq_elem->ctrl.class) {
+    case VIRTIO_NET_CTRL_MAC_ADDR_SET:
+        break;
+    default:
+        error_report("Unexpected ctrl class %u", cvq_elem->ctrl.class);
+        goto out;
+    };
+
+    memcpy(&status, cvq_elem->in_buf, sizeof(status));
+    if (status != VIRTIO_NET_OK) {
+        goto out;
+    }
+
+    status = VIRTIO_NET_ERR;
+    virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, &out, 1);
+    if (status != VIRTIO_NET_OK) {
+        error_report("Bad CVQ processing in model");
+        goto out;
+    }
+
+out:
+    guest_elem = g_steal_pointer(&cvq_elem->guest_elem);
+    if (guest_elem) {
+        iov_from_buf(guest_elem->in_sg, guest_elem->in_num, 0, &status,
+                     sizeof(status));
+        vhost_svq_push_elem(svq, guest_elem, sizeof(status));
+    }
+}
+
+static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = {
+    .avail_handler = vhost_vdpa_net_handle_ctrl_avail,
+    .used_handler = vhost_vdpa_net_handle_ctrl_used,
+    .detach_handler = vhost_vdpa_net_handle_ctrl_detach,
+};
+
 static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
                                            const char *device,
                                            const char *name,
@@ -211,6 +580,10 @@ static NetClientState *net_vhost_vdpa_init(NetClientState 
*peer,
 
     s->vhost_vdpa.device_fd = vdpa_device_fd;
     s->vhost_vdpa.index = queue_pair_index;
+    if (!is_datapath) {
+        s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
+        s->vhost_vdpa.shadow_vq_ops_opaque = s;
+    }
     ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs);
     if (ret) {
         qemu_del_net_client(nc);
-- 
2.31.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]