[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[RFC PATCH v9 20/23] vdpa: Buffer CVQ support on shadow virtqueue
From: |
Eugenio Pérez |
Subject: |
[RFC PATCH v9 20/23] vdpa: Buffer CVQ support on shadow virtqueue |
Date: |
Wed, 6 Jul 2022 20:40:05 +0200 |
Introduce the control virtqueue support for vDPA shadow virtqueue. This
is needed for advanced networking features like multiqueue.
Virtio-net control VQ will copy the descriptors to qemu's VA, so we
avoid TOCTOU with the guest's or device's memory every time there is a
device model change. When address space isolation is implemented, this
will allow, CVQ to only have access to control messages too.
To demonstrate command handling, VIRTIO_NET_F_CTRL_MACADDR is
implemented. If virtio-net driver changes MAC the virtio-net device
model will be updated with the new one.
Others cvq commands could be added here straightforwardly but they have
been not tested.
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
---
include/hw/virtio/vhost-vdpa.h | 3 +
hw/virtio/vhost-vdpa.c | 5 +-
net/vhost-vdpa.c | 373 +++++++++++++++++++++++++++++++++
3 files changed, 379 insertions(+), 2 deletions(-)
diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
index 7214eb47dc..1111d85643 100644
--- a/include/hw/virtio/vhost-vdpa.h
+++ b/include/hw/virtio/vhost-vdpa.h
@@ -15,6 +15,7 @@
#include <gmodule.h>
#include "hw/virtio/vhost-iova-tree.h"
+#include "hw/virtio/vhost-shadow-virtqueue.h"
#include "hw/virtio/virtio.h"
#include "standard-headers/linux/vhost_types.h"
@@ -35,6 +36,8 @@ typedef struct vhost_vdpa {
/* IOVA mapping used by the Shadow Virtqueue */
VhostIOVATree *iova_tree;
GPtrArray *shadow_vqs;
+ const VhostShadowVirtqueueOps *shadow_vq_ops;
+ void *shadow_vq_ops_opaque;
struct vhost_dev *dev;
VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
} VhostVDPA;
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 613c3483b0..94bda07b4d 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -417,9 +417,10 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev,
struct vhost_vdpa *v,
shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free);
for (unsigned n = 0; n < hdev->nvqs; ++n) {
- g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new(v->iova_tree, NULL,
- NULL);
+ g_autoptr(VhostShadowVirtqueue) svq = NULL;
+ svq = vhost_svq_new(v->iova_tree, v->shadow_vq_ops,
+ v->shadow_vq_ops_opaque);
if (unlikely(!svq)) {
error_setg(errp, "Cannot create svq %u", n);
return -1;
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index b0158f625e..e415cc8de5 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -11,11 +11,15 @@
#include "qemu/osdep.h"
#include "clients.h"
+#include "hw/virtio/virtio-net.h"
#include "net/vhost_net.h"
#include "net/vhost-vdpa.h"
#include "hw/virtio/vhost-vdpa.h"
+#include "qemu/buffer.h"
#include "qemu/config-file.h"
#include "qemu/error-report.h"
+#include "qemu/log.h"
+#include "qemu/memalign.h"
#include "qemu/option.h"
#include "qapi/error.h"
#include <linux/vhost.h>
@@ -25,6 +29,26 @@
#include "monitor/monitor.h"
#include "hw/virtio/vhost.h"
+typedef struct CVQElement {
+ /* Device's in and out buffer */
+ void *in_buf, *out_buf;
+
+ /* Optional guest element from where this cvqelement was created */
+ VirtQueueElement *guest_elem;
+
+ /* Control header sent by the guest. */
+ struct virtio_net_ctrl_hdr ctrl;
+
+ /* vhost-vdpa device, for cleanup reasons */
+ struct vhost_vdpa *vdpa;
+
+ /* Length of out data */
+ size_t out_len;
+
+ /* Copy of the out data sent by the guest excluding ctrl. */
+ uint8_t out_data[];
+} CVQElement;
+
/* Todo:need to add the multiqueue support here */
typedef struct VhostVDPAState {
NetClientState nc;
@@ -187,6 +211,351 @@ static NetClientInfo net_vhost_vdpa_info = {
.check_peer_type = vhost_vdpa_check_peer_type,
};
+/**
+ * Unmap a descriptor chain of a SVQ element, optionally copying its in buffers
+ *
+ * @svq: Shadow VirtQueue
+ * @iova: SVQ IO Virtual address of descriptor
+ * @iov: Optional iovec to store device writable buffer
+ * @iov_cnt: iov length
+ * @buf_len: Length written by the device
+ *
+ * TODO: Use me! and adapt to net/vhost-vdpa format
+ * Print error message in case of error
+ */
+static void vhost_vdpa_cvq_unmap_buf(CVQElement *elem, void *addr)
+{
+ struct vhost_vdpa *v = elem->vdpa;
+ VhostIOVATree *tree = v->iova_tree;
+ DMAMap needle = {
+ /*
+ * No need to specify size or to look for more translations since
+ * this contiguous chunk was allocated by us.
+ */
+ .translated_addr = (hwaddr)(uintptr_t)addr,
+ };
+ const DMAMap *map = vhost_iova_tree_find_iova(tree, &needle);
+ int r;
+
+ if (unlikely(!map)) {
+ error_report("Cannot locate expected map");
+ goto err;
+ }
+
+ r = vhost_vdpa_dma_unmap(v, map->iova, map->size + 1);
+ if (unlikely(r != 0)) {
+ error_report("Device cannot unmap: %s(%d)", g_strerror(r), r);
+ }
+
+ vhost_iova_tree_remove(tree, map);
+
+err:
+ qemu_vfree(addr);
+}
+
+static void vhost_vdpa_cvq_delete_elem(CVQElement *elem)
+{
+ if (elem->out_buf) {
+ vhost_vdpa_cvq_unmap_buf(elem, g_steal_pointer(&elem->out_buf));
+ }
+
+ if (elem->in_buf) {
+ vhost_vdpa_cvq_unmap_buf(elem, g_steal_pointer(&elem->in_buf));
+ }
+
+ /* Guest element must have been returned to the guest or free otherway */
+ assert(!elem->guest_elem);
+
+ g_free(elem);
+}
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(CVQElement, vhost_vdpa_cvq_delete_elem);
+
+static int vhost_vdpa_net_cvq_svq_inject(VhostShadowVirtqueue *svq,
+ CVQElement *cvq_elem,
+ size_t out_len)
+{
+ const struct iovec iov[] = {
+ {
+ .iov_base = cvq_elem->out_buf,
+ .iov_len = out_len,
+ },{
+ .iov_base = cvq_elem->in_buf,
+ .iov_len = sizeof(virtio_net_ctrl_ack),
+ }
+ };
+
+ return vhost_svq_inject(svq, iov, 1, 1, cvq_elem);
+}
+
+static void *vhost_vdpa_cvq_alloc_buf(struct vhost_vdpa *v,
+ const uint8_t *out_data, size_t data_len,
+ bool write)
+{
+ DMAMap map = {};
+ size_t buf_len = ROUND_UP(data_len, qemu_real_host_page_size());
+ void *buf = qemu_memalign(qemu_real_host_page_size(), buf_len);
+ int r;
+
+ if (!write) {
+ memcpy(buf, out_data, data_len);
+ memset(buf + data_len, 0, buf_len - data_len);
+ } else {
+ memset(buf, 0, data_len);
+ }
+
+ map.translated_addr = (hwaddr)(uintptr_t)buf;
+ map.size = buf_len - 1;
+ map.perm = write ? IOMMU_RW : IOMMU_RO,
+ r = vhost_iova_tree_map_alloc(v->iova_tree, &map);
+ if (unlikely(r != IOVA_OK)) {
+ error_report("Cannot map injected element");
+ goto err;
+ }
+
+ r = vhost_vdpa_dma_map(v, map.iova, buf_len, buf, !write);
+ /* TODO: Handle error */
+ assert(r == 0);
+
+ return buf;
+
+err:
+ qemu_vfree(buf);
+ return NULL;
+}
+
+/**
+ * Allocate an element suitable to be injected
+ *
+ * @iov: The iovec
+ * @out_num: Number of out elements, placed first in iov
+ * @in_num: Number of in elements, placed after out ones
+ * @elem: Optional guest element from where this one was created
+ *
+ * TODO: Do we need a sg for out_num? I think not
+ */
+static CVQElement *vhost_vdpa_cvq_alloc_elem(VhostVDPAState *s,
+ struct virtio_net_ctrl_hdr ctrl,
+ const struct iovec *out_sg,
+ size_t out_num, size_t out_size,
+ VirtQueueElement *elem)
+{
+ g_autoptr(CVQElement) cvq_elem = g_malloc(sizeof(CVQElement) + out_size);
+ uint8_t *out_cursor = cvq_elem->out_data;
+ struct vhost_vdpa *v = &s->vhost_vdpa;
+
+ /* Start with a clean base */
+ memset(cvq_elem, 0, sizeof(*cvq_elem));
+ cvq_elem->vdpa = &s->vhost_vdpa;
+
+ /*
+ * Linearize element. If guest had a descriptor chain, we expose the device
+ * a single buffer.
+ */
+ cvq_elem->out_len = out_size;
+ memcpy(out_cursor, &ctrl, sizeof(ctrl));
+ out_size -= sizeof(ctrl);
+ out_cursor += sizeof(ctrl);
+ iov_to_buf(out_sg, out_num, 0, out_cursor, out_size);
+
+ cvq_elem->out_buf = vhost_vdpa_cvq_alloc_buf(v, cvq_elem->out_data,
+ out_size, false);
+ assert(cvq_elem->out_buf);
+ cvq_elem->in_buf = vhost_vdpa_cvq_alloc_buf(v, NULL,
+ sizeof(virtio_net_ctrl_ack),
+ true);
+ assert(cvq_elem->in_buf);
+
+ cvq_elem->guest_elem = elem;
+ cvq_elem->ctrl = ctrl;
+ return g_steal_pointer(&cvq_elem);
+}
+
+/**
+ * iov_size with an upper limit. It's assumed UINT64_MAX is an invalid
+ * iov_size.
+ */
+static uint64_t vhost_vdpa_net_iov_len(const struct iovec *iov,
+ unsigned int iov_cnt, size_t max)
+{
+ uint64_t len = 0;
+
+ for (unsigned int i = 0; len < max && i < iov_cnt; i++) {
+ bool overflow = uadd64_overflow(iov[i].iov_len, len, &len);
+ if (unlikely(overflow)) {
+ return UINT64_MAX;
+ }
+ }
+
+ return len;
+}
+
+static CVQElement *vhost_vdpa_net_cvq_copy_elem(VhostVDPAState *s,
+ VirtQueueElement *elem)
+{
+ struct virtio_net_ctrl_hdr ctrl;
+ g_autofree struct iovec *iov = NULL;
+ struct iovec *iov2;
+ unsigned int out_num = elem->out_num;
+ size_t n, out_size = 0;
+
+ /* TODO: in buffer MUST have only a single entry with a char? size */
+ if (unlikely(vhost_vdpa_net_iov_len(elem->in_sg, elem->in_num,
+ sizeof(virtio_net_ctrl_ack))
+ < sizeof(virtio_net_ctrl_ack))) {
+ return NULL;
+ }
+
+ n = iov_to_buf(elem->out_sg, out_num, 0, &ctrl, sizeof(ctrl));
+ if (unlikely(n != sizeof(ctrl))) {
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid out size\n", __func__);
+ return NULL;
+ }
+
+ iov = iov2 = g_memdup2(elem->out_sg, sizeof(struct iovec) * elem->out_num);
+ iov_discard_front(&iov2, &out_num, sizeof(ctrl));
+ switch (ctrl.class) {
+ case VIRTIO_NET_CTRL_MAC:
+ switch (ctrl.cmd) {
+ case VIRTIO_NET_CTRL_MAC_ADDR_SET:
+ if (likely(vhost_vdpa_net_iov_len(iov2, out_num, 6))) {
+ out_size += 6;
+ break;
+ }
+
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid mac size\n", __func__);
+ return NULL;
+ default:
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid mac cmd %u\n",
+ __func__, ctrl.cmd);
+ return NULL;
+ };
+ break;
+ default:
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid control class %u\n",
+ __func__, ctrl.class);
+ return NULL;
+ };
+
+ return vhost_vdpa_cvq_alloc_elem(s, ctrl, iov2, out_num,
+ sizeof(ctrl) + out_size, elem);
+}
+
+/**
+ * Validate and copy control virtqueue commands.
+ *
+ * Following QEMU guidelines, we offer a copy of the buffers to the device to
+ * prevent TOCTOU bugs. This functions check that the buffers length are
+ * expected too.
+ */
+static bool vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
+ VirtQueueElement *guest_elem,
+ void *opaque)
+{
+ VhostVDPAState *s = opaque;
+ g_autoptr(CVQElement) cvq_elem = NULL;
+ g_autofree VirtQueueElement *elem = guest_elem;
+ size_t out_size, in_len;
+ virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
+ int r;
+
+ cvq_elem = vhost_vdpa_net_cvq_copy_elem(s, elem);
+ if (unlikely(!cvq_elem)) {
+ goto err;
+ }
+
+ /* out size validated at vhost_vdpa_net_cvq_copy_elem */
+ out_size = iov_size(elem->out_sg, elem->out_num);
+ r = vhost_vdpa_net_cvq_svq_inject(svq, cvq_elem, out_size);
+ if (unlikely(r != 0)) {
+ goto err;
+ }
+
+ cvq_elem->guest_elem = g_steal_pointer(&elem);
+ /* Now CVQ elem belongs to SVQ */
+ g_steal_pointer(&cvq_elem);
+ return true;
+
+err:
+ in_len = iov_from_buf(elem->in_sg, elem->in_num, 0, &status,
+ sizeof(status));
+ vhost_svq_push_elem(svq, elem, in_len);
+ return true;
+}
+
+static VirtQueueElement *vhost_vdpa_net_handle_ctrl_detach(void *elem_opaque)
+{
+ g_autoptr(CVQElement) cvq_elem = elem_opaque;
+ return g_steal_pointer(&cvq_elem->guest_elem);
+}
+
+static void vhost_vdpa_net_handle_ctrl_used(VhostShadowVirtqueue *svq,
+ void *vq_elem_opaque,
+ uint32_t dev_written)
+{
+ g_autoptr(CVQElement) cvq_elem = vq_elem_opaque;
+ virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
+ const struct iovec out = {
+ .iov_base = cvq_elem->out_data,
+ .iov_len = cvq_elem->out_len,
+ };
+ const DMAMap status_map_needle = {
+ .translated_addr = (hwaddr)(uintptr_t)cvq_elem->in_buf,
+ .size = sizeof(status),
+ };
+ const DMAMap *in_map;
+ const struct iovec in = {
+ .iov_base = &status,
+ .iov_len = sizeof(status),
+ };
+ g_autofree VirtQueueElement *guest_elem = NULL;
+
+ if (unlikely(dev_written < sizeof(status))) {
+ error_report("Insufficient written data (%llu)",
+ (long long unsigned)dev_written);
+ goto out;
+ }
+
+ in_map = vhost_iova_tree_find_iova(svq->iova_tree, &status_map_needle);
+ if (unlikely(!in_map)) {
+ error_report("Cannot locate out mapping");
+ goto out;
+ }
+
+ switch (cvq_elem->ctrl.class) {
+ case VIRTIO_NET_CTRL_MAC_ADDR_SET:
+ break;
+ default:
+ error_report("Unexpected ctrl class %u", cvq_elem->ctrl.class);
+ goto out;
+ };
+
+ memcpy(&status, cvq_elem->in_buf, sizeof(status));
+ if (status != VIRTIO_NET_OK) {
+ goto out;
+ }
+
+ status = VIRTIO_NET_ERR;
+ virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, &out, 1);
+ if (status != VIRTIO_NET_OK) {
+ error_report("Bad CVQ processing in model");
+ goto out;
+ }
+
+out:
+ guest_elem = g_steal_pointer(&cvq_elem->guest_elem);
+ if (guest_elem) {
+ iov_from_buf(guest_elem->in_sg, guest_elem->in_num, 0, &status,
+ sizeof(status));
+ vhost_svq_push_elem(svq, guest_elem, sizeof(status));
+ }
+}
+
+static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = {
+ .avail_handler = vhost_vdpa_net_handle_ctrl_avail,
+ .used_handler = vhost_vdpa_net_handle_ctrl_used,
+ .detach_handler = vhost_vdpa_net_handle_ctrl_detach,
+};
+
static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
const char *device,
const char *name,
@@ -211,6 +580,10 @@ static NetClientState *net_vhost_vdpa_init(NetClientState
*peer,
s->vhost_vdpa.device_fd = vdpa_device_fd;
s->vhost_vdpa.index = queue_pair_index;
+ if (!is_datapath) {
+ s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
+ s->vhost_vdpa.shadow_vq_ops_opaque = s;
+ }
ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs);
if (ret) {
qemu_del_net_client(nc);
--
2.31.1
[RFC PATCH v9 23/23] vdpa: Add x-svq to NetdevVhostVDPAOptions, Eugenio Pérez, 2022/07/06