[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH v4 16/19] vdpa: Buffer CVQ support on shadow virtqueue
From: |
Eugenio Perez Martin |
Subject: |
Re: [PATCH v4 16/19] vdpa: Buffer CVQ support on shadow virtqueue |
Date: |
Tue, 19 Jul 2022 07:42:34 +0200 |
On Tue, Jul 19, 2022 at 4:39 AM Jason Wang <jasowang@redhat.com> wrote:
>
>
> 在 2022/7/18 18:29, Eugenio Pérez 写道:
> > Introduce the control virtqueue support for vDPA shadow virtqueue. This
> > is needed for advanced networking features like rx filtering.
> >
> > Virtio-net control VQ copies the descriptors to qemu's VA, so we avoid
> > TOCTOU with the guest's or device's memory every time there is a device
> > model change. Otherwise, the guest could change the memory content in
> > the time between qemu and the device read it.
> >
> > To demonstrate command handling, VIRTIO_NET_F_CTRL_MACADDR is
> > implemented. If the virtio-net driver changes MAC the virtio-net device
> > model will be updated with the new one, and a rx filtering change event
> > will be raised.
> >
> > More cvq commands could be added here straightforwardly but they have
> > not been tested.
> >
> > Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> > ---
> > net/vhost-vdpa.c | 211 +++++++++++++++++++++++++++++++++++++++++++++--
> > 1 file changed, 204 insertions(+), 7 deletions(-)
> >
> > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > index 2e3b6b10d8..3915b148c4 100644
> > --- a/net/vhost-vdpa.c
> > +++ b/net/vhost-vdpa.c
> > @@ -33,6 +33,9 @@ typedef struct VhostVDPAState {
> > NetClientState nc;
> > struct vhost_vdpa vhost_vdpa;
> > VHostNetState *vhost_net;
> > +
> > + /* Control commands shadow buffers */
> > + void *cvq_cmd_out_buffer, *cvq_cmd_in_buffer;
> > bool started;
> > } VhostVDPAState;
> >
> > @@ -131,6 +134,8 @@ static void vhost_vdpa_cleanup(NetClientState *nc)
> > {
> > VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> >
> > + qemu_vfree(s->cvq_cmd_out_buffer);
> > + qemu_vfree(s->cvq_cmd_in_buffer);
> > if (s->vhost_net) {
> > vhost_net_cleanup(s->vhost_net);
> > g_free(s->vhost_net);
> > @@ -190,24 +195,191 @@ static NetClientInfo net_vhost_vdpa_info = {
> > .check_peer_type = vhost_vdpa_check_peer_type,
> > };
> >
> > +static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr)
> > +{
> > + VhostIOVATree *tree = v->iova_tree;
> > + DMAMap needle = {
> > + /*
> > + * No need to specify size or to look for more translations since
> > + * this contiguous chunk was allocated by us.
> > + */
> > + .translated_addr = (hwaddr)(uintptr_t)addr,
> > + };
> > + const DMAMap *map = vhost_iova_tree_find_iova(tree, &needle);
> > + int r;
> > +
> > + if (unlikely(!map)) {
> > + error_report("Cannot locate expected map");
> > + return;
> > + }
> > +
> > + r = vhost_vdpa_dma_unmap(v, map->iova, map->size + 1);
> > + if (unlikely(r != 0)) {
> > + error_report("Device cannot unmap: %s(%d)", g_strerror(r), r);
> > + }
> > +
> > + vhost_iova_tree_remove(tree, map);
> > +}
> > +
> > +static size_t vhost_vdpa_net_cvq_cmd_len(void)
> > +{
> > + /*
> > + * MAC_TABLE_SET is the ctrl command that produces the longer out
> > buffer.
> > + * In buffer is always 1 byte, so it should fit here
> > + */
> > + return sizeof(struct virtio_net_ctrl_hdr) +
> > + 2 * sizeof(struct virtio_net_ctrl_mac) +
> > + MAC_TABLE_ENTRIES * ETH_ALEN;
> > +}
> > +
> > +static size_t vhost_vdpa_net_cvq_cmd_page_len(void)
> > +{
> > + return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(),
> > qemu_real_host_page_size());
> > +}
> > +
> > +/** Copy and map a guest buffer. */
> > +static bool vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v,
> > + const struct iovec *out_data,
> > + size_t out_num, size_t data_len, void
> > *buf,
> > + size_t *written, bool write)
> > +{
> > + DMAMap map = {};
> > + int r;
> > +
> > + if (unlikely(!data_len)) {
> > + qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid legnth of %s buffer\n",
> > + __func__, write ? "in" : "out");
> > + return false;
> > + }
> > +
> > + *written = iov_to_buf(out_data, out_num, 0, buf, data_len);
> > + map.translated_addr = (hwaddr)(uintptr_t)buf;
> > + map.size = vhost_vdpa_net_cvq_cmd_page_len() - 1;
> > + map.perm = write ? IOMMU_RW : IOMMU_RO,
> > + r = vhost_iova_tree_map_alloc(v->iova_tree, &map);
> > + if (unlikely(r != IOVA_OK)) {
> > + error_report("Cannot map injected element");
> > + return false;
> > + }
> > +
> > + r = vhost_vdpa_dma_map(v, map.iova, vhost_vdpa_net_cvq_cmd_page_len(),
> > buf,
> > + !write);
> > + if (unlikely(r < 0)) {
> > + goto dma_map_err;
> > + }
> > +
> > + return true;
> > +
> > +dma_map_err:
> > + vhost_iova_tree_remove(v->iova_tree, &map);
> > + return false;
> > +}
> > +
> > /**
> > - * Forward buffer for the moment.
> > + * Copy the guest element into a dedicated buffer suitable to be sent to
> > NIC
> > + *
> > + * @iov: [0] is the out buffer, [1] is the in one
> > + */
> > +static bool vhost_vdpa_net_cvq_map_elem(VhostVDPAState *s,
> > + VirtQueueElement *elem,
> > + struct iovec *iov)
> > +{
> > + size_t in_copied;
> > + bool ok;
> > +
> > + iov[0].iov_base = s->cvq_cmd_out_buffer;
> > + ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, elem->out_sg,
> > elem->out_num,
> > + vhost_vdpa_net_cvq_cmd_len(),
> > iov[0].iov_base,
> > + &iov[0].iov_len, false);
> > + if (unlikely(!ok)) {
> > + return false;
> > + }
> > +
> > + iov[1].iov_base = s->cvq_cmd_in_buffer;
> > + ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, NULL, 0,
> > + sizeof(virtio_net_ctrl_ack),
> > iov[1].iov_base,
> > + &in_copied, true);
> > + if (unlikely(!ok)) {
> > + vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
> > + return false;
> > + }
> > +
> > + iov[1].iov_len = sizeof(virtio_net_ctrl_ack);
> > + return true;
> > +}
> > +
> > +/**
> > + * Do not forward commands not supported by SVQ. Otherwise, the device
> > could
> > + * accept it and qemu would not know how to update the device model.
> > + */
> > +static bool vhost_vdpa_net_cvq_validate_cmd(const struct iovec *out,
> > + size_t out_num)
> > +{
> > + struct virtio_net_ctrl_hdr ctrl;
> > + size_t n;
> > +
> > + n = iov_to_buf(out, out_num, 0, &ctrl, sizeof(ctrl));
> > + if (unlikely(n < sizeof(ctrl))) {
> > + qemu_log_mask(LOG_GUEST_ERROR,
> > + "%s: invalid legnth of out buffer %zu\n", __func__,
> > n);
> > + return false;
> > + }
> > +
> > + switch (ctrl.class) {
> > + case VIRTIO_NET_CTRL_MAC:
> > + switch (ctrl.cmd) {
> > + case VIRTIO_NET_CTRL_MAC_ADDR_SET:
> > + return true;
> > + default:
> > + qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid mac cmd %u\n",
> > + __func__, ctrl.cmd);
> > + };
> > + break;
> > + default:
> > + qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid control class %u\n",
> > + __func__, ctrl.class);
> > + };
> > +
> > + return false;
> > +}
> > +
> > +/**
> > + * Validate and copy control virtqueue commands.
> > + *
> > + * Following QEMU guidelines, we offer a copy of the buffers to the device
> > to
> > + * prevent TOCTOU bugs.
> > */
> > static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
> > VirtQueueElement *elem,
> > void *opaque)
> > {
> > - unsigned int n = elem->out_num + elem->in_num;
> > - g_autofree struct iovec *dev_buffers = g_new(struct iovec, n);
> > + VhostVDPAState *s = opaque;
> > size_t in_len, dev_written;
> > virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
> > + /* out and in buffers sent to the device */
> > + struct iovec dev_buffers[2] = {
> > + { .iov_base = s->cvq_cmd_out_buffer },
> > + { .iov_base = s->cvq_cmd_in_buffer },
> > + };
> > + /* in buffer used for device model */
> > + const struct iovec in = {
> > + .iov_base = &status,
> > + .iov_len = sizeof(status),
> > + };
> > int r;
>
>
> I got this:
>
> FAILED: libcommon.fa.p/net_vhost-vdpa.c.o
> cc -m64 -mcx16 -Ilibcommon.fa.p -I../dtc/libfdt -I../slirp
> -I../slirp/src -I/usr/include/capstone -I/usr/include/pixman-1
> -I/usr/include/libpng16 -I/usr/include/libmount -I/usr/include/blkid
> -I/usr/include/glib-2.0 -I/usr/lib/x86_64-linux-gnu/glib-2.0/include
> -I/usr/include/gio-unix-2.0 -fdiagnostics-color=auto -Wall -Winvalid-pch
> -Werror -std=gnu11 -O2 -g -isystem /home/devel/git/qemu/linux-headers
> -isystem linux-headers -iquote . -iquote /home/devel/git/qemu -iquote
> /home/devel/git/qemu/include -iquote /home/devel/git/qemu/tcg/i386
> -pthread -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -D_GNU_SOURCE
> -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -Wstrict-prototypes
> -Wredundant-decls -Wundef -Wwrite-strings -Wmissing-prototypes
> -fno-strict-aliasing -fno-common -fwrapv -Wold-style-declaration
> -Wold-style-definition -Wtype-limits -Wformat-security -Wformat-y2k
> -Winit-self -Wignored-qualifiers -Wempty-body -Wnested-externs
> -Wendif-labels -Wexpansion-to-defined -Wimplicit-fallthrough=2
> -Wno-missing-include-dirs -Wno-shift-negative-value -Wno-psabi
> -fstack-protector-strong -fPIE -D_DEFAULT_SOURCE -D_XOPEN_SOURCE=600
> -DNCURSES_WIDECHAR=1 -MD -MQ libcommon.fa.p/net_vhost-vdpa.c.o -MF
> libcommon.fa.p/net_vhost-vdpa.c.o.d -o libcommon.fa.p/net_vhost-vdpa.c.o
> -c ../net/vhost-vdpa.c
> ../net/vhost-vdpa.c: In function ‘vhost_vdpa_net_handle_ctrl_avail’:
> ../net/vhost-vdpa.c:427:12: error: ‘r’ may be used uninitialized in this
> function [-Werror=maybe-uninitialized]
> 427 | return r;
> | ^
> cc1: all warnings being treated as errors
>
Sorry, I rebase with these applied (or half applied) [1].
I'll send a new version based on the current master in a moment.
Thanks!
[1] https://lists.nongnu.org/archive/html/qemu-devel/2022-07/msg01986.html
>
> > + bool ok;
> >
> > - memcpy(dev_buffers, elem->out_sg, elem->out_num);
> > - memcpy(dev_buffers + elem->out_num, elem->in_sg, elem->in_num);
> > + ok = vhost_vdpa_net_cvq_map_elem(s, elem, dev_buffers);
>
>
> I think it's good idea to let this function return int instead of a boolean.
>
> Thanks
>
>
> > + if (unlikely(!ok)) {
> > + goto out;
> > + }
> >
> > - r = vhost_svq_add(svq, &dev_buffers[0], elem->out_num, &dev_buffers[1],
> > - elem->in_num, elem);
> > + ok = vhost_vdpa_net_cvq_validate_cmd(&dev_buffers[0], 1);
> > + if (unlikely(!ok)) {
> > + goto out;
> > + }
> > +
> > + r = vhost_svq_add(svq, &dev_buffers[0], 1, &dev_buffers[1], 1, elem);
> > if (unlikely(r != 0)) {
> > if (unlikely(r == -ENOSPC)) {
> > qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device
> > queue\n",
> > @@ -224,6 +396,18 @@ static int
> > vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
> > dev_written = vhost_svq_poll(svq);
> > if (unlikely(dev_written < sizeof(status))) {
> > error_report("Insufficient written data (%zu)", dev_written);
> > + goto out;
> > + }
> > +
> > + memcpy(&status, dev_buffers[1].iov_base, sizeof(status));
> > + if (status != VIRTIO_NET_OK) {
> > + goto out;
> > + }
> > +
> > + status = VIRTIO_NET_ERR;
> > + virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, dev_buffers, 1);
> > + if (status != VIRTIO_NET_OK) {
> > + error_report("Bad CVQ processing in model");
> > }
> >
> > out:
> > @@ -234,6 +418,12 @@ out:
> > }
> > vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status)));
> > g_free(elem);
> > + if (dev_buffers[0].iov_base) {
> > + vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[0].iov_base);
> > + }
> > + if (dev_buffers[1].iov_base) {
> > + vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[1].iov_base);
> > + }
> > return r;
> > }
> >
> > @@ -266,6 +456,13 @@ static NetClientState
> > *net_vhost_vdpa_init(NetClientState *peer,
> > s->vhost_vdpa.device_fd = vdpa_device_fd;
> > s->vhost_vdpa.index = queue_pair_index;
> > if (!is_datapath) {
> > + s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(),
> > +
> > vhost_vdpa_net_cvq_cmd_page_len());
> > + memset(s->cvq_cmd_out_buffer, 0,
> > vhost_vdpa_net_cvq_cmd_page_len());
> > + s->cvq_cmd_in_buffer = qemu_memalign(qemu_real_host_page_size(),
> > +
> > vhost_vdpa_net_cvq_cmd_page_len());
> > + memset(s->cvq_cmd_in_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len());
> > +
> > s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
> > s->vhost_vdpa.shadow_vq_ops_opaque = s;
> > }
>
- [PATCH v4 11/19] vhost: Expose vhost_svq_add, (continued)
- [PATCH v4 11/19] vhost: Expose vhost_svq_add, Eugenio Pérez, 2022/07/18
- [PATCH v4 10/19] vhost: add vhost_svq_push_elem, Eugenio Pérez, 2022/07/18
- [PATCH v4 07/19] vhost: Decouple vhost_svq_add from VirtQueueElement, Eugenio Pérez, 2022/07/18
- [PATCH v4 14/19] vdpa: Export vhost_vdpa_dma_map and unmap calls, Eugenio Pérez, 2022/07/18
- [PATCH v4 12/19] vhost: add vhost_svq_poll, Eugenio Pérez, 2022/07/18
- [PATCH v4 13/19] vhost: Add svq avail_handler callback, Eugenio Pérez, 2022/07/18
- [PATCH v4 09/19] vhost: Track number of descs in SVQDescState, Eugenio Pérez, 2022/07/18
- [PATCH v4 15/19] vdpa: manual forward CVQ buffers, Eugenio Pérez, 2022/07/18
- [PATCH v4 16/19] vdpa: Buffer CVQ support on shadow virtqueue, Eugenio Pérez, 2022/07/18
- [PATCH v4 17/19] vdpa: Extract get features part from vhost_vdpa_get_max_queue_pairs, Eugenio Pérez, 2022/07/18
- [PATCH v4 18/19] vdpa: Add device migration blocker, Eugenio Pérez, 2022/07/18
- [PATCH v4 19/19] vdpa: Add x-svq to NetdevVhostVDPAOptions, Eugenio Pérez, 2022/07/18