[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[RFC PATCH v5 26/26] vdpa: Add custom IOTLB translations to SVQ
From: |
Eugenio Pérez |
Subject: |
[RFC PATCH v5 26/26] vdpa: Add custom IOTLB translations to SVQ |
Date: |
Fri, 29 Oct 2021 20:35:25 +0200 |
Use translations added in VhostIOVATree in SVQ.
Now every element needs to remember the iova / GPA address also, so
VirtQueue can consume the elements properly. This adds a little
overhead per VQ element, having to allocate more memory to stash them.
As a possible optimization, this allocation could be avoided if the
descriptor is not a chain but a single one, but this is left undone.
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
---
hw/virtio/vhost-shadow-virtqueue.h | 4 +-
hw/virtio/vhost-shadow-virtqueue.c | 156 ++++++++++++++++++++++-------
hw/virtio/vhost-vdpa.c | 35 ++++++-
3 files changed, 159 insertions(+), 36 deletions(-)
diff --git a/hw/virtio/vhost-shadow-virtqueue.h
b/hw/virtio/vhost-shadow-virtqueue.h
index ac55588009..903b9f7a14 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -12,6 +12,7 @@
#include "hw/virtio/vhost.h"
#include "qemu/event_notifier.h"
+#include "hw/virtio/vhost-iova-tree.h"
typedef struct VhostShadowVirtqueue VhostShadowVirtqueue;
@@ -35,7 +36,8 @@ void vhost_svq_start(struct vhost_dev *dev, unsigned idx,
void vhost_svq_stop(struct vhost_dev *dev, unsigned idx,
VhostShadowVirtqueue *svq);
-VhostShadowVirtqueue *vhost_svq_new(struct vhost_dev *dev, int idx);
+VhostShadowVirtqueue *vhost_svq_new(struct vhost_dev *dev, int idx,
+ VhostIOVATree *iova_map);
void vhost_svq_free(VhostShadowVirtqueue *vq);
diff --git a/hw/virtio/vhost-shadow-virtqueue.c
b/hw/virtio/vhost-shadow-virtqueue.c
index ad1b2342be..7ab506f9e7 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -11,12 +11,19 @@
#include "hw/virtio/vhost-shadow-virtqueue.h"
#include "hw/virtio/vhost.h"
#include "hw/virtio/virtio-access.h"
+#include "hw/virtio/vhost-iova-tree.h"
#include "standard-headers/linux/vhost_types.h"
#include "qemu/error-report.h"
#include "qemu/main-loop.h"
+typedef struct SVQElement {
+ VirtQueueElement elem;
+ void **vaddr_in_sg;
+ void **vaddr_out_sg;
+} SVQElement;
+
/* Shadow virtqueue to relay notifications */
typedef struct VhostShadowVirtqueue {
/* Shadow vring */
@@ -49,11 +56,14 @@ typedef struct VhostShadowVirtqueue {
/* Virtio device */
VirtIODevice *vdev;
+ /* IOVA mapping if used */
+ VhostIOVATree *iova_map;
+
/* Map for returning guest's descriptors */
- VirtQueueElement **ring_id_maps;
+ SVQElement **ring_id_maps;
- /* Next VirtQueue element that guest made available */
- VirtQueueElement *next_guest_avail_elem;
+ /* Next SVQ element that guest made available */
+ SVQElement *next_guest_avail_elem;
/* Next head to expose to device */
uint16_t avail_idx_shadow;
@@ -80,6 +90,14 @@ const EventNotifier *vhost_svq_get_dev_kick_notifier(
return &svq->hdev_kick;
}
+static void vhost_svq_elem_free(SVQElement *elem)
+{
+ g_free(elem->vaddr_in_sg);
+ g_free(elem->vaddr_out_sg);
+ g_free(elem);
+}
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(SVQElement, vhost_svq_elem_free)
+
/**
* VirtIO transport device feature acknowledge
*
@@ -99,13 +117,7 @@ bool vhost_svq_valid_device_features(uint64_t *dev_features)
continue;
case VIRTIO_F_ACCESS_PLATFORM:
- /* SVQ does not know how to translate addresses */
- if (*dev_features & BIT_ULL(b)) {
- clear_bit(b, dev_features);
- r = false;
- }
- break;
-
+ /* SVQ trust in host's IOMMU to translate addresses */
case VIRTIO_F_VERSION_1:
/* SVQ trust that guest vring is little endian */
if (!(*dev_features & BIT_ULL(b))) {
@@ -175,7 +187,49 @@ static void
vhost_svq_set_notification(VhostShadowVirtqueue *svq, bool enable)
}
}
+static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
+ void ***vaddr, const struct iovec *iovec,
+ size_t num)
+{
+ size_t i;
+
+ if (num == 0) {
+ return true;
+ }
+
+ g_autofree void **addrs = g_new(void *, num);
+ for (i = 0; i < num; ++i) {
+ DMAMap needle = {
+ .translated_addr = (hwaddr)iovec[i].iov_base,
+ .size = iovec[i].iov_len,
+ };
+ size_t off;
+
+ const DMAMap *map = vhost_iova_tree_find_iova(svq->iova_map, &needle);
+ /*
+ * Map cannot be NULL since iova map contains all guest space and
+ * qemu already has a physical address mapped
+ */
+ if (unlikely(!map)) {
+ error_report("Invalid address 0x%"HWADDR_PRIx" given by guest",
+ needle.translated_addr);
+ return false;
+ }
+
+ /*
+ * Map->iova chunk size is ignored. What to do if descriptor
+ * (addr, size) does not fit is delegated to the device.
+ */
+ off = needle.translated_addr - map->translated_addr;
+ addrs[i] = (void *)(map->iova + off);
+ }
+
+ *vaddr = g_steal_pointer(&addrs);
+ return true;
+}
+
static void vhost_vring_write_descs(VhostShadowVirtqueue *svq,
+ void * const *vaddr_sg,
const struct iovec *iovec,
size_t num, bool more_descs, bool write)
{
@@ -194,7 +248,7 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue
*svq,
} else {
descs[i].flags = flags;
}
- descs[i].addr = cpu_to_le64((hwaddr)iovec[n].iov_base);
+ descs[i].addr = cpu_to_le64((hwaddr)vaddr_sg[n]);
descs[i].len = cpu_to_le32(iovec[n].iov_len);
last = i;
@@ -204,43 +258,62 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue
*svq,
svq->free_head = le16_to_cpu(descs[last].next);
}
-static unsigned vhost_svq_add_split(VhostShadowVirtqueue *svq,
- VirtQueueElement *elem)
+static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
+ SVQElement *svq_elem,
+ unsigned *head)
{
- int head;
+ VirtQueueElement *elem = &svq_elem->elem;
unsigned avail_idx;
vring_avail_t *avail = svq->vring.avail;
+ bool ok;
- head = svq->free_head;
+ *head = svq->free_head;
/* We need some descriptors here */
assert(elem->out_num || elem->in_num);
- vhost_vring_write_descs(svq, elem->out_sg, elem->out_num,
- elem->in_num > 0, false);
- vhost_vring_write_descs(svq, elem->in_sg, elem->in_num, false, true);
+ ok = vhost_svq_translate_addr(svq, &svq_elem->vaddr_in_sg, elem->in_sg,
+ elem->in_num);
+ if (unlikely(!ok)) {
+ return false;
+ }
+ ok = vhost_svq_translate_addr(svq, &svq_elem->vaddr_out_sg, elem->out_sg,
+ elem->out_num);
+ if (unlikely(!ok)) {
+ return false;
+ }
+
+ vhost_vring_write_descs(svq, svq_elem->vaddr_out_sg, elem->out_sg,
+ elem->out_num, elem->in_num > 0, false);
+ vhost_vring_write_descs(svq, svq_elem->vaddr_in_sg, elem->in_sg,
+ elem->in_num, false, true);
/*
* Put entry in available array (but don't update avail->idx until they
* do sync).
*/
avail_idx = svq->avail_idx_shadow & (svq->vring.num - 1);
- avail->ring[avail_idx] = cpu_to_le16(head);
+ avail->ring[avail_idx] = cpu_to_le16(*head);
svq->avail_idx_shadow++;
/* Update avail index after the descriptor is wrote */
smp_wmb();
avail->idx = cpu_to_le16(svq->avail_idx_shadow);
- return head;
+ return true;
}
-static void vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem)
+static bool vhost_svq_add(VhostShadowVirtqueue *svq, SVQElement *elem)
{
- unsigned qemu_head = vhost_svq_add_split(svq, elem);
+ unsigned qemu_head;
+ bool ok = vhost_svq_add_split(svq, elem, &qemu_head);
+ if (unlikely(!ok)) {
+ return false;
+ }
svq->ring_id_maps[qemu_head] = elem;
+ return true;
}
static void vhost_svq_kick(VhostShadowVirtqueue *svq)
@@ -284,7 +357,8 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue
*svq)
}
while (true) {
- VirtQueueElement *elem;
+ SVQElement *elem;
+ bool ok;
if (svq->next_guest_avail_elem) {
elem = g_steal_pointer(&svq->next_guest_avail_elem);
@@ -296,7 +370,7 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue
*svq)
break;
}
- if (elem->out_num + elem->in_num >
+ if (elem->elem.out_num + elem->elem.in_num >
vhost_svq_available_slots(svq)) {
/*
* This condition is possible since a contiguous buffer in GPA
@@ -313,7 +387,11 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue
*svq)
return;
}
- vhost_svq_add(svq, elem);
+ ok = vhost_svq_add(svq, elem);
+ if (unlikely(!ok)) {
+ /* VQ is broken, just return and ignore any other kicks */
+ return;
+ }
vhost_svq_kick(svq);
}
@@ -344,7 +422,7 @@ static bool vhost_svq_more_used(VhostShadowVirtqueue *svq)
return svq->last_used_idx != svq->shadow_used_idx;
}
-static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq)
+static SVQElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq)
{
vring_desc_t *descs = svq->vring.desc;
const vring_used_t *used = svq->vring.used;
@@ -378,7 +456,7 @@ static VirtQueueElement
*vhost_svq_get_buf(VhostShadowVirtqueue *svq)
descs[used_elem.id].next = svq->free_head;
svq->free_head = used_elem.id;
- svq->ring_id_maps[used_elem.id]->len = used_elem.len;
+ svq->ring_id_maps[used_elem.id]->elem.len = used_elem.len;
return g_steal_pointer(&svq->ring_id_maps[used_elem.id]);
}
@@ -393,8 +471,9 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq,
vhost_svq_set_notification(svq, false);
while (true) {
- g_autofree VirtQueueElement *elem = vhost_svq_get_buf(svq);
- if (!elem) {
+ g_autofree SVQElement *svq_elem = vhost_svq_get_buf(svq);
+ VirtQueueElement *elem;
+ if (!svq_elem) {
break;
}
@@ -406,6 +485,7 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq,
virtqueue_flush(vq, i);
i = 0;
}
+ elem = &svq_elem->elem;
virtqueue_fill(vq, elem, elem->len, i++);
}
@@ -583,10 +663,15 @@ void vhost_svq_stop(struct vhost_dev *dev, unsigned idx,
vhost_svq_flush(svq, false);
for (i = 0; i < svq->vring.num; ++i) {
- g_autofree VirtQueueElement *elem = svq->ring_id_maps[i];
- if (elem) {
- virtqueue_detach_element(svq->vq, elem, elem->len);
+ g_autofree SVQElement *svq_elem = svq->ring_id_maps[i];
+ VirtQueueElement *elem;
+
+ if (!svq_elem) {
+ continue;
}
+
+ elem = &svq_elem->elem;
+ virtqueue_detach_element(svq->vq, elem, elem->len);
}
}
@@ -594,7 +679,8 @@ void vhost_svq_stop(struct vhost_dev *dev, unsigned idx,
* Creates vhost shadow virtqueue, and instruct vhost device to use the shadow
* methods and file descriptors.
*/
-VhostShadowVirtqueue *vhost_svq_new(struct vhost_dev *dev, int idx)
+VhostShadowVirtqueue *vhost_svq_new(struct vhost_dev *dev, int idx,
+ VhostIOVATree *iova_map)
{
int vq_idx = dev->vq_index + idx;
unsigned num = virtio_queue_get_num(dev->vdev, vq_idx);
@@ -628,11 +714,13 @@ VhostShadowVirtqueue *vhost_svq_new(struct vhost_dev
*dev, int idx)
memset(svq->vring.desc, 0, driver_size);
svq->vring.used = qemu_memalign(qemu_real_host_page_size, device_size);
memset(svq->vring.used, 0, device_size);
+ svq->iova_map = iova_map;
+
for (i = 0; i < num - 1; i++) {
svq->vring.desc[i].next = cpu_to_le16(i + 1);
}
- svq->ring_id_maps = g_new0(VirtQueueElement *, num);
+ svq->ring_id_maps = g_new0(SVQElement *, num);
event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call);
return g_steal_pointer(&svq);
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index a827ecbe4f..8466580ae7 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -210,6 +210,18 @@ static void vhost_vdpa_listener_region_add(MemoryListener
*listener,
vaddr, section->readonly);
llsize = int128_sub(llend, int128_make64(iova));
+ if (v->shadow_vqs_enabled) {
+ DMAMap mem_region = {
+ .translated_addr = (hwaddr)vaddr,
+ .size = int128_get64(llsize) - 1,
+ .perm = IOMMU_ACCESS_FLAG(true, section->readonly),
+ };
+
+ int r = vhost_iova_tree_map_alloc(v->iova_map, &mem_region);
+ assert(r == IOVA_OK);
+
+ iova = mem_region.iova;
+ }
vhost_vdpa_iotlb_batch_begin_once(v);
ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize),
@@ -262,6 +274,20 @@ static void vhost_vdpa_listener_region_del(MemoryListener
*listener,
llsize = int128_sub(llend, int128_make64(iova));
+ if (v->shadow_vqs_enabled) {
+ const DMAMap *result;
+ const void *vaddr = memory_region_get_ram_ptr(section->mr) +
+ section->offset_within_region +
+ (iova - section->offset_within_address_space);
+ DMAMap mem_region = {
+ .translated_addr = (hwaddr)vaddr,
+ .size = int128_get64(llsize) - 1,
+ };
+
+ result = vhost_iova_tree_find_iova(v->iova_map, &mem_region);
+ iova = result->iova;
+ vhost_iova_tree_remove(v->iova_map, &mem_region);
+ }
vhost_vdpa_iotlb_batch_begin_once(v);
ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize));
if (ret) {
@@ -1067,7 +1093,7 @@ void vhost_vdpa_enable_svq(struct vhost_vdpa *v, bool
enable, Error **errp)
/* Allocate resources */
assert(v->shadow_vqs->len == 0);
for (n = 0; n < hdev->nvqs; ++n) {
- VhostShadowVirtqueue *svq = vhost_svq_new(hdev, n);
+ VhostShadowVirtqueue *svq = vhost_svq_new(hdev, n, v->iova_map);
if (unlikely(!svq)) {
error_setg(errp, "Cannot create svq");
enable = false;
@@ -1088,6 +1114,13 @@ void vhost_vdpa_enable_svq(struct vhost_vdpa *v, bool
enable, Error **errp)
vhost_vdpa_get_vq_state(hdev, hdev->vq_index + n);
}
+ memory_listener_unregister(&v->listener);
+ r = vhost_vdpa_dma_unmap(v, iova_first,
+ (iova_last - iova_first) & TARGET_PAGE_MASK);
+ if (unlikely(r)) {
+ error_setg_errno(errp, -r, "Fail to invalidate IOTLB");
+ }
+
/* Reset device so it can be configured */
vhost_vdpa_dev_start(hdev, false);
--
2.27.0
- [RFC PATCH v5 16/26] vhost-vdpa: Take into account SVQ in vhost_vdpa_set_vring_call, (continued)
- [RFC PATCH v5 16/26] vhost-vdpa: Take into account SVQ in vhost_vdpa_set_vring_call, Eugenio Pérez, 2021/10/29
- [RFC PATCH v5 17/26] vhost: Route host->guest notification through shadow virtqueue, Eugenio Pérez, 2021/10/29
- [RFC PATCH v5 18/26] virtio: Add vhost_shadow_vq_get_vring_addr, Eugenio Pérez, 2021/10/29
- [RFC PATCH v5 19/26] vdpa: ack VIRTIO_F_QUEUE_STATE if device supports it, Eugenio Pérez, 2021/10/29
- [RFC PATCH v5 20/26] vhost: Add vhost_svq_valid_device_features to shadow vq, Eugenio Pérez, 2021/10/29
- [RFC PATCH v5 21/26] vhost: Add vhost_svq_valid_guest_features to shadow vq, Eugenio Pérez, 2021/10/29
- [RFC PATCH v5 22/26] vhost: Shadow virtqueue buffers forwarding, Eugenio Pérez, 2021/10/29
- [RFC PATCH v5 23/26] util: Add iova_tree_alloc, Eugenio Pérez, 2021/10/29
- [RFC PATCH v5 24/26] vhost: Add VhostIOVATree, Eugenio Pérez, 2021/10/29
- [RFC PATCH v5 25/26] vhost: Use a tree to store memory mappings, Eugenio Pérez, 2021/10/29
- [RFC PATCH v5 26/26] vdpa: Add custom IOTLB translations to SVQ,
Eugenio Pérez <=