[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH][RFC] IOMMU: Add Support to VFIO devices with vI
From: |
Michael S. Tsirkin |
Subject: |
Re: [Qemu-devel] [PATCH][RFC] IOMMU: Add Support to VFIO devices with vIOMMU present |
Date: |
Wed, 23 Mar 2016 16:34:42 +0200 |
On Thu, Mar 17, 2016 at 01:58:13PM +0200, Aviv B.D. wrote:
>
>
> On Tue, Mar 15, 2016 at 12:53 PM, Michael S. Tsirkin <address@hidden> wrote:
>
> On Sat, Mar 12, 2016 at 06:13:17PM +0200, Aviv B.D. wrote:
> > From: "Aviv B.D." <address@hidden>
> >
> > * Fix bug that prevent qemu from starting up when vIOMMU and VFIO
> > device are present.
> > * Advertise Cache Mode capability in iommu cap register.
> > * Register every VFIO device with IOMMU state.
> > * On page cache invalidation in vIOMMU, check if the domain belong to
> > VFIO device and mirror the guest requests to host.
> >
> > Not working (Yet!):
> > * Tested only with network interface card (ixgbevf) and
> > intel_iommu=strict in guest's kernel command line.
> > * Lock up under high load.
> > * Errors on guest poweroff.
> > * High relative latency compare to VFIO without IOMMU.
> >
> > Signed-off-by: Aviv B.D. <address@hidden>
>
> Thanks, this is very interesting.
> So this needs some cleanup, and some issues that will have to be
> addressed.
> See below.
> Thanks!
>
>
> Thanks!
>
>
> > ---
> > hw/i386/intel_iommu.c | 76
> ++++++++++++++++++++++++++++++++++++++----
> > hw/i386/intel_iommu_internal.h | 1 +
> > hw/vfio/common.c | 12 +++++--
> > include/hw/i386/intel_iommu.h | 4 +++
> > include/hw/vfio/vfio-common.h | 1 +
> > 5 files changed, 85 insertions(+), 9 deletions(-)
> >
> > diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> > index 347718f..046688f 100644
> > --- a/hw/i386/intel_iommu.c
> > +++ b/hw/i386/intel_iommu.c
> > @@ -43,6 +44,9 @@ static int vtd_dbgflags = VTD_DBGBIT(GENERAL) |
> VTD_DBGBIT
> > (CSR);
> > #define VTD_DPRINTF(what, fmt, ...) do {} while (0)
> > #endif
> >
> > +static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t
> bus_num,
> > + uint8_t devfn, VTDContextEntry
> *ce);
> > +
> > static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t
> val,
> > uint64_t wmask, uint64_t w1cmask)
> > {
> > @@ -126,6 +130,19 @@ static uint32_t vtd_set_clear_mask_long
> (IntelIOMMUState
> > *s, hwaddr addr,
> > return new_val;
> > }
> >
> > +static uint16_t vtd_get_did_dev(IntelIOMMUState *s, uint8_t bus_num,
> uint8_t
> > devfn)
> > +{
> > + VTDContextEntry ce;
> > + int ret_fr;
> > +
> > + ret_fr = vtd_dev_to_context_entry(s, bus_num, devfn, &ce);
> > + if (ret_fr){
> > + return -1;
> > + }
> > +
> > + return VTD_CONTEXT_ENTRY_DID(ce.hi);
> > +}
> > +
> > static uint64_t vtd_set_clear_mask_quad(IntelIOMMUState *s, hwaddr
> addr,
> > uint64_t clear, uint64_t mask)
> > {
> > @@ -711,9 +728,9 @@ static int vtd_dev_to_context_entry(IntelIOMMUState
> *s,
> > uint8_t bus_num,
> > }
> >
> > if (!vtd_context_entry_present(ce)) {
> > - VTD_DPRINTF(GENERAL,
> > + /*VTD_DPRINTF(GENERAL,
> > "error: context-entry #%"PRIu8 "(bus #%"PRIu8 ") "
> > - "is not present", devfn, bus_num);
> > + "is not present", devfn, bus_num);*/
> > return -VTD_FR_CONTEXT_ENTRY_P;
> > } else if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) ||
> > (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO)) {
> > @@ -1020,14 +1037,53 @@ static void vtd_iotlb_page_invalidate
> (IntelIOMMUState
> > *s, uint16_t domain_id,
> > hwaddr addr, uint8_t am)
> > {
> > VTDIOTLBPageInvInfo info;
> > + VFIOGuestIOMMU * giommu;
> > + bool flag = false;
> >
> > assert(am <= VTD_MAMV);
> > info.domain_id = domain_id;
> > info.addr = addr;
> > info.mask = ~((1 << am) - 1);
> > +
> > + QLIST_FOREACH(giommu, &(s->giommu_list), iommu_next){
> > + VTDAddressSpace *vtd_as = container_of(giommu->iommu,
> VTDAddressSpace,
> > iommu);
> > + uint16_t vfio_source_id = vtd_make_source_id(pci_bus_num
> (vtd_as->bus),
> > vtd_as->devfn);
> > + uint16_t vfio_domain_id = vtd_get_did_dev(s, pci_bus_num
> (vtd_as->bus),
> > vtd_as->devfn);
> > + if (vfio_domain_id != (uint16_t)-1 &&
> > + domain_id == vfio_domain_id){
> > + VTDIOTLBEntry *iotlb_entry = vtd_lookup_iotlb(s,
> vfio_source_id,
> > addr);
> > + if (iotlb_entry != NULL){
> > + IOMMUTLBEntry entry;
> > + VTD_DPRINTF(GENERAL, "Remove addr 0x%"PRIx64 " mask
> %d",
> addr,
> > am);
> > + entry.iova = addr & VTD_PAGE_MASK_4K;
> > + entry.translated_addr =
> vtd_get_slpte_addr(iotlb_entry->
> slpte)
> > & VTD_PAGE_MASK_4K;
> > + entry.addr_mask = ~VTD_PAGE_MASK_4K;
> > + entry.perm = IOMMU_NONE;
> > + memory_region_notify_iommu(giommu->iommu, entry);
> > + flag = true;
> > +
> > + }
> > + }
> > +
> > + }
> > +
> > g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, &
> info);
> > -}
> >
> > + QLIST_FOREACH(giommu, &(s->giommu_list), iommu_next){
> > + VTDAddressSpace *vtd_as = container_of(giommu->iommu,
> VTDAddressSpace,
> > iommu);
> > + uint16_t vfio_domain_id = vtd_get_did_dev(s, pci_bus_num
> (vtd_as->bus),
> > vtd_as->devfn);
> > + if (vfio_domain_id != (uint16_t)-1 &&
> > + domain_id == vfio_domain_id && !flag){
> > + /* do vfio map */
> > + VTD_DPRINTF(GENERAL, "add addr 0x%"PRIx64 " mask %d", addr,
> am);
> > + /* call to vtd_iommu_translate */
> > + IOMMUTLBEntry entry = s->iommu_ops.translate(giommu->iommu,
> addr,
> > 0);
> > + entry.perm = IOMMU_RW;
> > + memory_region_notify_iommu(giommu->iommu, entry);
>
> So this just triggers the iommu notifiers in the regular way.
> But what if two devices have conflicting entries?
> We need to setup host IOMMU differently for them.
> But see below
>
> > + //g_vfio_iommu->n.notify(&g_vfio_iommu->n, &entry);
> > + }
> > + }
> > +}
> > /* Flush IOTLB
> > * Returns the IOTLB Actual Invalidation Granularity.
> > * @val: the content of the IOTLB_REG
> > @@ -1895,6 +1951,13 @@ static Property vtd_properties[] = {
> > DEFINE_PROP_END_OF_LIST(),
> > };
> >
> > +void vtd_register_giommu(VFIOGuestIOMMU * giommu)
> > +{
> > + VTDAddressSpace *vtd_as = container_of(giommu->iommu,
> VTDAddressSpace,
> > iommu);
> > + IntelIOMMUState *s = vtd_as->iommu_state;
> > +
> > + QLIST_INSERT_HEAD(&s->giommu_list, giommu, iommu_next);
> > +}
> >
> > VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int
> devfn)
> > {
> > @@ -1949,7 +2012,8 @@ static void vtd_init(IntelIOMMUState *s)
> > s->iq_last_desc_type = VTD_INV_DESC_NONE;
> > s->next_frcd_reg = 0;
> > s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND | VTD_CAP_MGAW |
> > - VTD_CAP_SAGAW | VTD_CAP_MAMV | VTD_CAP_PSI |
> VTD_CAP_SLLPS;
> > + VTD_CAP_SAGAW | VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS
> |
> > + VTD_CAP_CM;
> > s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO;
> >
> > vtd_reset_context_cache(s);
> > diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/
> intel_iommu_internal.h
> > index e5f514c..ae40f73 100644
> > --- a/hw/i386/intel_iommu_internal.h
> > +++ b/hw/i386/intel_iommu_internal.h
> > @@ -190,6 +190,7 @@
> > #define VTD_CAP_MAMV (VTD_MAMV << 48)
> > #define VTD_CAP_PSI (1ULL << 39)
> > #define VTD_CAP_SLLPS ((1ULL << 34) | (1ULL << 35))
> > +#define VTD_CAP_CM (1ULL << 7)
> >
> > /* Supported Adjusted Guest Address Widths */
> > #define VTD_CAP_SAGAW_SHIFT 8
> > diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> > index 607ec70..98c8d67 100644
> > --- a/hw/vfio/common.c
> > +++ b/hw/vfio/common.c
> > @@ -32,6 +32,9 @@
> > #include "sysemu/kvm.h"
> > #include "trace.h"
> >
> > +#include "hw/sysbus.h"
> > +#include "hw/i386/intel_iommu.h"
> > +
> > struct vfio_group_head vfio_group_list =
> > QLIST_HEAD_INITIALIZER(vfio_group_list);
> > struct vfio_as_head vfio_address_spaces =
> > @@ -312,12 +315,12 @@ static void vfio_iommu_map_notify(Notifier *n,
> void
> > *data)
> > out:
> > rcu_read_unlock();
> > }
> > -
> > +#if 0
> > static hwaddr vfio_container_granularity(VFIOContainer *container)
> > {
> > return (hwaddr)1 << ctz64(container->iova_pgsizes);
> > }
> > -
> > +#endif
> > static void vfio_listener_region_add(MemoryListener *listener,
> > MemoryRegionSection *section)
> > {
> > @@ -344,6 +347,7 @@ static void vfio_listener_region_add(MemoryListener
> > *listener,
> > iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
> > llend = int128_make64(section->offset_within_address_space);
> > llend = int128_add(llend, section->size);
> > + llend = int128_add(llend, int128_exts64(-1));
> > llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
> >
> > if (int128_ge(int128_make64(iova), llend)) {
> > @@ -381,11 +385,13 @@ static void
> vfio_listener_region_add(MemoryListener
> > *listener,
> > giommu->n.notify = vfio_iommu_map_notify;
> > QLIST_INSERT_HEAD(&container->giommu_list, giommu,
> giommu_next);
> >
> > + vtd_register_giommu(giommu);
> > memory_region_register_iommu_notifier(giommu->iommu, &giommu->
> n);
> > +#if 0
> > memory_region_iommu_replay(giommu->iommu, &giommu->n,
> > vfio_container_granularity
> (container),
> > false);
> > -
> > +#endif
> > return;
> > }
> >
>
> Looks like there is still a single global domain on the host.
> Works up to a point as long as we have a single VFIO device.
>
>
> You are correct, I should call (from page_invalidation) only to the correct
> vfio notifier that belong to this domain. This way (as long as no conflicts
> mapping is present) several VFIO devices can works at the same time. I'll add
> this feature to the next version of the patch.
> currently I am not sure how the code should handle conflicting mapping between
> two different domains.
If you have different vfio domains, then what is the
issue with fixing it?
>
>
> > diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/
> intel_iommu.h
> > index b024ffa..22f3f83 100644
> > --- a/include/hw/i386/intel_iommu.h
> > +++ b/include/hw/i386/intel_iommu.h
> > @@ -23,6 +23,7 @@
> > #define INTEL_IOMMU_H
> > #include "hw/qdev.h"
> > #include "sysemu/dma.h"
> > +#include "hw/vfio/vfio-common.h"
> >
> > #define TYPE_INTEL_IOMMU_DEVICE "intel-iommu"
> > #define INTEL_IOMMU_DEVICE(obj) \
> > @@ -123,6 +124,8 @@ struct IntelIOMMUState {
> > MemoryRegionIOMMUOps iommu_ops;
> > GHashTable *vtd_as_by_busptr; /* VTDBus objects indexed by
> PCIBus*
> > reference */
> > VTDBus *vtd_as_by_bus_num[VTD_PCI_BUS_MAX]; /* VTDBus objects
> indexed by
> > bus number */
> > +
> > + QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
> > };
> >
> > /* Find the VTD Address space associated with the given bus pointer,
> > @@ -130,4 +133,5 @@ struct IntelIOMMUState {
> > */
> > VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int
> devfn);
> >
> > +void vtd_register_giommu(VFIOGuestIOMMU * giommu);
> > #endif
> > diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/
> vfio-common.h
> > index f037f3c..9225ba3 100644
> > --- a/include/hw/vfio/vfio-common.h
> > +++ b/include/hw/vfio/vfio-common.h
> > @@ -82,6 +82,7 @@ typedef struct VFIOGuestIOMMU {
> > MemoryRegion *iommu;
> > Notifier n;
> > QLIST_ENTRY(VFIOGuestIOMMU) giommu_next;
> > + QLIST_ENTRY(VFIOGuestIOMMU) iommu_next;
> > } VFIOGuestIOMMU;
> >
> > typedef struct VFIODeviceOps VFIODeviceOps;
> > --
> > 1.9.1
> >
>
>
- Re: [Qemu-devel] [PATCH][RFC] IOMMU: Add Support to VFIO devices with vIOMMU present, (continued)
- Re: [Qemu-devel] [PATCH][RFC] IOMMU: Add Support to VFIO devices with vIOMMU present, Jan Kiszka, 2016/03/14
- Re: [Qemu-devel] [PATCH][RFC] IOMMU: Add Support to VFIO devices with vIOMMU present, Peter Xu, 2016/03/15
- Re: [Qemu-devel] [PATCH][RFC] IOMMU: Add Support to VFIO devices with vIOMMU present, Aviv B.D., 2016/03/17
- Re: [Qemu-devel] [PATCH][RFC] IOMMU: Add Support to VFIO devices with vIOMMU present, Peter Xu, 2016/03/17
- Re: [Qemu-devel] [PATCH][RFC] IOMMU: Add Support to VFIO devices with vIOMMU present, Aviv B.D., 2016/03/19
- Re: [Qemu-devel] [PATCH][RFC] IOMMU: Add Support to VFIO devices with vIOMMU present, Peter Xu, 2016/03/20
- Re: [Qemu-devel] [PATCH][RFC] IOMMU: Add Support to VFIO devices with vIOMMU present, Aviv B.D., 2016/03/22
Re: [Qemu-devel] [PATCH][RFC] IOMMU: Add Support to VFIO devices with vIOMMU present, Michael S. Tsirkin, 2016/03/15
Re: [Qemu-devel] [PATCH][RFC] IOMMU: Add Support to VFIO devices with vIOMMU present, Michael S. Tsirkin, 2016/03/23