Caution: External email. Do not open attachments or click links, unless
this email comes from a known sender and you know the content is safe.
On 2024/9/30 17:26, Zhenzhong Duan wrote:
Per spec 6.5.2.4, PADID-selective PASID-based iotlb invalidation will
flush stage-2 iotlb entries with matching domain id and pasid.
Also, call out it's per table Table 21. PASID-based-IOTLB Invalidation of
VT-d spec 4.1.
With scalable modern mode introduced, guest could send PASID-selective
PASID-based iotlb invalidation to flush both stage-1 and stage-2 entries.
By this chance, remove old IOTLB related definitions which were unused.
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Clément Mathieu--Drif<clement.mathieu--drif@eviden.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
hw/i386/intel_iommu_internal.h | 14 ++++--
hw/i386/intel_iommu.c | 88 +++++++++++++++++++++++++++++++++-
2 files changed, 96 insertions(+), 6 deletions(-)
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/
intel_iommu_internal.h
index d0f9d4589d..eec8090190 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -403,11 +403,6 @@ typedef union VTDInvDesc VTDInvDesc;
#define VTD_INV_DESC_IOTLB_AM(val) ((val) & 0x3fULL)
#define VTD_INV_DESC_IOTLB_RSVD_LO 0xffffffff0000f100ULL
#define VTD_INV_DESC_IOTLB_RSVD_HI 0xf80ULL
-#define VTD_INV_DESC_IOTLB_PASID_PASID (2ULL << 4)
-#define VTD_INV_DESC_IOTLB_PASID_PAGE (3ULL << 4)
-#define VTD_INV_DESC_IOTLB_PASID(val) (((val) >> 32) &
VTD_PASID_ID_MASK)
-#define VTD_INV_DESC_IOTLB_PASID_RSVD_LO 0xfff00000000001c0ULL
-#define VTD_INV_DESC_IOTLB_PASID_RSVD_HI 0xf80ULL
/* Mask for Device IOTLB Invalidate Descriptor */
#define VTD_INV_DESC_DEVICE_IOTLB_ADDR(val) ((val) &
0xfffffffffffff000ULL)
@@ -433,6 +428,15 @@ typedef union VTDInvDesc VTDInvDesc;
#define VTD_SPTE_LPAGE_L3_RSVD_MASK(aw) \
(0x3ffff800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
+/* Masks for PIOTLB Invalidate Descriptor */
+#define VTD_INV_DESC_PIOTLB_G (3ULL << 4)
+#define VTD_INV_DESC_PIOTLB_ALL_IN_PASID (2ULL << 4)
+#define VTD_INV_DESC_PIOTLB_PSI_IN_PASID (3ULL << 4)
+#define VTD_INV_DESC_PIOTLB_DID(val) (((val) >> 16) &
VTD_DOMAIN_ID_MASK)
+#define VTD_INV_DESC_PIOTLB_PASID(val) (((val) >> 32) & 0xfffffULL)
+#define VTD_INV_DESC_PIOTLB_RSVD_VAL0 0xfff000000000f1c0ULL
+#define VTD_INV_DESC_PIOTLB_RSVD_VAL1 0xf80ULL
+
/* Information about page-selective IOTLB invalidate */
struct VTDIOTLBPageInvInfo {
uint16_t domain_id;
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 9e6ef0cb99..72c9c91d4f 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -2656,6 +2656,86 @@ static bool
vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
return true;
}
+static gboolean vtd_hash_remove_by_pasid(gpointer key, gpointer value,
+ gpointer user_data)
+{
+ VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value;
+ VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data;
+
+ return ((entry->domain_id == info->domain_id) &&
+ (entry->pasid == info->pasid));
+}
+
+static void vtd_piotlb_pasid_invalidate(IntelIOMMUState *s,
+ uint16_t domain_id, uint32_t
pasid)
+{
+ VTDIOTLBPageInvInfo info;
+ VTDAddressSpace *vtd_as;
+ VTDContextEntry ce;
+
+ info.domain_id = domain_id;
+ info.pasid = pasid;
+
+ vtd_iommu_lock(s);
+ g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_pasid,
+ &info);
+ vtd_iommu_unlock(s);
+
+ QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) {
+ if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
+ vtd_as->devfn, &ce) &&
+ domain_id == vtd_get_domain_id(s, &ce, vtd_as->pasid)) {
+ uint32_t rid2pasid = VTD_CE_GET_RID2PASID(&ce);
+
+ if ((vtd_as->pasid != PCI_NO_PASID || pasid != rid2pasid) &&
+ vtd_as->pasid != pasid) {
+ continue;
+ }
+
+ if (!s->scalable_modern) {
+ vtd_address_space_sync(vtd_as);
+ }
+ }
+ }
+}
+
+static bool vtd_process_piotlb_desc(IntelIOMMUState *s,
+ VTDInvDesc *inv_desc)
+{
+ uint16_t domain_id;
+ uint32_t pasid;
+
+ if ((inv_desc->val[0] & VTD_INV_DESC_PIOTLB_RSVD_VAL0) ||
+ (inv_desc->val[1] & VTD_INV_DESC_PIOTLB_RSVD_VAL1) ||
+ inv_desc->val[2] || inv_desc->val[3]) {
+ error_report_once("%s: invalid piotlb inv desc val[3]=0x%"PRIx64
+ " val[2]=0x%"PRIx64" val[1]=0x%"PRIx64
+ " val[0]=0x%"PRIx64" (reserved bits unzero)",
+ __func__, inv_desc->val[3], inv_desc->val[2],
+ inv_desc->val[1], inv_desc->val[0]);
+ return false;
+ }
Need to consider the below behaviour as well.
"
This
descriptor is a 256-bit descriptor and will result in an invalid descriptor
error if submitted in an IQ that
is setup to provide hardware with 128-bit descriptors (IQA_REG.DW=0)
"
Also there are descriptions about the old inv desc types (e.g.
iotlb_inv_desc) that can be either 128bits or 256bits.
"If a 128-bit
version of this descriptor is submitted into an IQ that is setup to provide
hardware with 256-bit
descriptors or vice-versa it will result in an invalid descriptor error.
"
If DW==1, vIOMMU fetches 32 bytes per desc. In such case, if the guest
submits 128bits desc, then the high 128bits would be non-zero if there is
more than one desc. But if there is only one desc in the queue, then the
high 128bits would be zero as well. While, it may be captured by the
tail register update. Bit4 is reserved when DW==1, and guest would use
bit4 when it only submits one desc.
If DW==0, vIOMMU fetchs 16bytes per desc. If guest submits 256bits desc,
it would appear to be two descs from vIOMMU p.o.v. The first 128bits
can be identified as valid except for the types that does not requires
256bits. The higher 128bits would be subjected to the desc sanity check
as well.
Based on the above, I think you may need to add two more checks. If DW==0,
vIOMMU should fail the inv types that requires 256bits; If DW==1, you
should check the inv_desc->val[2] and inv_desc->val[3]. You've already
done it in this patch.
Thoughts are welcomed here.