[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH v4 04/17] intel_iommu: Flush stage-2 cache in PASID-selective
From: |
Michael S. Tsirkin |
Subject: |
Re: [PATCH v4 04/17] intel_iommu: Flush stage-2 cache in PASID-selective PASID-based iotlb invalidation |
Date: |
Mon, 4 Nov 2024 06:50:44 -0500 |
On Mon, Nov 04, 2024 at 11:46:00AM +0000, Duan, Zhenzhong wrote:
>
>
> >-----Original Message-----
> >From: Liu, Yi L <yi.l.liu@intel.com>
> >Sent: Monday, November 4, 2024 4:45 PM
> >Subject: Re: [PATCH v4 04/17] intel_iommu: Flush stage-2 cache in PASID-
> >selective PASID-based iotlb invalidation
> >
> >On 2024/11/4 15:37, CLEMENT MATHIEU--DRIF wrote:
> >>
> >>
> >> On 04/11/2024 03:49, Yi Liu wrote:
> >>> Caution: External email. Do not open attachments or click links, unless
> >>> this email comes from a known sender and you know the content is safe.
> >>>
> >>>
> >>> On 2024/9/30 17:26, Zhenzhong Duan wrote:
> >>>> Per spec 6.5.2.4, PADID-selective PASID-based iotlb invalidation will
> >>>> flush stage-2 iotlb entries with matching domain id and pasid.
> >>>
> >>> Also, call out it's per table Table 21. PASID-based-IOTLB Invalidation of
> >>> VT-d spec 4.1.
> >>>
> >>>> With scalable modern mode introduced, guest could send PASID-selective
> >>>> PASID-based iotlb invalidation to flush both stage-1 and stage-2 entries.
> >>>>
> >>>> By this chance, remove old IOTLB related definitions which were unused.
> >>>
> >>>
> >>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> >>>> Reviewed-by: Clément Mathieu--Drif<clement.mathieu--drif@eviden.com>
> >>>> Acked-by: Jason Wang <jasowang@redhat.com>
> >>>> ---
> >>>> hw/i386/intel_iommu_internal.h | 14 ++++--
> >>>> hw/i386/intel_iommu.c | 88 +++++++++++++++++++++++++++++++++-
> >>>> 2 files changed, 96 insertions(+), 6 deletions(-)
> >>>>
> >>>> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/
> >>>> intel_iommu_internal.h
> >>>> index d0f9d4589d..eec8090190 100644
> >>>> --- a/hw/i386/intel_iommu_internal.h
> >>>> +++ b/hw/i386/intel_iommu_internal.h
> >>>> @@ -403,11 +403,6 @@ typedef union VTDInvDesc VTDInvDesc;
> >>>> #define VTD_INV_DESC_IOTLB_AM(val) ((val) & 0x3fULL)
> >>>> #define VTD_INV_DESC_IOTLB_RSVD_LO 0xffffffff0000f100ULL
> >>>> #define VTD_INV_DESC_IOTLB_RSVD_HI 0xf80ULL
> >>>> -#define VTD_INV_DESC_IOTLB_PASID_PASID (2ULL << 4)
> >>>> -#define VTD_INV_DESC_IOTLB_PASID_PAGE (3ULL << 4)
> >>>> -#define VTD_INV_DESC_IOTLB_PASID(val) (((val) >> 32) &
> >>>> VTD_PASID_ID_MASK)
> >>>> -#define VTD_INV_DESC_IOTLB_PASID_RSVD_LO 0xfff00000000001c0ULL
> >>>> -#define VTD_INV_DESC_IOTLB_PASID_RSVD_HI 0xf80ULL
> >>>>
> >>>> /* Mask for Device IOTLB Invalidate Descriptor */
> >>>> #define VTD_INV_DESC_DEVICE_IOTLB_ADDR(val) ((val) &
> >>>> 0xfffffffffffff000ULL)
> >>>> @@ -433,6 +428,15 @@ typedef union VTDInvDesc VTDInvDesc;
> >>>> #define VTD_SPTE_LPAGE_L3_RSVD_MASK(aw) \
> >>>> (0x3ffff800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
> >>>>
> >>>> +/* Masks for PIOTLB Invalidate Descriptor */
> >>>> +#define VTD_INV_DESC_PIOTLB_G (3ULL << 4)
> >>>> +#define VTD_INV_DESC_PIOTLB_ALL_IN_PASID (2ULL << 4)
> >>>> +#define VTD_INV_DESC_PIOTLB_PSI_IN_PASID (3ULL << 4)
> >>>> +#define VTD_INV_DESC_PIOTLB_DID(val) (((val) >> 16) &
> >>>> VTD_DOMAIN_ID_MASK)
> >>>> +#define VTD_INV_DESC_PIOTLB_PASID(val) (((val) >> 32) & 0xfffffULL)
> >>>> +#define VTD_INV_DESC_PIOTLB_RSVD_VAL0 0xfff000000000f1c0ULL
> >>>> +#define VTD_INV_DESC_PIOTLB_RSVD_VAL1 0xf80ULL
> >>>> +
> >>>> /* Information about page-selective IOTLB invalidate */
> >>>> struct VTDIOTLBPageInvInfo {
> >>>> uint16_t domain_id;
> >>>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> >>>> index 9e6ef0cb99..72c9c91d4f 100644
> >>>> --- a/hw/i386/intel_iommu.c
> >>>> +++ b/hw/i386/intel_iommu.c
> >>>> @@ -2656,6 +2656,86 @@ static bool
> >>>> vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
> >>>> return true;
> >>>> }
> >>>>
> >>>> +static gboolean vtd_hash_remove_by_pasid(gpointer key, gpointer value,
> >>>> + gpointer user_data)
> >>>> +{
> >>>> + VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value;
> >>>> + VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data;
> >>>> +
> >>>> + return ((entry->domain_id == info->domain_id) &&
> >>>> + (entry->pasid == info->pasid));
> >>>> +}
> >>>> +
> >>>> +static void vtd_piotlb_pasid_invalidate(IntelIOMMUState *s,
> >>>> + uint16_t domain_id, uint32_t
> >>>> pasid)
> >>>> +{
> >>>> + VTDIOTLBPageInvInfo info;
> >>>> + VTDAddressSpace *vtd_as;
> >>>> + VTDContextEntry ce;
> >>>> +
> >>>> + info.domain_id = domain_id;
> >>>> + info.pasid = pasid;
> >>>> +
> >>>> + vtd_iommu_lock(s);
> >>>> + g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_pasid,
> >>>> + &info);
> >>>> + vtd_iommu_unlock(s);
> >>>> +
> >>>> + QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) {
> >>>> + if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
> >>>> + vtd_as->devfn, &ce) &&
> >>>> + domain_id == vtd_get_domain_id(s, &ce, vtd_as->pasid)) {
> >>>> + uint32_t rid2pasid = VTD_CE_GET_RID2PASID(&ce);
> >>>> +
> >>>> + if ((vtd_as->pasid != PCI_NO_PASID || pasid != rid2pasid) &&
> >>>> + vtd_as->pasid != pasid) {
> >>>> + continue;
> >>>> + }
> >>>> +
> >>>> + if (!s->scalable_modern) {
> >>>> + vtd_address_space_sync(vtd_as);
> >>>> + }
> >>>> + }
> >>>> + }
> >>>> +}
> >>>> +
> >>>> +static bool vtd_process_piotlb_desc(IntelIOMMUState *s,
> >>>> + VTDInvDesc *inv_desc)
> >>>> +{
> >>>> + uint16_t domain_id;
> >>>> + uint32_t pasid;
> >>>> +
> >>>> + if ((inv_desc->val[0] & VTD_INV_DESC_PIOTLB_RSVD_VAL0) ||
> >>>> + (inv_desc->val[1] & VTD_INV_DESC_PIOTLB_RSVD_VAL1) ||
> >>>> + inv_desc->val[2] || inv_desc->val[3]) {
> >>>> + error_report_once("%s: invalid piotlb inv desc val[3]=0x%"PRIx64
> >>>> + " val[2]=0x%"PRIx64" val[1]=0x%"PRIx64
> >>>> + " val[0]=0x%"PRIx64" (reserved bits unzero)",
> >>>> + __func__, inv_desc->val[3], inv_desc->val[2],
> >>>> + inv_desc->val[1], inv_desc->val[0]);
> >>>> + return false;
> >>>> + }
> >>>
> >>> Need to consider the below behaviour as well.
> >>>
> >>> "
> >>> This
> >>> descriptor is a 256-bit descriptor and will result in an invalid
> >>> descriptor
> >>> error if submitted in an IQ that
> >>> is setup to provide hardware with 128-bit descriptors (IQA_REG.DW=0)
> >>> "
> >>>
> >>> Also there are descriptions about the old inv desc types (e.g.
> >>> iotlb_inv_desc) that can be either 128bits or 256bits.
> >>>
> >>> "If a 128-bit
> >>> version of this descriptor is submitted into an IQ that is setup to
> >>> provide
> >>> hardware with 256-bit
> >>> descriptors or vice-versa it will result in an invalid descriptor error.
> >>> "
> >>>
> >>> If DW==1, vIOMMU fetches 32 bytes per desc. In such case, if the guest
> >>> submits 128bits desc, then the high 128bits would be non-zero if there is
> >>> more than one desc. But if there is only one desc in the queue, then the
> >>> high 128bits would be zero as well. While, it may be captured by the
> >>> tail register update. Bit4 is reserved when DW==1, and guest would use
> >>> bit4 when it only submits one desc.
> >>>
> >>> If DW==0, vIOMMU fetchs 16bytes per desc. If guest submits 256bits desc,
> >>> it would appear to be two descs from vIOMMU p.o.v. The first 128bits
> >>> can be identified as valid except for the types that does not requires
> >>> 256bits. The higher 128bits would be subjected to the desc sanity check
> >>> as well.
> >>>
> >>> Based on the above, I think you may need to add two more checks. If DW==0,
> >>> vIOMMU should fail the inv types that requires 256bits; If DW==1, you
> >>> should check the inv_desc->val[2] and inv_desc->val[3]. You've already
> >>> done it in this patch.
> >>>
> >>> Thoughts are welcomed here.
> >>
> >> Good catch,
> >> I think we should write the check in vtd_process_inv_desc
> >> rather than updating the handlers.
> >>
> >> What are your thoughts?
> >
> >the first check can be done in vtd_process_inv_desc(). The second may
> >be better in the handlers as the handlers have the reserved bits check.
> >But given that none of the inv types use the high 128bits, so it is also
> >acceptable to do it in vtd_process_inv_desc(). Do add proper comment.
>
> Thanks Yi and Clement's suggestion, I'll send a small series to fix that
> for upstream.
>
> BRs.
> Zhenzhong
Ok so you will send v5?
- Re: [PATCH v4 04/17] intel_iommu: Flush stage-2 cache in PASID-selective PASID-based iotlb invalidation, Yi Liu, 2024/11/03
- Re: [PATCH v4 04/17] intel_iommu: Flush stage-2 cache in PASID-selective PASID-based iotlb invalidation, CLEMENT MATHIEU--DRIF, 2024/11/04
- Re: [PATCH v4 04/17] intel_iommu: Flush stage-2 cache in PASID-selective PASID-based iotlb invalidation, Yi Liu, 2024/11/04
- RE: [PATCH v4 04/17] intel_iommu: Flush stage-2 cache in PASID-selective PASID-based iotlb invalidation, Duan, Zhenzhong, 2024/11/04
- Re: [PATCH v4 04/17] intel_iommu: Flush stage-2 cache in PASID-selective PASID-based iotlb invalidation,
Michael S. Tsirkin <=
- RE: [PATCH v4 04/17] intel_iommu: Flush stage-2 cache in PASID-selective PASID-based iotlb invalidation, Duan, Zhenzhong, 2024/11/04
- Re: [PATCH v4 04/17] intel_iommu: Flush stage-2 cache in PASID-selective PASID-based iotlb invalidation, Michael S. Tsirkin, 2024/11/04
- RE: [PATCH v4 04/17] intel_iommu: Flush stage-2 cache in PASID-selective PASID-based iotlb invalidation, Duan, Zhenzhong, 2024/11/04