[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [RFC v2 10/22] intel_iommu: add virtual command capability support
From: |
Peter Xu |
Subject: |
Re: [RFC v2 10/22] intel_iommu: add virtual command capability support |
Date: |
Fri, 1 Nov 2019 19:05:45 +0100 |
User-agent: |
Mutt/1.11.4 (2019-03-13) |
On Thu, Oct 24, 2019 at 08:34:31AM -0400, Liu Yi L wrote:
> This patch adds virtual command support to Intel vIOMMU per
> Intel VT-d 3.1 spec. And adds two virtual commands: alloc_pasid
> and free_pasid.
>
> Cc: Kevin Tian <address@hidden>
> Cc: Jacob Pan <address@hidden>
> Cc: Peter Xu <address@hidden>
> Cc: Yi Sun <address@hidden>
> Signed-off-by: Liu Yi L <address@hidden>
> Signed-off-by: Yi Sun <address@hidden>
> ---
> hw/i386/intel_iommu.c | 162
> ++++++++++++++++++++++++++++++++++++++++-
> hw/i386/intel_iommu_internal.h | 38 ++++++++++
> hw/i386/trace-events | 1 +
> include/hw/i386/intel_iommu.h | 6 +-
> 4 files changed, 205 insertions(+), 2 deletions(-)
>
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index e9f8692..88b843f 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -944,6 +944,7 @@ static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState
> *s, uint8_t bus_num)
> return vtd_bus;
> }
> }
> + vtd_bus = NULL;
I feel like I've commented on this..
Should this be a standalone patch?
> }
> return vtd_bus;
> }
> @@ -2590,6 +2591,140 @@ static void vtd_handle_iectl_write(IntelIOMMUState *s)
> }
> }
>
> +static int vtd_request_pasid_alloc(IntelIOMMUState *s)
> +{
> + VTDBus *vtd_bus;
> + int bus_n, devfn;
> + IOMMUCTXEventData event_data;
> + IOMMUCTXPASIDReqDesc req;
> + VTDIOMMUContext *vtd_ic;
> +
> + event_data.event = IOMMU_CTX_EVENT_PASID_ALLOC;
> + event_data.data = &req;
> + req.min_pasid = VTD_MIN_HPASID;
> + req.max_pasid = VTD_MAX_HPASID;
> + req.alloc_result = 0;
> + event_data.length = sizeof(req);
As mentioned in the other thread, do you think we can drop this length
field?
> + for (bus_n = 0; bus_n < PCI_BUS_MAX; bus_n++) {
> + vtd_bus = vtd_find_as_from_bus_num(s, bus_n);
> + if (!vtd_bus) {
> + continue;
> + }
> + for (devfn = 0; devfn < PCI_DEVFN_MAX; devfn++) {
> + vtd_ic = vtd_bus->dev_ic[devfn];
> + if (!vtd_ic) {
> + continue;
> + }
> + iommu_ctx_event_notify(&vtd_ic->iommu_context, &event_data);
Considering that we'll fill in the result into event_data, it could be
a bit misleading to still call it "notify" here because normally it
should only get data from the notifier caller rather than returning a
meaningful value.. Things like SUCCESS/FAIL would be fine, but here
we're returning a pasid from the notifier which seems a bit odd.
Maybe rename it to iommu_ctx_event_deliver()? Then we just rename all
the references of "notify" thingys into "hook" or something clearer?
> + if (req.alloc_result > 0) {
I'd suggest we comment on this:
We'll return the first valid result we got. It's a bit hackish in
that we don't have a good global interface yet to talk to modules
like vfio to deliver this allocation request, so we're leveraging
this per-device context to do the same thing just to make sure the
allocation happens only once.
Same to the pasid_free() below, though you can reference the comment
here from there to be simple.
> + return req.alloc_result;
> + }
> + }
> + }
> + return -1;
> +}
> +
> +static int vtd_request_pasid_free(IntelIOMMUState *s, uint32_t pasid)
> +{
> + VTDBus *vtd_bus;
> + int bus_n, devfn;
> + IOMMUCTXEventData event_data;
> + IOMMUCTXPASIDReqDesc req;
> + VTDIOMMUContext *vtd_ic;
> +
> + event_data.event = IOMMU_CTX_EVENT_PASID_FREE;
> + event_data.data = &req;
> + req.pasid = pasid;
> + req.free_result = 0;
> + event_data.length = sizeof(req);
> + for (bus_n = 0; bus_n < PCI_BUS_MAX; bus_n++) {
> + vtd_bus = vtd_find_as_from_bus_num(s, bus_n);
> + if (!vtd_bus) {
> + continue;
> + }
> + for (devfn = 0; devfn < PCI_DEVFN_MAX; devfn++) {
> + vtd_ic = vtd_bus->dev_ic[devfn];
> + if (!vtd_ic) {
> + continue;
> + }
> + iommu_ctx_event_notify(&vtd_ic->iommu_context, &event_data);
> + if (req.free_result == 0) {
> + return 0;
> + }
> + }
> + }
> + return -1;
> +}
> +
> +/*
> + * If IP is not set, set it and return 0
> + * If IP is already set, return -1
> + */
> +static int vtd_vcmd_rsp_ip_check(IntelIOMMUState *s)
> +{
> + if (!(s->vccap & VTD_VCCAP_PAS) ||
> + (s->vcrsp & 1)) {
> + return -1;
> + }
VTD_VCCAP_PAS is not a IP check, so maybe simply move these chunk out
to vtd_handle_vcmd_write? Then we can rename this function to
"void vtd_vcmd_ip_set(...)".
> + s->vcrsp = 1;
> + vtd_set_quad_raw(s, DMAR_VCRSP_REG,
> + ((uint64_t) s->vcrsp));
> + return 0;
> +}
> +
> +static void vtd_vcmd_clear_ip(IntelIOMMUState *s)
> +{
> + s->vcrsp &= (~((uint64_t)(0x1)));
> + vtd_set_quad_raw(s, DMAR_VCRSP_REG,
> + ((uint64_t) s->vcrsp));
> +}
> +
> +/* Handle write to Virtual Command Register */
> +static int vtd_handle_vcmd_write(IntelIOMMUState *s, uint64_t val)
> +{
> + uint32_t pasid;
> + int ret = -1;
> +
> + trace_vtd_reg_write_vcmd(s->vcrsp, val);
> +
> + /*
> + * Since vCPU should be blocked when the guest VMCD
> + * write was trapped to here. Should be no other vCPUs
> + * try to access VCMD if guest software is well written.
> + * However, we still emulate the IP bit here in case of
> + * bad guest software. Also align with the spec.
> + */
> + ret = vtd_vcmd_rsp_ip_check(s);
> + if (ret) {
> + return ret;
> + }
> + switch (val & VTD_VCMD_CMD_MASK) {
> + case VTD_VCMD_ALLOC_PASID:
> + ret = vtd_request_pasid_alloc(s);
> + if (ret < 0) {
> + s->vcrsp |= VTD_VCRSP_SC(VTD_VCMD_NO_AVAILABLE_PASID);
> + } else {
> + s->vcrsp |= VTD_VCRSP_RSLT(ret);
> + }
> + break;
> +
> + case VTD_VCMD_FREE_PASID:
> + pasid = VTD_VCMD_PASID_VALUE(val);
> + ret = vtd_request_pasid_free(s, pasid);
> + if (ret < 0) {
> + s->vcrsp |= VTD_VCRSP_SC(VTD_VCMD_FREE_INVALID_PASID);
> + }
> + break;
> +
> + default:
> + s->vcrsp |= VTD_VCRSP_SC(VTD_VCMD_UNDEFINED_CMD);
> + printf("Virtual Command: unsupported command!!!\n");
Perhaps error_report_once()?
> + break;
> + }
> + vtd_vcmd_clear_ip(s);
> + return 0;
> +}
> +
> static uint64_t vtd_mem_read(void *opaque, hwaddr addr, unsigned size)
> {
> IntelIOMMUState *s = opaque;
> @@ -2879,6 +3014,23 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
> vtd_set_long(s, addr, val);
> break;
>
> + case DMAR_VCMD_REG:
> + if (!vtd_handle_vcmd_write(s, val)) {
> + if (size == 4) {
> + vtd_set_long(s, addr, val);
> + } else {
> + vtd_set_quad(s, addr, val);
> + }
> + }
> + break;
> +
> + case DMAR_VCMD_REG_HI:
> + assert(size == 4);
This assert() seems scary, but of course not a problem of this patch
because plenty of that are there in vtd_mem_write.. So we can fix
that later.
Do you know what should happen on bare-metal from spec-wise that when
the guest e.g. writes 2 bytes to these mmio regions?
> + if (!vtd_handle_vcmd_write(s, val)) {
> + vtd_set_long(s, addr, val);
> + }
> + break;
> +
> default:
> if (size == 4) {
> vtd_set_long(s, addr, val);
> @@ -3617,7 +3769,8 @@ static void vtd_init(IntelIOMMUState *s)
> s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS;
> } else if (!strcmp(s->scalable_mode, "modern")) {
> s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_PASID
> - | VTD_ECAP_FLTS | VTD_ECAP_PSS;
> + | VTD_ECAP_FLTS | VTD_ECAP_PSS | VTD_ECAP_VCS;
> + s->vccap |= VTD_VCCAP_PAS;
> }
> }
>
[...]
> +#define VTD_VCMD_CMD_MASK 0xffUL
> +#define VTD_VCMD_PASID_VALUE(val) (((val) >> 8) & 0xfffff)
> +
> +#define VTD_VCRSP_RSLT(val) ((val) << 8)
> +#define VTD_VCRSP_SC(val) (((val) & 0x3) << 1)
> +
> +#define VTD_VCMD_UNDEFINED_CMD 1ULL
> +#define VTD_VCMD_NO_AVAILABLE_PASID 2ULL
According to 10.4.44 - should this be 1?
--
Peter Xu
- Re: [RFC v2 10/22] intel_iommu: add virtual command capability support,
Peter Xu <=