qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v1 02/22] Update linux-header to support iommufd cdev and hwp


From: Eric Auger
Subject: Re: [PATCH v1 02/22] Update linux-header to support iommufd cdev and hwpt alloc
Date: Thu, 14 Sep 2023 16:46:16 +0200
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Thunderbird/102.13.0

Hi Zhenzhong,

On 8/30/23 12:37, Zhenzhong Duan wrote:
> From https://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd.git
> branch: for_next
> commit id: eb501c2d96cfce6b42528e8321ea085ec605e790
I see that in your branch you have now updated against v6.6-rc1. However
you should run a full ./scripts/update-linux-headers.sh,
ie. not only importing the changes in linux-headers/linux/iommufd.h as
it seems to do but also import all changes brought with this linux version.

Thanks

Eric
>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
> Note this is a placeholder patch.
>
>  include/standard-headers/linux/fuse.h |   3 +
>  linux-headers/linux/iommufd.h         | 444 ++++++++++++++++++++++++++
>  linux-headers/linux/kvm.h             |  13 +-
>  linux-headers/linux/vfio.h            | 148 ++++++++-
>  4 files changed, 604 insertions(+), 4 deletions(-)
>  create mode 100644 linux-headers/linux/iommufd.h
>
> diff --git a/include/standard-headers/linux/fuse.h 
> b/include/standard-headers/linux/fuse.h
> index 35c131a107..2c8b8de9c2 100644
> --- a/include/standard-headers/linux/fuse.h
> +++ b/include/standard-headers/linux/fuse.h
> @@ -206,6 +206,7 @@
>   *  - add extension header
>   *  - add FUSE_EXT_GROUPS
>   *  - add FUSE_CREATE_SUPP_GROUP
> + *  - add FUSE_HAS_EXPIRE_ONLY
>   */
>  
>  #ifndef _LINUX_FUSE_H
> @@ -365,6 +366,7 @@ struct fuse_file_lock {
>   * FUSE_HAS_INODE_DAX:  use per inode DAX
>   * FUSE_CREATE_SUPP_GROUP: add supplementary group info to create, mkdir,
>   *                   symlink and mknod (single group that matches parent)
> + * FUSE_HAS_EXPIRE_ONLY: kernel supports expiry-only entry invalidation
>   */
>  #define FUSE_ASYNC_READ              (1 << 0)
>  #define FUSE_POSIX_LOCKS     (1 << 1)
> @@ -402,6 +404,7 @@ struct fuse_file_lock {
>  #define FUSE_SECURITY_CTX    (1ULL << 32)
>  #define FUSE_HAS_INODE_DAX   (1ULL << 33)
>  #define FUSE_CREATE_SUPP_GROUP       (1ULL << 34)
> +#define FUSE_HAS_EXPIRE_ONLY (1ULL << 35)
>  
>  /**
>   * CUSE INIT request/reply flags
> diff --git a/linux-headers/linux/iommufd.h b/linux-headers/linux/iommufd.h
> new file mode 100644
> index 0000000000..218bf7ac98
> --- /dev/null
> +++ b/linux-headers/linux/iommufd.h
> @@ -0,0 +1,444 @@
> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
> +/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES.
> + */
> +#ifndef _IOMMUFD_H
> +#define _IOMMUFD_H
> +
> +#include <linux/types.h>
> +#include <linux/ioctl.h>
> +
> +#define IOMMUFD_TYPE (';')
> +
> +/**
> + * DOC: General ioctl format
> + *
> + * The ioctl interface follows a general format to allow for extensibility. 
> Each
> + * ioctl is passed in a structure pointer as the argument providing the size 
> of
> + * the structure in the first u32. The kernel checks that any structure space
> + * beyond what it understands is 0. This allows userspace to use the backward
> + * compatible portion while consistently using the newer, larger, structures.
> + *
> + * ioctls use a standard meaning for common errnos:
> + *
> + *  - ENOTTY: The IOCTL number itself is not supported at all
> + *  - E2BIG: The IOCTL number is supported, but the provided structure has
> + *    non-zero in a part the kernel does not understand.
> + *  - EOPNOTSUPP: The IOCTL number is supported, and the structure is
> + *    understood, however a known field has a value the kernel does not
> + *    understand or support.
> + *  - EINVAL: Everything about the IOCTL was understood, but a field is not
> + *    correct.
> + *  - ENOENT: An ID or IOVA provided does not exist.
> + *  - ENOMEM: Out of memory.
> + *  - EOVERFLOW: Mathematics overflowed.
> + *
> + * As well as additional errnos, within specific ioctls.
> + */
> +enum {
> +     IOMMUFD_CMD_BASE = 0x80,
> +     IOMMUFD_CMD_DESTROY = IOMMUFD_CMD_BASE,
> +     IOMMUFD_CMD_IOAS_ALLOC,
> +     IOMMUFD_CMD_IOAS_ALLOW_IOVAS,
> +     IOMMUFD_CMD_IOAS_COPY,
> +     IOMMUFD_CMD_IOAS_IOVA_RANGES,
> +     IOMMUFD_CMD_IOAS_MAP,
> +     IOMMUFD_CMD_IOAS_UNMAP,
> +     IOMMUFD_CMD_OPTION,
> +     IOMMUFD_CMD_VFIO_IOAS,
> +     IOMMUFD_CMD_HWPT_ALLOC,
> +     IOMMUFD_CMD_GET_HW_INFO,
> +};
> +
> +/**
> + * struct iommu_destroy - ioctl(IOMMU_DESTROY)
> + * @size: sizeof(struct iommu_destroy)
> + * @id: iommufd object ID to destroy. Can be any destroyable object type.
> + *
> + * Destroy any object held within iommufd.
> + */
> +struct iommu_destroy {
> +     __u32 size;
> +     __u32 id;
> +};
> +#define IOMMU_DESTROY _IO(IOMMUFD_TYPE, IOMMUFD_CMD_DESTROY)
> +
> +/**
> + * struct iommu_ioas_alloc - ioctl(IOMMU_IOAS_ALLOC)
> + * @size: sizeof(struct iommu_ioas_alloc)
> + * @flags: Must be 0
> + * @out_ioas_id: Output IOAS ID for the allocated object
> + *
> + * Allocate an IO Address Space (IOAS) which holds an IO Virtual Address 
> (IOVA)
> + * to memory mapping.
> + */
> +struct iommu_ioas_alloc {
> +     __u32 size;
> +     __u32 flags;
> +     __u32 out_ioas_id;
> +};
> +#define IOMMU_IOAS_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_ALLOC)
> +
> +/**
> + * struct iommu_iova_range - ioctl(IOMMU_IOVA_RANGE)
> + * @start: First IOVA
> + * @last: Inclusive last IOVA
> + *
> + * An interval in IOVA space.
> + */
> +struct iommu_iova_range {
> +     __aligned_u64 start;
> +     __aligned_u64 last;
> +};
> +
> +/**
> + * struct iommu_ioas_iova_ranges - ioctl(IOMMU_IOAS_IOVA_RANGES)
> + * @size: sizeof(struct iommu_ioas_iova_ranges)
> + * @ioas_id: IOAS ID to read ranges from
> + * @num_iovas: Input/Output total number of ranges in the IOAS
> + * @__reserved: Must be 0
> + * @allowed_iovas: Pointer to the output array of struct iommu_iova_range
> + * @out_iova_alignment: Minimum alignment required for mapping IOVA
> + *
> + * Query an IOAS for ranges of allowed IOVAs. Mapping IOVA outside these 
> ranges
> + * is not allowed. num_iovas will be set to the total number of iovas and
> + * the allowed_iovas[] will be filled in as space permits.
> + *
> + * The allowed ranges are dependent on the HW path the DMA operation takes, 
> and
> + * can change during the lifetime of the IOAS. A fresh empty IOAS will have a
> + * full range, and each attached device will narrow the ranges based on that
> + * device's HW restrictions. Detaching a device can widen the ranges. 
> Userspace
> + * should query ranges after every attach/detach to know what IOVAs are valid
> + * for mapping.
> + *
> + * On input num_iovas is the length of the allowed_iovas array. On output it 
> is
> + * the total number of iovas filled in. The ioctl will return -EMSGSIZE and 
> set
> + * num_iovas to the required value if num_iovas is too small. In this case 
> the
> + * caller should allocate a larger output array and re-issue the ioctl.
> + *
> + * out_iova_alignment returns the minimum IOVA alignment that can be given
> + * to IOMMU_IOAS_MAP/COPY. IOVA's must satisfy::
> + *
> + *   starting_iova % out_iova_alignment == 0
> + *   (starting_iova + length) % out_iova_alignment == 0
> + *
> + * out_iova_alignment can be 1 indicating any IOVA is allowed. It cannot
> + * be higher than the system PAGE_SIZE.
> + */
> +struct iommu_ioas_iova_ranges {
> +     __u32 size;
> +     __u32 ioas_id;
> +     __u32 num_iovas;
> +     __u32 __reserved;
> +     __aligned_u64 allowed_iovas;
> +     __aligned_u64 out_iova_alignment;
> +};
> +#define IOMMU_IOAS_IOVA_RANGES _IO(IOMMUFD_TYPE, 
> IOMMUFD_CMD_IOAS_IOVA_RANGES)
> +
> +/**
> + * struct iommu_ioas_allow_iovas - ioctl(IOMMU_IOAS_ALLOW_IOVAS)
> + * @size: sizeof(struct iommu_ioas_allow_iovas)
> + * @ioas_id: IOAS ID to allow IOVAs from
> + * @num_iovas: Input/Output total number of ranges in the IOAS
> + * @__reserved: Must be 0
> + * @allowed_iovas: Pointer to array of struct iommu_iova_range
> + *
> + * Ensure a range of IOVAs are always available for allocation. If this call
> + * succeeds then IOMMU_IOAS_IOVA_RANGES will never return a list of IOVA 
> ranges
> + * that are narrower than the ranges provided here. This call will fail if
> + * IOMMU_IOAS_IOVA_RANGES is currently narrower than the given ranges.
> + *
> + * When an IOAS is first created the IOVA_RANGES will be maximally sized, 
> and as
> + * devices are attached the IOVA will narrow based on the device 
> restrictions.
> + * When an allowed range is specified any narrowing will be refused, ie 
> device
> + * attachment can fail if the device requires limiting within the allowed 
> range.
> + *
> + * Automatic IOVA allocation is also impacted by this call. MAP will only
> + * allocate within the allowed IOVAs if they are present.
> + *
> + * This call replaces the entire allowed list with the given list.
> + */
> +struct iommu_ioas_allow_iovas {
> +     __u32 size;
> +     __u32 ioas_id;
> +     __u32 num_iovas;
> +     __u32 __reserved;
> +     __aligned_u64 allowed_iovas;
> +};
> +#define IOMMU_IOAS_ALLOW_IOVAS _IO(IOMMUFD_TYPE, 
> IOMMUFD_CMD_IOAS_ALLOW_IOVAS)
> +
> +/**
> + * enum iommufd_ioas_map_flags - Flags for map and copy
> + * @IOMMU_IOAS_MAP_FIXED_IOVA: If clear the kernel will compute an 
> appropriate
> + *                             IOVA to place the mapping at
> + * @IOMMU_IOAS_MAP_WRITEABLE: DMA is allowed to write to this mapping
> + * @IOMMU_IOAS_MAP_READABLE: DMA is allowed to read from this mapping
> + */
> +enum iommufd_ioas_map_flags {
> +     IOMMU_IOAS_MAP_FIXED_IOVA = 1 << 0,
> +     IOMMU_IOAS_MAP_WRITEABLE = 1 << 1,
> +     IOMMU_IOAS_MAP_READABLE = 1 << 2,
> +};
> +
> +/**
> + * struct iommu_ioas_map - ioctl(IOMMU_IOAS_MAP)
> + * @size: sizeof(struct iommu_ioas_map)
> + * @flags: Combination of enum iommufd_ioas_map_flags
> + * @ioas_id: IOAS ID to change the mapping of
> + * @__reserved: Must be 0
> + * @user_va: Userspace pointer to start mapping from
> + * @length: Number of bytes to map
> + * @iova: IOVA the mapping was placed at. If IOMMU_IOAS_MAP_FIXED_IOVA is set
> + *        then this must be provided as input.
> + *
> + * Set an IOVA mapping from a user pointer. If FIXED_IOVA is specified then 
> the
> + * mapping will be established at iova, otherwise a suitable location based 
> on
> + * the reserved and allowed lists will be automatically selected and 
> returned in
> + * iova.
> + *
> + * If IOMMU_IOAS_MAP_FIXED_IOVA is specified then the iova range must 
> currently
> + * be unused, existing IOVA cannot be replaced.
> + */
> +struct iommu_ioas_map {
> +     __u32 size;
> +     __u32 flags;
> +     __u32 ioas_id;
> +     __u32 __reserved;
> +     __aligned_u64 user_va;
> +     __aligned_u64 length;
> +     __aligned_u64 iova;
> +};
> +#define IOMMU_IOAS_MAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_MAP)
> +
> +/**
> + * struct iommu_ioas_copy - ioctl(IOMMU_IOAS_COPY)
> + * @size: sizeof(struct iommu_ioas_copy)
> + * @flags: Combination of enum iommufd_ioas_map_flags
> + * @dst_ioas_id: IOAS ID to change the mapping of
> + * @src_ioas_id: IOAS ID to copy from
> + * @length: Number of bytes to copy and map
> + * @dst_iova: IOVA the mapping was placed at. If IOMMU_IOAS_MAP_FIXED_IOVA is
> + *            set then this must be provided as input.
> + * @src_iova: IOVA to start the copy
> + *
> + * Copy an already existing mapping from src_ioas_id and establish it in
> + * dst_ioas_id. The src iova/length must exactly match a range used with
> + * IOMMU_IOAS_MAP.
> + *
> + * This may be used to efficiently clone a subset of an IOAS to another, or 
> as a
> + * kind of 'cache' to speed up mapping. Copy has an efficiency advantage over
> + * establishing equivalent new mappings, as internal resources are shared, 
> and
> + * the kernel will pin the user memory only once.
> + */
> +struct iommu_ioas_copy {
> +     __u32 size;
> +     __u32 flags;
> +     __u32 dst_ioas_id;
> +     __u32 src_ioas_id;
> +     __aligned_u64 length;
> +     __aligned_u64 dst_iova;
> +     __aligned_u64 src_iova;
> +};
> +#define IOMMU_IOAS_COPY _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_COPY)
> +
> +/**
> + * struct iommu_ioas_unmap - ioctl(IOMMU_IOAS_UNMAP)
> + * @size: sizeof(struct iommu_ioas_unmap)
> + * @ioas_id: IOAS ID to change the mapping of
> + * @iova: IOVA to start the unmapping at
> + * @length: Number of bytes to unmap, and return back the bytes unmapped
> + *
> + * Unmap an IOVA range. The iova/length must be a superset of a previously
> + * mapped range used with IOMMU_IOAS_MAP or IOMMU_IOAS_COPY. Splitting or
> + * truncating ranges is not allowed. The values 0 to U64_MAX will unmap
> + * everything.
> + */
> +struct iommu_ioas_unmap {
> +     __u32 size;
> +     __u32 ioas_id;
> +     __aligned_u64 iova;
> +     __aligned_u64 length;
> +};
> +#define IOMMU_IOAS_UNMAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_UNMAP)
> +
> +/**
> + * enum iommufd_option - ioctl(IOMMU_OPTION_RLIMIT_MODE) and
> + *                       ioctl(IOMMU_OPTION_HUGE_PAGES)
> + * @IOMMU_OPTION_RLIMIT_MODE:
> + *    Change how RLIMIT_MEMLOCK accounting works. The caller must have 
> privilege
> + *    to invoke this. Value 0 (default) is user based accouting, 1 uses 
> process
> + *    based accounting. Global option, object_id must be 0
> + * @IOMMU_OPTION_HUGE_PAGES:
> + *    Value 1 (default) allows contiguous pages to be combined when 
> generating
> + *    iommu mappings. Value 0 disables combining, everything is mapped to
> + *    PAGE_SIZE. This can be useful for benchmarking.  This is a per-IOAS
> + *    option, the object_id must be the IOAS ID.
> + */
> +enum iommufd_option {
> +     IOMMU_OPTION_RLIMIT_MODE = 0,
> +     IOMMU_OPTION_HUGE_PAGES = 1,
> +};
> +
> +/**
> + * enum iommufd_option_ops - ioctl(IOMMU_OPTION_OP_SET) and
> + *                           ioctl(IOMMU_OPTION_OP_GET)
> + * @IOMMU_OPTION_OP_SET: Set the option's value
> + * @IOMMU_OPTION_OP_GET: Get the option's value
> + */
> +enum iommufd_option_ops {
> +     IOMMU_OPTION_OP_SET = 0,
> +     IOMMU_OPTION_OP_GET = 1,
> +};
> +
> +/**
> + * struct iommu_option - iommu option multiplexer
> + * @size: sizeof(struct iommu_option)
> + * @option_id: One of enum iommufd_option
> + * @op: One of enum iommufd_option_ops
> + * @__reserved: Must be 0
> + * @object_id: ID of the object if required
> + * @val64: Option value to set or value returned on get
> + *
> + * Change a simple option value. This multiplexor allows controlling options
> + * on objects. IOMMU_OPTION_OP_SET will load an option and 
> IOMMU_OPTION_OP_GET
> + * will return the current value.
> + */
> +struct iommu_option {
> +     __u32 size;
> +     __u32 option_id;
> +     __u16 op;
> +     __u16 __reserved;
> +     __u32 object_id;
> +     __aligned_u64 val64;
> +};
> +#define IOMMU_OPTION _IO(IOMMUFD_TYPE, IOMMUFD_CMD_OPTION)
> +
> +/**
> + * enum iommufd_vfio_ioas_op - IOMMU_VFIO_IOAS_* ioctls
> + * @IOMMU_VFIO_IOAS_GET: Get the current compatibility IOAS
> + * @IOMMU_VFIO_IOAS_SET: Change the current compatibility IOAS
> + * @IOMMU_VFIO_IOAS_CLEAR: Disable VFIO compatibility
> + */
> +enum iommufd_vfio_ioas_op {
> +     IOMMU_VFIO_IOAS_GET = 0,
> +     IOMMU_VFIO_IOAS_SET = 1,
> +     IOMMU_VFIO_IOAS_CLEAR = 2,
> +};
> +
> +/**
> + * struct iommu_vfio_ioas - ioctl(IOMMU_VFIO_IOAS)
> + * @size: sizeof(struct iommu_vfio_ioas)
> + * @ioas_id: For IOMMU_VFIO_IOAS_SET the input IOAS ID to set
> + *           For IOMMU_VFIO_IOAS_GET will output the IOAS ID
> + * @op: One of enum iommufd_vfio_ioas_op
> + * @__reserved: Must be 0
> + *
> + * The VFIO compatibility support uses a single ioas because VFIO APIs do not
> + * support the ID field. Set or Get the IOAS that VFIO compatibility will 
> use.
> + * When VFIO_GROUP_SET_CONTAINER is used on an iommufd it will get the
> + * compatibility ioas, either by taking what is already set, or auto creating
> + * one. From then on VFIO will continue to use that ioas and is not effected 
> by
> + * this ioctl. SET or CLEAR does not destroy any auto-created IOAS.
> + */
> +struct iommu_vfio_ioas {
> +     __u32 size;
> +     __u32 ioas_id;
> +     __u16 op;
> +     __u16 __reserved;
> +};
> +#define IOMMU_VFIO_IOAS _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VFIO_IOAS)
> +
> +/**
> + * struct iommu_hwpt_alloc - ioctl(IOMMU_HWPT_ALLOC)
> + * @size: sizeof(struct iommu_hwpt_alloc)
> + * @flags: Must be 0
> + * @dev_id: The device to allocate this HWPT for
> + * @pt_id: The IOAS to connect this HWPT to
> + * @out_hwpt_id: The ID of the new HWPT
> + * @__reserved: Must be 0
> + *
> + * Explicitly allocate a hardware page table object. This is the same object
> + * type that is returned by iommufd_device_attach() and represents the
> + * underlying iommu driver's iommu_domain kernel object.
> + *
> + * A HWPT will be created with the IOVA mappings from the given IOAS.
> + */
> +struct iommu_hwpt_alloc {
> +     __u32 size;
> +     __u32 flags;
> +     __u32 dev_id;
> +     __u32 pt_id;
> +     __u32 out_hwpt_id;
> +     __u32 __reserved;
> +};
> +#define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC)
> +
> +/**
> + * struct iommu_hw_info_vtd - Intel VT-d hardware information
> + *
> + * @flags: Must be 0
> + * @__reserved: Must be 0
> + *
> + * @cap_reg: Value of Intel VT-d capability register defined in VT-d spec
> + *           section 11.4.2 Capability Register.
> + * @ecap_reg: Value of Intel VT-d capability register defined in VT-d spec
> + *            section 11.4.3 Extended Capability Register.
> + *
> + * User needs to understand the Intel VT-d specification to decode the
> + * register value.
> + */
> +struct iommu_hw_info_vtd {
> +     __u32 flags;
> +     __u32 __reserved;
> +     __aligned_u64 cap_reg;
> +     __aligned_u64 ecap_reg;
> +};
> +
> +/**
> + * enum iommu_hw_info_type - IOMMU Hardware Info Types
> + * @IOMMU_HW_INFO_TYPE_NONE: Used by the drivers that do not report hardware
> + *                           info
> + * @IOMMU_HW_INFO_TYPE_INTEL_VTD: Intel VT-d iommu info type
> + */
> +enum iommu_hw_info_type {
> +     IOMMU_HW_INFO_TYPE_NONE,
> +     IOMMU_HW_INFO_TYPE_INTEL_VTD,
> +};
> +
> +/**
> + * struct iommu_hw_info - ioctl(IOMMU_GET_HW_INFO)
> + * @size: sizeof(struct iommu_hw_info)
> + * @flags: Must be 0
> + * @dev_id: The device bound to the iommufd
> + * @data_len: Input the length of a user buffer in bytes. Output the length 
> of
> + *            data that kernel supports
> + * @data_uptr: User pointer to a user-space buffer used by the kernel to fill
> + *             the iommu type specific hardware information data
> + * @out_data_type: Output the iommu hardware info type as defined in the enum
> + *                 iommu_hw_info_type.
> + * @__reserved: Must be 0
> + *
> + * Query an iommu type specific hardware information data from an iommu 
> behind
> + * a given device that has been bound to iommufd. This hardware info data 
> will
> + * be used to sync capabilities between the virtual iommu and the physical
> + * iommu, e.g. a nested translation setup needs to check the hardware info, 
> so
> + * a guest stage-1 page table can be compatible with the physical iommu.
> + *
> + * To capture an iommu type specific hardware information data, @data_uptr 
> and
> + * its length @data_len must be provided. Trailing bytes will be zeroed if 
> the
> + * user buffer is larger than the data that kernel has. Otherwise, kernel 
> only
> + * fills the buffer using the given length in @data_len. If the ioctl 
> succeeds,
> + * @data_len will be updated to the length that kernel actually supports,
> + * @out_data_type will be filled to decode the data filled in the buffer
> + * pointed by @data_uptr. Input @data_len == zero is allowed.
> + */
> +struct iommu_hw_info {
> +     __u32 size;
> +     __u32 flags;
> +     __u32 dev_id;
> +     __u32 data_len;
> +     __aligned_u64 data_uptr;
> +     __u32 out_data_type;
> +     __u32 __reserved;
> +};
> +#define IOMMU_GET_HW_INFO _IO(IOMMUFD_TYPE, IOMMUFD_CMD_GET_HW_INFO)
> +#endif
> diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
> index 1f3f3333a4..0d74ee999a 100644
> --- a/linux-headers/linux/kvm.h
> +++ b/linux-headers/linux/kvm.h
> @@ -1414,9 +1414,16 @@ struct kvm_device_attr {
>       __u64   addr;           /* userspace address of attr data */
>  };
>  
> -#define  KVM_DEV_VFIO_GROUP                  1
> -#define   KVM_DEV_VFIO_GROUP_ADD                     1
> -#define   KVM_DEV_VFIO_GROUP_DEL                     2
> +#define  KVM_DEV_VFIO_FILE                   1
> +
> +#define   KVM_DEV_VFIO_FILE_ADD                      1
> +#define   KVM_DEV_VFIO_FILE_DEL                      2
> +
> +/* KVM_DEV_VFIO_GROUP aliases are for compile time uapi compatibility */
> +#define  KVM_DEV_VFIO_GROUP  KVM_DEV_VFIO_FILE
> +
> +#define   KVM_DEV_VFIO_GROUP_ADD     KVM_DEV_VFIO_FILE_ADD
> +#define   KVM_DEV_VFIO_GROUP_DEL     KVM_DEV_VFIO_FILE_DEL
>  #define   KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE           3
>  
>  enum kvm_device_type {
> diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
> index 16db89071e..7326ace436 100644
> --- a/linux-headers/linux/vfio.h
> +++ b/linux-headers/linux/vfio.h
> @@ -677,11 +677,60 @@ enum {
>   * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 12,
>   *                                         struct vfio_pci_hot_reset_info)
>   *
> + * This command is used to query the affected devices in the hot reset for
> + * a given device.
> + *
> + * This command always reports the segment, bus, and devfn information for
> + * each affected device, and selectively reports the group_id or devid per
> + * the way how the calling device is opened.
> + *
> + *   - If the calling device is opened via the traditional group/container
> + *     API, group_id is reported.  User should check if it has owned all
> + *     the affected devices and provides a set of group fds to prove the
> + *     ownership in VFIO_DEVICE_PCI_HOT_RESET ioctl.
> + *
> + *   - If the calling device is opened as a cdev, devid is reported.
> + *     Flag VFIO_PCI_HOT_RESET_FLAG_DEV_ID is set to indicate this
> + *     data type.  All the affected devices should be represented in
> + *     the dev_set, ex. bound to a vfio driver, and also be owned by
> + *     this interface which is determined by the following conditions:
> + *     1) Has a valid devid within the iommufd_ctx of the calling device.
> + *        Ownership cannot be determined across separate iommufd_ctx and
> + *        the cdev calling conventions do not support a proof-of-ownership
> + *        model as provided in the legacy group interface.  In this case
> + *        valid devid with value greater than zero is provided in the return
> + *        structure.
> + *     2) Does not have a valid devid within the iommufd_ctx of the calling
> + *        device, but belongs to the same IOMMU group as the calling device
> + *        or another opened device that has a valid devid within the
> + *        iommufd_ctx of the calling device.  This provides implicit 
> ownership
> + *        for devices within the same DMA isolation context.  In this case
> + *        the devid value of VFIO_PCI_DEVID_OWNED is provided in the return
> + *        structure.
> + *
> + *     A devid value of VFIO_PCI_DEVID_NOT_OWNED is provided in the return
> + *     structure for affected devices where device is NOT represented in the
> + *     dev_set or ownership is not available.  Such devices prevent the use
> + *     of VFIO_DEVICE_PCI_HOT_RESET ioctl outside of the proof-of-ownership
> + *     calling conventions (ie. via legacy group accessed devices).  Flag
> + *     VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED would be set when all the
> + *     affected devices are represented in the dev_set and also owned by
> + *     the user.  This flag is available only when
> + *     flag VFIO_PCI_HOT_RESET_FLAG_DEV_ID is set, otherwise reserved.
> + *     When set, user could invoke VFIO_DEVICE_PCI_HOT_RESET with a zero
> + *     length fd array on the calling device as the ownership is validated
> + *     by iommufd_ctx.
> + *
>   * Return: 0 on success, -errno on failure:
>   *   -enospc = insufficient buffer, -enodev = unsupported for device.
>   */
>  struct vfio_pci_dependent_device {
> -     __u32   group_id;
> +     union {
> +             __u32   group_id;
> +             __u32   devid;
> +#define VFIO_PCI_DEVID_OWNED         0
> +#define VFIO_PCI_DEVID_NOT_OWNED     -1
> +     };
>       __u16   segment;
>       __u8    bus;
>       __u8    devfn; /* Use PCI_SLOT/PCI_FUNC */
> @@ -690,6 +739,8 @@ struct vfio_pci_dependent_device {
>  struct vfio_pci_hot_reset_info {
>       __u32   argsz;
>       __u32   flags;
> +#define VFIO_PCI_HOT_RESET_FLAG_DEV_ID               (1 << 0)
> +#define VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED (1 << 1)
>       __u32   count;
>       struct vfio_pci_dependent_device        devices[];
>  };
> @@ -700,6 +751,24 @@ struct vfio_pci_hot_reset_info {
>   * VFIO_DEVICE_PCI_HOT_RESET - _IOW(VFIO_TYPE, VFIO_BASE + 13,
>   *                               struct vfio_pci_hot_reset)
>   *
> + * A PCI hot reset results in either a bus or slot reset which may affect
> + * other devices sharing the bus/slot.  The calling user must have
> + * ownership of the full set of affected devices as determined by the
> + * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO ioctl.
> + *
> + * When called on a device file descriptor acquired through the vfio
> + * group interface, the user is required to provide proof of ownership
> + * of those affected devices via the group_fds array in struct
> + * vfio_pci_hot_reset.
> + *
> + * When called on a direct cdev opened vfio device, the flags field of
> + * struct vfio_pci_hot_reset_info reports the ownership status of the
> + * affected devices and this ioctl must be called with an empty group_fds
> + * array.  See above INFO ioctl definition for ownership requirements.
> + *
> + * Mixed usage of legacy groups and cdevs across the set of affected
> + * devices is not supported.
> + *
>   * Return: 0 on success, -errno on failure.
>   */
>  struct vfio_pci_hot_reset {
> @@ -828,6 +897,83 @@ struct vfio_device_feature {
>  
>  #define VFIO_DEVICE_FEATURE          _IO(VFIO_TYPE, VFIO_BASE + 17)
>  
> +/*
> + * VFIO_DEVICE_BIND_IOMMUFD - _IOR(VFIO_TYPE, VFIO_BASE + 18,
> + *                              struct vfio_device_bind_iommufd)
> + * @argsz:    User filled size of this data.
> + * @flags:    Must be 0.
> + * @iommufd:  iommufd to bind.
> + * @out_devid:        The device id generated by this bind. devid is a 
> handle for
> + *            this device/iommufd bond and can be used in IOMMUFD commands.
> + *
> + * Bind a vfio_device to the specified iommufd.
> + *
> + * User is restricted from accessing the device before the binding operation
> + * is completed.  Only allowed on cdev fds.
> + *
> + * Unbind is automatically conducted when device fd is closed.
> + *
> + * Return: 0 on success, -errno on failure.
> + */
> +struct vfio_device_bind_iommufd {
> +     __u32           argsz;
> +     __u32           flags;
> +     __s32           iommufd;
> +     __u32           out_devid;
> +};
> +
> +#define VFIO_DEVICE_BIND_IOMMUFD     _IO(VFIO_TYPE, VFIO_BASE + 18)
> +
> +/*
> + * VFIO_DEVICE_ATTACH_IOMMUFD_PT - _IOW(VFIO_TYPE, VFIO_BASE + 19,
> + *                                   struct vfio_device_attach_iommufd_pt)
> + * @argsz:   User filled size of this data.
> + * @flags:   Must be 0.
> + * @pt_id:   Input the target id which can represent an ioas or a hwpt
> + *           allocated via iommufd subsystem.
> + *           Output the input ioas id or the attached hwpt id which could
> + *           be the specified hwpt itself or a hwpt automatically created
> + *           for the specified ioas by kernel during the attachment.
> + *
> + * Associate the device with an address space within the bound iommufd.
> + * Undo by VFIO_DEVICE_DETACH_IOMMUFD_PT or device fd close.  This is only
> + * allowed on cdev fds.
> + *
> + * If a vfio device is currently attached to a valid hw_pagetable, without 
> doing
> + * a VFIO_DEVICE_DETACH_IOMMUFD_PT, a second VFIO_DEVICE_ATTACH_IOMMUFD_PT 
> ioctl
> + * passing in another hw_pagetable (hwpt) id is allowed. This action, also 
> known
> + * as a hw_pagetable replacement, will replace the device's currently 
> attached
> + * hw_pagetable with a new hw_pagetable corresponding to the given pt_id.
> + *
> + * Return: 0 on success, -errno on failure.
> + */
> +struct vfio_device_attach_iommufd_pt {
> +     __u32   argsz;
> +     __u32   flags;
> +     __u32   pt_id;
> +};
> +
> +#define VFIO_DEVICE_ATTACH_IOMMUFD_PT                _IO(VFIO_TYPE, 
> VFIO_BASE + 19)
> +
> +/*
> + * VFIO_DEVICE_DETACH_IOMMUFD_PT - _IOW(VFIO_TYPE, VFIO_BASE + 20,
> + *                                   struct vfio_device_detach_iommufd_pt)
> + * @argsz:   User filled size of this data.
> + * @flags:   Must be 0.
> + *
> + * Remove the association of the device and its current associated address
> + * space.  After it, the device should be in a blocking DMA state.  This is 
> only
> + * allowed on cdev fds.
> + *
> + * Return: 0 on success, -errno on failure.
> + */
> +struct vfio_device_detach_iommufd_pt {
> +     __u32   argsz;
> +     __u32   flags;
> +};
> +
> +#define VFIO_DEVICE_DETACH_IOMMUFD_PT                _IO(VFIO_TYPE, 
> VFIO_BASE + 20)
> +
>  /*
>   * Provide support for setting a PCI VF Token, which is used as a shared
>   * secret between PF and VF drivers.  This feature may only be set on a




reply via email to

[Prev in Thread] Current Thread [Next in Thread]