Re: [Qemu-devel] [RFC PATCH v2 3/3] VFIO: Type1 IOMMU mapping support fo

qemu-devel

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [RFC PATCH v2 3/3] VFIO: Type1 IOMMU mapping support fo

From:	Jike Song
Subject:	Re: [Qemu-devel] [RFC PATCH v2 3/3] VFIO: Type1 IOMMU mapping support for vGPU
Date:	Wed, 02 Mar 2016 16:38:34 +0800
User-agent:	Mozilla/5.0 (X11; Linux i686 on x86_64; rv:17.0) Gecko/20130801 Thunderbird/17.0.8

On 02/24/2016 12:24 AM, Kirti Wankhede wrote:
> Aim of this module is to pin and unpin guest memory.
> This module provides interface to GPU driver that can be used to map guest
> physical memory into its kernel space driver.
> Currently this module has duplicate code from vfio_iommu_type1.c
> Working on refining functions to reuse existing code in vfio_iommu_type1.c and
> with that will add API to unpin pages.
> This is for the reference to review the overall design of vGPU.
> 
> Thanks,
> Kirti.
> 
> Signed-off-by: Kirti Wankhede <address@hidden>
> Signed-off-by: Neo Jia <address@hidden>
> ---
>  drivers/vgpu/Makefile                |    1 +
>  drivers/vgpu/vfio_iommu_type1_vgpu.c |  423 
> ++++++++++++++++++++++++++++++++++
>  2 files changed, 424 insertions(+), 0 deletions(-)
>  create mode 100644 drivers/vgpu/vfio_iommu_type1_vgpu.c
> 
> diff --git a/drivers/vgpu/Makefile b/drivers/vgpu/Makefile
> index a0a2655..8ace18d 100644
> --- a/drivers/vgpu/Makefile
> +++ b/drivers/vgpu/Makefile
> @@ -3,3 +3,4 @@ vgpu-y := vgpu-core.o vgpu-sysfs.o vgpu-driver.o
>  
>  obj-$(CONFIG_VGPU)                   += vgpu.o
>  obj-$(CONFIG_VGPU_VFIO)                 += vgpu_vfio.o
> +obj-$(CONFIG_VFIO_IOMMU_TYPE1_VGPU)     += vfio_iommu_type1_vgpu.o
> diff --git a/drivers/vgpu/vfio_iommu_type1_vgpu.c 
> b/drivers/vgpu/vfio_iommu_type1_vgpu.c
> new file mode 100644
> index 0000000..0b36ae5
> --- /dev/null
> +++ b/drivers/vgpu/vfio_iommu_type1_vgpu.c
> @@ -0,0 +1,423 @@
> +/*
> + * VGPU : IOMMU DMA mapping support for VGPU
> + *
> + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
> + *     Author: Neo Jia <address@hidden>
> + *          Kirti Wankhede <address@hidden>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/init.h>
> +#include <linux/module.h>
> +#include <linux/compat.h>
> +#include <linux/device.h>
> +#include <linux/kernel.h>
> +#include <linux/fs.h>
> +#include <linux/miscdevice.h>
> +#include <linux/sched.h>
> +#include <linux/wait.h>
> +#include <linux/uuid.h>
> +#include <linux/vfio.h>
> +#include <linux/iommu.h>
> +#include <linux/vgpu.h>
> +
> +#include "vgpu_private.h"
> +
> +#define DRIVER_VERSION       "0.1"
> +#define DRIVER_AUTHOR        "NVIDIA Corporation"
> +#define DRIVER_DESC     "VGPU Type1 IOMMU driver for VFIO"
> +
> +// VFIO structures
> +
> +struct vfio_iommu_vgpu {
> +     struct mutex lock;
> +     struct iommu_group *group;
> +     struct vgpu_device *vgpu_dev;
> +     struct rb_root dma_list;
> +     struct mm_struct * vm_mm;
> +};
> +
> +struct vgpu_vfio_dma {
> +     struct rb_node node;
> +     dma_addr_t iova;
> +     unsigned long vaddr;
> +     size_t size;
> +     int prot;
> +};
> +
> +/*
> + * VGPU VFIO FOPs definition
> + *
> + */
> +
> +/*
> + * Duplicated from vfio_link_dma, just quick hack ... should
> + * reuse code later
> + */
> +
> +static void vgpu_link_dma(struct vfio_iommu_vgpu *iommu,
> +                       struct vgpu_vfio_dma *new)
> +{
> +     struct rb_node **link = &iommu->dma_list.rb_node, *parent = NULL;
> +     struct vgpu_vfio_dma *dma;
> +
> +     while (*link) {
> +             parent = *link;
> +             dma = rb_entry(parent, struct vgpu_vfio_dma, node);
> +
> +             if (new->iova + new->size <= dma->iova)
> +                     link = &(*link)->rb_left;
> +             else
> +                     link = &(*link)->rb_right;
> +     }
> +
> +     rb_link_node(&new->node, parent, link);
> +     rb_insert_color(&new->node, &iommu->dma_list);
> +}
> +
> +static struct vgpu_vfio_dma *vgpu_find_dma(struct vfio_iommu_vgpu *iommu,
> +                                        dma_addr_t start, size_t size)
> +{
> +     struct rb_node *node = iommu->dma_list.rb_node;
> +
> +     while (node) {
> +             struct vgpu_vfio_dma *dma = rb_entry(node, struct 
> vgpu_vfio_dma, node);
> +
> +             if (start + size <= dma->iova)
> +                     node = node->rb_left;
> +             else if (start >= dma->iova + dma->size)
> +                     node = node->rb_right;
> +             else
> +                     return dma;
> +     }
> +
> +     return NULL;
> +}
> +
> +static void vgpu_unlink_dma(struct vfio_iommu_vgpu *iommu, struct 
> vgpu_vfio_dma *old)
> +{
> +     rb_erase(&old->node, &iommu->dma_list);
> +}
> +
> +static void vgpu_dump_dma(struct vfio_iommu_vgpu *iommu)
> +{
> +     struct vgpu_vfio_dma *c, *n;
> +     uint32_t i = 0;
> +
> +     rbtree_postorder_for_each_entry_safe(c, n, &iommu->dma_list, node)
> +             printk(KERN_INFO "%s: dma[%d] iova:0x%llx, vaddr:0x%lx, 
> size:0x%lx\n",
> +                    __FUNCTION__, i++, c->iova, c->vaddr, c->size);
> +}
> +
> +static int vgpu_dma_do_track(struct vfio_iommu_vgpu * vgpu_iommu,
> +     struct vfio_iommu_type1_dma_map *map)
> +{
> +     dma_addr_t iova = map->iova;
> +     unsigned long vaddr = map->vaddr;
> +     int ret = 0, prot = 0;
> +     struct vgpu_vfio_dma *vgpu_dma;
> +
> +     mutex_lock(&vgpu_iommu->lock);
> +
> +     if (vgpu_find_dma(vgpu_iommu, map->iova, map->size)) {
> +             mutex_unlock(&vgpu_iommu->lock);
> +             return -EEXIST;
> +     }
> +
> +     vgpu_dma = kzalloc(sizeof(*vgpu_dma), GFP_KERNEL);
> +
> +     if (!vgpu_dma) {
> +             mutex_unlock(&vgpu_iommu->lock);
> +             return -ENOMEM;
> +     }
> +
> +     vgpu_dma->iova = iova;
> +     vgpu_dma->vaddr = vaddr;
> +     vgpu_dma->prot = prot;
> +     vgpu_dma->size = map->size;
> +
> +     vgpu_link_dma(vgpu_iommu, vgpu_dma);

Hi Kirti & Neo,

seems that no one actually setup mappings for IOMMU here?

> +
> +     mutex_unlock(&vgpu_iommu->lock);
> +     return ret;
> +}
> +
> +static int vgpu_dma_do_untrack(struct vfio_iommu_vgpu * vgpu_iommu,
> +     struct vfio_iommu_type1_dma_unmap *unmap)
> +{
> +     struct vgpu_vfio_dma *vgpu_dma;
> +     size_t unmapped = 0;
> +     int ret = 0;
> +
> +     mutex_lock(&vgpu_iommu->lock);
> +
> +     vgpu_dma = vgpu_find_dma(vgpu_iommu, unmap->iova, 0);
> +     if (vgpu_dma && vgpu_dma->iova != unmap->iova) {
> +             ret = -EINVAL;
> +             goto unlock;
> +     }
> +
> +     vgpu_dma = vgpu_find_dma(vgpu_iommu, unmap->iova + unmap->size - 1, 0);
> +     if (vgpu_dma && vgpu_dma->iova + vgpu_dma->size != unmap->iova + 
> unmap->size) {
> +             ret = -EINVAL;
> +             goto unlock;
> +     }
> +
> +     while (( vgpu_dma = vgpu_find_dma(vgpu_iommu, unmap->iova, 
> unmap->size))) {
> +             unmapped += vgpu_dma->size;
> +             vgpu_unlink_dma(vgpu_iommu, vgpu_dma);
> +     }
> +
> +unlock:
> +     mutex_unlock(&vgpu_iommu->lock);
> +     unmap->size = unmapped;
> +
> +     return ret;
> +}
> +
> +/* Ugly hack to quickly test single deivce ... */
> +
> +static struct vfio_iommu_vgpu *_local_iommu = NULL;
> +
> +int vgpu_dma_do_translate(dma_addr_t *gfn_buffer, uint32_t count)
> +{
> +     int i = 0, ret = 0, prot = 0;
> +     unsigned long remote_vaddr = 0, pfn = 0;
> +     struct vfio_iommu_vgpu *vgpu_iommu = _local_iommu;
> +     struct vgpu_vfio_dma *vgpu_dma;
> +     struct page *page[1];
> +     // unsigned long * addr = NULL;
> +     struct mm_struct *mm = vgpu_iommu->vm_mm;
> +
> +     prot = IOMMU_READ | IOMMU_WRITE;
> +
> +     printk(KERN_INFO "%s: >>>>\n", __FUNCTION__);
> +
> +     mutex_lock(&vgpu_iommu->lock);
> +
> +     vgpu_dump_dma(vgpu_iommu);
> +
> +     for (i = 0; i < count; i++) {
> +             dma_addr_t iova = gfn_buffer[i] << PAGE_SHIFT;
> +             vgpu_dma = vgpu_find_dma(vgpu_iommu, iova, 0 /*  size */);
> +
> +             if (!vgpu_dma) {
> +                     printk(KERN_INFO "%s: fail locate iova[%d]:0x%llx\n", 
> __FUNCTION__, i, iova);
> +                     ret = -EINVAL;
> +                     goto unlock;
> +             }
> +
> +             remote_vaddr = vgpu_dma->vaddr + iova - vgpu_dma->iova;
> +             printk(KERN_INFO "%s: find dma iova[%d]:0x%llx, vaddr:0x%lx, 
> size:0x%lx, remote_vaddr:0x%lx\n",
> +                     __FUNCTION__, i, vgpu_dma->iova,
> +                     vgpu_dma->vaddr, vgpu_dma->size, remote_vaddr);
> +
> +             if (get_user_pages_unlocked(NULL, mm, remote_vaddr, 1, 1, 0, 
> page) == 1) {
> +                     pfn = page_to_pfn(page[0]);
> +                     printk(KERN_INFO "%s: pfn[%d]:0x%lx\n", __FUNCTION__, 
> i, pfn);
> +                     // addr = vmap(page, 1, VM_MAP, PAGE_KERNEL);
> +             }
> +             else {
> +                     printk(KERN_INFO "%s: fail to pin pfn[%d]\n", 
> __FUNCTION__, i);
> +                     ret = -ENOMEM;
> +                     goto unlock;
> +             }
> +
> +             gfn_buffer[i] = pfn;
> +             // vunmap(addr);
> +
> +     }
> +
> +unlock:
> +     mutex_unlock(&vgpu_iommu->lock);
> +     printk(KERN_INFO "%s: <<<<\n", __FUNCTION__);
> +     return ret;
> +}
> +EXPORT_SYMBOL(vgpu_dma_do_translate);
> +
> +static void *vfio_iommu_vgpu_open(unsigned long arg)
> +{
> +     struct vfio_iommu_vgpu *iommu;
> +
> +     iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
> +
> +     if (!iommu)
> +             return ERR_PTR(-ENOMEM);
> +
> +     mutex_init(&iommu->lock);
> +
> +     printk(KERN_INFO "%s", __FUNCTION__);
> +
> +     /* TODO: Keep track the v2 vs. v1, for now only assume
> +      * we are v2 due to QEMU code */
> +     _local_iommu = iommu;
> +     return iommu;
> +}
> +
> +static void vfio_iommu_vgpu_release(void *iommu_data)
> +{
> +     struct vfio_iommu_vgpu *iommu = iommu_data;
> +     kfree(iommu);
> +     printk(KERN_INFO "%s", __FUNCTION__);
> +}
> +
> +static long vfio_iommu_vgpu_ioctl(void *iommu_data,
> +             unsigned int cmd, unsigned long arg)
> +{
> +     int ret = 0;
> +     unsigned long minsz;
> +     struct vfio_iommu_vgpu *vgpu_iommu = iommu_data;
> +
> +     switch (cmd) {
> +     case VFIO_CHECK_EXTENSION:
> +     {
> +             if ((arg == VFIO_TYPE1_IOMMU) || (arg == VFIO_TYPE1v2_IOMMU))
> +                     return 1;
> +             else
> +                     return 0;
> +     }
> +
> +     case VFIO_IOMMU_GET_INFO:
> +     {
> +             struct vfio_iommu_type1_info info;
> +             minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes);
> +
> +             if (copy_from_user(&info, (void __user *)arg, minsz))
> +                     return -EFAULT;
> +
> +             if (info.argsz < minsz)
> +                     return -EINVAL;
> +
> +             info.flags = 0;
> +
> +             return copy_to_user((void __user *)arg, &info, minsz);
> +     }
> +     case VFIO_IOMMU_MAP_DMA:
> +     {
> +             // TODO
> +             struct vfio_iommu_type1_dma_map map;
> +             minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
> +
> +             if (copy_from_user(&map, (void __user *)arg, minsz))
> +                     return -EFAULT;
> +
> +             if (map.argsz < minsz)
> +                     return -EINVAL;
> +
> +             printk(KERN_INFO "VGPU-IOMMU:MAP_DMA flags:%d, vaddr:0x%llx, 
> iova:0x%llx, size:0x%llx\n",
> +                     map.flags, map.vaddr, map.iova, map.size);
> +
> +             /*
> +              * TODO: Tracking code is mostly duplicated from TYPE1 IOMMU, 
> ideally,
> +              * this should be merged into one single file and reuse data
> +              * structure
> +              *
> +              */
> +             ret = vgpu_dma_do_track(vgpu_iommu, &map);
> +             break;
> +     }
> +     case VFIO_IOMMU_UNMAP_DMA:
> +     {
> +             // TODO
> +             struct vfio_iommu_type1_dma_unmap unmap;
> +
> +             minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, size);
> +
> +             if (copy_from_user(&unmap, (void __user *)arg, minsz))
> +                     return -EFAULT;
> +
> +             if (unmap.argsz < minsz)
> +                     return -EINVAL;
> +
> +             ret = vgpu_dma_do_untrack(vgpu_iommu, &unmap);
> +             break;
> +     }
> +     default:
> +     {
> +             printk(KERN_INFO "%s cmd default ", __FUNCTION__);
> +             ret = -ENOTTY;
> +             break;
> +     }
> +     }
> +
> +     return ret;
> +}
> +
> +
> +static int vfio_iommu_vgpu_attach_group(void *iommu_data,
> +                                     struct iommu_group *iommu_group)
> +{
> +     struct vfio_iommu_vgpu *iommu = iommu_data;
> +     struct vgpu_device *vgpu_dev = NULL;
> +
> +     printk(KERN_INFO "%s", __FUNCTION__);
> +
> +     vgpu_dev = get_vgpu_device_from_group(iommu_group);
> +     if (vgpu_dev) {
> +             iommu->vgpu_dev = vgpu_dev;
> +             iommu->group = iommu_group;
> +
> +             /* IOMMU shares the same life cylce as VM MM */
> +             iommu->vm_mm = current->mm;
> +
> +             return 0;
> +     }
> +     iommu->group = iommu_group;
> +     return 1;
> +}
> +
> +static void vfio_iommu_vgpu_detach_group(void *iommu_data,
> +             struct iommu_group *iommu_group)
> +{
> +     struct vfio_iommu_vgpu *iommu = iommu_data;
> +
> +     printk(KERN_INFO "%s", __FUNCTION__);
> +     iommu->vm_mm = NULL;
> +     iommu->group = NULL;
> +
> +     return;
> +}
> +
> +
> +static const struct vfio_iommu_driver_ops vfio_iommu_vgpu_driver_ops = {
> +     .name           = "vgpu_vfio",
> +     .owner          = THIS_MODULE,
> +     .open           = vfio_iommu_vgpu_open,
> +     .release        = vfio_iommu_vgpu_release,
> +     .ioctl          = vfio_iommu_vgpu_ioctl,
> +     .attach_group   = vfio_iommu_vgpu_attach_group,
> +     .detach_group   = vfio_iommu_vgpu_detach_group,
> +};
> +
> +
> +int vgpu_vfio_iommu_init(void)
> +{
> +     int rc = vfio_register_iommu_driver(&vfio_iommu_vgpu_driver_ops);
> +
> +     printk(KERN_INFO "%s\n", __FUNCTION__);
> +     if (rc < 0) {
> +             printk(KERN_ERR "Error: failed to register vfio iommu, 
> err:%d\n", rc);
> +     }
> +
> +     return rc;
> +}
> +
> +void vgpu_vfio_iommu_exit(void)
> +{
> +     // unregister vgpu_vfio driver
> +     vfio_unregister_iommu_driver(&vfio_iommu_vgpu_driver_ops);
> +     printk(KERN_INFO "%s\n", __FUNCTION__);
> +}
> +
> +
> +module_init(vgpu_vfio_iommu_init);
> +module_exit(vgpu_vfio_iommu_exit);
> +
> +MODULE_VERSION(DRIVER_VERSION);
> +MODULE_LICENSE("GPL");
> +MODULE_AUTHOR(DRIVER_AUTHOR);
> +MODULE_DESCRIPTION(DRIVER_DESC);
> +
> 

--
Thanks,
Jike

[Prev in Thread]

Current Thread

[Next in Thread]

Re: [Qemu-devel] [RFC PATCH v2 3/3] VFIO: Type1 IOMMU mapping support for vGPU, Jike Song <=
- Re: [Qemu-devel] [RFC PATCH v2 3/3] VFIO: Type1 IOMMU mapping support for vGPU, Neo Jia, 2016/03/04
  - Re: [Qemu-devel] [RFC PATCH v2 3/3] VFIO: Type1 IOMMU mapping support for vGPU, Jike Song, 2016/03/07
    - Re: [Qemu-devel] [RFC PATCH v2 3/3] VFIO: Type1 IOMMU mapping support for vGPU, Neo Jia, 2016/03/07
    - Re: [Qemu-devel] [RFC PATCH v2 3/3] VFIO: Type1 IOMMU mapping support for vGPU, Jike Song, 2016/03/09
    - Re: [Qemu-devel] [RFC PATCH v2 3/3] VFIO: Type1 IOMMU mapping support for vGPU, Neo Jia, 2016/03/10
    - Re: [Qemu-devel] [RFC PATCH v2 3/3] VFIO: Type1 IOMMU mapping support for vGPU, Tian, Kevin, 2016/03/10
    - Re: [Qemu-devel] [RFC PATCH v2 3/3] VFIO: Type1 IOMMU mapping support for vGPU, Neo Jia, 2016/03/11
    - Re: [Qemu-devel] [RFC PATCH v2 3/3] VFIO: Type1 IOMMU mapping support for vGPU, Tian, Kevin, 2016/03/11
    - Re: [Qemu-devel] [RFC PATCH v2 3/3] VFIO: Type1 IOMMU mapping support for vGPU, Alex Williamson, 2016/03/11
    - Re: [Qemu-devel] [RFC PATCH v2 3/3] VFIO: Type1 IOMMU mapping support for vGPU, Neo Jia, 2016/03/11

Prev by Date: Re: [Qemu-devel] [PATCH v2 3/4] rng: move request queue cleanup from RngEgd to RngBackend
Next by Date: Re: [Qemu-devel] [PATCH 8/9] nvdimm acpi: emulate dsm method
Previous by thread: Re: [Qemu-devel] [PATCH v2 3/4] rng: move request queue cleanup from RngEgd to RngBackend
Next by thread: Re: [Qemu-devel] [RFC PATCH v2 3/3] VFIO: Type1 IOMMU mapping support for vGPU
Index(es):
- Date
- Thread