vfio-pci驱动依赖vfio模块,在加载vfio-pci模块前首先需加载vfio模块。vfio-pci驱动的初始化函数为vfio_pci_probe,当被直通设备绑定vfio-pci驱动时会调用该函数:
static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct vfio_pci_device *vdev;
struct iommu_group *group;
int ret;
if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL)
return -EINVAL;
group = vfio_iommu_group_get(&pdev->dev); // 获取iommu层 group
if (!group)
return -EINVAL;
vdev = kzalloc(sizeof(*vdev), GFP_KERNEL); //创建 struct vfio_pci_device设备
if (!vdev) {
vfio_iommu_group_put(group, &pdev->dev);
return -ENOMEM;
}
vdev->pdev = pdev;
vdev->irq_type = VFIO_PCI_NUM_IRQS;
mutex_init(&vdev->igate);
spin_lock_init(&vdev->irqlock);
// 创建vfio_device,并绑定到一个vfio group
ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev);
if (ret) {
vfio_iommu_group_put(group, &pdev->dev);
kfree(vdev);
return ret;
}
...
return ret;
}
在vfio_pci_probe函数中分配vfio_pci_device结构并进行相关初始化,然后调用了vfio_add_group_dev函数:
int vfio_add_group_dev(struct device *dev,
const struct vfio_device_ops *ops, void *device_data)
{
struct iommu_group *iommu_group;
struct vfio_group *group;
struct vfio_device *device;
// iommu驱动层group,系统在设备初始化时会为每个PCI设备设置其对应group
// 保存在设备device结构体中的iommu_group成员中
iommu_group = iommu_group_get(dev);
if (!iommu_group)
return -EINVAL;
// 根据iommu层group生成vfio层group
group = vfio_group_get_from_iommu(iommu_group); // vfio层group
// 一个group可包含多个device
if (!group) { // 为空则创建group
group = vfio_create_group(iommu_group);
if (IS_ERR(group)) {
iommu_group_put(iommu_group);
return PTR_ERR(group);
}
} else {
/*
* A found vfio_group already holds a reference to the
* iommu_group. A created vfio_group keeps the reference.
*/
iommu_group_put(iommu_group);
}
// 判断该物理设备dev对应vfio_device是否创建,
// 一个vfio_device只能属于一个vfio group
device = vfio_group_get_device(group, dev);
if (device) {
WARN(1, "Device %s already exists on group %d\n",
dev_name(dev), iommu_group_id(iommu_group));
vfio_device_put(device);
vfio_group_put(group);
return -EBUSY;
}
// 创建一个vfio层面的设备vfio_device
device = vfio_group_create_device(group, dev, ops, device_data);
if (IS_ERR(device)) {
vfio_group_put(group);
return PTR_ERR(device);
}
/*
* Drop all but the vfio_device reference. The vfio_device holds
* a reference to the vfio_group, which holds a reference to the
* iommu_group.
*/
vfio_group_put(group);
return 0;
}
vfio_add_group_dev函数首先会根据设备的iommu层group寻找对应vfio group,如果vfio group不存在则调用vfio_create_group函数创建对应的vfio group:
static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group)
{
struct vfio_group *group, *tmp;
struct device *dev;
int ret, minor;
group = kzalloc(sizeof(*group), GFP_KERNEL);
if (!group)
return ERR_PTR(-ENOMEM);
// group相关字段的初始化
kref_init(&group->kref);
INIT_LIST_HEAD(&group->device_list);
mutex_init(&group->device_lock);
INIT_LIST_HEAD(&group->unbound_list);
...
mutex_lock(&vfio.group_lock);
/* Did we race creating this group? */
list_for_each_entry(tmp, &vfio.group_list, vfio_next) {
if (tmp->iommu_group == iommu_group) {
vfio_group_get(tmp);
vfio_group_unlock_and_free(group);
return tmp;
}
}
minor = vfio_alloc_group_minor(group); // 分配一个次设备号
if (minor < 0) {
vfio_group_unlock_and_free(group);
return ERR_PTR(minor);
}
// 创建/dev/vfio/$group_id设备
// vfio.class在vfio模块加载时设置
// dev内嵌到group->dev,用于用户态进程ioctl下发命令实际操作的设备
dev = device_create(vfio.class, NULL,
MKDEV(MAJOR(vfio.group_devt), minor),
group, "%s%d", group->noiommu ? "noiommu-" : "",
iommu_group_id(iommu_group));
if (IS_ERR(dev)) {
vfio_free_group_minor(minor);
vfio_group_unlock_and_free(group);
return (struct vfio_group *)dev; /* ERR_PTR */
}
group->minor = minor;
group->dev = dev;
list_add(&group->vfio_next, &vfio.group_list);
mutex_unlock(&vfio.group_lock);
return group;
}
接着vfio_add_group_dev函数会调用vfio_group_get_device函数判断该物理设备dev对应vfio_device是否创建,没创建则调用vfio_group_create_device函数创建一个vfio层面的设备vfio_device:
static
struct vfio_device *vfio_group_create_device(struct vfio_group *group,
struct device *dev,
const struct vfio_device_ops *ops,
void *device_data)
{
struct vfio_device *device;
device = kzalloc(sizeof(*device), GFP_KERNEL);
if (!device)
return ERR_PTR(-ENOMEM);
kref_init(&device->kref);
device->dev = dev;
device->group = group; // 所属vfio group
device->ops = ops; // vfio_pci_ops
device->device_data = device_data; // 私有数据:struct vfio_pci_device
dev_set_drvdata(dev, device);
/* No need to get group_lock, caller has group reference */
vfio_group_get(group);
mutex_lock(&group->device_lock);
list_add(&device->group_next, &group->device_list); // 链接到同一个vfio group中的设备
mutex_unlock(&group->device_lock);
return device;
}