第一阶段:qemu启动初始化vhost-user
qemu启动初始化netdev设备,创建vhost-user socket文件,等待连接,并对socket注册回调函数net_vhost_user_event。
当slave打开socket的时候,会调用net_vhost_user_event的open处理,对vhost-user初始化,之后设置s->start为true,当前流程结束。
日常调试的时候有时候会遇到qemu启动卡在了启动等待socket连接的地方,qemu其他的线程比如vcpu等都还没有创建,原因就是slave没有open。
qemu_init
net_init_clients
qemu_opts_foreach(qemu_find_opts("netdev"), net_init_netdev,
net_init_netdev
net_client_init
net_client_init1
net_client_init_fun[netdev->type](netdev, name, peer, errp)
static int (* const net_client_init_fun[NET_CLIENT_DRIVER__MAX])(
const Netdev *netdev,
const char *name,
NetClientState *peer, Error **errp) = {
[NET_CLIENT_DRIVER_NIC] = net_init_nic,
[NET_CLIENT_DRIVER_TAP] = net_init_tap,
[NET_CLIENT_DRIVER_SOCKET] = net_init_socket,
... ...
#ifdef CONFIG_NET_BRIDGE
[NET_CLIENT_DRIVER_BRIDGE] = net_init_bridge,
#endif
[NET_CLIENT_DRIVER_HUBPORT] = net_init_hubport,
#ifdef CONFIG_VHOST_NET_USER
[NET_CLIENT_DRIVER_VHOST_USER] = net_init_vhost_user,
#endif
};
net_init_vhost_user
net_vhost_user_init
net_vhost_user_init
do {
if (qemu_chr_fe_wait_connected(&s->chr, &err) < 0) {
error_report_err(err);
goto err;
}
qemu_chr_fe_set_handlers(&s->chr, NULL, NULL,
net_vhost_user_event, NULL, nc0->name, NULL,
true);
} while (!s->started);
net_vhost_user_event收到open事件,对vhost-user初始化:
1、协商protocol feature
2、获取slave设备的feature
3、初始化和通知vq的中断fd到slave
4、注册memory listener
static void net_vhost_user_event(void *opaque, QEMUChrEvent event)
const char *name = opaque;
queues = qemu_find_net_clients_except(name, ncs,
switch (event) {
case CHR_EVENT_OPENED:
vhost_user_start(queues, ncs, s->vhost_user) # 单独展开
s->started = true;
case CHR_EVENT_CLOSED:
aio_bh_schedule_oneshot(ctx, chr_closed_bh, opaque); # chr_closed_bh保存feature到NetVhostUserState结构
vhost_user_start
vhost_net_init
vhost_dev_init
vhost_set_backend_type
dev->vhost_ops = &user_ops
hdev->vhost_ops->vhost_backend_init # 协商protocol feature
hdev->vhost_ops->vhost_set_owner
hdev->vhost_ops->vhost_get_features # 获取slave设备的feature
for (i = 0; i < hdev->nvqs; ++i,
vhost_virtqueue_init(hdev, hdev->vqs + i, hdev->vq_index + i)
vhost_vq_index = dev->vhost_ops->vhost_get_vq_index
# 初始化和通知vq的中断fd到slave
event_notifier_init(&vq->masked_notifier, 0)
ret = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC)
e->rfd = e->wfd = ret;
file.fd = event_notifier_get_fd(&vq->masked_notifier)
return e->rfd;
dev->vhost_ops->vhost_set_vring_call(dev, &file)
hdev->memory_listener = (MemoryListener) {
.begin = vhost_begin,
.commit = vhost_commit,
.region_add = vhost_region_addnop,
.region_nop = vhost_region_addnop,
.log_start = vhost_log_start,
.log_stop = vhost_log_stop,
.log_sync = vhost_log_sync,
.log_global_start = vhost_log_global_start,
.log_global_stop = vhost_log_global_stop,
.eventfd_add = vhost_eventfd_add,
.eventfd_del = vhost_eventfd_del,
.priority = 10
};
memory_listener_register(&hdev->memory_listener, &address_space_memory)
第二阶段:guest驱动触发vhost-user启动
virtio设备memory region ops初始化(已virtio_pci_common_write为例)
# 赋值virtio各个capability在配置空间的信息
virtio_pci_realize
/*
* virtio pci bar layout used by default.
* subclasses can re-arrange things if needed.
*
* region 0 -- virtio legacy io bar
* region 1 -- msi-x bar
* region 4+5 -- virtio modern memory (64bit) bar
*
*/
proxy->legacy_io_bar_idx = 0;
proxy->msix_bar_idx = 1;
proxy->modern_io_bar_idx = 2;
proxy->modern_mem_bar_idx = 4;
proxy->common.offset = 0x0;
proxy->common.size = 0x1000;
proxy->common.type = VIRTIO_PCI_CAP_COMMON_CFG;
... ...
# 初始化配置空间virtio的版本信息、各个capability在配置空间的信息、各个capability的memory region ops函数
static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
config = proxy->pci_dev.config
# 设置virtio设备配置空间为1.0的相关信息
pci_set_word(config + PCI_VENDOR_ID,
PCI_VENDOR_ID_REDHAT_QUMRANET);
pci_set_word(config + PCI_DEVICE_ID,
0x1040 + virtio_bus_get_vdev_id(bus));
pci_config_set_revision(config, 1);
# 初始化各个capability的memory region ops函数,单独展开
virtio_pci_modern_regions_init(proxy);
# 初始化各个capability在配置空间的信息
virtio_pci_modern_mem_region_map(proxy, &proxy->common, &cap);
virtio_pci_modern_region_map
memory_region_add_subregion(mr, region->offset, ®ion->mr);
cap->cfg_type = region->type;
cap->bar = bar;
cap->offset = cpu_to_le32(region->offset);
cap->length = cpu_to_le32(region->size);
virtio_pci_add_mem_cap(proxy, cap);
virtio_pci_modern_mem_region_map(proxy, &proxy->isr, &cap);
virtio_pci_modern_mem_region_map(proxy, &proxy->device, &cap);
virtio_pci_modern_mem_region_map(proxy, &proxy->notify, ¬ify.cap);
virtio_pci_modern_regions_init
static const MemoryRegionOps common_ops = {
.read = virtio_pci_common_read,
.write = virtio_pci_common_write,
.impl = {
.min_access_size = 1,
.max_access_size = 4,
},
.endianness = DEVICE_LITTLE_ENDIAN,
};
... ...
memory_region_init_io(&proxy->common.mr, OBJECT(proxy),
&common_ops,
proxy,
"virtio-pci-common",
proxy->common.size);
mr->ops = common_ops
... ...
guest写guest feature,qemu层协商feature并保存
virtio_pci_common_write
case VIRTIO_PCI_COMMON_GF:
virtio_set_features
virtio_set_features_nocheck
k->set_features(vdev, val)
virtio_net_class_init
vdc->set_features = virtio_net_set_features;
k->set_features(vdev, val) # virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
virtio_net_set_multiqueue
virtio_net_set_queues
peer_attach
vhost_set_vring_enable
vhost_ops->vhost_set_vring_enable(&net->dev, enable)
vhost_net_ack_features # 协商feature
vhost_net_save_acked_features
guest写driver ok到device status,触发qemu下发vhost-user设备start,通知到vdpa后端设备启动开始运行
virtio_pci_common_write
case VIRTIO_PCI_COMMON_STATUS:
virtio_set_status
k->set_status
virtio_net_class_init
vdc->set_status = virtio_net_set_status;
k->set_status # virtio_net_set_status
vhost_net_start
for (i = 0; i < total_queues; i++) {
vhost_set_vring_enable
vhost_ops->vhost_set_vring_enable
vhost_net_start_one
vhost_dev_start
vhost_dev_set_features(hdev, hdev->log_enabled) # 热迁移流程调用,开启标脏
dev->vhost_ops->vhost_set_features
hdev->vhost_ops->vhost_set_mem_table(hdev, hdev->mem)
for (i = 0; i < hdev->nvqs; ++i) {
vhost_virtqueue_start
dev->vhost_ops->vhost_get_vq_index
dev->vhost_ops->vhost_set_vring_num
dev->vhost_ops->vhost_set_vring_base
vhost_virtqueue_set_addr
dev->vhost_ops->vhost_set_vring_addr
dev->vhost_ops->vhost_set_vring_kick(dev, &file)
dev->vhost_ops->vhost_set_vring_call
if (hdev->log_enabled) { # 热迁移流程调用,设置logbase
hdev->vhost_ops->vhost_set_log_base
hdev->vhost_ops->vhost_dev_start(hdev, true)