searchusermenu
  • 发布文章
  • 消息中心
点赞
收藏
评论
分享
原创

带有vhost-user设备的qemu热迁移流程

2024-10-11 10:17:25
63
0

memory_listener初始化

vhost_dev_init(struct vhost_dev *hdev,
    hdev->vhost_ops->vhost_backend_init
    hdev->vhost_ops->vhost_get_features
    vhost_virtqueue_init
    ... ...
    hdev->memory_listener = (MemoryListener) {
        .begin = vhost_begin,
        .commit = vhost_commit,
        .region_add = vhost_region_addnop,
        .region_nop = vhost_region_addnop,
        .log_start = vhost_log_start,
        .log_stop = vhost_log_stop,
        .log_sync = vhost_log_sync,
        .log_global_start = vhost_log_global_start,    # 下面展开,vfio的listen不支持标脏启动函数,还有其他的很多函数
        .log_global_stop = vhost_log_global_stop,
        .eventfd_add = vhost_eventfd_add,
        .eventfd_del = vhost_eventfd_del,
        .priority = 10
    };
    memory_listener_register(&hdev->memory_listener, &address_space_memory);
        listener_add_address_space(listener, as)          

热迁移准备

设置标脏、对所有内存初始标脏
基于hdev->memory_listener实现的
qmp_migrate
    ->tcp_start_outgoing_migration
        ->socket_start_outgoing_migration
            ->socket_outgoing_migration
                ->migration_channel_connect
                    ->migrate_fd_connect
                        ->migration_thread(用qemu_thread_create创建迁移线程)
                        
migration_thread
    qemu_savevm_state_setup    # 热迁移初始化
        QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
        se->ops->save_setup(f, se->opaque);    
        // .save_setup = ram_save_setup,
            ram_init_all
                // 初始化bitmap
                ram_init_bitmaps
                    ram_list_init_bitmaps    # 对所有内存初始标脏
                        RAMBLOCK_FOREACH_NOT_IGNORED(block) {
                            bitmap_set
                    memory_global_dirty_log_start    # 下面展开,对所有memory listener调用log_global_start,enable标脏标志
                                
# 热迁移enable标脏(下发dirty log base和size)  
memory_global_dirty_log_start(unsigned int flags)    
    global_dirty_tracking |= flags;               
    MEMORY_LISTENER_CALL_GLOBAL(log_global_start, Forward)
        QTAILQ_FOREACH(_listener, &memory_listeners, link)
            _listener->_callback(_listener, ##_args)    
            # _callback == log_global_start    
            # .log_global_start = vhost_log_global_start,
            static void vhost_log_global_start(MemoryListener *listener)
                vhost_migration_log(listener, true)    
                    vhost_dev_log_resize(dev, vhost_get_log_size(dev));
                    vhost_dev_set_log(dev, true)
                        vhost_dev_set_features(dev, enable_log);
                            features |= 0x1ULL << VHOST_F_LOG_ALL
                            dev->vhost_ops->vhost_set_features(dev, features)
                                .vhost_set_features = vhost_user_set_features,
                                    vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features)

迁移主体处理

migration_thread
    migration_iteration_run
        qemu_savevm_state_pending(s->to_dst_file, s->threshold_size, &pend_pre, &pend_compat, &pend_post);
        if (pending_size && pending_size >= s->threshold_size) {
            qemu_savevm_state_iterate(s->to_dst_file, in_postcopy);    // 迭代迁移
        } else {
            migration_completion(s);    // 停机迁移
 

停机迁移

基于注册到vm_change_state_head的virtio_vmstate_change实现停机迁移的设备停止
基于初始化到设备实现里的VMStateDescription来支持设备状态保存和加载
migration_completion
    vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);    // 停止虚拟机
        vm_stop()
            do_vm_stop()
                pause_all_vcpus()        // 停止vcpu线程的运行
                vm_state_notify        # 通知外设RUN_STATE_FINISH_MIGRATE状态
                    QTAILQ_FOREACH_SAFE(e, &vm_change_state_head, entries, next)
                        e->cb(e->opaque, running, state)
                        # 这里的cb函数是在下面注册的,实际cb是virtio_vmstate_change    # 停止设备,获取idx 
                        
static const TypeInfo virtio_net_info = {

    .class_init = virtio_net_class_init,
virtio_net_class_init
    vdc->realize = virtio_net_device_realize;
    vdc->set_status = virtio_net_set_status;
virtio_net_device_realize
    virtio_init
        # vfio没有继承virtio的设备类,所以也没有继承这里注册的热迁移触发调用的停设备的回调函数
        vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev),virtio_vmstate_change, vdev);
            qemu_add_vm_change_state_handler_prio
                e->cb = cb;
                QTAILQ_INSERT_TAIL(&vm_change_state_head, e, entries);
                
# 停止设备,获取idx                
virtio_vmstate_change 
    virtio_set_status
        k->set_status    # virtio_net_set_status
            virtio_net_vhost_status
                vhost_net_stop
                    for (i = 0; i < total_queues; i++) {  
                        vhost_net_stop_one
                            vhost_dev_stop
                                for (i = 0; i < hdev->nvqs; ++i)
                                    vhost_virtqueue_stop
                                        dev->vhost_ops->vhost_get_vring_base
                                        virtio_queue_set_last_avail_idx(vdev, idx, state.num)
                                            virtio_queue_split_set_last_avail_idx
                                                # 如果get vring base失败,从guest内存获取
                                                # vdev->vq[n].last_avail_idx = vring_used_idx(&vdev->vq[n])
                                                vdev->vq[n].last_avail_idx = idx;
                                                vdev->vq[n].shadow_avail_idx = idx;
                                        virtio_queue_update_used_idx
                                            virtio_split_packed_update_used_idx
                                                vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n])    
                                                            
# 停止设备后,对设备状态进行qemu层的保存、传输、对端加载
migration_completion
    qemu_savevm_state_complete_precopy
        qemu_savevm_state_complete_precopy_non_iterable
            vmstate_save
                vmstate_save_state
                    vmstate_save_state_v
                        VMStateField *field = vmsd->fields
                        field->info->put()    # 下面展开

static const VMStateDescription vmstate_virtio_net = {
    .name = "virtio-net",
    .minimum_version_id = VIRTIO_NET_VM_VERSION,
    .version_id = VIRTIO_NET_VM_VERSION,
    .fields = (VMStateField[]) {
        VMSTATE_VIRTIO_DEVICE,
        VMSTATE_END_OF_LIST()
    },
    .pre_save = virtio_net_pre_save,
    .dev_unplug_pending = dev_unplug_pending,
};

static const TypeInfo virtio_net_info = {
    .name = TYPE_VIRTIO_NET,
    .class_init = virtio_net_class_init,
virtio_net_class_init
    dc->vmsd = &vmstate_virtio_net;        # vfio没有设置这个vmsd,所以不支持热迁移设备状态保存
    
#define VMSTATE_VIRTIO_DEVICE \
    {                                         \
        .name = "virtio",                     \
        .info = &virtio_vmstate_info,         \
        .flags = VMS_SINGLE,                  \
    }
    
const VMStateInfo  virtio_vmstate_info = {    
    .name = "virtio",
    .get = virtio_device_get,
    .put = virtio_device_put,
};      
virtio_device_put
    virtio_save    # 保存了很多东西
        k->save_config
        qemu_put_8s(f, &vdev->status);
        qemu_put_8s(f, &vdev->isr);
        qemu_put_be16s(f, &vdev->queue_sel);
        qemu_put_be32s(f, &guest_features_lo);
        qemu_put_be32(f, vdev->config_len);
        qemu_put_buffer(f, vdev->config, vdev->config_len)    
        ... ...        
        qemu_put_be16s(f, &vdev->vq[i].last_avail_idx)    
        ... ... 
virtio_device_get
    virtio_load
        k->load_config
        qemu_get_8s(f, &vdev->status);
        qemu_get_8s(f, &vdev->isr);
        qemu_get_be16s(f, &vdev->queue_sel);
        qemu_get_be32s(f, &features);       
        ... ...
        vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i])
        vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i])       
        ... ...                                  
0条评论
0 / 1000
h****n
6文章数
1粉丝数
h****n
6 文章 | 1 粉丝
原创

带有vhost-user设备的qemu热迁移流程

2024-10-11 10:17:25
63
0

memory_listener初始化

vhost_dev_init(struct vhost_dev *hdev,
    hdev->vhost_ops->vhost_backend_init
    hdev->vhost_ops->vhost_get_features
    vhost_virtqueue_init
    ... ...
    hdev->memory_listener = (MemoryListener) {
        .begin = vhost_begin,
        .commit = vhost_commit,
        .region_add = vhost_region_addnop,
        .region_nop = vhost_region_addnop,
        .log_start = vhost_log_start,
        .log_stop = vhost_log_stop,
        .log_sync = vhost_log_sync,
        .log_global_start = vhost_log_global_start,    # 下面展开,vfio的listen不支持标脏启动函数,还有其他的很多函数
        .log_global_stop = vhost_log_global_stop,
        .eventfd_add = vhost_eventfd_add,
        .eventfd_del = vhost_eventfd_del,
        .priority = 10
    };
    memory_listener_register(&hdev->memory_listener, &address_space_memory);
        listener_add_address_space(listener, as)          

热迁移准备

设置标脏、对所有内存初始标脏
基于hdev->memory_listener实现的
qmp_migrate
    ->tcp_start_outgoing_migration
        ->socket_start_outgoing_migration
            ->socket_outgoing_migration
                ->migration_channel_connect
                    ->migrate_fd_connect
                        ->migration_thread(用qemu_thread_create创建迁移线程)
                        
migration_thread
    qemu_savevm_state_setup    # 热迁移初始化
        QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
        se->ops->save_setup(f, se->opaque);    
        // .save_setup = ram_save_setup,
            ram_init_all
                // 初始化bitmap
                ram_init_bitmaps
                    ram_list_init_bitmaps    # 对所有内存初始标脏
                        RAMBLOCK_FOREACH_NOT_IGNORED(block) {
                            bitmap_set
                    memory_global_dirty_log_start    # 下面展开,对所有memory listener调用log_global_start,enable标脏标志
                                
# 热迁移enable标脏(下发dirty log base和size)  
memory_global_dirty_log_start(unsigned int flags)    
    global_dirty_tracking |= flags;               
    MEMORY_LISTENER_CALL_GLOBAL(log_global_start, Forward)
        QTAILQ_FOREACH(_listener, &memory_listeners, link)
            _listener->_callback(_listener, ##_args)    
            # _callback == log_global_start    
            # .log_global_start = vhost_log_global_start,
            static void vhost_log_global_start(MemoryListener *listener)
                vhost_migration_log(listener, true)    
                    vhost_dev_log_resize(dev, vhost_get_log_size(dev));
                    vhost_dev_set_log(dev, true)
                        vhost_dev_set_features(dev, enable_log);
                            features |= 0x1ULL << VHOST_F_LOG_ALL
                            dev->vhost_ops->vhost_set_features(dev, features)
                                .vhost_set_features = vhost_user_set_features,
                                    vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features)

迁移主体处理

migration_thread
    migration_iteration_run
        qemu_savevm_state_pending(s->to_dst_file, s->threshold_size, &pend_pre, &pend_compat, &pend_post);
        if (pending_size && pending_size >= s->threshold_size) {
            qemu_savevm_state_iterate(s->to_dst_file, in_postcopy);    // 迭代迁移
        } else {
            migration_completion(s);    // 停机迁移
 

停机迁移

基于注册到vm_change_state_head的virtio_vmstate_change实现停机迁移的设备停止
基于初始化到设备实现里的VMStateDescription来支持设备状态保存和加载
migration_completion
    vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);    // 停止虚拟机
        vm_stop()
            do_vm_stop()
                pause_all_vcpus()        // 停止vcpu线程的运行
                vm_state_notify        # 通知外设RUN_STATE_FINISH_MIGRATE状态
                    QTAILQ_FOREACH_SAFE(e, &vm_change_state_head, entries, next)
                        e->cb(e->opaque, running, state)
                        # 这里的cb函数是在下面注册的,实际cb是virtio_vmstate_change    # 停止设备,获取idx 
                        
static const TypeInfo virtio_net_info = {

    .class_init = virtio_net_class_init,
virtio_net_class_init
    vdc->realize = virtio_net_device_realize;
    vdc->set_status = virtio_net_set_status;
virtio_net_device_realize
    virtio_init
        # vfio没有继承virtio的设备类,所以也没有继承这里注册的热迁移触发调用的停设备的回调函数
        vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev),virtio_vmstate_change, vdev);
            qemu_add_vm_change_state_handler_prio
                e->cb = cb;
                QTAILQ_INSERT_TAIL(&vm_change_state_head, e, entries);
                
# 停止设备,获取idx                
virtio_vmstate_change 
    virtio_set_status
        k->set_status    # virtio_net_set_status
            virtio_net_vhost_status
                vhost_net_stop
                    for (i = 0; i < total_queues; i++) {  
                        vhost_net_stop_one
                            vhost_dev_stop
                                for (i = 0; i < hdev->nvqs; ++i)
                                    vhost_virtqueue_stop
                                        dev->vhost_ops->vhost_get_vring_base
                                        virtio_queue_set_last_avail_idx(vdev, idx, state.num)
                                            virtio_queue_split_set_last_avail_idx
                                                # 如果get vring base失败,从guest内存获取
                                                # vdev->vq[n].last_avail_idx = vring_used_idx(&vdev->vq[n])
                                                vdev->vq[n].last_avail_idx = idx;
                                                vdev->vq[n].shadow_avail_idx = idx;
                                        virtio_queue_update_used_idx
                                            virtio_split_packed_update_used_idx
                                                vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n])    
                                                            
# 停止设备后,对设备状态进行qemu层的保存、传输、对端加载
migration_completion
    qemu_savevm_state_complete_precopy
        qemu_savevm_state_complete_precopy_non_iterable
            vmstate_save
                vmstate_save_state
                    vmstate_save_state_v
                        VMStateField *field = vmsd->fields
                        field->info->put()    # 下面展开

static const VMStateDescription vmstate_virtio_net = {
    .name = "virtio-net",
    .minimum_version_id = VIRTIO_NET_VM_VERSION,
    .version_id = VIRTIO_NET_VM_VERSION,
    .fields = (VMStateField[]) {
        VMSTATE_VIRTIO_DEVICE,
        VMSTATE_END_OF_LIST()
    },
    .pre_save = virtio_net_pre_save,
    .dev_unplug_pending = dev_unplug_pending,
};

static const TypeInfo virtio_net_info = {
    .name = TYPE_VIRTIO_NET,
    .class_init = virtio_net_class_init,
virtio_net_class_init
    dc->vmsd = &vmstate_virtio_net;        # vfio没有设置这个vmsd,所以不支持热迁移设备状态保存
    
#define VMSTATE_VIRTIO_DEVICE \
    {                                         \
        .name = "virtio",                     \
        .info = &virtio_vmstate_info,         \
        .flags = VMS_SINGLE,                  \
    }
    
const VMStateInfo  virtio_vmstate_info = {    
    .name = "virtio",
    .get = virtio_device_get,
    .put = virtio_device_put,
};      
virtio_device_put
    virtio_save    # 保存了很多东西
        k->save_config
        qemu_put_8s(f, &vdev->status);
        qemu_put_8s(f, &vdev->isr);
        qemu_put_be16s(f, &vdev->queue_sel);
        qemu_put_be32s(f, &guest_features_lo);
        qemu_put_be32(f, vdev->config_len);
        qemu_put_buffer(f, vdev->config, vdev->config_len)    
        ... ...        
        qemu_put_be16s(f, &vdev->vq[i].last_avail_idx)    
        ... ... 
virtio_device_get
    virtio_load
        k->load_config
        qemu_get_8s(f, &vdev->status);
        qemu_get_8s(f, &vdev->isr);
        qemu_get_be16s(f, &vdev->queue_sel);
        qemu_get_be32s(f, &features);       
        ... ...
        vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i])
        vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i])       
        ... ...                                  
文章来自个人专栏
系统问题调试
5 文章 | 1 订阅
0条评论
0 / 1000
请输入你的评论
0
0