searchusermenu
  • 发布文章
  • 消息中心
点赞
收藏
评论
分享
原创

基于vfio-user的全用户态存储设备直通——环境搭建

2023-09-01 03:38:36
98
0

环境信息

[root@localhost spdk]# uname -r
5.10.0-60.18.0.50.oe2203.x86_64
[root@localhost spdk]# cat /etc/*release
openEuler release 22.03 LTS
NAME="openEuler"
VERSION="22.03 LTS"
ID="openEuler"
VERSION_ID="22.03"
PRETTY_NAME="openEuler 22.03 LTS"
ANSI_COLOR="0;31"
openEuler release 22.03 LTS
[root@localhost spdk]# 
[root@localhost spdk]# cat /etc/default/grub 
GRUB_TIMEOUT=5
GRUB_DISTRIBUTOR="$(sed 's, release .*$,,g' /etc/system-release)"
GRUB_DEFAULT=saved
GRUB_DISABLE_SUBMENU=true
GRUB_TERMINAL_OUTPUT="console"
GRUB_CMDLINE_LINUX="resume=/dev/mapper/openeuler-swap rd.lvm.lv=openeuler/root rd.lvm.lv=openeuler/swap 
crashkernel=512M intel_iommu=on iommu=pt hugepagesz=2MB hugepages=4096 default_hugepagesz=2MB pci=realloc"
GRUB_DISABLE_RECOVERY="true"
[root@localhost spdk]# 
[root@localhost spdk]# cat /proc/meminfo | grep -i huge
AnonHugePages: 77824 kB
ShmemHugePages: 0 kB
FileHugePages: 0 kB
HugePages_Total: 4096
HugePages_Free: 2744
HugePages_Rsvd: 0
HugePages_Surp: 0
Hugepagesize: 2048 kB
Hugetlb: 8388608 kB
[root@localhost spdk]# 

编译SPDK

git clone h**ps://github.com/spdk/spdk
cd spdk
git checkout 72a5fa139
git submodule update –init
./scripts/pkgdep.sh
./configure --with-vfio-user
make -j24

启动SPDK server

1. 删除残留的文件

rm -f /var/tmp/spdk.sock
rm -f /var/tmp/spdk.sock.lock
rm -f /var/run/{cntrl,bar0}

2. 启动nvmf_tgt应用程序

[root@localhost spdk]# LD_LIBRARY_PATH=build/lib:dpdk/build/lib build/bin/nvmf_tgt -m 0x2
[2022-12-02 00:19:48.606751] Starting SPDK v22.01-pre git sha1 72a5fa139 / DPDK 21.08.0 initialization...
[2022-12-02 00:19:48.606842] [ DPDK EAL parameters: [2022-12-02 00:19:48.606850] nvmf [2022-12-02 00:19:
48.606857] --no-shconf [2022-12-02 00:19:48.606863] -c 0x2 [2022-12-02 00:19:48.606870] --log-level=lib.eal:6 
[2022-12-02 00:19:48.606877] --log-level=lib.cryptodev:5 [2022-12-02 00:19:48.606884] --log-level=user1:6 [2022-
12-02 00:19:48.606890] --iova-mode=pa [2022-12-02 00:19:48.606897] --base-virtaddr=0x200000000000 [2022-12-02 
00:19:48.606903] --match-allocations [2022-12-02 00:19:48.606910] --file-prefix=spdk_pid2414 [2022-12-02 00:19:
48.606920] ]
EAL: No available 1048576 kB hugepages reported
TELEMETRY: No legacy callbacks, legacy socket not created
[2022-12-02 00:19:48.737441] app.c: 543:spdk_app_start: *NOTICE*: Total cores available: 1
[2022-12-02 00:19:49.270944] reactor.c: 943:reactor_run: *NOTICE*: Reactor started on core 1
[2022-12-02 00:19:49.271046] accel_engine.c:1012:spdk_accel_engine_initialize: *NOTICE*: Accel engine 
initialized to use software engine. 

-m 0x2表示CPU绑核,0x2二进制010表示绑第2个CPU核

[root@localhost spdk]# ./scripts/rpc.py thread_get_stats
{
 "tick_rate": 2800000000,
 "threads": [
 {
 "name": "app_thread",
 "id": 1,
 "cpumask": "2",
 "busy": 56001085330,
 "idle": 1804557630407,
 "active_pollers_count": 0,
 "timed_pollers_count": 2,
 "paused_pollers_count": 0
 },
 {
 "name": "nvmf_tgt_poll_group_1",
 "id": 2,
 "cpumask": "2",
 "busy": 7175205807,
 "idle": 15491017885977,
 "active_pollers_count": 3,
 "timed_pollers_count": 0,
 "paused_pollers_count": 0
 }
 ]
}
[root@localhost spdk]#

3、创建 VFIOUSER nvmf transport

./scripts/rpc.py nvmf_create_transport -t VFIOUSER

4、创建NVMf subsystem

./scripts/rpc.py nvmf_create_subsystem  nqn.2022-05.io.spdk:cnode0 -a -s SPDK0

5、创建 Malloc 类型的 bdev

创建Malloc类型的bdev用作NVMf Target背后的块设备资源

[root@localhost spdk]# ./scripts/rpc.py bdev_malloc_create 512 512 -b Malloc0
Malloc0

6、给NVMf Target添加bdev

./scripts/rpc.py nvmf_subsystem_add_ns nqn.2022-05.io.spdk:cnode0 Malloc0

7、给NVMf Target添加vfio-user类型的listener

./scripts/rpc.py nvmf_subsystem_add_listener nqn.2022-05.io.spdk:cnode0 -t VFIOUSER -a /var/run -s 0

生成socket文件/var/run/cntrl


经过以上操作,通过CLI命令行可以看到当前server的transport 为VFIOUSER,有一个NVMe类型名称为SPDK0的子系统,管理了512M 的 Malloc0内存盘,
使用socket文件/var/run:0来监听对子系统 nqn.2022-05.io.spdk:cnode0 的请求。
也可以通过rpc查看server的子系统信息

[root@localhost spdk]# ./scripts/rpc.py nvmf_get_subsystems
[
 {
 "nqn": "nqn.2014-08.org.nvmexpress.discovery",
 "subtype": "Discovery",
 "listen_addresses": [],
 "allow_any_host": true,
 "hosts": []
 },
 {
 "nqn": "nqn.2019-07.io.spdk:cnode0",
 "subtype": "NVMe",
 "listen_addresses": [
 {
 "transport": "VFIOUSER",
 "trtype": "VFIOUSER",
 "adrfam": "IPv4",
 "traddr": "/var/run",
 "trsvcid": "0"
 }
 ],
 "allow_any_host": true,
 "hosts": [],
 "serial_number": "SPDK0",
 "model_number": "SPDK bdev Controller",
 "max_namespaces": 32,
 "min_cntlid": 1,
 "max_cntlid": 65519,
 "namespaces": [
 {
 "nsid": 1,
 "bdev_name": "Malloc0",
 "name": "Malloc0",
 "nguid": "647705BA8B9C414893C92731F230EA36",
 "uuid": "647705ba-8b9c-4148-93c9-2731f230ea36"
 }
 ]
 }
]
[root@localhost spdk]# 

spdk是polling模式,cpu占用是100%, 会严重影响虚机启动,因此spdk和qemu进程需要绑定不同的核。

编译qemu

git clone h**s://github.com/oracle/qemu
cd qemu
git checkout d377d483f9
git submodule update --init --recursive
./configure --enable-multiprocess --disable-werror
make –j24

启动虚拟机

./qemu-system-x86_64 -cpu host -smp 8 -m 2G -object memory-backend-file,id=mem,size=2G,mem-path=/dev/hugepages,share=on -numa node,
memdev=mem -drive file=/root/CentOS-7-x86_64-GenericCloud-1811.qcow2,if=none,id=disk -device ide-hd,drive=disk,bootindex=0 -net user,
hostfwd=tcp::10020-:22 -net nic -vnc :1 --enable-kvm -device vfio-user-pci,socket=/var/run/cntrl -nographic


虚拟机启动后,可以看到NVMf Target通过vfio-user通道直通的NVMe设备。

0条评论
0 / 1000
周朋肖
3文章数
0粉丝数
周朋肖
3 文章 | 0 粉丝
周朋肖
3文章数
0粉丝数
周朋肖
3 文章 | 0 粉丝
原创

基于vfio-user的全用户态存储设备直通——环境搭建

2023-09-01 03:38:36
98
0

环境信息

[root@localhost spdk]# uname -r
5.10.0-60.18.0.50.oe2203.x86_64
[root@localhost spdk]# cat /etc/*release
openEuler release 22.03 LTS
NAME="openEuler"
VERSION="22.03 LTS"
ID="openEuler"
VERSION_ID="22.03"
PRETTY_NAME="openEuler 22.03 LTS"
ANSI_COLOR="0;31"
openEuler release 22.03 LTS
[root@localhost spdk]# 
[root@localhost spdk]# cat /etc/default/grub 
GRUB_TIMEOUT=5
GRUB_DISTRIBUTOR="$(sed 's, release .*$,,g' /etc/system-release)"
GRUB_DEFAULT=saved
GRUB_DISABLE_SUBMENU=true
GRUB_TERMINAL_OUTPUT="console"
GRUB_CMDLINE_LINUX="resume=/dev/mapper/openeuler-swap rd.lvm.lv=openeuler/root rd.lvm.lv=openeuler/swap 
crashkernel=512M intel_iommu=on iommu=pt hugepagesz=2MB hugepages=4096 default_hugepagesz=2MB pci=realloc"
GRUB_DISABLE_RECOVERY="true"
[root@localhost spdk]# 
[root@localhost spdk]# cat /proc/meminfo | grep -i huge
AnonHugePages: 77824 kB
ShmemHugePages: 0 kB
FileHugePages: 0 kB
HugePages_Total: 4096
HugePages_Free: 2744
HugePages_Rsvd: 0
HugePages_Surp: 0
Hugepagesize: 2048 kB
Hugetlb: 8388608 kB
[root@localhost spdk]# 

编译SPDK

git clone h**ps://github.com/spdk/spdk
cd spdk
git checkout 72a5fa139
git submodule update –init
./scripts/pkgdep.sh
./configure --with-vfio-user
make -j24

启动SPDK server

1. 删除残留的文件

rm -f /var/tmp/spdk.sock
rm -f /var/tmp/spdk.sock.lock
rm -f /var/run/{cntrl,bar0}

2. 启动nvmf_tgt应用程序

[root@localhost spdk]# LD_LIBRARY_PATH=build/lib:dpdk/build/lib build/bin/nvmf_tgt -m 0x2
[2022-12-02 00:19:48.606751] Starting SPDK v22.01-pre git sha1 72a5fa139 / DPDK 21.08.0 initialization...
[2022-12-02 00:19:48.606842] [ DPDK EAL parameters: [2022-12-02 00:19:48.606850] nvmf [2022-12-02 00:19:
48.606857] --no-shconf [2022-12-02 00:19:48.606863] -c 0x2 [2022-12-02 00:19:48.606870] --log-level=lib.eal:6 
[2022-12-02 00:19:48.606877] --log-level=lib.cryptodev:5 [2022-12-02 00:19:48.606884] --log-level=user1:6 [2022-
12-02 00:19:48.606890] --iova-mode=pa [2022-12-02 00:19:48.606897] --base-virtaddr=0x200000000000 [2022-12-02 
00:19:48.606903] --match-allocations [2022-12-02 00:19:48.606910] --file-prefix=spdk_pid2414 [2022-12-02 00:19:
48.606920] ]
EAL: No available 1048576 kB hugepages reported
TELEMETRY: No legacy callbacks, legacy socket not created
[2022-12-02 00:19:48.737441] app.c: 543:spdk_app_start: *NOTICE*: Total cores available: 1
[2022-12-02 00:19:49.270944] reactor.c: 943:reactor_run: *NOTICE*: Reactor started on core 1
[2022-12-02 00:19:49.271046] accel_engine.c:1012:spdk_accel_engine_initialize: *NOTICE*: Accel engine 
initialized to use software engine. 

-m 0x2表示CPU绑核,0x2二进制010表示绑第2个CPU核

[root@localhost spdk]# ./scripts/rpc.py thread_get_stats
{
 "tick_rate": 2800000000,
 "threads": [
 {
 "name": "app_thread",
 "id": 1,
 "cpumask": "2",
 "busy": 56001085330,
 "idle": 1804557630407,
 "active_pollers_count": 0,
 "timed_pollers_count": 2,
 "paused_pollers_count": 0
 },
 {
 "name": "nvmf_tgt_poll_group_1",
 "id": 2,
 "cpumask": "2",
 "busy": 7175205807,
 "idle": 15491017885977,
 "active_pollers_count": 3,
 "timed_pollers_count": 0,
 "paused_pollers_count": 0
 }
 ]
}
[root@localhost spdk]#

3、创建 VFIOUSER nvmf transport

./scripts/rpc.py nvmf_create_transport -t VFIOUSER

4、创建NVMf subsystem

./scripts/rpc.py nvmf_create_subsystem  nqn.2022-05.io.spdk:cnode0 -a -s SPDK0

5、创建 Malloc 类型的 bdev

创建Malloc类型的bdev用作NVMf Target背后的块设备资源

[root@localhost spdk]# ./scripts/rpc.py bdev_malloc_create 512 512 -b Malloc0
Malloc0

6、给NVMf Target添加bdev

./scripts/rpc.py nvmf_subsystem_add_ns nqn.2022-05.io.spdk:cnode0 Malloc0

7、给NVMf Target添加vfio-user类型的listener

./scripts/rpc.py nvmf_subsystem_add_listener nqn.2022-05.io.spdk:cnode0 -t VFIOUSER -a /var/run -s 0

生成socket文件/var/run/cntrl


经过以上操作,通过CLI命令行可以看到当前server的transport 为VFIOUSER,有一个NVMe类型名称为SPDK0的子系统,管理了512M 的 Malloc0内存盘,
使用socket文件/var/run:0来监听对子系统 nqn.2022-05.io.spdk:cnode0 的请求。
也可以通过rpc查看server的子系统信息

[root@localhost spdk]# ./scripts/rpc.py nvmf_get_subsystems
[
 {
 "nqn": "nqn.2014-08.org.nvmexpress.discovery",
 "subtype": "Discovery",
 "listen_addresses": [],
 "allow_any_host": true,
 "hosts": []
 },
 {
 "nqn": "nqn.2019-07.io.spdk:cnode0",
 "subtype": "NVMe",
 "listen_addresses": [
 {
 "transport": "VFIOUSER",
 "trtype": "VFIOUSER",
 "adrfam": "IPv4",
 "traddr": "/var/run",
 "trsvcid": "0"
 }
 ],
 "allow_any_host": true,
 "hosts": [],
 "serial_number": "SPDK0",
 "model_number": "SPDK bdev Controller",
 "max_namespaces": 32,
 "min_cntlid": 1,
 "max_cntlid": 65519,
 "namespaces": [
 {
 "nsid": 1,
 "bdev_name": "Malloc0",
 "name": "Malloc0",
 "nguid": "647705BA8B9C414893C92731F230EA36",
 "uuid": "647705ba-8b9c-4148-93c9-2731f230ea36"
 }
 ]
 }
]
[root@localhost spdk]# 

spdk是polling模式,cpu占用是100%, 会严重影响虚机启动,因此spdk和qemu进程需要绑定不同的核。

编译qemu

git clone h**s://github.com/oracle/qemu
cd qemu
git checkout d377d483f9
git submodule update --init --recursive
./configure --enable-multiprocess --disable-werror
make –j24

启动虚拟机

./qemu-system-x86_64 -cpu host -smp 8 -m 2G -object memory-backend-file,id=mem,size=2G,mem-path=/dev/hugepages,share=on -numa node,
memdev=mem -drive file=/root/CentOS-7-x86_64-GenericCloud-1811.qcow2,if=none,id=disk -device ide-hd,drive=disk,bootindex=0 -net user,
hostfwd=tcp::10020-:22 -net nic -vnc :1 --enable-kvm -device vfio-user-pci,socket=/var/run/cntrl -nographic


虚拟机启动后,可以看到NVMf Target通过vfio-user通道直通的NVMe设备。

文章来自个人专栏
虚拟化杂谈
3 文章 | 1 订阅
0条评论
0 / 1000
请输入你的评论
0
0