当我们在开发内核功能或者验证定位问题时,经常需要模拟各种内核的异常场景,来验证程序的健壮性或加速问题的复现,比如内存分配失败、磁盘IO错误超时等等。Linux内核集成了一个比较实用的功能“Fault-injection”来帮助我们进行故障注入,从而可以构建一些通用的内核异常场景。它能够模拟内存slab分配失败、内存页分配失败、磁盘IO错误、磁盘IO超时、futex锁错误以及专门针对mmc的IO错误,用户也可以利用该机制设计增加自己需要的故障注入。
一、fail_make_request磁盘错误注入验证
# ls /sys/kernel/debug/fail_make_request/
interval probability space task-filter times verbose verbose_ratelimit_burst verbose_ratelimit_interval_ms
interval:间隔
probability:可能性
space:默认值为0,异常余量
案例:
[root@localhost fail_make_request]# echo 100 > probability 设置出现错误的概率
[root@localhost fail_make_request]# echo 10 > times 设置出现错误的次数
#echo 1 > /sys/devices/pci0000:00/0000:00:05.0/virtio0/block/vda/vda1/make-it-fail 制造错误
[root@localhost boot]# dmesg
[ 277.015948] FAULT_INJECTION: forcing a failure.
name fail_make_request, interval 1, probability 100, space 0, times 1
[ 277.015966] CPU: 87 PID: 2645 Comm: ls Kdump: loaded Not tainted 6.1.0-rc6 #4
[ 277.015975] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
[ 277.015978] Call Trace:
[ 277.016007] <TASK>
[ 277.016013] dump_stack_lvl+0x33/0x46
[ 277.016031] should_fail.cold.7+0x32/0x37
[ 277.016040] submit_bio_noacct+0x35d/0x430
[ 277.016055] _xfs_buf_ioapply+0x256/0x3e0 [xfs]
[ 277.016328] __xfs_buf_submit+0x6f/0x1e0 [xfs]
[ 277.016515] xfs_buf_read_map+0x1b1/0x320 [xfs]
[ 277.016678] ? xfs_imap_to_bp+0x4b/0x70 [xfs]
[ 277.016880] xfs_trans_read_buf_map+0x29d/0x320 [xfs]
[ 277.017132] ? xfs_imap_to_bp+0x4b/0x70 [xfs]
[ 277.017335] xfs_imap_to_bp+0x4b/0x70 [xfs]
[ 277.017509] xfs_trans_log_inode+0x194/0x270 [xfs]
[ 277.017697] xfs_vn_update_time+0x109/0x1b0 [xfs]
[ 277.017906] touch_atime+0x11a/0x170
[ 277.017919] iterate_dir+0xfd/0x1c0
[ 277.017926] __x64_sys_getdents+0x81/0x120
[ 277.017932] ? filldir64+0x170/0x170
[ 277.017938] do_syscall_64+0x3a/0x90
[ 277.017944] entry_SYSCALL_64_after_hwframe+0x63/0xcd
[ 277.017956] RIP: 0033:0x7fdee80c1285
[ 277.017963] Code: 83 c7 13 e9 4d b7 fc ff 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 41 56 48 63 ff b8 4e 00 00 00 41 55 41 54 55 53 48 89 f3 0f 05 <48> 3d 00 f0 ff ff 77 55 4c 8d 2c 03 49 89 c6 4c 39 eb 73 3d 0f 1f
[ 277.017969] RSP: 002b:00007fff19414b10 EFLAGS: 00000246 ORIG_RAX: 000000000000004e
[ 277.017978] RAX: ffffffffffffffda RBX: 0000000000e3fc60 RCX: 00007fdee80c1285
[ 277.017983] RDX: 0000000000008000 RSI: 0000000000e3fc60 RDI: 0000000000000003
[ 277.017986] RBP: 0000000000e3fc60 R08: 0000000000000000 R09: 0000000000008030
[ 277.017990] R10: 00007fff194145e0 R11: 0000000000000246 R12: fffffffffffffe80
[ 277.017993] R13: 0000000000000000 R14: 0000000000e3fc10 R15: 0000000000000000
[ 277.018000] </TASK>
[ 277.018047] XFS (vda1): metadata I/O error in "xfs_imap_to_bp+0x4b/0x70 [xfs]" at daddr 0x40 len 32 error 5
[ 277.022150] XFS (vda1): Metadata I/O Error (0x1) detected at xfs_trans_read_buf_map+0x306/0x320 [xfs] (fs/xfs/xfs_trans_buf.c:296). Shutting down filesystem.
[ 277.027746] XFS (vda1): Please unmount the filesystem and rectify the problem(s)
[root@localhost boot]# ls
ls: cannot open directory .: Input/output error
二、 fail_io_timeout磁盘io超时错误注入,该异常只能对磁盘块设备(struct gendisk)注入而无法对分区注入。
# ls /sys/kernel/debug/fail_io_timeout/
interval probability space task-filter times verbose verbose_ratelimit_burst verbose_ratelimit_interval_ms
[root@localhost fail_io_timeout]# echo 100 > probability
[root@localhost virtio0]# ls6]# cd /sys/devices/pci0000:00/0000:00:05.0/virtio0/block/vda/
[root@localhost vda]# echo 1 > io-timeout-fail
[root@localhost vda]# ls /boot/ 卡住无输出由于D状态所以产生hung_task_timeout信息
[ 738.357289] INFO: task ls:2691 blocked for more than 122 seconds.
[ 738.359951] Not tainted 6.1.0-rc6 #4
[ 738.361569] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 861.237268] INFO: task ls:2691 blocked for more than 245 seconds.
[ 861.239702] Not tainted 6.1.0-rc6 #4
[ 861.241312] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[root@localhost ~]# dmesg
[ 505.066079] FAULT_INJECTION: forcing a failure.
name fail_io_timeout, interval 1, probability 100, space 0, times 1
[ 505.066106] CPU: 99 PID: 0 Comm: swapper/99 Kdump: loaded Not tainted 6.1.0-rc6 #4
[ 505.066117] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
[ 505.066120] Call Trace:
[ 505.066146] <IRQ>
[ 505.066152] dump_stack_lvl+0x33/0x46
[ 505.066171] should_fail.cold.7+0x32/0x37
[ 505.066183] virtblk_done+0xa1/0x120 [virtio_blk]
[ 505.066201] vring_interrupt+0x6d/0xe0
[ 505.066213] __handle_irq_event_percpu+0x4f/0x190
[ 505.066226] handle_irq_event_percpu+0xf/0x40
[ 505.066234] handle_irq_event+0x34/0x60
[ 505.066243] handle_edge_irq+0x9a/0x1c0
[ 505.066250] __common_interrupt+0x65/0x100
[ 505.066259] common_interrupt+0xb4/0xd0
[ 505.066266] </IRQ>
[ 505.066268] <TASK>
[ 505.066271] asm_common_interrupt+0x22/0x40
[ 505.066284] RIP: 0010:default_idle+0x10/0x20
[ 505.066294] Code: c0 22 48 89 44 24 08 eb a3 48 89 df e8 f9 cc bb ff eb a2 e8 c2 f4 fe ff cc cc 0f 1f 44 00 00 eb 07 0f 00 2d f2 a6 56 00 fb f4 <c3> cc cc cc cc 90 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 65
[ 505.066299] RSP: 0018:ffffc28e403abed8 EFLAGS: 00000202
[ 505.066307] RAX: ffffffffa00991f0 RBX: 0000000000000000 RCX: 0000000000000001
[ 505.066311] RDX: 0000000000000000 RSI: ffffffffa093e0fa RDI: ffffffffa090be40
[ 505.066315] RBP: 0000000000000063 R08: 000000b1c67efb06 R09: 0000000000000318
[ 505.066319] R10: 00000000000f4240 R11: 000000000000001c R12: ffffffffffffffff
[ 505.066322] R13: 00000000fffffff0 R14: 0000000000000000 R15: ffff9eab40b4b280
[ 505.066328] ? __sched_text_end+0x2/0x2
[ 505.066338] ? __sched_text_end+0x2/0x2
[ 505.066344] default_idle_call+0x31/0xe0
[ 505.066352] do_idle+0x1fa/0x2a0
[ 505.066360] cpu_startup_entry+0x19/0x20
[ 505.066365] start_secondary+0x10d/0x130
[ 505.066376] secondary_startup_64_no_verify+0xe5/0xeb
[ 505.066389] </TASK>
三、fail_page_alloc分配失败的错误注入的验证
# ls /sys/kernel/debug/fail_page_alloc/
ignore-gfp-highmem interval probability task-filter verbose verbose_ratelimit_interval_ms
ignore-gfp-wait min-order space times verbose_ratelimit_burst
[root@localhost fail_page_alloc]# pwd
/sys/kernel/debug/fail_page_alloc
[root@localhost fail_page_alloc]# echo 2 > times
[root@localhost fail_page_alloc]# echo 100 > probability
[root@localhost fail_page_alloc]# dmesg
[ 78.225947] FAULT_INJECTION: forcing a failure.
name fail_page_alloc, interval 1, probability 100, space 0, times 2
[ 78.225959] CPU: 64 PID: 6453 Comm: sh Not tainted 6.1.0-rc6 #8
[ 78.225968] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
[ 78.225971] Call Trace:
[ 78.225976] <TASK>
[ 78.225979] dump_stack_lvl+0x33/0x46
[ 78.226001] should_fail.cold.6+0x32/0x4c
[ 78.226011] __alloc_pages+0x100/0x340
[ 78.226027] new_slab+0x391/0x450
[ 78.226036] ___slab_alloc+0x3ba/0x900
[ 78.226042] ? vm_area_dup+0x21/0x90
[ 78.226054] ? vm_area_dup+0x21/0x90
[ 78.226058] __slab_alloc.isra.79+0x52/0x90
[ 78.226065] kmem_cache_alloc+0x3de/0x420
[ 78.226071] ? vm_area_dup+0x21/0x90
[ 78.226075] vm_area_dup+0x21/0x90
[ 78.226083] ? avc_has_perm_noaudit+0xd4/0x150
[ 78.226095] ? cred_has_capability+0x7c/0x140
[ 78.226101] ? mas_update_gap.part.43+0xc6/0x1e0
[ 78.226110] ? mas_wr_store_entry+0x103/0x2b0
[ 78.226116] ? percpu_counter_add_batch+0x59/0xb0
[ 78.226129] ? __vm_enough_memory+0x24/0xf0
[ 78.226141] dup_mmap+0x211/0x5c0
[ 78.226149] dup_mm+0x68/0x110
[ 78.226154] copy_process+0x13ff/0x1ba0
[ 78.226162] kernel_clone+0x99/0x3a0
[ 78.226168] __do_sys_clone+0x78/0xa0
[ 78.226175] do_syscall_64+0x3a/0x90
[ 78.226181] entry_SYSCALL_64_after_hwframe+0x63/0xcd
[ 78.226194] RIP: 0033:0x7f8145cc5972
[ 78.226202] Code: f7 d8 64 89 04 25 d4 02 00 00 64 4c 8b 04 25 10 00 00 00 31 d2 4d 8d 90 d0 02 00 00 31 f6 bf 11 00 20 01 b8 38 00 00 00 0f 05 <48> 3d 00 f0 ff ff 0f 87 5d 01 00 00 85 c0 41 89 c5 0f 85 67 01 00
[ 78.226208] RSP: 002b:00007fff8f8668a0 EFLAGS: 00000246 ORIG_RAX: 0000000000000038
[ 78.226217] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f8145cc5972
[ 78.226222] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000001200011
[ 78.226225] RBP: 00007fff8f8668c0 R08: 00007f8146a32740 R09: 0000000000000000
[ 78.226229] R10: 00007f8146a32a10 R11: 0000000000000246 R12: 0000000000000000
[ 78.226232] R13: 0000000001879090 R14: 0000000000000001 R15: 0000000000000000
[ 78.226238] </TASK>
四、Failslab分配失败的错误注入的验证
# ls /sys/kernel/debug/failslab/
cache-filter interval space times verbose_ratelimit_burst
ignore-gfp-wait probability task-filter verbose verbose_ratelimit_interval_ms
[root@localhost failslab]# cat probability
0
[root@localhost failslab]# cat times
1
[root@localhost failslab]# echo 100 > probability
[root@localhost failslab]# dmesg
[ 214.853586] fail_stacktrace:78 depth=32
[ 214.853599] fail_stacktrace:82
[ 214.853629] FAULT_INJECTION: forcing a failure.
name failslab, interval 1, probability 100, space 0, times 1
[ 214.853639] CPU: 39 PID: 3536 Comm: bash Kdump: loaded Not tainted 6.1.0-rc6 #8
[ 214.853648] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
[ 214.853651] Call Trace:
[ 214.853656] <TASK>
[ 214.853659] dump_stack_lvl+0x33/0x46
[ 214.853701] should_fail.cold.6+0x32/0x4c
[ 214.853709] should_failslab+0xa/0x20
[ 214.853721] kmem_cache_alloc+0x41/0x420
[ 214.853728] ? ebitmap_cpy+0x4d/0xd0
[ 214.853738] ebitmap_cpy+0x4d/0xd0
[ 214.853745] mls_compute_sid+0x115/0x400
[ 214.853755] security_compute_sid.part.16+0x2f3/0x8f0
[ 214.853762] ? set_termios+0x16a/0x1b0
[ 214.853777] security_transition_sid+0x2b/0x50
[ 214.853782] selinux_socket_create+0x163/0x3d0
[ 214.853788] ? preempt_count_add+0x70/0xa0
[ 214.853799] security_socket_create+0x38/0x60
[ 214.853812] __sock_create+0x63/0x1e0
[ 214.853823] __sys_socket_create.part.26+0x43/0x70
[ 214.853829] __sys_socket+0x28/0xa0
[ 214.853836] __x64_sys_socket+0x16/0x20
[ 214.853842] do_syscall_64+0x3a/0x90
[ 214.853849] entry_SYSCALL_64_after_hwframe+0x63/0xcd
[ 214.853861] RIP: 0033:0x7f2c0feffae7
[ 214.853870] Code: 73 01 c3 48 8b 0d 89 73 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 b8 29 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 59 73 2c 00 f7 d8 64 89 01 48
[ 214.853875] RSP: 002b:00007ffe1d2ec2e8 EFLAGS: 00000206 ORIG_RAX: 0000000000000029
[ 214.853884] RAX: ffffffffffffffda RBX: 00000000015dab50 RCX: 00007f2c0feffae7
[ 214.853888] RDX: 0000000000000009 RSI: 0000000000000003 RDI: 0000000000000010
[ 214.853892] RBP: 00000000004c394f R08: 00007ffe1d2ec240 R09: 00007ffe1d2ec190
[ 214.853896] R10: 0000000000000008 R11: 0000000000000206 R12: 0000000000000006
[ 214.853899] R13: 0000000000000001 R14: 0000000000000002 R15: 00007ffe1d2ec4d0
[ 214.853906] </TASK>