diff mbox series

[v3,9/9] scsi:scsi_debug: Add debugfs interface to fail target reset

Message ID 20230723234105.1628982-10-haowenchao2@huawei.com
State Superseded
Headers show
Series scsi:scsi_debug: Add error injection for single device | expand

Commit Message

Wenchao Hao July 23, 2023, 11:41 p.m. UTC
The interface is found at
/sys/kernel/debug/scsi_debug/target<h:c:t>/fail_reset where <h:c:t>
identifies the target to inject errors on. It's a simple bool type
interface which would make this target's reset fail if set to 'Y'.

Signed-off-by: Wenchao Hao <haowenchao2@huawei.com>
---
 drivers/scsi/scsi_debug.c | 97 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 96 insertions(+), 1 deletion(-)

Comments

kernel test robot Aug. 3, 2023, 6:26 a.m. UTC | #1
Hello,

kernel test robot noticed "BUG:sleeping_function_called_from_invalid_context_at_kernel/locking/rwsem.c" on:

commit: b75b216522fef4d99e145fd9a1535db987ef2836 ("[PATCH v3 9/9] scsi:scsi_debug: Add debugfs interface to fail target reset")
url: https://github.com/intel-lab-lkp/linux/commits/Wenchao-Hao/scsi-scsi_debug-create-scsi_debug-directory-in-the-debugfs-filesystem/20230723-182123
base: https://git.kernel.org/cgit/linux/kernel/git/mkp/scsi.git for-next
patch link: https://lore.kernel.org/all/20230723234105.1628982-10-haowenchao2@huawei.com/
patch subject: [PATCH v3 9/9] scsi:scsi_debug: Add debugfs interface to fail target reset

in testcase: xfstests
version: xfstests-x86_64-bb8af9c-1_20230801
with following parameters:

	disk: 4HDD
	fs: ext4
	test: generic-350



compiler: gcc-12
test machine: 4 threads Intel(R) Core(TM) i5-6500 CPU @ 3.20GHz (Skylake) with 32G memory

(please refer to attached dmesg/kmsg for entire log/backtrace)



If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <oliver.sang@intel.com>
| Closes: https://lore.kernel.org/oe-lkp/202308031027.5941ce5f-oliver.sang@intel.com


[  224.728219][ T2216] BUG: sleeping function called from invalid context at kernel/locking/rwsem.c:1572
[  224.737418][ T2216] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 2216, name: modprobe
[  224.746095][ T2216] preempt_count: 1, expected: 0
[  224.750787][ T2216] RCU nest depth: 0, expected: 0
[  224.755564][ T2216] CPU: 3 PID: 2216 Comm: modprobe Tainted: G          I        6.5.0-rc1-00013-gb75b216522fe #1
[  224.765799][ T2216] Hardware name: Dell Inc. OptiPlex 7040/0Y7WYT, BIOS 1.1.1 10/07/2015
[  224.773867][ T2216] Call Trace:
[  224.777011][ T2216]  <TASK>
[ 224.779796][ T2216] dump_stack_lvl (lib/dump_stack.c:107 (discriminator 1)) 
[ 224.784146][ T2216] __might_resched (kernel/sched/core.c:10190) 
[ 224.788753][ T2216] ? preempt_notifier_dec (kernel/sched/core.c:10144) 
[ 224.793792][ T2216] ? preempt_notifier_dec (kernel/sched/core.c:10144) 
[ 224.798851][ T2216] down_write (include/linux/kernel.h:111 kernel/locking/rwsem.c:1572) 
[ 224.802936][ T2216] ? rwsem_down_write_slowpath (kernel/locking/rwsem.c:1571) 
[ 224.808582][ T2216] ? kobject_put (arch/x86/include/asm/atomic.h:103 include/linux/atomic/atomic-arch-fallback.h:940 include/linux/atomic/atomic-instrumented.h:401 include/linux/refcount.h:272 include/linux/refcount.h:315 include/linux/refcount.h:333 include/linux/kref.h:64 lib/kobject.c:730) 
[ 224.812858][ T2216] simple_recursive_removal (include/linux/dcache.h:385 include/linux/dcache.h:400 include/linux/dcache.h:410 fs/libfs.c:273) 
[ 224.818256][ T2216] ? start_creating (fs/tracefs/inode.c:135) 
[ 224.823555][ T2216] ? _raw_spin_lock_irqsave (arch/x86/include/asm/atomic.h:115 include/linux/atomic/atomic-arch-fallback.h:2155 include/linux/atomic/atomic-instrumented.h:1296 include/asm-generic/qspinlock.h:111 include/linux/spinlock.h:187 include/linux/spinlock_api_smp.h:111 kernel/locking/spinlock.c:162) 
[ 224.828766][ T2216] ? _raw_read_unlock_irqrestore (kernel/locking/spinlock.c:161) 
[ 224.834412][ T2216] debugfs_remove (fs/debugfs/inode.c:766) 
[ 224.838758][ T2216] sdebug_target_destroy (drivers/scsi/scsi_debug.c:1147) scsi_debug
[ 224.844874][ T2216] scsi_target_destroy (drivers/scsi/scsi_scan.c:401) 
[ 224.849830][ T2216] __scsi_remove_device (drivers/scsi/scsi_sysfs.c:1517) 
[ 224.854889][ T2216] scsi_forget_host (drivers/scsi/scsi_scan.c:1988) 
[ 224.859508][ T2216] scsi_remove_host (drivers/scsi/hosts.c:182) 
[ 224.864113][ T2216] sdebug_driver_remove (drivers/scsi/scsi_debug.c:8342) scsi_debug
[ 224.870201][ T2216] ? kernfs_remove_by_name_ns (fs/kernfs/dir.c:1679) 
[ 224.875758][ T2216] device_release_driver_internal (drivers/base/dd.c:1272 drivers/base/dd.c:1293) 
[ 224.881662][ T2216] bus_remove_device (include/linux/kobject.h:191 drivers/base/base.h:73 drivers/base/bus.c:581) 
[ 224.886442][ T2216] device_del (drivers/base/core.c:3815) 
[ 224.890616][ T2216] ? __device_link_del (drivers/base/core.c:3769) 
[ 224.895567][ T2216] ? mutex_unlock (arch/x86/include/asm/atomic64_64.h:109 include/linux/atomic/atomic-arch-fallback.h:4303 include/linux/atomic/atomic-long.h:1499 include/linux/atomic/atomic-instrumented.h:4446 kernel/locking/mutex.c:181 kernel/locking/mutex.c:540) 
[ 224.899915][ T2216] ? __mutex_unlock_slowpath+0x2b0/0x2b0 
[ 224.906427][ T2216] device_unregister (drivers/base/core.c:3732 drivers/base/core.c:3845) 
[ 224.911038][ T2216] sdebug_do_remove_host (drivers/scsi/scsi_debug.c:7691) scsi_debug
[ 224.917309][ T2216] scsi_debug_exit (drivers/scsi/scsi_debug.c:8170) scsi_debug
[ 224.922889][ T2216] __do_sys_delete_module+0x316/0x540 
[ 224.929162][ T2216] ? module_flags (kernel/module/main.c:698) 
[ 224.933689][ T2216] ? task_work_cancel (kernel/task_work.c:147) 
[ 224.938388][ T2216] ? _raw_spin_lock (arch/x86/include/asm/atomic.h:115 include/linux/atomic/atomic-arch-fallback.h:2155 include/linux/atomic/atomic-instrumented.h:1296 include/asm-generic/qspinlock.h:111 include/linux/spinlock.h:187 include/linux/spinlock_api_smp.h:134 kernel/locking/spinlock.c:154) 
[ 224.942921][ T2216] ? exit_to_user_mode_loop (include/linux/sched.h:2337 include/linux/resume_user_mode.h:61 kernel/entry/common.c:171) 
[ 224.948222][ T2216] do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80) 
[ 224.952483][ T2216] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120) 
[  224.958217][ T2216] RIP: 0033:0x7fe4243fc417
[ 224.962479][ T2216] Code: 73 01 c3 48 8b 0d 79 1a 0d 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 b8 b0 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 49 1a 0d 00 f7 d8 64 89 01 48
All code
========
   0:	73 01                	jae    0x3
   2:	c3                   	retq   
   3:	48 8b 0d 79 1a 0d 00 	mov    0xd1a79(%rip),%rcx        # 0xd1a83
   a:	f7 d8                	neg    %eax
   c:	64 89 01             	mov    %eax,%fs:(%rcx)
   f:	48 83 c8 ff          	or     $0xffffffffffffffff,%rax
  13:	c3                   	retq   
  14:	66 2e 0f 1f 84 00 00 	nopw   %cs:0x0(%rax,%rax,1)
  1b:	00 00 00 
  1e:	0f 1f 44 00 00       	nopl   0x0(%rax,%rax,1)
  23:	b8 b0 00 00 00       	mov    $0xb0,%eax
  28:	0f 05                	syscall 
  2a:*	48 3d 01 f0 ff ff    	cmp    $0xfffffffffffff001,%rax		<-- trapping instruction
  30:	73 01                	jae    0x33
  32:	c3                   	retq   
  33:	48 8b 0d 49 1a 0d 00 	mov    0xd1a49(%rip),%rcx        # 0xd1a83
  3a:	f7 d8                	neg    %eax
  3c:	64 89 01             	mov    %eax,%fs:(%rcx)
  3f:	48                   	rex.W

Code starting with the faulting instruction
===========================================
   0:	48 3d 01 f0 ff ff    	cmp    $0xfffffffffffff001,%rax
   6:	73 01                	jae    0x9
   8:	c3                   	retq   
   9:	48 8b 0d 49 1a 0d 00 	mov    0xd1a49(%rip),%rcx        # 0xd1a59
  10:	f7 d8                	neg    %eax
  12:	64 89 01             	mov    %eax,%fs:(%rcx)
  15:	48                   	rex.W
[  224.981910][ T2216] RSP: 002b:00007ffc194cd048 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0
[  224.990153][ T2216] RAX: ffffffffffffffda RBX: 00005641e66c7c70 RCX: 00007fe4243fc417
[  224.997962][ T2216] RDX: 0000000000000000 RSI: 0000000000000800 RDI: 00005641e66c7cd8
[  225.005769][ T2216] RBP: 00005641e66c7c70 R08: 0000000000000000 R09: 0000000000000000
[  225.013579][ T2216] R10: 00007fe42447cac0 R11: 0000000000000206 R12: 00005641e66c7cd8
[  225.021386][ T2216] R13: 0000000000000000 R14: 0000000000000000 R15: 00005641e66c7df0
[  225.029197][ T2216]  </TASK>
[  225.355582][ T2223] EXT4-fs (sda1): unmounting filesystem fc80755b-20bd-48b4-996a-4bc58040c94e.
[  225.676238][  T277] generic/350       _check_dmesg: something found in dmesg (see /lkp/benchmarks/xfstests/results//generic/350.dmesg)
[  225.676268][  T277]
[  304.321275][  T277]
[  304.321304][  T277]
[  304.343153][  T277] Ran: generic/350
[  304.343170][  T277]
[  304.349423][  T277] Failures: generic/350
[  304.349436][  T277]
[  304.356135][  T277] Failed 1 of 1 tests


To reproduce:

        git clone https://github.com/intel/lkp-tests.git
        cd lkp-tests
        sudo bin/lkp install job.yaml           # job file is attached in this email
        bin/lkp split-job --compatible job.yaml # generate the yaml file for lkp run
        sudo bin/lkp run generated-yaml-file

        # if come across any failure that blocks the test,
        # please remove ~/.lkp and /lkp dir to run from a clean state.
diff mbox series

Patch

diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 2c16e5f4ab28..2c5ed618f228 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -355,6 +355,11 @@  struct sdebug_dev_info {
 	struct list_head inject_err_list;
 };
 
+struct sdebug_target_info {
+	bool reset_fail;
+	struct dentry *debugfs_entry;
+};
+
 struct sdebug_host_info {
 	struct list_head host_list;
 	int si_idx;	/* sdeb_store_info (per host) xarray index */
@@ -1069,6 +1074,75 @@  static const struct file_operations sdebug_error_fops = {
 	.release = single_release,
 };
 
+static int sdebug_target_reset_fail_show(struct seq_file *m, void *p)
+{
+	struct scsi_target *starget = (struct scsi_target *)m->private;
+	struct sdebug_target_info *targetip =
+		(struct sdebug_target_info *)starget->hostdata;
+
+	if (targetip)
+		seq_printf(m, "%c\n", targetip->reset_fail ? 'Y' : 'N');
+
+	return 0;
+}
+
+static int sdebug_target_reset_fail_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, sdebug_target_reset_fail_show, inode->i_private);
+}
+
+static ssize_t sdebug_target_reset_fail_write(struct file *file,
+		const char __user *ubuf, size_t count, loff_t *ppos)
+{
+	int ret;
+	struct scsi_target *starget =
+		(struct scsi_target *)file->f_inode->i_private;
+	struct sdebug_target_info *targetip =
+		(struct sdebug_target_info *)starget->hostdata;
+
+	if (targetip) {
+		ret = kstrtobool_from_user(ubuf, count, &targetip->reset_fail);
+		return ret < 0 ? ret : count;
+	}
+	return -ENODEV;
+}
+
+static const struct file_operations sdebug_target_reset_fail_fops = {
+	.open	= sdebug_target_reset_fail_open,
+	.read	= seq_read,
+	.write	= sdebug_target_reset_fail_write,
+	.release = single_release,
+};
+
+static int sdebug_target_alloc(struct scsi_target *starget)
+{
+	struct sdebug_target_info *targetip;
+
+	targetip = kzalloc(sizeof(struct sdebug_target_info), GFP_KERNEL);
+	if (!targetip)
+		return -ENOMEM;
+
+	targetip->debugfs_entry = debugfs_create_dir(dev_name(&starget->dev),
+				sdebug_debugfs_root);
+	debugfs_create_file("fail_reset", 0600, targetip->debugfs_entry, starget,
+				&sdebug_target_reset_fail_fops);
+
+	starget->hostdata = targetip;
+
+	return 0;
+}
+
+static void sdebug_target_destroy(struct scsi_target *starget)
+{
+	struct sdebug_target_info *targetip;
+
+	targetip = (struct sdebug_target_info *)starget->hostdata;
+	if (targetip) {
+		debugfs_remove(targetip->debugfs_entry);
+		kfree(targetip);
+	}
+}
+
 /* Only do the extra work involved in logical block provisioning if one or
  * more of the lbpu, lbpws or lbpws10 parameters are given and we are doing
  * real reads and writes (i.e. not skipping them for speed).
@@ -5610,11 +5684,25 @@  static int scsi_debug_device_reset(struct scsi_cmnd *SCpnt)
 	return SUCCESS;
 }
 
+static int sdebug_fail_target_reset(struct scsi_cmnd *cmnd)
+{
+	struct scsi_target *starget = scsi_target(cmnd->device);
+	struct sdebug_target_info *targetip =
+		(struct sdebug_target_info *)starget->hostdata;
+
+	if (targetip)
+		return targetip->reset_fail;
+
+	return 0;
+}
+
 static int scsi_debug_target_reset(struct scsi_cmnd *SCpnt)
 {
 	struct scsi_device *sdp = SCpnt->device;
 	struct sdebug_host_info *sdbg_host = shost_to_sdebug_host(sdp->host);
 	struct sdebug_dev_info *devip;
+	u8 *cmd = SCpnt->cmnd;
+	u8 opcode = cmd[0];
 	int k = 0;
 
 	++num_target_resets;
@@ -5632,6 +5720,12 @@  static int scsi_debug_target_reset(struct scsi_cmnd *SCpnt)
 		sdev_printk(KERN_INFO, sdp,
 			    "%s: %d device(s) found in target\n", __func__, k);
 
+	if (sdebug_fail_target_reset(SCpnt)) {
+		scmd_printk(KERN_INFO, SCpnt, "fail target reset 0x%x\n",
+			    opcode);
+		return FAILED;
+	}
+
 	return SUCCESS;
 }
 
@@ -8070,7 +8164,6 @@  static int sdebug_init_cmd_priv(struct Scsi_Host *shost, struct scsi_cmnd *cmd)
 	return 0;
 }
 
-
 static struct scsi_host_template sdebug_driver_template = {
 	.show_info =		scsi_debug_show_info,
 	.write_info =		scsi_debug_write_info,
@@ -8100,6 +8193,8 @@  static struct scsi_host_template sdebug_driver_template = {
 	.track_queue_depth =	1,
 	.cmd_size = sizeof(struct sdebug_scsi_cmd),
 	.init_cmd_priv = sdebug_init_cmd_priv,
+	.target_alloc =		sdebug_target_alloc,
+	.target_destroy =	sdebug_target_destroy,
 };
 
 static int sdebug_driver_probe(struct device *dev)