diff mbox series

V2 scsi_mod: Add a new parameter to scsi_mod to control

Message ID 20240418181038.198242-1-loberman@redhat.com
State New
Headers show
Series V2 scsi_mod: Add a new parameter to scsi_mod to control | expand

Commit Message

Laurence Oberman April 18, 2024, 6:10 p.m. UTC
Resend of this patch as V2 against Martin's tree.
Changes: Removed initialization of global variable storage_quiet_discovery

This new parameter storage_quiet_discovery defaults to 0 and behavior is
unchanged. If its set to 1 on the kernel line then sd_printk and
sdev_printk are disabled for printing. The default logging can be
re-enabled any time after boot using /etc/sysctl.conf by setting
dev.scsi.storage_quiet_discovery = 0.
systctl -w dev.scsi.storage_quiet_discovery=0 will also change it
immediately back to logging. i
Users can leave it set to 1 on the kernel line and 0 in the conf file
so it changes back to default after rc.sysinit.
This solves the tough problem of systems with 1000's of
storage LUNS consuming a system and preventing it from booting due to
NMI's and timeouts due to udev triggers.

Signed-off-by: Laurence Oberman <loberman@redhat.com>
---
 Documentation/scsi/scsi-parameters.rst | 16 ++++++++++++++++
 Makefile                               |  2 +-
 drivers/scsi/scsi.c                    |  6 ++++++
 drivers/scsi/scsi_sysctl.c             |  5 +++++
 drivers/scsi/sd.h                      | 11 ++++++++---
 include/scsi/scsi_device.h             |  8 ++++++--
 6 files changed, 42 insertions(+), 6 deletions(-)

Comments

Laurence Oberman April 19, 2024, 12:54 a.m. UTC | #1
On Thu, 2024-04-18 at 14:10 -0400, Laurence Oberman wrote:
> Resend of this patch as V2 against Martin's tree.
> Changes: Removed initialization of global variable
> storage_quiet_discovery
> 
> This new parameter storage_quiet_discovery defaults to 0 and behavior
> is
> unchanged. If its set to 1 on the kernel line then sd_printk and
> sdev_printk are disabled for printing. The default logging can be
> re-enabled any time after boot using /etc/sysctl.conf by setting
> dev.scsi.storage_quiet_discovery = 0.
> systctl -w dev.scsi.storage_quiet_discovery=0 will also change it
> immediately back to logging. i
> Users can leave it set to 1 on the kernel line and 0 in the conf file
> so it changes back to default after rc.sysinit.
> This solves the tough problem of systems with 1000's of
> storage LUNS consuming a system and preventing it from booting due to
> NMI's and timeouts due to udev triggers.
> 
> Signed-off-by: Laurence Oberman <loberman@redhat.com>
> ---
>  Documentation/scsi/scsi-parameters.rst | 16 ++++++++++++++++
>  Makefile                               |  2 +-
>  drivers/scsi/scsi.c                    |  6 ++++++
>  drivers/scsi/scsi_sysctl.c             |  5 +++++
>  drivers/scsi/sd.h                      | 11 ++++++++---
>  include/scsi/scsi_device.h             |  8 ++++++--
>  6 files changed, 42 insertions(+), 6 deletions(-)
> 
> diff --git a/Documentation/scsi/scsi-parameters.rst
> b/Documentation/scsi/scsi-parameters.rst
> index c42c55e1e25e..ec2372db82bd 100644
> --- a/Documentation/scsi/scsi-parameters.rst
> +++ b/Documentation/scsi/scsi-parameters.rst
> @@ -93,6 +93,22 @@ parameters may be changed at runtime by the
> command
>                         S390-tools package, available for download at
>                         
> https://github.com/ibm-s390-linux/s390-tools/blob/master/scripts/scsi
> _logging_level
>  
> +       scsi_mod.storage_quiet_discovery=
> +                       [SCSI] a parameter to control the printing
> from
> +                        sdev_printk and sd_printk for systems with
> large
> +                        amounts of LUNS.
> +                        Defaults to 0 so unchanged behavior.
> +                        If scsi_mod.storage_quiet_discovery=1 is
> added boot line
> +                        then the messages are not printed and can be
> enabled
> +                        after boot via
> +                        echo 0 >
> +                       
> /sys/module/scsi_mod/parameters/storage_quiet_discovery
> +                        Another option is using
> +                        sysctl -w
> dev.scsi.storage_quiet_discovery=0.
> +                        Leaving this set to 0 in /etc/sysctl.conf
> and setting
> +                        it to 1 on the kernel line will help for
> these large
> +                        LUN count configurations and ensure its back
> on after boot.
> +
>         scsi_mod.scan=  [SCSI] sync (default) scans SCSI busses as
> they are
>                         discovered.  async scans them in kernel
> threads,
>                         allowing boot to proceed.  none ignores them,
> expecting
> diff --git a/Makefile b/Makefile
> index 763b6792d3d5..5effa83a32ab 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -2,7 +2,7 @@
>  VERSION = 6
>  PATCHLEVEL = 9
>  SUBLEVEL = 0
> -EXTRAVERSION = -rc1
> +EXTRAVERSION = -rc1.lobe
>  NAME = Hurr durr I'ma ninja sloth
>  
>  # *DOCUMENTATION*
> diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
> index 3e0c0381277a..38db68861bbe 100644
> --- a/drivers/scsi/scsi.c
> +++ b/drivers/scsi/scsi.c
> @@ -76,6 +76,9 @@
>   * Definitions and constants.
>   */
>  
> +int storage_quiet_discovery;
> +EXPORT_SYMBOL(storage_quiet_discovery);
> +
>  /*
>   * Note - the initial logging level can be set here to log events at
> boot time.
>   * After the system is up, you may enable logging via the /proc
> interface.
> @@ -979,6 +982,9 @@ MODULE_LICENSE("GPL");
>  
>  module_param(scsi_logging_level, int, S_IRUGO|S_IWUSR);
>  MODULE_PARM_DESC(scsi_logging_level, "a bit mask of logging
> levels");
> +module_param(storage_quiet_discovery, int, S_IRUGO|S_IWUSR);
> +MODULE_PARM_DESC(storage_quiet_discovery, "If set to 1 will silence
> SCSI and \
> +               ALUA discovery logging on boot");
>  
>  static int __init init_scsi(void)
>  {
> diff --git a/drivers/scsi/scsi_sysctl.c b/drivers/scsi/scsi_sysctl.c
> index 093774d77534..13c7ee4a6212 100644
> --- a/drivers/scsi/scsi_sysctl.c
> +++ b/drivers/scsi/scsi_sysctl.c
> @@ -18,6 +18,11 @@ static struct ctl_table scsi_table[] = {
>           .maxlen       = sizeof(scsi_logging_level),
>           .mode         = 0644,
>           .proc_handler = proc_dointvec },
> +       { .procname     = "storage_quiet_discovery",
> +         .data         = &storage_quiet_discovery,
> +         .maxlen       = sizeof(storage_quiet_discovery),
> +         .mode         = 0644,
> +         .proc_handler = proc_dointvec },
>  };
>  
>  static struct ctl_table_header *scsi_table_header;
> diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
> index 5c4285a582b2..e1cff37ca69e 100644
> --- a/drivers/scsi/sd.h
> +++ b/drivers/scsi/sd.h
> @@ -162,11 +162,16 @@ static inline struct scsi_disk
> *scsi_disk(struct gendisk *disk)
>         return disk->private_data;
>  }
>  
> +extern int storage_quiet_discovery;
> +
>  #define sd_printk(prefix, sdsk, fmt,
> a...)                             \
> -        (sdsk)->disk
> ?                                                 \
> -             sdev_prefix_printk(prefix, (sdsk)-
> >device,                \
> +       do
> {                                                            \
> +               if
> (!storage_quiet_discovery)                           \
> +                (sdsk)->disk
> ?                                         \
> +                       sdev_prefix_printk(prefix, (sdsk)-
> >device,      \
>                                  (sdsk)->disk->disk_name, fmt, ##a)
> :   \
> -             sdev_printk(prefix, (sdsk)->device, fmt, ##a)
> +                       sdev_printk(prefix, (sdsk)->device, fmt,
> ##a);  \
> +       } while (0)
>  
>  #define sd_first_printk(prefix, sdsk, fmt,
> a...)                       \
>         do
> {                                                            \
> diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
> index 9c540f5468eb..0ad0c387883e 100644
> --- a/include/scsi/scsi_device.h
> +++ b/include/scsi/scsi_device.h
> @@ -302,8 +302,12 @@ __printf(4, 5) void
>  sdev_prefix_printk(const char *, const struct scsi_device *, const
> char *,
>                 const char *, ...);
>  
> -#define sdev_printk(l, sdev, fmt,
> a...)                                \
> -       sdev_prefix_printk(l, sdev, NULL, fmt, ##a)
> +extern int storage_quiet_discovery;
> +
> +#define sdev_printk(l, sdev, fmt, a...) ({                     \
> +       if (!storage_quiet_discovery)                           \
> +               sdev_prefix_printk(l, sdev, NULL, fmt, ##a);    \
> +       })
>  
>  __printf(3, 4) void
>  scmd_printk(const char *, const struct scsi_cmnd *, const char *,
> ...);

Replying to my own message:

After review will send a V3

I just noticed the patch included the Makefile and that should not be
included.
Also Subject was truncated so will fix that too.

Thanks
Laurence
Bart Van Assche April 19, 2024, 6:16 p.m. UTC | #2
On 4/18/24 11:10, Laurence Oberman wrote:
> This new parameter storage_quiet_discovery defaults to 0 and behavior is
> unchanged. If its set to 1 on the kernel line then sd_printk and
> sdev_printk are disabled for printing. The default logging can be
> re-enabled any time after boot using /etc/sysctl.conf by setting
> dev.scsi.storage_quiet_discovery = 0.
> systctl -w dev.scsi.storage_quiet_discovery=0 will also change it
> immediately back to logging. i
> Users can leave it set to 1 on the kernel line and 0 in the conf file
> so it changes back to default after rc.sysinit.
> This solves the tough problem of systems with 1000's of
> storage LUNS consuming a system and preventing it from booting due to
> NMI's and timeouts due to udev triggers.

Are there any alternatives to introducing a new kernel module parameter?
Has it e.g. been considered to rate-limit the messages related to newly
discovered logical units? How about reporting information about new
logical units only if less than ten logical units are discovered per
second?

Thanks,

Bart.
Mike Christie April 19, 2024, 7:28 p.m. UTC | #3
On 4/18/24 1:10 PM, Laurence Oberman wrote:
> Resend of this patch as V2 against Martin's tree.
> Changes: Removed initialization of global variable storage_quiet_discovery
> 
> This new parameter storage_quiet_discovery defaults to 0 and behavior is
> unchanged. If its set to 1 on the kernel line then sd_printk and
> sdev_printk are disabled for printing. The default logging can be
> re-enabled any time after boot using /etc/sysctl.conf by setting
> dev.scsi.storage_quiet_discovery = 0.
> systctl -w dev.scsi.storage_quiet_discovery=0 will also change it
> immediately back to logging. i
> Users can leave it set to 1 on the kernel line and 0 in the conf file
> so it changes back to default after rc.sysinit.
> This solves the tough problem of systems with 1000's of
> storage LUNS consuming a system and preventing it from booting due to
> NMI's and timeouts due to udev triggers.
> 

I didn't see v1 so maybe this was already asked. Why can you use the
existing SCSI_LOG infrastructure for this?

For example, are the printks that are causing you problems specific
calls that are not already covered by SCSI_LOG, like the sdev_printk in
scsi_probe_lun? Do we just want to have those covered by a new SCSI_LOG
value like SCSI_LOG_DISCOVERY?
Laurence Oberman April 19, 2024, 7:36 p.m. UTC | #4
On Fri, 2024-04-19 at 14:28 -0500, michael.christie@oracle.com wrote:
> On 4/18/24 1:10 PM, Laurence Oberman wrote:
> > Resend of this patch as V2 against Martin's tree.
> > Changes: Removed initialization of global variable
> > storage_quiet_discovery
> > 
> > This new parameter storage_quiet_discovery defaults to 0 and
> > behavior is
> > unchanged. If its set to 1 on the kernel line then sd_printk and
> > sdev_printk are disabled for printing. The default logging can be
> > re-enabled any time after boot using /etc/sysctl.conf by setting
> > dev.scsi.storage_quiet_discovery = 0.
> > systctl -w dev.scsi.storage_quiet_discovery=0 will also change it
> > immediately back to logging. i
> > Users can leave it set to 1 on the kernel line and 0 in the conf
> > file
> > so it changes back to default after rc.sysinit.
> > This solves the tough problem of systems with 1000's of
> > storage LUNS consuming a system and preventing it from booting due
> > to
> > NMI's and timeouts due to udev triggers.
> > 
> 
> I didn't see v1 so maybe this was already asked. Why can you use the
> existing SCSI_LOG infrastructure for this?
> 
> For example, are the printks that are causing you problems specific
> calls that are not already covered by SCSI_LOG, like the sdev_printk
> in
> scsi_probe_lun? Do we just want to have those covered by a new
> SCSI_LOG
> value like SCSI_LOG_DISCOVERY?
> 
> 
> 

Mike and Bart, Thank you

I think I looked at this some years back and customers wanted an off
during boot but then on workflow.
I sent the patch a few years back but the problem has not gone away so
I resent.

Back Monday after review of your suggestions.

Regards
Laurence
Laurence Oberman April 21, 2024, 10:13 p.m. UTC | #5
On Fri, 2024-04-19 at 15:36 -0400, Laurence Oberman wrote:
> On Fri, 2024-04-19 at 14:28 -0500, michael.christie@oracle.com wrote:
> > On 4/18/24 1:10 PM, Laurence Oberman wrote:
> > > Resend of this patch as V2 against Martin's tree.
> > > Changes: Removed initialization of global variable
> > > storage_quiet_discovery
> > > 
> > > This new parameter storage_quiet_discovery defaults to 0 and
> > > behavior is
> > > unchanged. If its set to 1 on the kernel line then sd_printk and
> > > sdev_printk are disabled for printing. The default logging can be
> > > re-enabled any time after boot using /etc/sysctl.conf by setting
> > > dev.scsi.storage_quiet_discovery = 0.
> > > systctl -w dev.scsi.storage_quiet_discovery=0 will also change it
> > > immediately back to logging. i
> > > Users can leave it set to 1 on the kernel line and 0 in the conf
> > > file
> > > so it changes back to default after rc.sysinit.
> > > This solves the tough problem of systems with 1000's of
> > > storage LUNS consuming a system and preventing it from booting
> > > due
> > > to
> > > NMI's and timeouts due to udev triggers.
> > > 
> > 
> > I didn't see v1 so maybe this was already asked. Why can you use
> > the
> > existing SCSI_LOG infrastructure for this?
> > 
> > For example, are the printks that are causing you problems specific
> > calls that are not already covered by SCSI_LOG, like the
> > sdev_printk
> > in
> > scsi_probe_lun? Do we just want to have those covered by a new
> > SCSI_LOG
> > value like SCSI_LOG_DISCOVERY?
> > 
> > 
> > 
> 
> Mike and Bart, Thank you
> 
> I think I looked at this some years back and customers wanted an off
> during boot but then on workflow.
> I sent the patch a few years back but the problem has not gone away
> so
> I resent.
> 
> Back Monday after review of your suggestions.
> 
> Regards
> Laurence

Hello folks

OK I remember why I did it this way. (Back in 2021)
There are too many places sdev_printk is called, not as bad for
sd_printk but still a lot of changes so doing it in the macro was the
most sensible. It also masks all the ALUA messages.

Adding another KERN_xxx and LOGLEVEL_xxx won't fly and Enterprise
customers want the normal log behavior after boot.
So I think other than adding a single extra module parameter this
solution is the cleanest and makes sense. 
Using rate_limiting is not what customers want after boot to ensure
device logging is back as normal. It's only the boot challenges that
cause problems.
This change other than adding the single extra option defaults to ZERO
changed behavior so I am asking for this please to be considered.
It is way too common now for Enterprise customers to have these multi-
thousand lun paths and devices. 

Sincerely
Laurence
diff mbox series

Patch

diff --git a/Documentation/scsi/scsi-parameters.rst b/Documentation/scsi/scsi-parameters.rst
index c42c55e1e25e..ec2372db82bd 100644
--- a/Documentation/scsi/scsi-parameters.rst
+++ b/Documentation/scsi/scsi-parameters.rst
@@ -93,6 +93,22 @@  parameters may be changed at runtime by the command
 			S390-tools package, available for download at
 			https://github.com/ibm-s390-linux/s390-tools/blob/master/scripts/scsi_logging_level
 
+	scsi_mod.storage_quiet_discovery=
+                       [SCSI] a parameter to control the printing from
+                        sdev_printk and sd_printk for systems with large
+                        amounts of LUNS.
+                        Defaults to 0 so unchanged behavior.
+                        If scsi_mod.storage_quiet_discovery=1 is added boot line
+                        then the messages are not printed and can be enabled
+                        after boot via
+                        echo 0 >
+                        /sys/module/scsi_mod/parameters/storage_quiet_discovery
+                        Another option is using
+                        sysctl -w dev.scsi.storage_quiet_discovery=0.
+                        Leaving this set to 0 in /etc/sysctl.conf and setting
+                        it to 1 on the kernel line will help for these large
+                        LUN count configurations and ensure its back on after boot.
+
 	scsi_mod.scan=	[SCSI] sync (default) scans SCSI busses as they are
 			discovered.  async scans them in kernel threads,
 			allowing boot to proceed.  none ignores them, expecting
diff --git a/Makefile b/Makefile
index 763b6792d3d5..5effa83a32ab 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@ 
 VERSION = 6
 PATCHLEVEL = 9
 SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc1.lobe
 NAME = Hurr durr I'ma ninja sloth
 
 # *DOCUMENTATION*
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 3e0c0381277a..38db68861bbe 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -76,6 +76,9 @@ 
  * Definitions and constants.
  */
 
+int storage_quiet_discovery;
+EXPORT_SYMBOL(storage_quiet_discovery);
+
 /*
  * Note - the initial logging level can be set here to log events at boot time.
  * After the system is up, you may enable logging via the /proc interface.
@@ -979,6 +982,9 @@  MODULE_LICENSE("GPL");
 
 module_param(scsi_logging_level, int, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(scsi_logging_level, "a bit mask of logging levels");
+module_param(storage_quiet_discovery, int, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(storage_quiet_discovery, "If set to 1 will silence SCSI and \
+		ALUA discovery logging on boot");
 
 static int __init init_scsi(void)
 {
diff --git a/drivers/scsi/scsi_sysctl.c b/drivers/scsi/scsi_sysctl.c
index 093774d77534..13c7ee4a6212 100644
--- a/drivers/scsi/scsi_sysctl.c
+++ b/drivers/scsi/scsi_sysctl.c
@@ -18,6 +18,11 @@  static struct ctl_table scsi_table[] = {
 	  .maxlen	= sizeof(scsi_logging_level),
 	  .mode		= 0644,
 	  .proc_handler	= proc_dointvec },
+	{ .procname	= "storage_quiet_discovery",
+	  .data		= &storage_quiet_discovery,
+	  .maxlen	= sizeof(storage_quiet_discovery),
+	  .mode		= 0644,
+	  .proc_handler	= proc_dointvec },
 };
 
 static struct ctl_table_header *scsi_table_header;
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 5c4285a582b2..e1cff37ca69e 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -162,11 +162,16 @@  static inline struct scsi_disk *scsi_disk(struct gendisk *disk)
 	return disk->private_data;
 }
 
+extern int storage_quiet_discovery;
+
 #define sd_printk(prefix, sdsk, fmt, a...)				\
-        (sdsk)->disk ?							\
-	      sdev_prefix_printk(prefix, (sdsk)->device,		\
+	do {								\
+		if (!storage_quiet_discovery)				\
+		 (sdsk)->disk ?						\
+			sdev_prefix_printk(prefix, (sdsk)->device,	\
 				 (sdsk)->disk->disk_name, fmt, ##a) :	\
-	      sdev_printk(prefix, (sdsk)->device, fmt, ##a)
+			sdev_printk(prefix, (sdsk)->device, fmt, ##a);	\
+	} while (0)
 
 #define sd_first_printk(prefix, sdsk, fmt, a...)			\
 	do {								\
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 9c540f5468eb..0ad0c387883e 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -302,8 +302,12 @@  __printf(4, 5) void
 sdev_prefix_printk(const char *, const struct scsi_device *, const char *,
 		const char *, ...);
 
-#define sdev_printk(l, sdev, fmt, a...)				\
-	sdev_prefix_printk(l, sdev, NULL, fmt, ##a)
+extern int storage_quiet_discovery;
+
+#define sdev_printk(l, sdev, fmt, a...) ({			\
+	if (!storage_quiet_discovery)				\
+		sdev_prefix_printk(l, sdev, NULL, fmt, ##a);	\
+	})
 
 __printf(3, 4) void
 scmd_printk(const char *, const struct scsi_cmnd *, const char *, ...);