diff mbox series

[RFC] fixes for hangs and error reporting in CDC_WDM

Message ID 20200922101329.14801-1-oneukum@suse.com
State New
Headers show
Series [RFC] fixes for hangs and error reporting in CDC_WDM | expand

Commit Message

Oliver Neukum Sept. 22, 2020, 10:13 a.m. UTC
Stress testing has shown that CDC-WDM has some issues with hangs
and error reporting

1. wakeups are not correctly handled in multhreaded environments
2. unresponsive hardware is not handled
3. errors are not correctly reported. This needs flush() to be
implemented.

For easier review all squashed together:

Comments

Tetsuo Handa Sept. 22, 2020, 10:42 a.m. UTC | #1
On 2020/09/22 19:13, Oliver Neukum wrote:
> +/* flush() is uninterruptible, but we cannot wait forever */
> +#define WDM_FLUSH_TIMEOUT      (30 * HZ)

>  static int wdm_flush(struct file *file, fl_owner_t id)
>  {
>         struct wdm_device *desc = file->private_data;
> +       int rv;
>  
> -       wait_event(desc->wait,
> +       rv = wait_event_timeout(desc->wait,
>                         /*
>                          * needs both flags. We cannot do with one
>                          * because resetting it would cause a race
> @@ -595,16 +630,25 @@ static int wdm_flush(struct file *file, fl_owner_t id)
>                          * a disconnect
>                          */
>                         !test_bit(WDM_IN_USE, &desc->flags) ||
> -                       test_bit(WDM_DISCONNECTING, &desc->flags));
> +                       test_bit(WDM_DISCONNECTING, &desc->flags),
> +                       WDM_FLUSH_TIMEOUT);
>  

Generally looks OK.

Any chance we can use wait_event_killable_timeout() or wait_event_killable() here?
syzkaller sends SIGKILL after 5 seconds from process creation. Blocking for 30
seconds in TASK_UNINTERRUPTIBLE state is not happy when killed by e.g. OOM killer.
diff mbox series

Patch

--- a/drivers/usb/class/cdc-wdm.c
+++ b/drivers/usb/class/cdc-wdm.c
@@ -58,6 +58,9 @@  MODULE_DEVICE_TABLE (usb, wdm_ids);
 
 #define WDM_MAX                        16
 
+/* flush() is uninterruptible, but we cannot wait forever */
+#define WDM_FLUSH_TIMEOUT      (30 * HZ)
+
 /* CDC-WMC r1.1 requires wMaxCommand to be "at least 256 decimal (0x100)" */
 #define WDM_DEFAULT_BUFSIZE    256
 
@@ -151,7 +154,7 @@  static void wdm_out_callback(struct urb *urb)
        kfree(desc->outbuf);
        desc->outbuf = NULL;
        clear_bit(WDM_IN_USE, &desc->flags);
-       wake_up(&desc->wait);
+       wake_up_all(&desc->wait);
 }
 
 static void wdm_in_callback(struct urb *urb)
@@ -393,6 +396,9 @@  static ssize_t wdm_write
        if (test_bit(WDM_RESETTING, &desc->flags))
                r = -EIO;
 
+       if (test_bit(WDM_DISCONNECTING, &desc->flags))
+               r = -ENODEV;
+
        if (r < 0) {
                rv = r;
                goto out_free_mem_pm;
@@ -424,6 +430,7 @@  static ssize_t wdm_write
        if (rv < 0) {
                desc->outbuf = NULL;
                clear_bit(WDM_IN_USE, &desc->flags);
+               wake_up_all(&desc->wait); /* for flush() */
                dev_err(&desc->intf->dev, "Tx URB error: %d\n", rv);
                rv = usb_translate_errors(rv);
                goto out_free_mem_pm;
@@ -583,11 +590,39 @@  static ssize_t wdm_read
        return rv;
 }
 
+/*
+ * The difference to flush is that we wait forever. If you don't like
+ * that behavior, you need to send a signal.
+ */
+
+static int wdm_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+{
+       struct wdm_device *desc = file->private_data;
+       int rv;
+
+       rv = wait_event_interruptible(desc->wait,
+                       !test_bit(WDM_IN_USE, &desc->flags) ||
+                       test_bit(WDM_DISCONNECTING, &desc->flags));
+
+       if (test_bit(WDM_DISCONNECTING, &desc->flags))
+               return -ENODEV;
+       if (rv < 0)
+               return -EINTR;
+
+       spin_lock_irq(&desc->iuspin);
+       rv = desc->werr;
+       desc->werr = 0;
+       spin_unlock_irq(&desc->iuspin);
+
+       return usb_translate_errors(rv);
+}
+
 static int wdm_flush(struct file *file, fl_owner_t id)
 {
        struct wdm_device *desc = file->private_data;
+       int rv;
 
-       wait_event(desc->wait,
+       rv = wait_event_timeout(desc->wait,
                        /*
                         * needs both flags. We cannot do with one
                         * because resetting it would cause a race
@@ -595,16 +630,25 @@  static int wdm_flush(struct file *file, fl_owner_t id)
                         * a disconnect
                         */
                        !test_bit(WDM_IN_USE, &desc->flags) ||
-                       test_bit(WDM_DISCONNECTING, &desc->flags));
+                       test_bit(WDM_DISCONNECTING, &desc->flags),
+                       WDM_FLUSH_TIMEOUT);
 
-       /* cannot dereference desc->intf if WDM_DISCONNECTING */
+       /*
+        * to report the correct error.
+        * This is best effort
+        * We are inevitably racing with the hardware.
+        */
        if (test_bit(WDM_DISCONNECTING, &desc->flags))
                return -ENODEV;
-       if (desc->werr < 0)
-               dev_err(&desc->intf->dev, "Error in flush path: %d\n",
-                       desc->werr);
+       if (!rv)
+               return -EIO;
+
+       spin_lock_irq(&desc->iuspin);
+       rv = desc->werr;
+       desc->werr = 0;
+       spin_unlock_irq(&desc->iuspin);
 
-       return usb_translate_errors(desc->werr);
+       return usb_translate_errors(rv);
 }
 
 static __poll_t wdm_poll(struct file *file, struct poll_table_struct *wait)
@@ -729,6 +773,7 @@  static const struct file_operations wdm_fops = {
        .owner =        THIS_MODULE,
        .read =         wdm_read,
        .write =        wdm_write,
+       .fsync =        wdm_fsync,
        .open =         wdm_open,
        .flush =        wdm_flush,