diff mbox series

[v2,2/7] staging: qlge: Initialize devlink health dump framework

Message ID 20201014104306.63756-3-coiby.xu@gmail.com
State New
Headers show
Series [v2,1/7] staging: qlge: replace ql_* with qlge_* to avoid namespace clashes with other qlogic drivers | expand

Commit Message

Coiby Xu Oct. 14, 2020, 10:43 a.m. UTC
Initialize devlink health dump framework for the qlge driver so the
coredump could be done via devlink.

struct qlge_adapter is now used as the private data struct of
struct devlink so it could exist independently of struct net_device
and devlink reload could be supported in the future.

Signed-off-by: Coiby Xu <coiby.xu@gmail.com>
---
 drivers/staging/qlge/Kconfig        |  1 +
 drivers/staging/qlge/Makefile       |  2 +-
 drivers/staging/qlge/qlge.h         |  5 +++
 drivers/staging/qlge/qlge_devlink.c | 31 +++++++++++++++++++
 drivers/staging/qlge/qlge_devlink.h |  9 ++++++
 drivers/staging/qlge/qlge_main.c    | 48 +++++++++++++++++++++--------
 6 files changed, 82 insertions(+), 14 deletions(-)
 create mode 100644 drivers/staging/qlge/qlge_devlink.c
 create mode 100644 drivers/staging/qlge/qlge_devlink.h

Comments

Dan Carpenter Oct. 14, 2020, 1:08 p.m. UTC | #1
On Wed, Oct 14, 2020 at 06:43:01PM +0800, Coiby Xu wrote:
>  static int qlge_probe(struct pci_dev *pdev,

>  		      const struct pci_device_id *pci_entry)

>  {

>  	struct net_device *ndev = NULL;

>  	struct qlge_adapter *qdev = NULL;

> +	struct devlink *devlink;

>  	static int cards_found;

>  	int err = 0;

>  

> -	ndev = alloc_etherdev_mq(sizeof(struct qlge_adapter),

> +	devlink = devlink_alloc(&qlge_devlink_ops, sizeof(struct qlge_adapter));

> +	if (!devlink)

> +		return -ENOMEM;

> +

> +	qdev = devlink_priv(devlink);

> +

> +	ndev = alloc_etherdev_mq(sizeof(struct qlge_netdev_priv),

>  				 min(MAX_CPUS,

>  				     netif_get_num_default_rss_queues()));

>  	if (!ndev)

> -		return -ENOMEM;

> +		goto devlink_free;

>  

> -	err = qlge_init_device(pdev, ndev, cards_found);

> -	if (err < 0) {

> -		free_netdev(ndev);

> -		return err;


In the old code, if qlge_init_device() fails then it frees "ndev".

> -	}

> +	qdev->ndev = ndev;

> +	err = qlge_init_device(pdev, qdev, cards_found);

> +	if (err < 0)

> +		goto devlink_free;


But the patch introduces a new resource leak.

>  

> -	qdev = netdev_priv(ndev);

>  	SET_NETDEV_DEV(ndev, &pdev->dev);

>  	ndev->hw_features = NETIF_F_SG |

>  		NETIF_F_IP_CSUM |

> @@ -4611,8 +4619,14 @@ static int qlge_probe(struct pci_dev *pdev,

>  		qlge_release_all(pdev);

>  		pci_disable_device(pdev);

>  		free_netdev(ndev);

> -		return err;

> +		goto devlink_free;

>  	}

> +

> +	err = devlink_register(devlink, &pdev->dev);

> +	if (err)

> +		goto devlink_free;

> +

> +	qlge_health_create_reporters(qdev);

>  	/* Start up the timer to trigger EEH if

>  	 * the bus goes dead

>  	 */

> @@ -4623,6 +4637,10 @@ static int qlge_probe(struct pci_dev *pdev,

>  	atomic_set(&qdev->lb_count, 0);

>  	cards_found++;

>  	return 0;

> +

> +devlink_free:

> +	devlink_free(devlink);

> +	return err;

>  }


The best way to write error handling code is keep tracke of the most
recent allocation which was allocated successfully.

	one = alloc();
	if (!one)
		return -ENOMEM;  //  <-- nothing allocated successfully

	two = alloc();
	if (!two) {
		ret = -ENOMEM;
		goto free_one; // <-- one was allocated successfully
                               // Notice that the label name says what
			       // the goto does.
	}

	three = alloc();
	if (!three) {
		ret = -ENOMEM;
		goto free_two; // <-- two allocated, two freed.
	}

	...

	return 0;

free_two:
	free(two);
free_one:
	free(one);

	return ret;

In the old code qlge_probe() freed things before returning, and that's
fine if there is only two allocations in the function but when there are
three or more allocations, then use gotos to unwind.

Ideally there would be a ql_deinit_device() function to mirror the
ql_init_device() function.  The ql_init_device() is staging quality
code with leaks and bad label names.  It should be re-written to free
things one step at a time instead of calling ql_release_all().

Anyway, let's not introduce new leaks at least.

regards,
dan carpenter
Coiby Xu Oct. 15, 2020, 4:22 a.m. UTC | #2
On Wed, Oct 14, 2020 at 04:08:46PM +0300, Dan Carpenter wrote:
>On Wed, Oct 14, 2020 at 06:43:01PM +0800, Coiby Xu wrote:
>>  static int qlge_probe(struct pci_dev *pdev,
>>  		      const struct pci_device_id *pci_entry)
>>  {
>>  	struct net_device *ndev = NULL;
>>  	struct qlge_adapter *qdev = NULL;
>> +	struct devlink *devlink;
>>  	static int cards_found;
>>  	int err = 0;
>>
>> -	ndev = alloc_etherdev_mq(sizeof(struct qlge_adapter),
>> +	devlink = devlink_alloc(&qlge_devlink_ops, sizeof(struct qlge_adapter));
>> +	if (!devlink)
>> +		return -ENOMEM;
>> +
>> +	qdev = devlink_priv(devlink);
>> +
>> +	ndev = alloc_etherdev_mq(sizeof(struct qlge_netdev_priv),
>>  				 min(MAX_CPUS,
>>  				     netif_get_num_default_rss_queues()));
>>  	if (!ndev)
>> -		return -ENOMEM;
>> +		goto devlink_free;
>>
>> -	err = qlge_init_device(pdev, ndev, cards_found);
>> -	if (err < 0) {
>> -		free_netdev(ndev);
>> -		return err;
>
>In the old code, if qlge_init_device() fails then it frees "ndev".
>
>> -	}
>> +	qdev->ndev = ndev;
>> +	err = qlge_init_device(pdev, qdev, cards_found);
>> +	if (err < 0)
>> +		goto devlink_free;
>
>But the patch introduces a new resource leak.

Thank you for spotting this issue!

>
>>
>> -	qdev = netdev_priv(ndev);
>>  	SET_NETDEV_DEV(ndev, &pdev->dev);
>>  	ndev->hw_features = NETIF_F_SG |
>>  		NETIF_F_IP_CSUM |
>> @@ -4611,8 +4619,14 @@ static int qlge_probe(struct pci_dev *pdev,
>>  		qlge_release_all(pdev);
>>  		pci_disable_device(pdev);
>>  		free_netdev(ndev);
>> -		return err;
>> +		goto devlink_free;
>>  	}
>> +
>> +	err = devlink_register(devlink, &pdev->dev);
>> +	if (err)
>> +		goto devlink_free;
>> +
>> +	qlge_health_create_reporters(qdev);
>>  	/* Start up the timer to trigger EEH if
>>  	 * the bus goes dead
>>  	 */
>> @@ -4623,6 +4637,10 @@ static int qlge_probe(struct pci_dev *pdev,
>>  	atomic_set(&qdev->lb_count, 0);
>>  	cards_found++;
>>  	return 0;
>> +
>> +devlink_free:
>> +	devlink_free(devlink);
>> +	return err;
>>  }
>
>The best way to write error handling code is keep tracke of the most
>recent allocation which was allocated successfully.
>
>	one = alloc();
>	if (!one)
>		return -ENOMEM;  //  <-- nothing allocated successfully
>
>	two = alloc();
>	if (!two) {
>		ret = -ENOMEM;
>		goto free_one; // <-- one was allocated successfully
>                               // Notice that the label name says what
>			       // the goto does.
>	}
>
>	three = alloc();
>	if (!three) {
>		ret = -ENOMEM;
>		goto free_two; // <-- two allocated, two freed.
>	}
>
>	...
>
>	return 0;
>
>free_two:
>	free(two);
>free_one:
>	free(one);
>
>	return ret;
>
Thank you for teaching me this pattern!

>In the old code qlge_probe() freed things before returning, and that's
>fine if there is only two allocations in the function but when there are
>three or more allocations, then use gotos to unwind.
>
>Ideally there would be a ql_deinit_device() function to mirror the
>ql_init_device() function.  The ql_init_device() is staging quality
>code with leaks and bad label names.  It should be re-written to free
>things one step at a time instead of calling ql_release_all().
>
I'll see how I can improve ql_init_device. Thank you for the suggestion!

>Anyway, let's not introduce new leaks at least.
>
>regards,
>dan carpenter

--
Best regards,
Coiby
Shung-Hsi Yu Oct. 20, 2020, 8:36 a.m. UTC | #3
Hi,

This patch trigger the following KASAN error inside qlge_init_device().

[...] general protection fault, probably for non-canonical address 0xdffffc000000004b: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI
[...] KASAN: null-ptr-deref in range [0x0000000000000258-0x000000000000025f]
[...] CPU: 0 PID: 438 Comm: systemd-udevd Tainted: G         C  E     5.9.0-kvmsmall+ #7
[...] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-48-
g...ilt.opensuse.org 04/01/2014
[...] RIP: 0010:qlge_get_8000_flash_params+0x377/0x6e0 [qlge]
[...] Code: 03 80 3c 02 00 0f 85 57 03 00 00 49 8b af 68 08 00 00 48 b8 00 00 00 00 00 fc ff df 48 8d bd 5f 02 00 00 48 89 fa 48 c1 ea 03 <0f> b6 04 02 48 89 fa 83 e2 07 38 d0 7f 08 84 c0 0f 85 c6 02 00 00
[...] RSP: 0018:ffffc90000f97788 EFLAGS: 00010207
[...] RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 0000000000000000
[...] RDX: 000000000000004b RSI: ffffffffc08cb843 RDI: 000000000000025f
[...] R10: fffffbfff5f718a0 R11: 0000000000000001 R12: dffffc0000000000
[...] R13: ffff888111085d40 R14: ffff888111085d40 R15: ffff888111080280
[...] FS:  00007f315f5db280(0000) GS:ffff8881f5200000(0000) knlGS:0000000000000000
[...] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[...] CR2: 000055bb25297170 CR3: 0000000110674000 CR4: 00000000000006f0
[...] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[...] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[...] Call Trace:
[...]  ? qlge_get_8012_flash_params+0x600/0x600 [qlge]
[...]  ? static_obj+0x8a/0xc0
[...]  ? lockdep_init_map_waits+0x26a/0x700
[...]  qlge_init_device+0x425/0x1000 [qlge]
[...]  ? debug_mutex_init+0x31/0x60
[...]  qlge_probe+0xfe/0x6c0 [qlge]
[...]  ? qlge_set_mac_addr+0x330/0x330 [qlge]
[...]  local_pci_probe+0xd8/0x170
[...]  pci_call_probe+0x156/0x3d0
[...]  ? pci_match_device+0x30c/0x620
[...]  ? pci_pm_suspend_noirq+0x980/0x980
[...]  ? pci_match_device+0x33c/0x620
[...]  ? kernfs_put+0x18/0x30
[...]  pci_device_probe+0x1e0/0x270
[...]  ? pci_dma_configure+0x57/0xd0
[...]  really_probe+0x218/0xd20
[...]  driver_probe_device+0x1e6/0x2c0
[...]  device_driver_attach+0x209/0x270
[...]  __driver_attach+0xf6/0x260
[...]  ? device_driver_attach+0x270/0x270
[...]  bus_for_each_dev+0x114/0x1a0
[...]  ? subsys_find_device_by_id+0x2d0/0x2d0
[...]  ? bus_add_driver+0x2d2/0x620
[...]  bus_add_driver+0x352/0x620
[...]  driver_register+0x1ee/0x4b0
[...]  ? 0xffffffffc08e9000
[...]  do_one_initcall+0xbb/0x400
[...]  ? trace_event_raw_event_initcall_finish+0x1c0/0x1c0
[...]  ? rcu_read_lock_sched_held+0x3f/0x70
[...]  ? trace_kmalloc+0xa2/0xd0
[...]  ? kasan_unpoison_shadow+0x33/0x40
[...]  ? kasan_unpoison_shadow+0x33/0x40
[...]  do_init_module+0x1ce/0x780
[...]  load_module+0x14b1/0x16d0
[...]  ? post_relocation+0x3a0/0x3a0
[...]  ? device_driver_attach+0x270/0x270
[...]  bus_for_each_dev+0x114/0x1a0
[...]  ? subsys_find_device_by_id+0x2d0/0x2d0
[...]  ? bus_add_driver+0x2d2/0x620
[...]  bus_add_driver+0x352/0x620
[...]  driver_register+0x1ee/0x4b0
[...]  ? 0xffffffffc08e9000
[...]  do_one_initcall+0xbb/0x400
[...]  ? trace_event_raw_event_initcall_finish+0x1c0/0x1c0
[...]  ? rcu_read_lock_sched_held+0x3f/0x70
[...]  ? trace_kmalloc+0xa2/0xd0
[...]  ? kasan_unpoison_shadow+0x33/0x40
[...]  ? kasan_unpoison_shadow+0x33/0x40
[...]  do_init_module+0x1ce/0x780
[...]  load_module+0x14b1/0x16d0
[...]  ? post_relocation+0x3a0/0x3a0
[...]  ? kernel_read_file_from_fd+0x4b/0x90
[...]  __do_sys_finit_module+0x110/0x1a0
[...]  ? __ia32_sys_init_module+0xa0/0xa0
[...]  do_syscall_64+0x33/0x80
[...]  entry_SYSCALL_64_after_hwframe+0x44/0xa9

With qlge_get_8000_flash_params+0x377/0x6e0 corresponding to the following:

	if (qdev->flash.flash_params_8000.data_type1 == 2)
		memcpy(mac_addr,
		       qdev->flash.flash_params_8000.mac_addr1,
		       qdev->ndev->addr_len); // <---- Here

IIRC I didn't see this with v1. However I didn't test v2, so I'm not sure if
this issue is introduced during v2 or v3.

Best,
Shung-Hsi
Shung-Hsi Yu Oct. 20, 2020, 8:55 a.m. UTC | #4
On Tue, Oct 20, 2020 at 04:36:09PM +0800, Shung-Hsi Yu wrote:
> This patch trigger the following KASAN error inside qlge_init_device().


Sorry, I meant to reply to the v3 series, please ignore this email.
diff mbox series

Patch

diff --git a/drivers/staging/qlge/Kconfig b/drivers/staging/qlge/Kconfig
index a3cb25a3ab80..6d831ed67965 100644
--- a/drivers/staging/qlge/Kconfig
+++ b/drivers/staging/qlge/Kconfig
@@ -3,6 +3,7 @@ 
 config QLGE
 	tristate "QLogic QLGE 10Gb Ethernet Driver Support"
 	depends on ETHERNET && PCI
+	select NET_DEVLINK
 	help
 	This driver supports QLogic ISP8XXX 10Gb Ethernet cards.
 
diff --git a/drivers/staging/qlge/Makefile b/drivers/staging/qlge/Makefile
index 1dc2568e820c..07c1898a512e 100644
--- a/drivers/staging/qlge/Makefile
+++ b/drivers/staging/qlge/Makefile
@@ -5,4 +5,4 @@ 
 
 obj-$(CONFIG_QLGE) += qlge.o
 
-qlge-objs := qlge_main.o qlge_dbg.o qlge_mpi.o qlge_ethtool.o
+qlge-objs := qlge_main.o qlge_dbg.o qlge_mpi.o qlge_ethtool.o qlge_devlink.o
diff --git a/drivers/staging/qlge/qlge.h b/drivers/staging/qlge/qlge.h
index 6ee83e7efd7c..4a48bcc88fbd 100644
--- a/drivers/staging/qlge/qlge.h
+++ b/drivers/staging/qlge/qlge.h
@@ -2060,6 +2060,10 @@  struct nic_operations {
 	int (*port_initialize)(struct qlge_adapter *qdev);
 };
 
+struct qlge_netdev_priv {
+	struct ql_adapter *qdev;
+};
+
 /*
  * The main Adapter structure definition.
  * This structure has all fields relevant to the hardware.
@@ -2077,6 +2081,7 @@  struct qlge_adapter {
 	struct pci_dev *pdev;
 	struct net_device *ndev;	/* Parent NET device */
 
+	struct devlink_health_reporter *reporter;
 	/* Hardware information */
 	u32 chip_rev_id;
 	u32 fw_rev_id;
diff --git a/drivers/staging/qlge/qlge_devlink.c b/drivers/staging/qlge/qlge_devlink.c
new file mode 100644
index 000000000000..d9c71f45211f
--- /dev/null
+++ b/drivers/staging/qlge/qlge_devlink.c
@@ -0,0 +1,31 @@ 
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include "qlge.h"
+#include "qlge_devlink.h"
+
+static int
+qlge_reporter_coredump(struct devlink_health_reporter *reporter,
+		       struct devlink_fmsg *fmsg, void *priv_ctx,
+		       struct netlink_ext_ack *extack)
+{
+	return 0;
+}
+
+static const struct devlink_health_reporter_ops qlge_reporter_ops = {
+	.name = "dummy",
+	.dump = qlge_reporter_coredump,
+};
+
+void qlge_health_create_reporters(struct qlge_adapter *priv)
+{
+	struct devlink_health_reporter *reporter;
+	struct devlink *devlink;
+
+	devlink = priv_to_devlink(priv);
+	priv->reporter =
+		devlink_health_reporter_create(devlink, &qlge_reporter_ops,
+					       0, priv);
+	if (IS_ERR(priv->reporter))
+		netdev_warn(priv->ndev,
+			    "Failed to create reporter, err = %ld\n",
+			    PTR_ERR(reporter));
+}
diff --git a/drivers/staging/qlge/qlge_devlink.h b/drivers/staging/qlge/qlge_devlink.h
new file mode 100644
index 000000000000..19078e1ac694
--- /dev/null
+++ b/drivers/staging/qlge/qlge_devlink.h
@@ -0,0 +1,9 @@ 
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef QLGE_DEVLINK_H
+#define QLGE_DEVLINK_H
+
+#include <net/devlink.h>
+
+void qlge_health_create_reporters(struct qlge_adapter *priv);
+
+#endif /* QLGE_DEVLINK_H */
diff --git a/drivers/staging/qlge/qlge_main.c b/drivers/staging/qlge/qlge_main.c
index 19e72279b0ce..7a4bae3c12d0 100644
--- a/drivers/staging/qlge/qlge_main.c
+++ b/drivers/staging/qlge/qlge_main.c
@@ -42,6 +42,7 @@ 
 #include <net/ip6_checksum.h>
 
 #include "qlge.h"
+#include "qlge_devlink.h"
 
 char qlge_driver_name[] = DRV_NAME;
 const char qlge_driver_version[] = DRV_VERSION;
@@ -4382,10 +4383,10 @@  static void qlge_release_all(struct pci_dev *pdev)
 	pci_release_regions(pdev);
 }
 
-static int qlge_init_device(struct pci_dev *pdev, struct net_device *ndev,
+static int qlge_init_device(struct pci_dev *pdev, struct qlge_adapter *qdev,
 			    int cards_found)
 {
-	struct qlge_adapter *qdev = netdev_priv(ndev);
+	struct net_device *ndev = qdev->ndev;
 	int err = 0;
 
 	memset((void *)qdev, 0, sizeof(*qdev));
@@ -4548,27 +4549,34 @@  static void qlge_timer(struct timer_list *t)
 	mod_timer(&qdev->timer, jiffies + (5 * HZ));
 }
 
+static const struct devlink_ops qlge_devlink_ops;
+
 static int qlge_probe(struct pci_dev *pdev,
 		      const struct pci_device_id *pci_entry)
 {
 	struct net_device *ndev = NULL;
 	struct qlge_adapter *qdev = NULL;
+	struct devlink *devlink;
 	static int cards_found;
 	int err = 0;
 
-	ndev = alloc_etherdev_mq(sizeof(struct qlge_adapter),
+	devlink = devlink_alloc(&qlge_devlink_ops, sizeof(struct qlge_adapter));
+	if (!devlink)
+		return -ENOMEM;
+
+	qdev = devlink_priv(devlink);
+
+	ndev = alloc_etherdev_mq(sizeof(struct qlge_netdev_priv),
 				 min(MAX_CPUS,
 				     netif_get_num_default_rss_queues()));
 	if (!ndev)
-		return -ENOMEM;
+		goto devlink_free;
 
-	err = qlge_init_device(pdev, ndev, cards_found);
-	if (err < 0) {
-		free_netdev(ndev);
-		return err;
-	}
+	qdev->ndev = ndev;
+	err = qlge_init_device(pdev, qdev, cards_found);
+	if (err < 0)
+		goto devlink_free;
 
-	qdev = netdev_priv(ndev);
 	SET_NETDEV_DEV(ndev, &pdev->dev);
 	ndev->hw_features = NETIF_F_SG |
 		NETIF_F_IP_CSUM |
@@ -4611,8 +4619,14 @@  static int qlge_probe(struct pci_dev *pdev,
 		qlge_release_all(pdev);
 		pci_disable_device(pdev);
 		free_netdev(ndev);
-		return err;
+		goto devlink_free;
 	}
+
+	err = devlink_register(devlink, &pdev->dev);
+	if (err)
+		goto devlink_free;
+
+	qlge_health_create_reporters(qdev);
 	/* Start up the timer to trigger EEH if
 	 * the bus goes dead
 	 */
@@ -4623,6 +4637,10 @@  static int qlge_probe(struct pci_dev *pdev,
 	atomic_set(&qdev->lb_count, 0);
 	cards_found++;
 	return 0;
+
+devlink_free:
+	devlink_free(devlink);
+	return err;
 }
 
 netdev_tx_t qlge_lb_send(struct sk_buff *skb, struct net_device *ndev)
@@ -4637,14 +4655,18 @@  int qlge_clean_lb_rx_ring(struct rx_ring *rx_ring, int budget)
 
 static void qlge_remove(struct pci_dev *pdev)
 {
-	struct net_device *ndev = pci_get_drvdata(pdev);
-	struct qlge_adapter *qdev = netdev_priv(ndev);
+	struct qlge_adapter *qdev = pci_get_drvdata(pdev);
+	struct net_device *ndev = qdev->ndev;
+	struct devlink *devlink = priv_to_devlink(qdev);
 
 	del_timer_sync(&qdev->timer);
 	qlge_cancel_all_work_sync(qdev);
 	unregister_netdev(ndev);
 	qlge_release_all(pdev);
 	pci_disable_device(pdev);
+	devlink_health_reporter_destroy(qdev->reporter);
+	devlink_unregister(devlink);
+	devlink_free(devlink);
 	free_netdev(ndev);
 }