diff mbox series

[net-next,RFC,v4,03/15] devlink: Add reload action stats

Message ID 1600063682-17313-4-git-send-email-moshe@mellanox.com
State New
Headers show
Series Add devlink reload action and | expand

Commit Message

Moshe Shemesh Sept. 14, 2020, 6:07 a.m. UTC
Add reload action stats to hold the history per reload action type and
limit level.
For example, the number of times fw_activate has been performed on this
device since the driver module was added or if the firmware activation
was performed with or without reset.
Add devlink notification on stats update.

The function devlink_reload_actions_implicit_actions_performed() is
exported to enable also drivers update on reload actions performed,
for example in case firmware activation with reset finished
successfully but was initiated by remote host.

Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
---
v3 -> v4:
- Renamed reload_actions_cnts to reload_action_stats
- Add devlink notifications on stats update
- Renamed devlink_reload_actions_implicit_actions_performed() and add
  function comment in code
v2 -> v3:
- New patch
---
 include/net/devlink.h |  7 ++++++
 net/core/devlink.c    | 58 ++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 62 insertions(+), 3 deletions(-)

Comments

Jiri Pirko Sept. 14, 2020, 1:39 p.m. UTC | #1
Mon, Sep 14, 2020 at 08:07:50AM CEST, moshe@mellanox.com wrote:
>Add reload action stats to hold the history per reload action type and
>limit level.

Empty line missing.


>For example, the number of times fw_activate has been performed on this
>device since the driver module was added or if the firmware activation
>was performed with or without reset.
>Add devlink notification on stats update.
>
>The function devlink_reload_actions_implicit_actions_performed() is
>exported to enable also drivers update on reload actions performed,
>for example in case firmware activation with reset finished
>successfully but was initiated by remote host.
>
>Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
>---
>v3 -> v4:
>- Renamed reload_actions_cnts to reload_action_stats
>- Add devlink notifications on stats update
>- Renamed devlink_reload_actions_implicit_actions_performed() and add
>  function comment in code
>v2 -> v3:
>- New patch
>---
> include/net/devlink.h |  7 ++++++
> net/core/devlink.c    | 58 ++++++++++++++++++++++++++++++++++++++++---
> 2 files changed, 62 insertions(+), 3 deletions(-)
>
>diff --git a/include/net/devlink.h b/include/net/devlink.h
>index dddd9ee5b8a9..b4feb92e0269 100644
>--- a/include/net/devlink.h
>+++ b/include/net/devlink.h
>@@ -20,6 +20,9 @@
> #include <uapi/linux/devlink.h>
> #include <linux/xarray.h>
> 
>+#define DEVLINK_RELOAD_ACTION_STATS_ARRAY_SIZE \
>+	(__DEVLINK_RELOAD_ACTION_LIMIT_LEVEL_MAX * __DEVLINK_RELOAD_ACTION_MAX)
>+
> struct devlink_ops;
> 
> struct devlink {
>@@ -38,6 +41,7 @@ struct devlink {
> 	struct list_head trap_policer_list;
> 	const struct devlink_ops *ops;
> 	struct xarray snapshot_ids;
>+	u32 reload_action_stats[DEVLINK_RELOAD_ACTION_STATS_ARRAY_SIZE];
> 	struct device *dev;
> 	possible_net_t _net;
> 	struct mutex lock; /* Serializes access to devlink instance specific objects such as
>@@ -1397,6 +1401,9 @@ void
> devlink_health_reporter_recovery_done(struct devlink_health_reporter *reporter);
> 
> bool devlink_is_reload_failed(const struct devlink *devlink);
>+void devlink_reload_implicit_actions_performed(struct devlink *devlink,
>+					       enum devlink_reload_action_limit_level limit_level,
>+					       unsigned long actions_performed);
> 
> void devlink_flash_update_begin_notify(struct devlink *devlink);
> void devlink_flash_update_end_notify(struct devlink *devlink);
>diff --git a/net/core/devlink.c b/net/core/devlink.c
>index 60aa0c4a3726..cbf746966913 100644
>--- a/net/core/devlink.c
>+++ b/net/core/devlink.c
>@@ -2981,11 +2981,58 @@ bool devlink_is_reload_failed(const struct devlink *devlink)
> }
> EXPORT_SYMBOL_GPL(devlink_is_reload_failed);
> 
>+static void
>+devlink_reload_action_stats_update(struct devlink *devlink,
>+				   enum devlink_reload_action_limit_level limit_level,
>+				   unsigned long actions_performed)
>+{
>+	int stat_idx;
>+	int action;
>+
>+	if (!actions_performed)
>+		return;
>+
>+	if (WARN_ON(limit_level > DEVLINK_RELOAD_ACTION_LIMIT_LEVEL_MAX))
>+		return;
>+	for (action = 0; action <= DEVLINK_RELOAD_ACTION_MAX; action++) {
>+		if (!test_bit(action, &actions_performed))
>+			continue;
>+		stat_idx = limit_level * __DEVLINK_RELOAD_ACTION_MAX + action;
>+		devlink->reload_action_stats[stat_idx]++;
>+	}
>+	devlink_notify(devlink, DEVLINK_CMD_NEW);
>+}
>+
>+/**
>+ *	devlink_reload_implicit_actions_performed - Update devlink on reload actions
>+ *	  performed which are not a direct result of devlink reload call.
>+ *
>+ *	This should be called by a driver after performing reload actions in case it was not
>+ *	a result of devlink reload call. For example fw_activate was performed as a result
>+ *	of devlink reload triggered fw_activate on another host.
>+ *	The motivation for this function is to keep data on reload actions performed on this
>+ *	function whether it was done due to direct devlink reload call or not.
>+ *
>+ *	@devlink: devlink
>+ *	@limit_level: reload action limit level
>+ *	@actions_performed: bitmask of actions performed
>+ */
>+void devlink_reload_implicit_actions_performed(struct devlink *devlink,
>+					       enum devlink_reload_action_limit_level limit_level,
>+					       unsigned long actions_performed)

What I'm a bit scarred of that the driver would call this from withing
reload_down()/up() ops. Perheps this could be WARN_ON'ed here (or in
devlink_reload())?


>+{
>+	if (!devlink_reload_supported(devlink))

Hmm. I think that the driver does not have to support the reload and can
still be reloaded by another instance and update the stats here. Why
not?


>+		return;
>+	devlink_reload_action_stats_update(devlink, limit_level, actions_performed);
>+}
>+EXPORT_SYMBOL_GPL(devlink_reload_implicit_actions_performed);
>+
> static int devlink_reload(struct devlink *devlink, struct net *dest_net,
> 			  enum devlink_reload_action action,
> 			  enum devlink_reload_action_limit_level limit_level,
>-			  struct netlink_ext_ack *extack, unsigned long *actions_performed)
>+			  struct netlink_ext_ack *extack, unsigned long *actions_performed_out)
> {
>+	unsigned long actions_performed;
> 	int err;
> 
> 	if (!devlink->reload_enabled)
>@@ -2998,9 +3045,14 @@ static int devlink_reload(struct devlink *devlink, struct net *dest_net,
> 	if (dest_net && !net_eq(dest_net, devlink_net(devlink)))
> 		devlink_reload_netns_change(devlink, dest_net);
> 
>-	err = devlink->ops->reload_up(devlink, action, limit_level, extack, actions_performed);
>+	err = devlink->ops->reload_up(devlink, action, limit_level, extack, &actions_performed);
> 	devlink_reload_failed_set(devlink, !!err);
>-	return err;
>+	if (err)
>+		return err;
>+	devlink_reload_action_stats_update(devlink, limit_level, actions_performed);
>+	if (actions_performed_out)

Just make the caller to provide valid pointer, as I suggested in the
other patch review.


>+		*actions_performed_out = actions_performed;
>+	return 0;
> }
> 
> static int
>-- 
>2.17.1
>
Moshe Shemesh Sept. 15, 2020, 12:30 p.m. UTC | #2
On 9/14/2020 4:39 PM, Jiri Pirko wrote:
> Mon, Sep 14, 2020 at 08:07:50AM CEST, moshe@mellanox.com wrote:

>> Add reload action stats to hold the history per reload action type and

>> limit level.

> Empty line missing.

>


Ack.

>> For example, the number of times fw_activate has been performed on this

>> device since the driver module was added or if the firmware activation

>> was performed with or without reset.

>> Add devlink notification on stats update.

>>

>> The function devlink_reload_actions_implicit_actions_performed() is

>> exported to enable also drivers update on reload actions performed,

>> for example in case firmware activation with reset finished

>> successfully but was initiated by remote host.

>>

>> Signed-off-by: Moshe Shemesh <moshe@mellanox.com>

>> ---

>> v3 -> v4:

>> - Renamed reload_actions_cnts to reload_action_stats

>> - Add devlink notifications on stats update

>> - Renamed devlink_reload_actions_implicit_actions_performed() and add

>>   function comment in code

>> v2 -> v3:

>> - New patch

>> ---

>> include/net/devlink.h |  7 ++++++

>> net/core/devlink.c    | 58 ++++++++++++++++++++++++++++++++++++++++---

>> 2 files changed, 62 insertions(+), 3 deletions(-)

>>

>> diff --git a/include/net/devlink.h b/include/net/devlink.h

>> index dddd9ee5b8a9..b4feb92e0269 100644

>> --- a/include/net/devlink.h

>> +++ b/include/net/devlink.h

>> @@ -20,6 +20,9 @@

>> #include <uapi/linux/devlink.h>

>> #include <linux/xarray.h>

>>

>> +#define DEVLINK_RELOAD_ACTION_STATS_ARRAY_SIZE \

>> +	(__DEVLINK_RELOAD_ACTION_LIMIT_LEVEL_MAX * __DEVLINK_RELOAD_ACTION_MAX)

>> +

>> struct devlink_ops;

>>

>> struct devlink {

>> @@ -38,6 +41,7 @@ struct devlink {

>> 	struct list_head trap_policer_list;

>> 	const struct devlink_ops *ops;

>> 	struct xarray snapshot_ids;

>> +	u32 reload_action_stats[DEVLINK_RELOAD_ACTION_STATS_ARRAY_SIZE];

>> 	struct device *dev;

>> 	possible_net_t _net;

>> 	struct mutex lock; /* Serializes access to devlink instance specific objects such as

>> @@ -1397,6 +1401,9 @@ void

>> devlink_health_reporter_recovery_done(struct devlink_health_reporter *reporter);

>>

>> bool devlink_is_reload_failed(const struct devlink *devlink);

>> +void devlink_reload_implicit_actions_performed(struct devlink *devlink,

>> +					       enum devlink_reload_action_limit_level limit_level,

>> +					       unsigned long actions_performed);

>>

>> void devlink_flash_update_begin_notify(struct devlink *devlink);

>> void devlink_flash_update_end_notify(struct devlink *devlink);

>> diff --git a/net/core/devlink.c b/net/core/devlink.c

>> index 60aa0c4a3726..cbf746966913 100644

>> --- a/net/core/devlink.c

>> +++ b/net/core/devlink.c

>> @@ -2981,11 +2981,58 @@ bool devlink_is_reload_failed(const struct devlink *devlink)

>> }

>> EXPORT_SYMBOL_GPL(devlink_is_reload_failed);

>>

>> +static void

>> +devlink_reload_action_stats_update(struct devlink *devlink,

>> +				   enum devlink_reload_action_limit_level limit_level,

>> +				   unsigned long actions_performed)

>> +{

>> +	int stat_idx;

>> +	int action;

>> +

>> +	if (!actions_performed)

>> +		return;

>> +

>> +	if (WARN_ON(limit_level > DEVLINK_RELOAD_ACTION_LIMIT_LEVEL_MAX))

>> +		return;

>> +	for (action = 0; action <= DEVLINK_RELOAD_ACTION_MAX; action++) {

>> +		if (!test_bit(action, &actions_performed))

>> +			continue;

>> +		stat_idx = limit_level * __DEVLINK_RELOAD_ACTION_MAX + action;

>> +		devlink->reload_action_stats[stat_idx]++;

>> +	}

>> +	devlink_notify(devlink, DEVLINK_CMD_NEW);

>> +}

>> +

>> +/**

>> + *	devlink_reload_implicit_actions_performed - Update devlink on reload actions

>> + *	  performed which are not a direct result of devlink reload call.

>> + *

>> + *	This should be called by a driver after performing reload actions in case it was not

>> + *	a result of devlink reload call. For example fw_activate was performed as a result

>> + *	of devlink reload triggered fw_activate on another host.

>> + *	The motivation for this function is to keep data on reload actions performed on this

>> + *	function whether it was done due to direct devlink reload call or not.

>> + *

>> + *	@devlink: devlink

>> + *	@limit_level: reload action limit level

>> + *	@actions_performed: bitmask of actions performed

>> + */

>> +void devlink_reload_implicit_actions_performed(struct devlink *devlink,

>> +					       enum devlink_reload_action_limit_level limit_level,

>> +					       unsigned long actions_performed)

> What I'm a bit scarred of that the driver would call this from withing

> reload_down()/up() ops. Perheps this could be WARN_ON'ed here (or in

> devlink_reload())?

>


Not sure how I know if it was called from devlink_reload_down()/up() ? 
Maybe mutex ? So the warn will be actually mutex deadlock ?

>> +{

>> +	if (!devlink_reload_supported(devlink))

> Hmm. I think that the driver does not have to support the reload and can

> still be reloaded by another instance and update the stats here. Why

> not?

>


But I show counters only for supported reload actions and levels, 
otherwise we will have these counters on devlink dev show output for 
other drivers that don't have support for devlink reload and didn't 
implement any of these including this function and these drivers may do 
some actions like fw_activate in another way and don't update the stats 
and so that will make these stats misleading. They will show history 
"stats" but they don't update them as they didn't apply anything related 
to devlink reload.

>> +		return;

>> +	devlink_reload_action_stats_update(devlink, limit_level, actions_performed);

>> +}

>> +EXPORT_SYMBOL_GPL(devlink_reload_implicit_actions_performed);

>> +

>> static int devlink_reload(struct devlink *devlink, struct net *dest_net,

>> 			  enum devlink_reload_action action,

>> 			  enum devlink_reload_action_limit_level limit_level,

>> -			  struct netlink_ext_ack *extack, unsigned long *actions_performed)

>> +			  struct netlink_ext_ack *extack, unsigned long *actions_performed_out)

>> {

>> +	unsigned long actions_performed;

>> 	int err;

>>

>> 	if (!devlink->reload_enabled)

>> @@ -2998,9 +3045,14 @@ static int devlink_reload(struct devlink *devlink, struct net *dest_net,

>> 	if (dest_net && !net_eq(dest_net, devlink_net(devlink)))

>> 		devlink_reload_netns_change(devlink, dest_net);

>>

>> -	err = devlink->ops->reload_up(devlink, action, limit_level, extack, actions_performed);

>> +	err = devlink->ops->reload_up(devlink, action, limit_level, extack, &actions_performed);

>> 	devlink_reload_failed_set(devlink, !!err);

>> -	return err;

>> +	if (err)

>> +		return err;

>> +	devlink_reload_action_stats_update(devlink, limit_level, actions_performed);

>> +	if (actions_performed_out)

> Just make the caller to provide valid pointer, as I suggested in the

> other patch review.



Ack.

>

>> +		*actions_performed_out = actions_performed;

>> +	return 0;

>> }

>>

>> static int

>> -- 

>> 2.17.1

>>
Jiri Pirko Sept. 15, 2020, 1:33 p.m. UTC | #3
Tue, Sep 15, 2020 at 02:30:19PM CEST, moshe@nvidia.com wrote:
>

>On 9/14/2020 4:39 PM, Jiri Pirko wrote:

>> Mon, Sep 14, 2020 at 08:07:50AM CEST, moshe@mellanox.com wrote:


[..]


>> > +/**

>> > + *	devlink_reload_implicit_actions_performed - Update devlink on reload actions

>> > + *	  performed which are not a direct result of devlink reload call.

>> > + *

>> > + *	This should be called by a driver after performing reload actions in case it was not

>> > + *	a result of devlink reload call. For example fw_activate was performed as a result

>> > + *	of devlink reload triggered fw_activate on another host.

>> > + *	The motivation for this function is to keep data on reload actions performed on this

>> > + *	function whether it was done due to direct devlink reload call or not.

>> > + *

>> > + *	@devlink: devlink

>> > + *	@limit_level: reload action limit level

>> > + *	@actions_performed: bitmask of actions performed

>> > + */

>> > +void devlink_reload_implicit_actions_performed(struct devlink *devlink,

>> > +					       enum devlink_reload_action_limit_level limit_level,

>> > +					       unsigned long actions_performed)

>> What I'm a bit scarred of that the driver would call this from withing

>> reload_down()/up() ops. Perheps this could be WARN_ON'ed here (or in

>> devlink_reload())?

>> 

>

>Not sure how I know if it was called from devlink_reload_down()/up() ? Maybe

>mutex ? So the warn will be actually mutex deadlock ?


No. Don't abuse mutex for this.
Just make sure that the counters do not move when you call
reload_down/up().


>

>> > +{

>> > +	if (!devlink_reload_supported(devlink))

>> Hmm. I think that the driver does not have to support the reload and can

>> still be reloaded by another instance and update the stats here. Why

>> not?

>> 

>

>But I show counters only for supported reload actions and levels, otherwise

>we will have these counters on devlink dev show output for other drivers that

>don't have support for devlink reload and didn't implement any of these

>including this function and these drivers may do some actions like

>fw_activate in another way and don't update the stats and so that will make

>these stats misleading. They will show history "stats" but they don't update

>them as they didn't apply anything related to devlink reload.


The case I tried to point at is the driver instance, that does not
implement reload ops itself, but still it can be reloaded by someone else -
the other driver instance outside.

The counters should work no matter if the driver implements reload ops
or not. Why wouldn't they? The user still likes to know that the devices
was reloaded.



>

>> > +		return;

>> > +	devlink_reload_action_stats_update(devlink, limit_level, actions_performed);

>> > +}

>> > +EXPORT_SYMBOL_GPL(devlink_reload_implicit_actions_performed);

>> > +

>> > static int devlink_reload(struct devlink *devlink, struct net *dest_net,

>> > 			  enum devlink_reload_action action,

>> > 			  enum devlink_reload_action_limit_level limit_level,

>> > -			  struct netlink_ext_ack *extack, unsigned long *actions_performed)

>> > +			  struct netlink_ext_ack *extack, unsigned long *actions_performed_out)

>> > {

>> > +	unsigned long actions_performed;

>> > 	int err;

>> > 

>> > 	if (!devlink->reload_enabled)

>> > @@ -2998,9 +3045,14 @@ static int devlink_reload(struct devlink *devlink, struct net *dest_net,

>> > 	if (dest_net && !net_eq(dest_net, devlink_net(devlink)))

>> > 		devlink_reload_netns_change(devlink, dest_net);

>> > 

>> > -	err = devlink->ops->reload_up(devlink, action, limit_level, extack, actions_performed);

>> > +	err = devlink->ops->reload_up(devlink, action, limit_level, extack, &actions_performed);

>> > 	devlink_reload_failed_set(devlink, !!err);

>> > -	return err;

>> > +	if (err)

>> > +		return err;

>> > +	devlink_reload_action_stats_update(devlink, limit_level, actions_performed);

>> > +	if (actions_performed_out)

>> Just make the caller to provide valid pointer, as I suggested in the

>> other patch review.

>

>

>Ack.

>

>> 

>> > +		*actions_performed_out = actions_performed;

>> > +	return 0;

>> > }

>> > 

>> > static int

>> > -- 

>> > 2.17.1

>> >
Moshe Shemesh Sept. 15, 2020, 8:20 p.m. UTC | #4
On 9/15/2020 4:33 PM, Jiri Pirko wrote:
> Tue, Sep 15, 2020 at 02:30:19PM CEST, moshe@nvidia.com wrote:

>> On 9/14/2020 4:39 PM, Jiri Pirko wrote:

>>> Mon, Sep 14, 2020 at 08:07:50AM CEST, moshe@mellanox.com wrote:

> [..]

>

>

>>>> +/**

>>>> + *	devlink_reload_implicit_actions_performed - Update devlink on reload actions

>>>> + *	  performed which are not a direct result of devlink reload call.

>>>> + *

>>>> + *	This should be called by a driver after performing reload actions in case it was not

>>>> + *	a result of devlink reload call. For example fw_activate was performed as a result

>>>> + *	of devlink reload triggered fw_activate on another host.

>>>> + *	The motivation for this function is to keep data on reload actions performed on this

>>>> + *	function whether it was done due to direct devlink reload call or not.

>>>> + *

>>>> + *	@devlink: devlink

>>>> + *	@limit_level: reload action limit level

>>>> + *	@actions_performed: bitmask of actions performed

>>>> + */

>>>> +void devlink_reload_implicit_actions_performed(struct devlink *devlink,

>>>> +					       enum devlink_reload_action_limit_level limit_level,

>>>> +					       unsigned long actions_performed)

>>> What I'm a bit scarred of that the driver would call this from withing

>>> reload_down()/up() ops. Perheps this could be WARN_ON'ed here (or in

>>> devlink_reload())?

>>>

>> Not sure how I know if it was called from devlink_reload_down()/up() ? Maybe

>> mutex ? So the warn will be actually mutex deadlock ?

> No. Don't abuse mutex for this.

> Just make sure that the counters do not move when you call

> reload_down/up().

>


Can make that, but actually I better take devlink->lock anyway in this 
function to avoid races, WDYT ?

>>>> +{

>>>> +	if (!devlink_reload_supported(devlink))

>>> Hmm. I think that the driver does not have to support the reload and can

>>> still be reloaded by another instance and update the stats here. Why

>>> not?

>>>

>> But I show counters only for supported reload actions and levels, otherwise

>> we will have these counters on devlink dev show output for other drivers that

>> don't have support for devlink reload and didn't implement any of these

>> including this function and these drivers may do some actions like

>> fw_activate in another way and don't update the stats and so that will make

>> these stats misleading. They will show history "stats" but they don't update

>> them as they didn't apply anything related to devlink reload.

> The case I tried to point at is the driver instance, that does not

> implement reload ops itself, but still it can be reloaded by someone else -

> the other driver instance outside.

>

> The counters should work no matter if the driver implements reload ops

> or not. Why wouldn't they? The user still likes to know that the devices

> was reloaded.

>


OK, so you say that every driver should show all counters no matter what 
actions it supports and if it supports devlink reload at all, right ?

>

>>>> +		return;

>>>> +	devlink_reload_action_stats_update(devlink, limit_level, actions_performed);

>>>> +}

>>>> +EXPORT_SYMBOL_GPL(devlink_reload_implicit_actions_performed);

>>>> +

>>>> static int devlink_reload(struct devlink *devlink, struct net *dest_net,

>>>> 			  enum devlink_reload_action action,

>>>> 			  enum devlink_reload_action_limit_level limit_level,

>>>> -			  struct netlink_ext_ack *extack, unsigned long *actions_performed)

>>>> +			  struct netlink_ext_ack *extack, unsigned long *actions_performed_out)

>>>> {

>>>> +	unsigned long actions_performed;

>>>> 	int err;

>>>>

>>>> 	if (!devlink->reload_enabled)

>>>> @@ -2998,9 +3045,14 @@ static int devlink_reload(struct devlink *devlink, struct net *dest_net,

>>>> 	if (dest_net && !net_eq(dest_net, devlink_net(devlink)))

>>>> 		devlink_reload_netns_change(devlink, dest_net);

>>>>

>>>> -	err = devlink->ops->reload_up(devlink, action, limit_level, extack, actions_performed);

>>>> +	err = devlink->ops->reload_up(devlink, action, limit_level, extack, &actions_performed);

>>>> 	devlink_reload_failed_set(devlink, !!err);

>>>> -	return err;

>>>> +	if (err)

>>>> +		return err;

>>>> +	devlink_reload_action_stats_update(devlink, limit_level, actions_performed);

>>>> +	if (actions_performed_out)

>>> Just make the caller to provide valid pointer, as I suggested in the

>>> other patch review.

>>

>> Ack.

>>

>>>> +		*actions_performed_out = actions_performed;

>>>> +	return 0;

>>>> }

>>>>

>>>> static int

>>>> -- 

>>>> 2.17.1

>>>>
Jiri Pirko Sept. 16, 2020, 6:07 a.m. UTC | #5
Tue, Sep 15, 2020 at 10:20:39PM CEST, moshe@nvidia.com wrote:
>
>On 9/15/2020 4:33 PM, Jiri Pirko wrote:
>> Tue, Sep 15, 2020 at 02:30:19PM CEST, moshe@nvidia.com wrote:
>> > On 9/14/2020 4:39 PM, Jiri Pirko wrote:
>> > > Mon, Sep 14, 2020 at 08:07:50AM CEST, moshe@mellanox.com wrote:
>> [..]
>> 
>> 
>> > > > +/**
>> > > > + *	devlink_reload_implicit_actions_performed - Update devlink on reload actions
>> > > > + *	  performed which are not a direct result of devlink reload call.
>> > > > + *
>> > > > + *	This should be called by a driver after performing reload actions in case it was not
>> > > > + *	a result of devlink reload call. For example fw_activate was performed as a result
>> > > > + *	of devlink reload triggered fw_activate on another host.
>> > > > + *	The motivation for this function is to keep data on reload actions performed on this
>> > > > + *	function whether it was done due to direct devlink reload call or not.
>> > > > + *
>> > > > + *	@devlink: devlink
>> > > > + *	@limit_level: reload action limit level
>> > > > + *	@actions_performed: bitmask of actions performed
>> > > > + */
>> > > > +void devlink_reload_implicit_actions_performed(struct devlink *devlink,
>> > > > +					       enum devlink_reload_action_limit_level limit_level,
>> > > > +					       unsigned long actions_performed)
>> > > What I'm a bit scarred of that the driver would call this from withing
>> > > reload_down()/up() ops. Perheps this could be WARN_ON'ed here (or in
>> > > devlink_reload())?
>> > > 
>> > Not sure how I know if it was called from devlink_reload_down()/up() ? Maybe
>> > mutex ? So the warn will be actually mutex deadlock ?
>> No. Don't abuse mutex for this.
>> Just make sure that the counters do not move when you call
>> reload_down/up().
>> 
>
>Can make that, but actually I better take devlink->lock anyway in this
>function to avoid races, WDYT ?

Either you need to protect some data or not. So if you do, do it.


>
>> > > > +{
>> > > > +	if (!devlink_reload_supported(devlink))
>> > > Hmm. I think that the driver does not have to support the reload and can
>> > > still be reloaded by another instance and update the stats here. Why
>> > > not?
>> > > 
>> > But I show counters only for supported reload actions and levels, otherwise
>> > we will have these counters on devlink dev show output for other drivers that
>> > don't have support for devlink reload and didn't implement any of these
>> > including this function and these drivers may do some actions like
>> > fw_activate in another way and don't update the stats and so that will make
>> > these stats misleading. They will show history "stats" but they don't update
>> > them as they didn't apply anything related to devlink reload.
>> The case I tried to point at is the driver instance, that does not
>> implement reload ops itself, but still it can be reloaded by someone else -
>> the other driver instance outside.
>> 
>> The counters should work no matter if the driver implements reload ops
>> or not. Why wouldn't they? The user still likes to know that the devices
>> was reloaded.
>> 
>
>OK, so you say that every driver should show all counters no matter what
>actions it supports and if it supports devlink reload at all, right ?

Well, as I wrote in the other email, I think that there should be 2 sets
of stats for this.


>
>> 
>> > > > +		return;
>> > > > +	devlink_reload_action_stats_update(devlink, limit_level, actions_performed);
>> > > > +}
>> > > > +EXPORT_SYMBOL_GPL(devlink_reload_implicit_actions_performed);
>> > > > +
>> > > > static int devlink_reload(struct devlink *devlink, struct net *dest_net,
>> > > > 			  enum devlink_reload_action action,
>> > > > 			  enum devlink_reload_action_limit_level limit_level,
>> > > > -			  struct netlink_ext_ack *extack, unsigned long *actions_performed)
>> > > > +			  struct netlink_ext_ack *extack, unsigned long *actions_performed_out)
>> > > > {
>> > > > +	unsigned long actions_performed;
>> > > > 	int err;
>> > > > 
>> > > > 	if (!devlink->reload_enabled)
>> > > > @@ -2998,9 +3045,14 @@ static int devlink_reload(struct devlink *devlink, struct net *dest_net,
>> > > > 	if (dest_net && !net_eq(dest_net, devlink_net(devlink)))
>> > > > 		devlink_reload_netns_change(devlink, dest_net);
>> > > > 
>> > > > -	err = devlink->ops->reload_up(devlink, action, limit_level, extack, actions_performed);
>> > > > +	err = devlink->ops->reload_up(devlink, action, limit_level, extack, &actions_performed);
>> > > > 	devlink_reload_failed_set(devlink, !!err);
>> > > > -	return err;
>> > > > +	if (err)
>> > > > +		return err;
>> > > > +	devlink_reload_action_stats_update(devlink, limit_level, actions_performed);
>> > > > +	if (actions_performed_out)
>> > > Just make the caller to provide valid pointer, as I suggested in the
>> > > other patch review.
>> > 
>> > Ack.
>> > 
>> > > > +		*actions_performed_out = actions_performed;
>> > > > +	return 0;
>> > > > }
>> > > > 
>> > > > static int
>> > > > -- 
>> > > > 2.17.1
>> > > >
diff mbox series

Patch

diff --git a/include/net/devlink.h b/include/net/devlink.h
index dddd9ee5b8a9..b4feb92e0269 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -20,6 +20,9 @@ 
 #include <uapi/linux/devlink.h>
 #include <linux/xarray.h>
 
+#define DEVLINK_RELOAD_ACTION_STATS_ARRAY_SIZE \
+	(__DEVLINK_RELOAD_ACTION_LIMIT_LEVEL_MAX * __DEVLINK_RELOAD_ACTION_MAX)
+
 struct devlink_ops;
 
 struct devlink {
@@ -38,6 +41,7 @@  struct devlink {
 	struct list_head trap_policer_list;
 	const struct devlink_ops *ops;
 	struct xarray snapshot_ids;
+	u32 reload_action_stats[DEVLINK_RELOAD_ACTION_STATS_ARRAY_SIZE];
 	struct device *dev;
 	possible_net_t _net;
 	struct mutex lock; /* Serializes access to devlink instance specific objects such as
@@ -1397,6 +1401,9 @@  void
 devlink_health_reporter_recovery_done(struct devlink_health_reporter *reporter);
 
 bool devlink_is_reload_failed(const struct devlink *devlink);
+void devlink_reload_implicit_actions_performed(struct devlink *devlink,
+					       enum devlink_reload_action_limit_level limit_level,
+					       unsigned long actions_performed);
 
 void devlink_flash_update_begin_notify(struct devlink *devlink);
 void devlink_flash_update_end_notify(struct devlink *devlink);
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 60aa0c4a3726..cbf746966913 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -2981,11 +2981,58 @@  bool devlink_is_reload_failed(const struct devlink *devlink)
 }
 EXPORT_SYMBOL_GPL(devlink_is_reload_failed);
 
+static void
+devlink_reload_action_stats_update(struct devlink *devlink,
+				   enum devlink_reload_action_limit_level limit_level,
+				   unsigned long actions_performed)
+{
+	int stat_idx;
+	int action;
+
+	if (!actions_performed)
+		return;
+
+	if (WARN_ON(limit_level > DEVLINK_RELOAD_ACTION_LIMIT_LEVEL_MAX))
+		return;
+	for (action = 0; action <= DEVLINK_RELOAD_ACTION_MAX; action++) {
+		if (!test_bit(action, &actions_performed))
+			continue;
+		stat_idx = limit_level * __DEVLINK_RELOAD_ACTION_MAX + action;
+		devlink->reload_action_stats[stat_idx]++;
+	}
+	devlink_notify(devlink, DEVLINK_CMD_NEW);
+}
+
+/**
+ *	devlink_reload_implicit_actions_performed - Update devlink on reload actions
+ *	  performed which are not a direct result of devlink reload call.
+ *
+ *	This should be called by a driver after performing reload actions in case it was not
+ *	a result of devlink reload call. For example fw_activate was performed as a result
+ *	of devlink reload triggered fw_activate on another host.
+ *	The motivation for this function is to keep data on reload actions performed on this
+ *	function whether it was done due to direct devlink reload call or not.
+ *
+ *	@devlink: devlink
+ *	@limit_level: reload action limit level
+ *	@actions_performed: bitmask of actions performed
+ */
+void devlink_reload_implicit_actions_performed(struct devlink *devlink,
+					       enum devlink_reload_action_limit_level limit_level,
+					       unsigned long actions_performed)
+{
+	if (!devlink_reload_supported(devlink))
+		return;
+	devlink_reload_action_stats_update(devlink, limit_level, actions_performed);
+}
+EXPORT_SYMBOL_GPL(devlink_reload_implicit_actions_performed);
+
 static int devlink_reload(struct devlink *devlink, struct net *dest_net,
 			  enum devlink_reload_action action,
 			  enum devlink_reload_action_limit_level limit_level,
-			  struct netlink_ext_ack *extack, unsigned long *actions_performed)
+			  struct netlink_ext_ack *extack, unsigned long *actions_performed_out)
 {
+	unsigned long actions_performed;
 	int err;
 
 	if (!devlink->reload_enabled)
@@ -2998,9 +3045,14 @@  static int devlink_reload(struct devlink *devlink, struct net *dest_net,
 	if (dest_net && !net_eq(dest_net, devlink_net(devlink)))
 		devlink_reload_netns_change(devlink, dest_net);
 
-	err = devlink->ops->reload_up(devlink, action, limit_level, extack, actions_performed);
+	err = devlink->ops->reload_up(devlink, action, limit_level, extack, &actions_performed);
 	devlink_reload_failed_set(devlink, !!err);
-	return err;
+	if (err)
+		return err;
+	devlink_reload_action_stats_update(devlink, limit_level, actions_performed);
+	if (actions_performed_out)
+		*actions_performed_out = actions_performed;
+	return 0;
 }
 
 static int