diff mbox series

[v2] ethtool: reduce stack usage with clang

Message ID 20190307160017.3120362-1-arnd@arndb.de
State New
Headers show
Series [v2] ethtool: reduce stack usage with clang | expand

Commit Message

Arnd Bergmann March 7, 2019, 3:58 p.m. UTC
clang inlines the dev_ethtool() more aggressively than gcc does, leading
to a larger amount of used stack space:

net/core/ethtool.c:2536:24: error: stack frame size of 1216 bytes in function 'dev_ethtool' [-Werror,-Wframe-larger-than=]

Marking the sub-functions that require the most stack space as
noinline_for_stack gives us reasonable behavior on all compilers.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>

---
v2: don't annotate dev_ethtool itself, as pointed out by Michal Kubecek
---
 net/core/ethtool.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

-- 
2.20.0

Comments

Michal Kubecek March 7, 2019, 4:41 p.m. UTC | #1
On Thu, Mar 07, 2019 at 04:58:35PM +0100, Arnd Bergmann wrote:
> clang inlines the dev_ethtool() more aggressively than gcc does, leading

> to a larger amount of used stack space:

> 

> net/core/ethtool.c:2536:24: error: stack frame size of 1216 bytes in function 'dev_ethtool' [-Werror,-Wframe-larger-than=]

> 

> Marking the sub-functions that require the most stack space as

> noinline_for_stack gives us reasonable behavior on all compilers.

> 

> Signed-off-by: Arnd Bergmann <arnd@arndb.de>

> ---

> v2: don't annotate dev_ethtool itself, as pointed out by Michal Kubecek

> ---

>  net/core/ethtool.c | 16 +++++++++-------

>  1 file changed, 9 insertions(+), 7 deletions(-)

> 


Reviewed-by: Michal Kubecek <mkubecek@suse.cz>


In the long term, these three functions would IMHO deserve some rework.
While marking them as noinline_for_stack prevents one big stack frame in
dev_ethtool(), we still allocate rather big structures on the stack,
only it won't be in one stack frame. When we take the

  dev_ethtool
    ethtool_set_perqueue
      ethtool_[gs]et_per_queue_coalesce

path, there will still be two 512-byte arrays on the stack, one inside
struct ethtool_per_queue_op in ethtool_set_per_queue() and one as the
queue_mask bitmap in ethtool_[gs]et_per_queue_coalesce() (MAX_NUM_QUEUE
is 4096 now). In other words, actual stack consumption might be
approximately the same after this change as it was before, only divided
into three stack frames.

Michal Kubecek

> diff --git a/net/core/ethtool.c b/net/core/ethtool.c

> index d4918ffddda8..b1eb32419732 100644

> --- a/net/core/ethtool.c

> +++ b/net/core/ethtool.c

> @@ -2319,9 +2319,10 @@ static int ethtool_set_tunable(struct net_device *dev, void __user *useraddr)

>  	return ret;

>  }

>  

> -static int ethtool_get_per_queue_coalesce(struct net_device *dev,

> -					  void __user *useraddr,

> -					  struct ethtool_per_queue_op *per_queue_opt)

> +static noinline_for_stack int

> +ethtool_get_per_queue_coalesce(struct net_device *dev,

> +			       void __user *useraddr,

> +			       struct ethtool_per_queue_op *per_queue_opt)

>  {

>  	u32 bit;

>  	int ret;

> @@ -2349,9 +2350,10 @@ static int ethtool_get_per_queue_coalesce(struct net_device *dev,

>  	return 0;

>  }

>  

> -static int ethtool_set_per_queue_coalesce(struct net_device *dev,

> -					  void __user *useraddr,

> -					  struct ethtool_per_queue_op *per_queue_opt)

> +static noinline_for_stack int

> +ethtool_set_per_queue_coalesce(struct net_device *dev,

> +			       void __user *useraddr,

> +			       struct ethtool_per_queue_op *per_queue_opt)

>  {

>  	u32 bit;

>  	int i, ret = 0;

> @@ -2405,7 +2407,7 @@ static int ethtool_set_per_queue_coalesce(struct net_device *dev,

>  	return ret;

>  }

>  

> -static int ethtool_set_per_queue(struct net_device *dev,

> +static int noinline_for_stack ethtool_set_per_queue(struct net_device *dev,

>  				 void __user *useraddr, u32 sub_cmd)

>  {

>  	struct ethtool_per_queue_op per_queue_opt;
Arnd Bergmann March 7, 2019, 10:03 p.m. UTC | #2
On Thu, Mar 7, 2019 at 6:46 PM David Miller <davem@davemloft.net> wrote:
>

> From: Arnd Bergmann <arnd@arndb.de>

> Date: Thu,  7 Mar 2019 16:58:35 +0100

>

> > clang inlines the dev_ethtool() more aggressively than gcc does, leading

> > to a larger amount of used stack space:

> >

> > net/core/ethtool.c:2536:24: error: stack frame size of 1216 bytes in function 'dev_ethtool' [-Werror,-Wframe-larger-than=]

> >

> > Marking the sub-functions that require the most stack space as

> > noinline_for_stack gives us reasonable behavior on all compilers.

> >

> > Signed-off-by: Arnd Bergmann <arnd@arndb.de>

> > ---

> > v2: don't annotate dev_ethtool itself, as pointed out by Michal Kubecek

>

> I'll apply this, but as Michal said this is just papering over the problem,

> the aggregate stack allocation is still the same and very large.


Thanks,

I looked at it again and found that ethtool_get_per_queue_coalesce()
and ethtool_set_per_queue_coalesce() in fact use the same stack slots
(as one would hope) when they are both inlined, so you are both right
that this doesn't change as much for the ethtool_set_per_queue()
function as I had hoped.

On the other hand, at least it helps reduce the stack usage for all
the other sub-functions of dev_ethtool(), which now don't pile
on top of the 1216 bytes for the combined function but only
on top of the 272 bytes for the rest of dev_ethtool.

     Arnd
diff mbox series

Patch

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index d4918ffddda8..b1eb32419732 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -2319,9 +2319,10 @@  static int ethtool_set_tunable(struct net_device *dev, void __user *useraddr)
 	return ret;
 }
 
-static int ethtool_get_per_queue_coalesce(struct net_device *dev,
-					  void __user *useraddr,
-					  struct ethtool_per_queue_op *per_queue_opt)
+static noinline_for_stack int
+ethtool_get_per_queue_coalesce(struct net_device *dev,
+			       void __user *useraddr,
+			       struct ethtool_per_queue_op *per_queue_opt)
 {
 	u32 bit;
 	int ret;
@@ -2349,9 +2350,10 @@  static int ethtool_get_per_queue_coalesce(struct net_device *dev,
 	return 0;
 }
 
-static int ethtool_set_per_queue_coalesce(struct net_device *dev,
-					  void __user *useraddr,
-					  struct ethtool_per_queue_op *per_queue_opt)
+static noinline_for_stack int
+ethtool_set_per_queue_coalesce(struct net_device *dev,
+			       void __user *useraddr,
+			       struct ethtool_per_queue_op *per_queue_opt)
 {
 	u32 bit;
 	int i, ret = 0;
@@ -2405,7 +2407,7 @@  static int ethtool_set_per_queue_coalesce(struct net_device *dev,
 	return ret;
 }
 
-static int ethtool_set_per_queue(struct net_device *dev,
+static int noinline_for_stack ethtool_set_per_queue(struct net_device *dev,
 				 void __user *useraddr, u32 sub_cmd)
 {
 	struct ethtool_per_queue_op per_queue_opt;