diff mbox series

interconnect: Don't access req_list while it's being manipulated

Message ID 20240305225652.22872-1-quic_mdtipton@quicinc.com
State Accepted
Commit de1bf25b6d771abdb52d43546cf57ad775fb68a1
Headers show
Series interconnect: Don't access req_list while it's being manipulated | expand

Commit Message

Mike Tipton March 5, 2024, 10:56 p.m. UTC
The icc_lock mutex was split into separate icc_lock and icc_bw_lock
mutexes in [1] to avoid lockdep splats. However, this didn't adequately
protect access to icc_node::req_list.

The icc_set_bw() function will eventually iterate over req_list while
only holding icc_bw_lock, but req_list can be modified while only
holding icc_lock. This causes races between icc_set_bw(), of_icc_get(),
and icc_put().

Example A:

  CPU0                               CPU1
  ----                               ----
  icc_set_bw(path_a)
    mutex_lock(&icc_bw_lock);
                                     icc_put(path_b)
                                       mutex_lock(&icc_lock);
    aggregate_requests()
      hlist_for_each_entry(r, ...
                                       hlist_del(...
        <r = invalid pointer>

Example B:

  CPU0                               CPU1
  ----                               ----
  icc_set_bw(path_a)
    mutex_lock(&icc_bw_lock);
                                     path_b = of_icc_get()
                                       of_icc_get_by_index()
                                         mutex_lock(&icc_lock);
                                         path_find()
                                           path_init()
    aggregate_requests()
      hlist_for_each_entry(r, ...
                                             hlist_add_head(...
        <r = invalid pointer>

Fix this by ensuring icc_bw_lock is always held before manipulating
icc_node::req_list. The additional places icc_bw_lock is held don't
perform any memory allocations, so we should still be safe from the
original lockdep splats that motivated the separate locks.

[1] commit af42269c3523 ("interconnect: Fix locking for runpm vs reclaim")

Signed-off-by: Mike Tipton <quic_mdtipton@quicinc.com>
Fixes: af42269c3523 ("interconnect: Fix locking for runpm vs reclaim")
---
 drivers/interconnect/core.c | 8 ++++++++
 1 file changed, 8 insertions(+)

Comments

Rob Clark March 5, 2024, 11:13 p.m. UTC | #1
On Tue, Mar 5, 2024 at 2:57 PM Mike Tipton <quic_mdtipton@quicinc.com> wrote:
>
> The icc_lock mutex was split into separate icc_lock and icc_bw_lock
> mutexes in [1] to avoid lockdep splats. However, this didn't adequately
> protect access to icc_node::req_list.
>
> The icc_set_bw() function will eventually iterate over req_list while
> only holding icc_bw_lock, but req_list can be modified while only
> holding icc_lock. This causes races between icc_set_bw(), of_icc_get(),
> and icc_put().
>
> Example A:
>
>   CPU0                               CPU1
>   ----                               ----
>   icc_set_bw(path_a)
>     mutex_lock(&icc_bw_lock);
>                                      icc_put(path_b)
>                                        mutex_lock(&icc_lock);
>     aggregate_requests()
>       hlist_for_each_entry(r, ...
>                                        hlist_del(...
>         <r = invalid pointer>
>
> Example B:
>
>   CPU0                               CPU1
>   ----                               ----
>   icc_set_bw(path_a)
>     mutex_lock(&icc_bw_lock);
>                                      path_b = of_icc_get()
>                                        of_icc_get_by_index()
>                                          mutex_lock(&icc_lock);
>                                          path_find()
>                                            path_init()
>     aggregate_requests()
>       hlist_for_each_entry(r, ...
>                                              hlist_add_head(...
>         <r = invalid pointer>
>
> Fix this by ensuring icc_bw_lock is always held before manipulating
> icc_node::req_list. The additional places icc_bw_lock is held don't
> perform any memory allocations, so we should still be safe from the
> original lockdep splats that motivated the separate locks.
>
> [1] commit af42269c3523 ("interconnect: Fix locking for runpm vs reclaim")
>
> Signed-off-by: Mike Tipton <quic_mdtipton@quicinc.com>
> Fixes: af42269c3523 ("interconnect: Fix locking for runpm vs reclaim")

Looks good from a memory/lockdep standpoint,

Reviewed-by: Rob Clark <robdclark@chromium.org>

> ---
>  drivers/interconnect/core.c | 8 ++++++++
>  1 file changed, 8 insertions(+)
>
> diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c
> index 5d1010cafed8..7e9b996b47c8 100644
> --- a/drivers/interconnect/core.c
> +++ b/drivers/interconnect/core.c
> @@ -176,6 +176,8 @@ static struct icc_path *path_init(struct device *dev, struct icc_node *dst,
>
>         path->num_nodes = num_nodes;
>
> +       mutex_lock(&icc_bw_lock);
> +
>         for (i = num_nodes - 1; i >= 0; i--) {
>                 node->provider->users++;
>                 hlist_add_head(&path->reqs[i].req_node, &node->req_list);
> @@ -186,6 +188,8 @@ static struct icc_path *path_init(struct device *dev, struct icc_node *dst,
>                 node = node->reverse;
>         }
>
> +       mutex_unlock(&icc_bw_lock);
> +
>         return path;
>  }
>
> @@ -792,12 +796,16 @@ void icc_put(struct icc_path *path)
>                 pr_err("%s: error (%d)\n", __func__, ret);
>
>         mutex_lock(&icc_lock);
> +       mutex_lock(&icc_bw_lock);
> +
>         for (i = 0; i < path->num_nodes; i++) {
>                 node = path->reqs[i].node;
>                 hlist_del(&path->reqs[i].req_node);
>                 if (!WARN_ON(!node->provider->users))
>                         node->provider->users--;
>         }
> +
> +       mutex_unlock(&icc_bw_lock);
>         mutex_unlock(&icc_lock);
>
>         kfree_const(path->name);
> --
> 2.17.1
>
Greg Kroah-Hartman March 6, 2024, 6:25 a.m. UTC | #2
On Tue, Mar 05, 2024 at 02:56:52PM -0800, Mike Tipton wrote:
> The icc_lock mutex was split into separate icc_lock and icc_bw_lock
> mutexes in [1] to avoid lockdep splats. However, this didn't adequately
> protect access to icc_node::req_list.
> 
> The icc_set_bw() function will eventually iterate over req_list while
> only holding icc_bw_lock, but req_list can be modified while only
> holding icc_lock. This causes races between icc_set_bw(), of_icc_get(),
> and icc_put().
> 
> Example A:
> 
>   CPU0                               CPU1
>   ----                               ----
>   icc_set_bw(path_a)
>     mutex_lock(&icc_bw_lock);
>                                      icc_put(path_b)
>                                        mutex_lock(&icc_lock);
>     aggregate_requests()
>       hlist_for_each_entry(r, ...
>                                        hlist_del(...
>         <r = invalid pointer>
> 
> Example B:
> 
>   CPU0                               CPU1
>   ----                               ----
>   icc_set_bw(path_a)
>     mutex_lock(&icc_bw_lock);
>                                      path_b = of_icc_get()
>                                        of_icc_get_by_index()
>                                          mutex_lock(&icc_lock);
>                                          path_find()
>                                            path_init()
>     aggregate_requests()
>       hlist_for_each_entry(r, ...
>                                              hlist_add_head(...
>         <r = invalid pointer>
> 
> Fix this by ensuring icc_bw_lock is always held before manipulating
> icc_node::req_list. The additional places icc_bw_lock is held don't
> perform any memory allocations, so we should still be safe from the
> original lockdep splats that motivated the separate locks.
> 
> [1] commit af42269c3523 ("interconnect: Fix locking for runpm vs reclaim")
> 
> Signed-off-by: Mike Tipton <quic_mdtipton@quicinc.com>
> Fixes: af42269c3523 ("interconnect: Fix locking for runpm vs reclaim")
> ---
>  drivers/interconnect/core.c | 8 ++++++++
>  1 file changed, 8 insertions(+)
> 

Hi,

This is the friendly patch-bot of Greg Kroah-Hartman.  You have sent him
a patch that has triggered this response.  He used to manually respond
to these common problems, but in order to save his sanity (he kept
writing the same thing over and over, yet to different people), I was
created.  Hopefully you will not take offence and will fix the problem
in your patch and resubmit it so that it can be accepted into the Linux
kernel tree.

You are receiving this message because of the following common error(s)
as indicated below:

- You have marked a patch with a "Fixes:" tag for a commit that is in an
  older released kernel, yet you do not have a cc: stable line in the
  signed-off-by area at all, which means that the patch will not be
  applied to any older kernel releases.  To properly fix this, please
  follow the documented rules in the
  Documentation/process/stable-kernel-rules.rst file for how to resolve
  this.

If you wish to discuss this problem further, or you have questions about
how to resolve this issue, please feel free to respond to this email and
Greg will reply once he has dug out from the pending patches received
from other developers.

thanks,

greg k-h's patch email bot
diff mbox series

Patch

diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c
index 5d1010cafed8..7e9b996b47c8 100644
--- a/drivers/interconnect/core.c
+++ b/drivers/interconnect/core.c
@@ -176,6 +176,8 @@  static struct icc_path *path_init(struct device *dev, struct icc_node *dst,
 
 	path->num_nodes = num_nodes;
 
+	mutex_lock(&icc_bw_lock);
+
 	for (i = num_nodes - 1; i >= 0; i--) {
 		node->provider->users++;
 		hlist_add_head(&path->reqs[i].req_node, &node->req_list);
@@ -186,6 +188,8 @@  static struct icc_path *path_init(struct device *dev, struct icc_node *dst,
 		node = node->reverse;
 	}
 
+	mutex_unlock(&icc_bw_lock);
+
 	return path;
 }
 
@@ -792,12 +796,16 @@  void icc_put(struct icc_path *path)
 		pr_err("%s: error (%d)\n", __func__, ret);
 
 	mutex_lock(&icc_lock);
+	mutex_lock(&icc_bw_lock);
+
 	for (i = 0; i < path->num_nodes; i++) {
 		node = path->reqs[i].node;
 		hlist_del(&path->reqs[i].req_node);
 		if (!WARN_ON(!node->provider->users))
 			node->provider->users--;
 	}
+
+	mutex_unlock(&icc_bw_lock);
 	mutex_unlock(&icc_lock);
 
 	kfree_const(path->name);