Message ID | 20250304131120.31135-1-liuhangbin@gmail.com |
---|---|
Headers | show |
Series | bond: fix xfrm offload issues | expand |
On 3/4/25 15:11, Hangbin Liu wrote: > The fixed commit placed mutex_lock() inside spin_lock_bh(), which triggers > a warning: > > BUG: sleeping function called from invalid context at... > > Fix this by moving the IPsec deletion operation to bond_ipsec_free_sa, > which is not held by spin_lock_bh(). > > Additionally, delete the IPsec list in bond_ipsec_del_sa_all() when the > XFRM state is DEAD to prevent xdo_dev_state_free() from being triggered > again in bond_ipsec_free_sa(). > > For bond_ipsec_free_sa(), there are now three conditions: > > 1. if (!slave): When no active device exists. > 2. if (!xs->xso.real_dev): When xdo_dev_state_add() fails. > 3. if (xs->xso.real_dev != real_dev): When an xs has already been freed > by bond_ipsec_del_sa_all() due to migration, and the active slave has > changed to a new device. At the same time, the xs is marked as DEAD > due to the XFRM entry is removed, triggering xfrm_state_gc_task() and > bond_ipsec_free_sa(). > > In all three cases, xdo_dev_state_free() should not be called, only xs > should be removed from bond->ipsec list. > > At the same time, protect bond_ipsec_del_sa_all and bond_ipsec_add_sa_all > with x->lock for each xs being processed. This prevents XFRM from > concurrently initiating add/delete operations on the managed states. > > Fixes: 2aeeef906d5a ("bonding: change ipsec_lock from spin lock to mutex") > Reported-by: Jakub Kicinski <kuba@kernel.org> > Closes: https://lore.kernel.org/netdev/20241212062734.182a0164@kernel.org > Suggested-by: Cosmin Ratiu <cratiu@nvidia.com> > Signed-off-by: Hangbin Liu <liuhangbin@gmail.com> > --- > drivers/net/bonding/bond_main.c | 53 +++++++++++++++++++++++---------- > 1 file changed, 37 insertions(+), 16 deletions(-) > > diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c > index e45bba240cbc..06b060d9b031 100644 > --- a/drivers/net/bonding/bond_main.c > +++ b/drivers/net/bonding/bond_main.c > @@ -537,15 +537,22 @@ static void bond_ipsec_add_sa_all(struct bonding *bond) > } > > list_for_each_entry(ipsec, &bond->ipsec_list, list) { > + spin_lock_bh(&ipsec->xs->lock); > + /* Skip dead xfrm states, they'll be freed later. */ > + if (ipsec->xs->km.state == XFRM_STATE_DEAD) > + goto next; > + > /* If new state is added before ipsec_lock acquired */ > if (ipsec->xs->xso.real_dev == real_dev) > - continue; > + goto next; > > ipsec->xs->xso.real_dev = real_dev; > if (real_dev->xfrmdev_ops->xdo_dev_state_add(ipsec->xs, NULL)) { > slave_warn(bond_dev, real_dev, "%s: failed to add SA\n", __func__); > ipsec->xs->xso.real_dev = NULL; > } > +next: > + spin_unlock_bh(&ipsec->xs->lock); > } > out: > mutex_unlock(&bond->ipsec_lock); > @@ -560,7 +567,6 @@ static void bond_ipsec_del_sa(struct xfrm_state *xs) > struct net_device *bond_dev = xs->xso.dev; > struct net_device *real_dev; > netdevice_tracker tracker; > - struct bond_ipsec *ipsec; > struct bonding *bond; > struct slave *slave; > > @@ -592,15 +598,6 @@ static void bond_ipsec_del_sa(struct xfrm_state *xs) > real_dev->xfrmdev_ops->xdo_dev_state_delete(xs); > out: > netdev_put(real_dev, &tracker); > - mutex_lock(&bond->ipsec_lock); > - list_for_each_entry(ipsec, &bond->ipsec_list, list) { > - if (ipsec->xs == xs) { > - list_del(&ipsec->list); > - kfree(ipsec); > - break; > - } > - } > - mutex_unlock(&bond->ipsec_lock); > } > > static void bond_ipsec_del_sa_all(struct bonding *bond) > @@ -617,8 +614,18 @@ static void bond_ipsec_del_sa_all(struct bonding *bond) > > mutex_lock(&bond->ipsec_lock); > list_for_each_entry(ipsec, &bond->ipsec_list, list) { Second time - you should use list_for_each_entry_safe if you're walking and deleting elements from the list. > + spin_lock_bh(&ipsec->xs->lock); > if (!ipsec->xs->xso.real_dev) > - continue; > + goto next; > + > + if (ipsec->xs->km.state == XFRM_STATE_DEAD) { > + /* already dead no need to delete again */ > + if (real_dev->xfrmdev_ops->xdo_dev_state_free) > + real_dev->xfrmdev_ops->xdo_dev_state_free(ipsec->xs); Have you checked if .xdo_dev_state_free can sleep? I see at least one that can: mlx5e_xfrm_free_state(). > + list_del(&ipsec->list); > + kfree(ipsec); > + goto next; > + } > > if (!real_dev->xfrmdev_ops || > !real_dev->xfrmdev_ops->xdo_dev_state_delete || > @@ -631,6 +638,8 @@ static void bond_ipsec_del_sa_all(struct bonding *bond) > if (real_dev->xfrmdev_ops->xdo_dev_state_free) > real_dev->xfrmdev_ops->xdo_dev_state_free(ipsec->xs); > } > +next: > + spin_unlock_bh(&ipsec->xs->lock); > } > mutex_unlock(&bond->ipsec_lock); > } > @@ -640,6 +649,7 @@ static void bond_ipsec_free_sa(struct xfrm_state *xs) > struct net_device *bond_dev = xs->xso.dev; > struct net_device *real_dev; > netdevice_tracker tracker; > + struct bond_ipsec *ipsec; > struct bonding *bond; > struct slave *slave; > > @@ -659,11 +669,22 @@ static void bond_ipsec_free_sa(struct xfrm_state *xs) > if (!xs->xso.real_dev) > goto out; > > - WARN_ON(xs->xso.real_dev != real_dev); > + mutex_lock(&bond->ipsec_lock); > + list_for_each_entry(ipsec, &bond->ipsec_list, list) { > + if (ipsec->xs == xs) { > + /* do xdo_dev_state_free if real_dev matches, > + * otherwise only remove the list > + */ > + if (real_dev && real_dev->xfrmdev_ops && > + real_dev->xfrmdev_ops->xdo_dev_state_free) > + real_dev->xfrmdev_ops->xdo_dev_state_free(xs); > + list_del(&ipsec->list); > + kfree(ipsec); > + break; > + } > + } > + mutex_unlock(&bond->ipsec_lock); > > - if (real_dev && real_dev->xfrmdev_ops && > - real_dev->xfrmdev_ops->xdo_dev_state_free) > - real_dev->xfrmdev_ops->xdo_dev_state_free(xs); > out: > netdev_put(real_dev, &tracker); > }