diff mbox series

[v5,01/11] mt76: add hash lookup for skb on TXS status_list

Message ID 20210804134505.3208-1-greearb@candelatech.com
State New
Headers show
Series [v5,01/11] mt76: add hash lookup for skb on TXS status_list | expand

Commit Message

Ben Greear Aug. 4, 2021, 1:44 p.m. UTC
From: Ben Greear <greearb@candelatech.com>

This improves performance when sending lots of frames that
are requesting being mapped to a TXS callback.

Add comments to help next person understood the tx path
better.

Signed-off-by: Ben Greear <greearb@candelatech.com>
---

v5:  Rebased on top of previous series.

 drivers/net/wireless/mediatek/mt76/mt76.h     | 48 +++++++---
 .../net/wireless/mediatek/mt76/mt7603/mac.c   |  2 +-
 .../net/wireless/mediatek/mt76/mt7615/mac.c   |  2 +-
 .../net/wireless/mediatek/mt76/mt76x02_mac.c  |  2 +-
 .../net/wireless/mediatek/mt76/mt7915/mac.c   |  8 +-
 .../net/wireless/mediatek/mt76/mt7921/mac.c   |  9 +-
 drivers/net/wireless/mediatek/mt76/tx.c       | 90 ++++++++++++++++---
 7 files changed, 132 insertions(+), 29 deletions(-)

Comments

Felix Fietkau Aug. 13, 2021, 4:50 p.m. UTC | #1
On 2021-08-04 15:44, greearb@candelatech.com wrote:
> From: Ben Greear <greearb@candelatech.com>

> 

> This improves performance when sending lots of frames that

> are requesting being mapped to a TXS callback.

> 

> Add comments to help next person understood the tx path

> better.

> 

> Signed-off-by: Ben Greear <greearb@candelatech.com>

> ---

> 

> v5:  Rebased on top of previous series.

> 

>  drivers/net/wireless/mediatek/mt76/mt76.h     | 48 +++++++---

>  .../net/wireless/mediatek/mt76/mt7603/mac.c   |  2 +-

>  .../net/wireless/mediatek/mt76/mt7615/mac.c   |  2 +-

>  .../net/wireless/mediatek/mt76/mt76x02_mac.c  |  2 +-

>  .../net/wireless/mediatek/mt76/mt7915/mac.c   |  8 +-

>  .../net/wireless/mediatek/mt76/mt7921/mac.c   |  9 +-

>  drivers/net/wireless/mediatek/mt76/tx.c       | 90 ++++++++++++++++---

>  7 files changed, 132 insertions(+), 29 deletions(-)

> 

> diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h

> index 436bf2b8e2cd..016f563fec39 100644

> --- a/drivers/net/wireless/mediatek/mt76/mt76.h

> +++ b/drivers/net/wireless/mediatek/mt76/mt76.h

> @@ -235,6 +235,14 @@ DECLARE_EWMA(signal, 10, 8);

>  #define MT_WCID_TX_INFO_TXPWR_ADJ	GENMASK(25, 18)

>  #define MT_WCID_TX_INFO_SET		BIT(31)

>  

> +#define MT_PACKET_ID_MASK		GENMASK(6, 0)

> +#define MT_PACKET_ID_NO_ACK		0

> +/* Request TXS, but don't try to match with skb. */

> +#define MT_PACKET_ID_NO_SKB		1

> +#define MT_PACKET_ID_FIRST		2

> +#define MT_PACKET_ID_HAS_RATE		BIT(7)

> +#define MT_PACKET_ID_MAX		(GENMASK(7, 0) - 1)

> +

>  struct mt76_wcid {

>  	struct mt76_rx_tid __rcu *aggr[IEEE80211_NUM_TIDS];

>  

> @@ -246,6 +254,8 @@ struct mt76_wcid {

>  

>  	struct rate_info rate;

>  

> +	struct sk_buff *skb_status_array[MT_PACKET_ID_MAX + 1];

You could add this to reduce the struct size:
#define MT_NUM_STATUS_PACKETS \
	(MT_PACKET_ID_MAX + 1 - MT_PACKET_ID_FIRST)

And then subtract MT_PACKET_ID_FIRST from cache entries.


>  	u16 idx;

>  	u8 hw_key_idx;

>  	u8 hw_key_idx2;

> @@ -302,13 +312,8 @@ struct mt76_rx_tid {

>  #define MT_TX_CB_TXS_DONE		BIT(1)

>  #define MT_TX_CB_TXS_FAILED		BIT(2)

>  

> -#define MT_PACKET_ID_MASK		GENMASK(6, 0)

> -#define MT_PACKET_ID_NO_ACK		0

> -#define MT_PACKET_ID_NO_SKB		1

> -#define MT_PACKET_ID_FIRST		2

> -#define MT_PACKET_ID_HAS_RATE		BIT(7)

> -

> -#define MT_TX_STATUS_SKB_TIMEOUT	HZ

> +/* This is timer for when to give up when waiting for TXS callback. */

> +#define MT_TX_STATUS_SKB_TIMEOUT	(HZ / 8)

I think the way timeouts are checked now, HZ/8 is way too short.
I would recommend checking timeout only for packets where
MT_TX_CB_DMA_DONE is already set, and setting cb->jiffies from within
__mt76_tx_status_skb_done on DMA completion. That should make it
possible to keep the timeout short without running into it in cases
where significant congestion adds huge completion latency.

> @@ -1297,13 +1303,33 @@ mt76_token_put(struct mt76_dev *dev, int token)

>  }

>  

>  static inline int

> -mt76_get_next_pkt_id(struct mt76_wcid *wcid)

> +mt76_get_next_pkt_id(struct mt76_dev *dev, struct mt76_wcid *wcid,

> +		     struct sk_buff *skb)

>  {

> +	struct sk_buff *qskb;

> +

> +	lockdep_assert_held(&dev->status_list.lock);

> +

>  	wcid->packet_id = (wcid->packet_id + 1) & MT_PACKET_ID_MASK;

> -	if (wcid->packet_id == MT_PACKET_ID_NO_ACK ||

> -	    wcid->packet_id == MT_PACKET_ID_NO_SKB)

> +	if (wcid->packet_id < MT_PACKET_ID_FIRST)

>  		wcid->packet_id = MT_PACKET_ID_FIRST;

>  

> +	qskb = wcid->skb_status_array[wcid->packet_id];

> +	if (qskb) {

> +		/* bummer, already waiting on this pid.  See if it is stale. */

> +		struct mt76_tx_cb *cb = mt76_tx_skb_cb(qskb);

> +

> +		if (!time_after(jiffies, cb->jiffies + MT_TX_STATUS_SKB_TIMEOUT)) {

> +			/* ok, not stale.  Increment pid anyway, will try next

> +			 * slot next time

> +			 */

> +			return MT_PACKET_ID_NO_SKB;

> +		}

> +	}

> +

> +	/* cache this skb for fast lookup by packet-id */

> +	wcid->skb_status_array[wcid->packet_id] = skb;

> +

I think mt76_get_next_pkt_id is not a good place for caching the skb.
Better cache it in the same place that also puts the skb in the status
list: mt76_tx_status_skb_add

That way you can drop your (possibly broken) changes to mt7921, which
calls mt76_get_next_pkt_id directly, but does not support tx status
tracking for skbs.

> diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c

> index d9f52e2611a7..8f5702981900 100644

> --- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c

> +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c

> @@ -1318,6 +1318,8 @@ mt7915_mac_add_txs_skb(struct mt7915_dev *dev, struct mt76_wcid *wcid, int pid,

>  

>  	mt76_tx_status_lock(mdev, &list);

>  	skb = mt76_tx_status_skb_get(mdev, wcid, pid, &list);

> +

> +	/* TODO:  Gather stats anyway, even if we are not matching on an skb. */

Please drop this comment, since you're deleting in another patch in this
series anyway.

> @@ -1417,10 +1419,14 @@ mt7915_mac_add_txs_skb(struct mt7915_dev *dev, struct mt76_wcid *wcid, int pid,

>  		stats->tx_bw[0]++;

>  		break;

>  	}

> +

> +	/* Cache rate for packets that don't get a TXS callback for some

> +	 * reason.

> +	 */

>  	wcid->rate = rate;

That comment is wrong, wcid->rate is cached because HE rates can't be
reported via skb->cb due to lack of space.


> diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c

> index 6f302acb6e69..4c8504d3c904 100644

> --- a/drivers/net/wireless/mediatek/mt76/tx.c

> +++ b/drivers/net/wireless/mediatek/mt76/tx.c

> @@ -130,15 +154,30 @@ mt76_tx_status_skb_add(struct mt76_dev *dev, struct mt76_wcid *wcid,

>  			     IEEE80211_TX_CTL_RATE_CTRL_PROBE)))

>  		return MT_PACKET_ID_NO_SKB;

>  

> +	/* due to limited range of the pktid (7 bits), we can only

> +	 * have a limited number of outstanding frames.  I think it is OK to

> +	 * check the length outside of a lock since it doesn't matter too much

> +	 * if we read wrong data here.

> +	 * The TX-status callbacks don't always return a callback for an SKB,

> +	 * so the status_list may contain some stale skbs.  Those will be cleaned

> +	 * out periodically, see MT_TX_STATUS_SKB_TIMEOUT.

> +	 */

> +

> +	qlen = skb_queue_len(&dev->status_list);

> +	if (qlen > 120)

> +		return MT_PACKET_ID_NO_SKB;

Checking the length of the per-device status list doesn't make sense,
since pktid allocation is per-wcid.

- Felix
Ben Greear Aug. 13, 2021, 5:28 p.m. UTC | #2
On 8/13/21 9:50 AM, Felix Fietkau wrote:
> 

> On 2021-08-04 15:44, greearb@candelatech.com wrote:

>> From: Ben Greear <greearb@candelatech.com>

>>

>> This improves performance when sending lots of frames that

>> are requesting being mapped to a TXS callback.

>>

>> Add comments to help next person understood the tx path

>> better.

>>

>> Signed-off-by: Ben Greear <greearb@candelatech.com>

>> ---

>>

>> v5:  Rebased on top of previous series.

>>

>>   drivers/net/wireless/mediatek/mt76/mt76.h     | 48 +++++++---

>>   .../net/wireless/mediatek/mt76/mt7603/mac.c   |  2 +-

>>   .../net/wireless/mediatek/mt76/mt7615/mac.c   |  2 +-

>>   .../net/wireless/mediatek/mt76/mt76x02_mac.c  |  2 +-

>>   .../net/wireless/mediatek/mt76/mt7915/mac.c   |  8 +-

>>   .../net/wireless/mediatek/mt76/mt7921/mac.c   |  9 +-

>>   drivers/net/wireless/mediatek/mt76/tx.c       | 90 ++++++++++++++++---

>>   7 files changed, 132 insertions(+), 29 deletions(-)

>>

>> diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h

>> index 436bf2b8e2cd..016f563fec39 100644

>> --- a/drivers/net/wireless/mediatek/mt76/mt76.h

>> +++ b/drivers/net/wireless/mediatek/mt76/mt76.h

>> @@ -235,6 +235,14 @@ DECLARE_EWMA(signal, 10, 8);

>>   #define MT_WCID_TX_INFO_TXPWR_ADJ	GENMASK(25, 18)

>>   #define MT_WCID_TX_INFO_SET		BIT(31)

>>   

>> +#define MT_PACKET_ID_MASK		GENMASK(6, 0)

>> +#define MT_PACKET_ID_NO_ACK		0

>> +/* Request TXS, but don't try to match with skb. */

>> +#define MT_PACKET_ID_NO_SKB		1

>> +#define MT_PACKET_ID_FIRST		2

>> +#define MT_PACKET_ID_HAS_RATE		BIT(7)

>> +#define MT_PACKET_ID_MAX		(GENMASK(7, 0) - 1)

>> +

>>   struct mt76_wcid {

>>   	struct mt76_rx_tid __rcu *aggr[IEEE80211_NUM_TIDS];

>>   

>> @@ -246,6 +254,8 @@ struct mt76_wcid {

>>   

>>   	struct rate_info rate;

>>   

>> +	struct sk_buff *skb_status_array[MT_PACKET_ID_MAX + 1];

> You could add this to reduce the struct size:

> #define MT_NUM_STATUS_PACKETS \

> 	(MT_PACKET_ID_MAX + 1 - MT_PACKET_ID_FIRST)

> 

> And then subtract MT_PACKET_ID_FIRST from cache entries.


That saves two void* bytes of memory, and complicates the code a bit?
I can do the change, just doesn't seem worthwhile to me.

>>   	u16 idx;

>>   	u8 hw_key_idx;

>>   	u8 hw_key_idx2;

>> @@ -302,13 +312,8 @@ struct mt76_rx_tid {

>>   #define MT_TX_CB_TXS_DONE		BIT(1)

>>   #define MT_TX_CB_TXS_FAILED		BIT(2)

>>   

>> -#define MT_PACKET_ID_MASK		GENMASK(6, 0)

>> -#define MT_PACKET_ID_NO_ACK		0

>> -#define MT_PACKET_ID_NO_SKB		1

>> -#define MT_PACKET_ID_FIRST		2

>> -#define MT_PACKET_ID_HAS_RATE		BIT(7)

>> -

>> -#define MT_TX_STATUS_SKB_TIMEOUT	HZ

>> +/* This is timer for when to give up when waiting for TXS callback. */

>> +#define MT_TX_STATUS_SKB_TIMEOUT	(HZ / 8)

> I think the way timeouts are checked now, HZ/8 is way too short.

> I would recommend checking timeout only for packets where

> MT_TX_CB_DMA_DONE is already set, and setting cb->jiffies from within

> __mt76_tx_status_skb_done on DMA completion. That should make it

> possible to keep the timeout short without running into it in cases

> where significant congestion adds huge completion latency.


Ok, I like that idea.  What is reasonable timeout from time of DMA done
before we give up on TXS callback?

> 

>> @@ -1297,13 +1303,33 @@ mt76_token_put(struct mt76_dev *dev, int token)

>>   }

>>   

>>   static inline int

>> -mt76_get_next_pkt_id(struct mt76_wcid *wcid)

>> +mt76_get_next_pkt_id(struct mt76_dev *dev, struct mt76_wcid *wcid,

>> +		     struct sk_buff *skb)

>>   {

>> +	struct sk_buff *qskb;

>> +

>> +	lockdep_assert_held(&dev->status_list.lock);

>> +

>>   	wcid->packet_id = (wcid->packet_id + 1) & MT_PACKET_ID_MASK;

>> -	if (wcid->packet_id == MT_PACKET_ID_NO_ACK ||

>> -	    wcid->packet_id == MT_PACKET_ID_NO_SKB)

>> +	if (wcid->packet_id < MT_PACKET_ID_FIRST)

>>   		wcid->packet_id = MT_PACKET_ID_FIRST;

>>   

>> +	qskb = wcid->skb_status_array[wcid->packet_id];

>> +	if (qskb) {

>> +		/* bummer, already waiting on this pid.  See if it is stale. */

>> +		struct mt76_tx_cb *cb = mt76_tx_skb_cb(qskb);

>> +

>> +		if (!time_after(jiffies, cb->jiffies + MT_TX_STATUS_SKB_TIMEOUT)) {

>> +			/* ok, not stale.  Increment pid anyway, will try next

>> +			 * slot next time

>> +			 */

>> +			return MT_PACKET_ID_NO_SKB;

>> +		}

>> +	}

>> +

>> +	/* cache this skb for fast lookup by packet-id */

>> +	wcid->skb_status_array[wcid->packet_id] = skb;

>> +

> I think mt76_get_next_pkt_id is not a good place for caching the skb.

> Better cache it in the same place that also puts the skb in the status

> list: mt76_tx_status_skb_add

> 

> That way you can drop your (possibly broken) changes to mt7921, which

> calls mt76_get_next_pkt_id directly, but does not support tx status

> tracking for skbs.


Ok, I will try that.

> 

>> diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c

>> index d9f52e2611a7..8f5702981900 100644

>> --- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c

>> +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c

>> @@ -1318,6 +1318,8 @@ mt7915_mac_add_txs_skb(struct mt7915_dev *dev, struct mt76_wcid *wcid, int pid,

>>   

>>   	mt76_tx_status_lock(mdev, &list);

>>   	skb = mt76_tx_status_skb_get(mdev, wcid, pid, &list);

>> +

>> +	/* TODO:  Gather stats anyway, even if we are not matching on an skb. */

> Please drop this comment, since you're deleting in another patch in this

> series anyway.

> 

>> @@ -1417,10 +1419,14 @@ mt7915_mac_add_txs_skb(struct mt7915_dev *dev, struct mt76_wcid *wcid, int pid,

>>   		stats->tx_bw[0]++;

>>   		break;

>>   	}

>> +

>> +	/* Cache rate for packets that don't get a TXS callback for some

>> +	 * reason.

>> +	 */

>>   	wcid->rate = rate;

> That comment is wrong, wcid->rate is cached because HE rates can't be

> reported via skb->cb due to lack of space.


We can update the rate from txs callback, and and from txfree path,
and also from querying the firmware rate-ctrl registers (I think?).
TXS is disabled for most frames by default.  txfree gives only some
info, not enough.  And polling rate-ctrl registers is slow.

So I think the comment is OK, but I end up modifying the code later anyway,
so I can remove this comment if you prefer.

> 

> 

>> diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c

>> index 6f302acb6e69..4c8504d3c904 100644

>> --- a/drivers/net/wireless/mediatek/mt76/tx.c

>> +++ b/drivers/net/wireless/mediatek/mt76/tx.c

>> @@ -130,15 +154,30 @@ mt76_tx_status_skb_add(struct mt76_dev *dev, struct mt76_wcid *wcid,

>>   			     IEEE80211_TX_CTL_RATE_CTRL_PROBE)))

>>   		return MT_PACKET_ID_NO_SKB;

>>   

>> +	/* due to limited range of the pktid (7 bits), we can only

>> +	 * have a limited number of outstanding frames.  I think it is OK to

>> +	 * check the length outside of a lock since it doesn't matter too much

>> +	 * if we read wrong data here.

>> +	 * The TX-status callbacks don't always return a callback for an SKB,

>> +	 * so the status_list may contain some stale skbs.  Those will be cleaned

>> +	 * out periodically, see MT_TX_STATUS_SKB_TIMEOUT.

>> +	 */

>> +

>> +	qlen = skb_queue_len(&dev->status_list);

>> +	if (qlen > 120)

>> +		return MT_PACKET_ID_NO_SKB;

> Checking the length of the per-device status list doesn't make sense,

> since pktid allocation is per-wcid.


Ok, so just remove this code, or should I set some other higher
limit to bound the list?

Thanks,
Ben

> 

> - Felix

> 



-- 
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc  http://www.candelatech.com
Felix Fietkau Aug. 13, 2021, 5:46 p.m. UTC | #3
On 2021-08-13 19:28, Ben Greear wrote:
> On 8/13/21 9:50 AM, Felix Fietkau wrote:

>> 

>> On 2021-08-04 15:44, greearb@candelatech.com wrote:

>>> From: Ben Greear <greearb@candelatech.com>

>>>

>>> This improves performance when sending lots of frames that

>>> are requesting being mapped to a TXS callback.

>>>

>>> Add comments to help next person understood the tx path

>>> better.

>>>

>>> Signed-off-by: Ben Greear <greearb@candelatech.com>

>>> ---

>>>

>>> v5:  Rebased on top of previous series.

>>>

>>>   drivers/net/wireless/mediatek/mt76/mt76.h     | 48 +++++++---

>>>   .../net/wireless/mediatek/mt76/mt7603/mac.c   |  2 +-

>>>   .../net/wireless/mediatek/mt76/mt7615/mac.c   |  2 +-

>>>   .../net/wireless/mediatek/mt76/mt76x02_mac.c  |  2 +-

>>>   .../net/wireless/mediatek/mt76/mt7915/mac.c   |  8 +-

>>>   .../net/wireless/mediatek/mt76/mt7921/mac.c   |  9 +-

>>>   drivers/net/wireless/mediatek/mt76/tx.c       | 90 ++++++++++++++++---

>>>   7 files changed, 132 insertions(+), 29 deletions(-)

>>>

>>> diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h

>>> index 436bf2b8e2cd..016f563fec39 100644

>>> --- a/drivers/net/wireless/mediatek/mt76/mt76.h

>>> +++ b/drivers/net/wireless/mediatek/mt76/mt76.h

>>> @@ -235,6 +235,14 @@ DECLARE_EWMA(signal, 10, 8);

>>>   #define MT_WCID_TX_INFO_TXPWR_ADJ	GENMASK(25, 18)

>>>   #define MT_WCID_TX_INFO_SET		BIT(31)

>>>   

>>> +#define MT_PACKET_ID_MASK		GENMASK(6, 0)

>>> +#define MT_PACKET_ID_NO_ACK		0

>>> +/* Request TXS, but don't try to match with skb. */

>>> +#define MT_PACKET_ID_NO_SKB		1

>>> +#define MT_PACKET_ID_FIRST		2

>>> +#define MT_PACKET_ID_HAS_RATE		BIT(7)

>>> +#define MT_PACKET_ID_MAX		(GENMASK(7, 0) - 1)

>>> +

>>>   struct mt76_wcid {

>>>   	struct mt76_rx_tid __rcu *aggr[IEEE80211_NUM_TIDS];

>>>   

>>> @@ -246,6 +254,8 @@ struct mt76_wcid {

>>>   

>>>   	struct rate_info rate;

>>>   

>>> +	struct sk_buff *skb_status_array[MT_PACKET_ID_MAX + 1];

>> You could add this to reduce the struct size:

>> #define MT_NUM_STATUS_PACKETS \

>> 	(MT_PACKET_ID_MAX + 1 - MT_PACKET_ID_FIRST)

>> 

>> And then subtract MT_PACKET_ID_FIRST from cache entries.

> 

> That saves two void* bytes of memory, and complicates the code a bit?

> I can do the change, just doesn't seem worthwhile to me.

It's not much more complicated (simple subtraction in very few places),
and the memory saved is per station.

>>>   	u16 idx;

>>>   	u8 hw_key_idx;

>>>   	u8 hw_key_idx2;

>>> @@ -302,13 +312,8 @@ struct mt76_rx_tid {

>>>   #define MT_TX_CB_TXS_DONE		BIT(1)

>>>   #define MT_TX_CB_TXS_FAILED		BIT(2)

>>>   

>>> -#define MT_PACKET_ID_MASK		GENMASK(6, 0)

>>> -#define MT_PACKET_ID_NO_ACK		0

>>> -#define MT_PACKET_ID_NO_SKB		1

>>> -#define MT_PACKET_ID_FIRST		2

>>> -#define MT_PACKET_ID_HAS_RATE		BIT(7)

>>> -

>>> -#define MT_TX_STATUS_SKB_TIMEOUT	HZ

>>> +/* This is timer for when to give up when waiting for TXS callback. */

>>> +#define MT_TX_STATUS_SKB_TIMEOUT	(HZ / 8)

>> I think the way timeouts are checked now, HZ/8 is way too short.

>> I would recommend checking timeout only for packets where

>> MT_TX_CB_DMA_DONE is already set, and setting cb->jiffies from within

>> __mt76_tx_status_skb_done on DMA completion. That should make it

>> possible to keep the timeout short without running into it in cases

>> where significant congestion adds huge completion latency.

> 

> Ok, I like that idea.  What is reasonable timeout from time of DMA done

> before we give up on TXS callback?

Your value of HZ / 8 seems reasonable to me for that case.

>>> diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c

>>> index d9f52e2611a7..8f5702981900 100644

>>> --- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c

>>> +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c

>>> @@ -1318,6 +1318,8 @@ mt7915_mac_add_txs_skb(struct mt7915_dev *dev, struct mt76_wcid *wcid, int pid,

>>>   

>>>   	mt76_tx_status_lock(mdev, &list);

>>>   	skb = mt76_tx_status_skb_get(mdev, wcid, pid, &list);

>>> +

>>> +	/* TODO:  Gather stats anyway, even if we are not matching on an skb. */

>> Please drop this comment, since you're deleting in another patch in this

>> series anyway.

>> 

>>> @@ -1417,10 +1419,14 @@ mt7915_mac_add_txs_skb(struct mt7915_dev *dev, struct mt76_wcid *wcid, int pid,

>>>   		stats->tx_bw[0]++;

>>>   		break;

>>>   	}

>>> +

>>> +	/* Cache rate for packets that don't get a TXS callback for some

>>> +	 * reason.

>>> +	 */

>>>   	wcid->rate = rate;

>> That comment is wrong, wcid->rate is cached because HE rates can't be

>> reported via skb->cb due to lack of space.

> 

> We can update the rate from txs callback, and and from txfree path,

> and also from querying the firmware rate-ctrl registers (I think?).

> TXS is disabled for most frames by default.  txfree gives only some

> info, not enough.  And polling rate-ctrl registers is slow.

> 

> So I think the comment is OK, but I end up modifying the code later anyway,

> so I can remove this comment if you prefer.

Yes, please do that.

>>> diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c

>>> index 6f302acb6e69..4c8504d3c904 100644

>>> --- a/drivers/net/wireless/mediatek/mt76/tx.c

>>> +++ b/drivers/net/wireless/mediatek/mt76/tx.c

>>> @@ -130,15 +154,30 @@ mt76_tx_status_skb_add(struct mt76_dev *dev, struct mt76_wcid *wcid,

>>>   			     IEEE80211_TX_CTL_RATE_CTRL_PROBE)))

>>>   		return MT_PACKET_ID_NO_SKB;

>>>   

>>> +	/* due to limited range of the pktid (7 bits), we can only

>>> +	 * have a limited number of outstanding frames.  I think it is OK to

>>> +	 * check the length outside of a lock since it doesn't matter too much

>>> +	 * if we read wrong data here.

>>> +	 * The TX-status callbacks don't always return a callback for an SKB,

>>> +	 * so the status_list may contain some stale skbs.  Those will be cleaned

>>> +	 * out periodically, see MT_TX_STATUS_SKB_TIMEOUT.

>>> +	 */

>>> +

>>> +	qlen = skb_queue_len(&dev->status_list);

>>> +	if (qlen > 120)

>>> +		return MT_PACKET_ID_NO_SKB;

>> Checking the length of the per-device status list doesn't make sense,

>> since pktid allocation is per-wcid.

> 

> Ok, so just remove this code, or should I set some other higher

> limit to bound the list?

You could just check for a duplicate skb_status_array entry.

- Felix
Ben Greear Aug. 13, 2021, 6:01 p.m. UTC | #4
On 8/13/21 10:46 AM, Felix Fietkau wrote:

>>>> diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c

>>>> index 6f302acb6e69..4c8504d3c904 100644

>>>> --- a/drivers/net/wireless/mediatek/mt76/tx.c

>>>> +++ b/drivers/net/wireless/mediatek/mt76/tx.c

>>>> @@ -130,15 +154,30 @@ mt76_tx_status_skb_add(struct mt76_dev *dev, struct mt76_wcid *wcid,

>>>>    			     IEEE80211_TX_CTL_RATE_CTRL_PROBE)))

>>>>    		return MT_PACKET_ID_NO_SKB;

>>>>    

>>>> +	/* due to limited range of the pktid (7 bits), we can only

>>>> +	 * have a limited number of outstanding frames.  I think it is OK to

>>>> +	 * check the length outside of a lock since it doesn't matter too much

>>>> +	 * if we read wrong data here.

>>>> +	 * The TX-status callbacks don't always return a callback for an SKB,

>>>> +	 * so the status_list may contain some stale skbs.  Those will be cleaned

>>>> +	 * out periodically, see MT_TX_STATUS_SKB_TIMEOUT.

>>>> +	 */

>>>> +

>>>> +	qlen = skb_queue_len(&dev->status_list);

>>>> +	if (qlen > 120)

>>>> +		return MT_PACKET_ID_NO_SKB;

>>> Checking the length of the per-device status list doesn't make sense,

>>> since pktid allocation is per-wcid.

>>

>> Ok, so just remove this code, or should I set some other higher

>> limit to bound the list?

> You could just check for a duplicate skb_status_array entry.


Ok, that will happen anyway when searching for next wcid pkt-id.

The check above was a quick bail-out before locks were acquired.

I'll just remove that qlen check...

Thanks,
Ben

> 

> - Felix

> 



-- 
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc  http://www.candelatech.com
diff mbox series

Patch

diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 436bf2b8e2cd..016f563fec39 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -235,6 +235,14 @@  DECLARE_EWMA(signal, 10, 8);
 #define MT_WCID_TX_INFO_TXPWR_ADJ	GENMASK(25, 18)
 #define MT_WCID_TX_INFO_SET		BIT(31)
 
+#define MT_PACKET_ID_MASK		GENMASK(6, 0)
+#define MT_PACKET_ID_NO_ACK		0
+/* Request TXS, but don't try to match with skb. */
+#define MT_PACKET_ID_NO_SKB		1
+#define MT_PACKET_ID_FIRST		2
+#define MT_PACKET_ID_HAS_RATE		BIT(7)
+#define MT_PACKET_ID_MAX		(GENMASK(7, 0) - 1)
+
 struct mt76_wcid {
 	struct mt76_rx_tid __rcu *aggr[IEEE80211_NUM_TIDS];
 
@@ -246,6 +254,8 @@  struct mt76_wcid {
 
 	struct rate_info rate;
 
+	struct sk_buff *skb_status_array[MT_PACKET_ID_MAX + 1];
+
 	u16 idx;
 	u8 hw_key_idx;
 	u8 hw_key_idx2;
@@ -302,13 +312,8 @@  struct mt76_rx_tid {
 #define MT_TX_CB_TXS_DONE		BIT(1)
 #define MT_TX_CB_TXS_FAILED		BIT(2)
 
-#define MT_PACKET_ID_MASK		GENMASK(6, 0)
-#define MT_PACKET_ID_NO_ACK		0
-#define MT_PACKET_ID_NO_SKB		1
-#define MT_PACKET_ID_FIRST		2
-#define MT_PACKET_ID_HAS_RATE		BIT(7)
-
-#define MT_TX_STATUS_SKB_TIMEOUT	HZ
+/* This is timer for when to give up when waiting for TXS callback. */
+#define MT_TX_STATUS_SKB_TIMEOUT	(HZ / 8)
 
 struct mt76_tx_cb {
 	unsigned long jiffies;
@@ -651,6 +656,7 @@  struct mt76_dev {
 	spinlock_t cc_lock;
 
 	u32 cur_cc_bss_rx;
+	unsigned long next_status_jiffies;
 
 	struct mt76_rx_status rx_ampdu_status;
 	u32 rx_ampdu_len;
@@ -1090,7 +1096,7 @@  struct sk_buff *mt76_tx_status_skb_get(struct mt76_dev *dev,
 				       struct mt76_wcid *wcid, int pktid,
 				       struct sk_buff_head *list);
 void mt76_tx_status_skb_done(struct mt76_dev *dev, struct sk_buff *skb,
-			     struct sk_buff_head *list);
+			     struct sk_buff_head *list, struct mt76_wcid *wcid);
 void __mt76_tx_complete_skb(struct mt76_dev *dev, u16 wcid, struct sk_buff *skb,
 			    struct list_head *free_list);
 static inline void
@@ -1297,13 +1303,33 @@  mt76_token_put(struct mt76_dev *dev, int token)
 }
 
 static inline int
-mt76_get_next_pkt_id(struct mt76_wcid *wcid)
+mt76_get_next_pkt_id(struct mt76_dev *dev, struct mt76_wcid *wcid,
+		     struct sk_buff *skb)
 {
+	struct sk_buff *qskb;
+
+	lockdep_assert_held(&dev->status_list.lock);
+
 	wcid->packet_id = (wcid->packet_id + 1) & MT_PACKET_ID_MASK;
-	if (wcid->packet_id == MT_PACKET_ID_NO_ACK ||
-	    wcid->packet_id == MT_PACKET_ID_NO_SKB)
+	if (wcid->packet_id < MT_PACKET_ID_FIRST)
 		wcid->packet_id = MT_PACKET_ID_FIRST;
 
+	qskb = wcid->skb_status_array[wcid->packet_id];
+	if (qskb) {
+		/* bummer, already waiting on this pid.  See if it is stale. */
+		struct mt76_tx_cb *cb = mt76_tx_skb_cb(qskb);
+
+		if (!time_after(jiffies, cb->jiffies + MT_TX_STATUS_SKB_TIMEOUT)) {
+			/* ok, not stale.  Increment pid anyway, will try next
+			 * slot next time
+			 */
+			return MT_PACKET_ID_NO_SKB;
+		}
+	}
+
+	/* cache this skb for fast lookup by packet-id */
+	wcid->skb_status_array[wcid->packet_id] = skb;
+
 	return wcid->packet_id;
 }
 #endif
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
index 3972c56136a2..2f268eb7c1e6 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
@@ -1230,7 +1230,7 @@  mt7603_mac_add_txs_skb(struct mt7603_dev *dev, struct mt7603_sta *sta, int pid,
 			info->status.rates[0].idx = -1;
 		}
 
-		mt76_tx_status_skb_done(mdev, skb, &list);
+		mt76_tx_status_skb_done(mdev, skb, &list, &sta->wcid);
 	}
 	mt76_tx_status_unlock(mdev, &list);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index ff3f85e4087c..381a998817d4 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -1433,7 +1433,7 @@  static bool mt7615_mac_add_txs_skb(struct mt7615_dev *dev,
 			info->status.rates[0].idx = -1;
 		}
 
-		mt76_tx_status_skb_done(mdev, skb, &list);
+		mt76_tx_status_skb_done(mdev, skb, &list, &sta->wcid);
 	}
 	mt76_tx_status_unlock(mdev, &list);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c
index c32e6dc68773..fce020e64678 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c
@@ -622,7 +622,7 @@  void mt76x02_send_tx_status(struct mt76x02_dev *dev,
 		info = *status.info;
 		len = status.skb->len;
 		ac = skb_get_queue_mapping(status.skb);
-		mt76_tx_status_skb_done(mdev, status.skb, &list);
+		mt76_tx_status_skb_done(mdev, status.skb, &list, wcid);
 	} else if (msta) {
 		len = status.info->status.ampdu_len * ewma_pktlen_read(&msta->pktlen);
 		ac = FIELD_GET(MT_PKTID_AC, cur_pktid);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
index d9f52e2611a7..8f5702981900 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
@@ -1318,6 +1318,8 @@  mt7915_mac_add_txs_skb(struct mt7915_dev *dev, struct mt76_wcid *wcid, int pid,
 
 	mt76_tx_status_lock(mdev, &list);
 	skb = mt76_tx_status_skb_get(mdev, wcid, pid, &list);
+
+	/* TODO:  Gather stats anyway, even if we are not matching on an skb. */
 	if (!skb)
 		goto out;
 
@@ -1417,10 +1419,14 @@  mt7915_mac_add_txs_skb(struct mt7915_dev *dev, struct mt76_wcid *wcid, int pid,
 		stats->tx_bw[0]++;
 		break;
 	}
+
+	/* Cache rate for packets that don't get a TXS callback for some
+	 * reason.
+	 */
 	wcid->rate = rate;
 
 out:
-	mt76_tx_status_skb_done(mdev, skb, &list);
+	mt76_tx_status_skb_done(mdev, skb, &list, wcid);
 	mt76_tx_status_unlock(mdev, &list);
 
 	return !!skb;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index 76985a6b3be5..219c17d77e46 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -732,7 +732,9 @@  mt7921_mac_write_txwi_80211(struct mt7921_dev *dev, __le32 *txwi,
 	txwi[7] |= cpu_to_le32(val);
 }
 
-static void mt7921_update_txs(struct mt76_wcid *wcid, __le32 *txwi)
+static void mt7921_update_txs(struct mt7921_dev *dev,
+			      struct mt76_wcid *wcid, __le32 *txwi,
+			      struct sk_buff *skb)
 {
 	struct mt7921_sta *msta = container_of(wcid, struct mt7921_sta, wcid);
 	u32 pid, frame_type = FIELD_GET(MT_TXD2_FRAME_TYPE, txwi[2]);
@@ -744,7 +746,7 @@  static void mt7921_update_txs(struct mt76_wcid *wcid, __le32 *txwi)
 		return;
 
 	msta->next_txs_ts = jiffies + msecs_to_jiffies(250);
-	pid = mt76_get_next_pkt_id(wcid);
+	pid = mt76_get_next_pkt_id(&dev->mt76, wcid, skb);
 	txwi[5] |= cpu_to_le32(MT_TXD5_TX_STATUS_MCU |
 			       FIELD_PREP(MT_TXD5_PID, pid));
 }
@@ -771,7 +773,6 @@  void mt7921_mac_write_txwi(struct mt7921_dev *dev, __le32 *txwi,
 {
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct ieee80211_vif *vif = info->control.vif;
-	struct mt76_phy *mphy = &dev->mphy;
 	u8 p_fmt, q_idx, omac_idx = 0, wmm_idx = 0;
 	bool is_8023 = info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP;
 	u16 tx_count = 15;
@@ -839,7 +840,7 @@  void mt7921_mac_write_txwi(struct mt7921_dev *dev, __le32 *txwi,
 		txwi[3] |= cpu_to_le32(MT_TXD3_BA_DISABLE);
 	}
 
-	mt7921_update_txs(wcid, txwi);
+	mt7921_update_txs(dev, wcid, txwi, skb);
 }
 
 static void
diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c
index 6f302acb6e69..4c8504d3c904 100644
--- a/drivers/net/wireless/mediatek/mt76/tx.c
+++ b/drivers/net/wireless/mediatek/mt76/tx.c
@@ -36,6 +36,7 @@  mt76_tx_check_agg_ssn(struct ieee80211_sta *sta, struct sk_buff *skb)
 }
 EXPORT_SYMBOL_GPL(mt76_tx_check_agg_ssn);
 
+/* Lock list, and initialize the timed-out-skb list object. */
 void
 mt76_tx_status_lock(struct mt76_dev *dev, struct sk_buff_head *list)
 		   __acquires(&dev->status_list.lock)
@@ -45,6 +46,9 @@  mt76_tx_status_lock(struct mt76_dev *dev, struct sk_buff_head *list)
 }
 EXPORT_SYMBOL_GPL(mt76_tx_status_lock);
 
+/* Unlock list, and use last-received status for any skbs that
+ * timed out getting TXS callback (they are on the list passed in
+ */
 void
 mt76_tx_status_unlock(struct mt76_dev *dev, struct sk_buff_head *list)
 		      __releases(&dev->status_list.lock)
@@ -80,20 +84,39 @@  EXPORT_SYMBOL_GPL(mt76_tx_status_unlock);
 
 static void
 __mt76_tx_status_skb_done(struct mt76_dev *dev, struct sk_buff *skb, u8 flags,
-			  struct sk_buff_head *list)
+			  struct sk_buff_head *list, struct mt76_wcid *wcid)
 {
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct mt76_tx_cb *cb = mt76_tx_skb_cb(skb);
 	u8 done = MT_TX_CB_DMA_DONE | MT_TX_CB_TXS_DONE;
 
+	lockdep_assert_held(&dev->status_list.lock);
+
 	flags |= cb->flags;
 	cb->flags = flags;
 
+	/* Only process skb with TXS status has been received and also
+	 * the txfree (DMA_DONE) callback has happened.
+	 */
 	if ((flags & done) != done)
 		return;
 
 	__skb_unlink(skb, &dev->status_list);
 
+	rcu_read_lock();
+	/* calling code may not know wcid, for instance in the tx_status_check
+	 * path, look it up in that case.
+	 */
+	if (!wcid)
+		wcid = rcu_dereference(dev->wcid[cb->wcid]);
+
+	/* Make sure we clear any cached skb. */
+	if (wcid) {
+		if (!(WARN_ON_ONCE(cb->pktid >= ARRAY_SIZE(wcid->skb_status_array))))
+			wcid->skb_status_array[cb->pktid] = NULL;
+	}
+	rcu_read_unlock();
+
 	/* Tx status can be unreliable. if it fails, mark the frame as ACKed */
 	if (flags & MT_TX_CB_TXS_FAILED) {
 		info->status.rates[0].count = 0;
@@ -106,9 +129,9 @@  __mt76_tx_status_skb_done(struct mt76_dev *dev, struct sk_buff *skb, u8 flags,
 
 void
 mt76_tx_status_skb_done(struct mt76_dev *dev, struct sk_buff *skb,
-			struct sk_buff_head *list)
+			struct sk_buff_head *list, struct mt76_wcid *wcid)
 {
-	__mt76_tx_status_skb_done(dev, skb, MT_TX_CB_TXS_DONE, list);
+	__mt76_tx_status_skb_done(dev, skb, MT_TX_CB_TXS_DONE, list, wcid);
 }
 EXPORT_SYMBOL_GPL(mt76_tx_status_skb_done);
 
@@ -119,6 +142,7 @@  mt76_tx_status_skb_add(struct mt76_dev *dev, struct mt76_wcid *wcid,
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct mt76_tx_cb *cb = mt76_tx_skb_cb(skb);
 	int pid;
+	int qlen;
 
 	if (!wcid)
 		return MT_PACKET_ID_NO_ACK;
@@ -130,15 +154,30 @@  mt76_tx_status_skb_add(struct mt76_dev *dev, struct mt76_wcid *wcid,
 			     IEEE80211_TX_CTL_RATE_CTRL_PROBE)))
 		return MT_PACKET_ID_NO_SKB;
 
+	/* due to limited range of the pktid (7 bits), we can only
+	 * have a limited number of outstanding frames.  I think it is OK to
+	 * check the length outside of a lock since it doesn't matter too much
+	 * if we read wrong data here.
+	 * The TX-status callbacks don't always return a callback for an SKB,
+	 * so the status_list may contain some stale skbs.  Those will be cleaned
+	 * out periodically, see MT_TX_STATUS_SKB_TIMEOUT.
+	 */
+
+	qlen = skb_queue_len(&dev->status_list);
+	if (qlen > 120)
+		return MT_PACKET_ID_NO_SKB;
+
 	spin_lock_bh(&dev->status_list.lock);
 
 	memset(cb, 0, sizeof(*cb));
-	pid = mt76_get_next_pkt_id(wcid);
+	pid = mt76_get_next_pkt_id(dev, wcid, skb);
 	cb->wcid = wcid->idx;
 	cb->pktid = pid;
 	cb->jiffies = jiffies;
 
-	__skb_queue_tail(&dev->status_list, skb);
+	if (cb->pktid != MT_PACKET_ID_NO_SKB)
+		__skb_queue_tail(&dev->status_list, skb);
+
 	spin_unlock_bh(&dev->status_list.lock);
 
 	return pid;
@@ -150,25 +189,56 @@  mt76_tx_status_skb_get(struct mt76_dev *dev, struct mt76_wcid *wcid, int pktid,
 		       struct sk_buff_head *list)
 {
 	struct sk_buff *skb, *tmp;
+	struct sk_buff *rvskb = NULL;
+
+	/* If pktid is < first-valid-id, then it is not something we requested
+	 * TXS for, so we will not find SKB.  Bail out early in that case,
+	 * unless we need to walk due to stale-skb-reaper timeout.
+	 */
+	if (pktid < MT_PACKET_ID_FIRST) {
+		if (!time_after(jiffies, dev->next_status_jiffies))
+			return NULL;
+		goto check_list;
+	}
+
+	if (wcid) {
+		lockdep_assert_held(&dev->status_list.lock);
+		if (WARN_ON_ONCE(pktid >= ARRAY_SIZE(wcid->skb_status_array))) {
+			dev_err(dev->dev, "invalid pktid: %d  status-array-size: %d\n",
+				pktid, (int)(ARRAY_SIZE(wcid->skb_status_array)));
+			WARN_ON_ONCE(true);
+			goto check_list;
+		}
 
+		skb = wcid->skb_status_array[pktid];
+
+		if (skb && !time_after(jiffies, dev->next_status_jiffies))
+			return skb;
+	}
+
+check_list:
 	skb_queue_walk_safe(&dev->status_list, skb, tmp) {
 		struct mt76_tx_cb *cb = mt76_tx_skb_cb(skb);
 
 		if (wcid && cb->wcid != wcid->idx)
 			continue;
 
-		if (cb->pktid == pktid)
-			return skb;
+		if (cb->pktid == pktid) {
+			/* Found our skb, but check for timeouts too */
+			rvskb = skb;
+			continue;
+		}
 
 		if (pktid >= 0 && !time_after(jiffies, cb->jiffies +
 					      MT_TX_STATUS_SKB_TIMEOUT))
 			continue;
 
 		__mt76_tx_status_skb_done(dev, skb, MT_TX_CB_TXS_FAILED |
-						    MT_TX_CB_TXS_DONE, list);
+					  MT_TX_CB_TXS_DONE, list, wcid);
 	}
+	dev->next_status_jiffies = jiffies + MT_TX_STATUS_SKB_TIMEOUT + 1;
 
-	return NULL;
+	return rvskb;
 }
 EXPORT_SYMBOL_GPL(mt76_tx_status_skb_get);
 
@@ -238,7 +308,7 @@  void __mt76_tx_complete_skb(struct mt76_dev *dev, u16 wcid_idx, struct sk_buff *
 	}
 
 	mt76_tx_status_lock(dev, &list);
-	__mt76_tx_status_skb_done(dev, skb, MT_TX_CB_DMA_DONE, &list);
+	__mt76_tx_status_skb_done(dev, skb, MT_TX_CB_DMA_DONE, &list, wcid);
 	mt76_tx_status_unlock(dev, &list);
 
 out: