diff mbox series

[2/2] wifi: rtw88: usb: Preallocate and reuse the RX skbs

Message ID 4e83ae71-60e2-4f24-a251-18cd59543d6d@gmail.com
State New
Headers show
Series [1/2] wifi: rtw88: usb: Copy instead of cloning the RX skb | expand

Commit Message

Bitterblue Smith Nov. 14, 2024, 11:07 p.m. UTC
The USB driver uses four USB Request Blocks for RX. Before submitting
one, it allocates a 32768 byte skb for the RX data. This allocation can
fail, maybe due to temporary memory fragmentation. When the allocation
fails, the corresponding URB is never submitted again. After four such
allocation failures, all RX stops because the driver is not requesting
data from the device anymore.

Don't allocate a 32768 byte skb when submitting a USB Request Block
(which happens very often). Instead preallocate 8 such skbs, and reuse
them over and over. If all 8 are busy, allocate a new one. This is
pretty rare. If the allocation fails, use a work to try again later.
When there are enough free skbs again, free the excess skbs.

Also, use WQ_BH for the RX workqueue. With a normal or high priority
workqueue the skbs are processed too slowly when the system is even a
little busy, like when opening a new page in a browser, and the driver
runs out of free skbs and allocates a lot of new ones.

Move C2H_ADAPTIVITY to the c2h workqueue instead of handling it directly
from rtw_fw_c2h_cmd_rx_irqsafe(), which runs in the RX workqueue. It
reads hardware registers, which is not "irqsafe" with USB.

This is more or less what the out-of-tree Realtek drivers do, except
they use a tasklet instead of a BH workqueue.

Tested with RTL8723DU, RTL8821AU, RTL8812AU, RTL8812BU, RTL8822CU,
RTL8811CU.

Closes: https://lore.kernel.org/linux-wireless/6e7ecb47-7ea0-433a-a19f-05f88a2edf6b@gmail.com/
Signed-off-by: Bitterblue Smith <rtl8821cerfe2@gmail.com>
---
 drivers/net/wireless/realtek/rtw88/fw.c  |  7 +--
 drivers/net/wireless/realtek/rtw88/usb.c | 73 ++++++++++++++++++++----
 drivers/net/wireless/realtek/rtw88/usb.h |  3 +
 3 files changed, 67 insertions(+), 16 deletions(-)

Comments

Bitterblue Smith Nov. 25, 2024, 11:49 p.m. UTC | #1
On 19/11/2024 02:50, Ping-Ke Shih wrote:
> Bitterblue Smith <rtl8821cerfe2@gmail.com> wrote:
>> The USB driver uses four USB Request Blocks for RX. Before submitting
>> one, it allocates a 32768 byte skb for the RX data. This allocation can
>> fail, maybe due to temporary memory fragmentation. When the allocation
>> fails, the corresponding URB is never submitted again. After four such
>> allocation failures, all RX stops because the driver is not requesting
>> data from the device anymore.
>>
>> Don't allocate a 32768 byte skb when submitting a USB Request Block
>> (which happens very often). Instead preallocate 8 such skbs, and reuse
>> them over and over. If all 8 are busy, allocate a new one. This is
>> pretty rare. If the allocation fails, use a work to try again later.
>> When there are enough free skbs again, free the excess skbs.
>>
>> Also, use WQ_BH for the RX workqueue. With a normal or high priority
>> workqueue the skbs are processed too slowly when the system is even a
>> little busy, like when opening a new page in a browser, and the driver
>> runs out of free skbs and allocates a lot of new ones.
>>
>> Move C2H_ADAPTIVITY to the c2h workqueue instead of handling it directly
>> from rtw_fw_c2h_cmd_rx_irqsafe(), which runs in the RX workqueue. It
>> reads hardware registers, which is not "irqsafe" with USB.
> 
> This part should be in a separated patch. 
> 

Do you mean just C2H_ADAPTIVITY or WQ_BH as well?

>>
>> This is more or less what the out-of-tree Realtek drivers do, except
>> they use a tasklet instead of a BH workqueue.
>>
>> Tested with RTL8723DU, RTL8821AU, RTL8812AU, RTL8812BU, RTL8822CU,
>> RTL8811CU.
>>
>> Closes: https://lore.kernel.org/linux-wireless/6e7ecb47-7ea0-433a-a19f-05f88a2edf6b@gmail.com/
>> Signed-off-by: Bitterblue Smith <rtl8821cerfe2@gmail.com>
>> ---
>>  drivers/net/wireless/realtek/rtw88/fw.c  |  7 +--
>>  drivers/net/wireless/realtek/rtw88/usb.c | 73 ++++++++++++++++++++----
>>  drivers/net/wireless/realtek/rtw88/usb.h |  3 +
>>  3 files changed, 67 insertions(+), 16 deletions(-)
>>
>> diff --git a/drivers/net/wireless/realtek/rtw88/fw.c b/drivers/net/wireless/realtek/rtw88/fw.c
>> index e6e9946fbf44..02389b7c6876 100644
>> --- a/drivers/net/wireless/realtek/rtw88/fw.c
>> +++ b/drivers/net/wireless/realtek/rtw88/fw.c
>> @@ -332,6 +332,9 @@ void rtw_fw_c2h_cmd_handle(struct rtw_dev *rtwdev, struct sk_buff *skb)
>>         case C2H_RA_RPT:
>>                 rtw_fw_ra_report_handle(rtwdev, c2h->payload, len);
>>                 break;
>> +       case C2H_ADAPTIVITY:
>> +               rtw_fw_adaptivity_result(rtwdev, c2h->payload, len);
>> +               break;
>>         default:
>>                 rtw_dbg(rtwdev, RTW_DBG_FW, "C2H 0x%x isn't handled\n", c2h->id);
>>                 break;
>> @@ -367,10 +370,6 @@ void rtw_fw_c2h_cmd_rx_irqsafe(struct rtw_dev *rtwdev, u32 pkt_offset,
>>                 rtw_fw_scan_result(rtwdev, c2h->payload, len);
>>                 dev_kfree_skb_any(skb);
>>                 break;
>> -       case C2H_ADAPTIVITY:
>> -               rtw_fw_adaptivity_result(rtwdev, c2h->payload, len);
>> -               dev_kfree_skb_any(skb);
>> -               break;
>>         default:
>>                 /* pass offset for further operation */
>>                 *((u32 *)skb->cb) = pkt_offset;
>> diff --git a/drivers/net/wireless/realtek/rtw88/usb.c b/drivers/net/wireless/realtek/rtw88/usb.c
>> index 727569d4ef4b..5c2dfa2ced92 100644
>> --- a/drivers/net/wireless/realtek/rtw88/usb.c
>> +++ b/drivers/net/wireless/realtek/rtw88/usb.c
>> @@ -585,7 +585,7 @@ static void rtw_usb_rx_handler(struct work_struct *work)
>>                                 goto skip_packet;
>>                         }
>>
>> -                       skb = alloc_skb(skb_len, GFP_KERNEL);
>> +                       skb = alloc_skb(skb_len, GFP_ATOMIC);
>>                         if (!skb) {
>>                                 rtw_dbg(rtwdev, RTW_DBG_USB,
>>                                         "failed to allocate RX skb of size %u\n",
>> @@ -612,7 +612,11 @@ static void rtw_usb_rx_handler(struct work_struct *work)
>>                         rx_desc += next_pkt;
>>                 } while (rx_desc + pkt_desc_sz < rx_skb->data + rx_skb->len);
>>
>> -               dev_kfree_skb_any(rx_skb);
>> +               if (skb_queue_len(&rtwusb->rx_free_queue) >=
>> +                   RTW_USB_RX_SKB_NUM - RTW_USB_RXCB_NUM)
>> +                       dev_kfree_skb_any(rx_skb);
>> +               else
>> +                       skb_queue_tail(&rtwusb->rx_free_queue, rx_skb);
> 
> Why not just queue and reuse rx_skb for all? That would be simpler. 
> 

I didn't want to let it allocate too many skbs. I didn't find any kind
of limit in the official drivers, so maybe it would be fine.

>>         }
>>  }
>>
>> @@ -621,23 +625,57 @@ static void rtw_usb_read_port_complete(struct urb *urb);
>>  static void rtw_usb_rx_resubmit(struct rtw_usb *rtwusb, struct rx_usb_ctrl_block *rxcb)
>>  {
>>         struct rtw_dev *rtwdev = rtwusb->rtwdev;
>> +       struct sk_buff *rx_skb;
>> +       gfp_t priority = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL;
> 
> I remember in_interrupt() is not recommended. Can't we pass necessary gfp_t
> via function argument by callers?
> 

Yes, I can do that.

>>         int error;
>>
>> -       rxcb->rx_skb = alloc_skb(RTW_USB_MAX_RECVBUF_SZ, GFP_ATOMIC);
>> -       if (!rxcb->rx_skb)
>> -               return;
>> +       rx_skb = skb_dequeue(&rtwusb->rx_free_queue);
>> +       if (!rx_skb)
>> +               rx_skb = alloc_skb(RTW_USB_MAX_RECVBUF_SZ, priority);
>> +
>> +       if (!rx_skb)
>> +               goto try_later;
>> +
>> +       rxcb->rx_skb = rx_skb;
>> +
>> +       skb_reset_tail_pointer(rxcb->rx_skb);
>> +       rxcb->rx_skb->len = 0;
> 
> How about moving these initialization upward before ' rxcb->rx_skb = rx_skb;'?
> Statements can be shorter, and it is reasonable to initialize data before
> assignment. 
> 
>>
>>         usb_fill_bulk_urb(rxcb->rx_urb, rtwusb->udev,
>>                           usb_rcvbulkpipe(rtwusb->udev, rtwusb->pipe_in),
>>                           rxcb->rx_skb->data, RTW_USB_MAX_RECVBUF_SZ,
>>                           rtw_usb_read_port_complete, rxcb);
>>
>> -       error = usb_submit_urb(rxcb->rx_urb, GFP_ATOMIC);
>> +       error = usb_submit_urb(rxcb->rx_urb, priority);
> 
> Not sure if 'priority' fits the meaning. Maybe many kernel code just
> uses 'gfp'.
> 
>>         if (error) {
>> -               kfree_skb(rxcb->rx_skb);
>> +               skb_queue_tail(&rtwusb->rx_free_queue, rxcb->rx_skb);
>> +
>>                 if (error != -ENODEV)
>>                         rtw_err(rtwdev, "Err sending rx data urb %d\n",
>>                                 error);
> 
> Since here rxcb->rx_skb != NULL, will it be a problem? The rxcb will not be
> submitted again? Should all error cases go to try_later label?
> 

Right, the rxcb will be submitted again only if the error was ENOMEM
I don't know what other errors can be considered temporary. I never
ran into the case where the error is not ENODEV.

> 
>> +
>> +               if (error == -ENOMEM)
>> +                       goto try_later;
>> +       }
>> +
>> +       return;
>> +
>> +try_later:
>> +       rxcb->rx_skb = NULL;
>> +       queue_work(rtwusb->rxwq, &rtwusb->rx_urb_work);
>> +}
>> +
>> +static void rtw_usb_rx_resubmit_work(struct work_struct *work)
>> +{
>> +       struct rtw_usb *rtwusb = container_of(work, struct rtw_usb, rx_urb_work);
>> +       struct rx_usb_ctrl_block *rxcb;
>> +       int i;
>> +
>> +       for (i = 0; i < RTW_USB_RXCB_NUM; i++) {
>> +               rxcb = &rtwusb->rx_cb[i];
>> +
>> +               if (!rxcb->rx_skb)
>> +                       rtw_usb_rx_resubmit(rtwusb, rxcb);
>>         }
>>  }
>>
Ping-Ke Shih Nov. 26, 2024, 1:26 a.m. UTC | #2
Bitterblue Smith <rtl8821cerfe2@gmail.com> wrote:
> On 19/11/2024 02:50, Ping-Ke Shih wrote:
> > Bitterblue Smith <rtl8821cerfe2@gmail.com> wrote:
> >> The USB driver uses four USB Request Blocks for RX. Before submitting
> >> one, it allocates a 32768 byte skb for the RX data. This allocation can
> >> fail, maybe due to temporary memory fragmentation. When the allocation
> >> fails, the corresponding URB is never submitted again. After four such
> >> allocation failures, all RX stops because the driver is not requesting
> >> data from the device anymore.
> >>
> >> Don't allocate a 32768 byte skb when submitting a USB Request Block
> >> (which happens very often). Instead preallocate 8 such skbs, and reuse
> >> them over and over. If all 8 are busy, allocate a new one. This is
> >> pretty rare. If the allocation fails, use a work to try again later.
> >> When there are enough free skbs again, free the excess skbs.
> >>
> >> Also, use WQ_BH for the RX workqueue. With a normal or high priority
> >> workqueue the skbs are processed too slowly when the system is even a
> >> little busy, like when opening a new page in a browser, and the driver
> >> runs out of free skbs and allocates a lot of new ones.
> >>
> >> Move C2H_ADAPTIVITY to the c2h workqueue instead of handling it directly
> >> from rtw_fw_c2h_cmd_rx_irqsafe(), which runs in the RX workqueue. It
> >> reads hardware registers, which is not "irqsafe" with USB.
> >
> > This part should be in a separated patch.
> >
> 
> Do you mean just C2H_ADAPTIVITY or WQ_BH as well?

Just C2H_ADAPTIVITY.

With patch subject, people can't understand this change. 

> 
> >>
> >> This is more or less what the out-of-tree Realtek drivers do, except
> >> they use a tasklet instead of a BH workqueue.
> >>
> >> Tested with RTL8723DU, RTL8821AU, RTL8812AU, RTL8812BU, RTL8822CU,
> >> RTL8811CU.
> >>
> >> Closes: https://lore.kernel.org/linux-wireless/6e7ecb47-7ea0-433a-a19f-05f88a2edf6b@gmail.com/
> >> Signed-off-by: Bitterblue Smith <rtl8821cerfe2@gmail.com>
> >> ---
> >>  drivers/net/wireless/realtek/rtw88/fw.c  |  7 +--
> >>  drivers/net/wireless/realtek/rtw88/usb.c | 73 ++++++++++++++++++++----
> >>  drivers/net/wireless/realtek/rtw88/usb.h |  3 +
> >>  3 files changed, 67 insertions(+), 16 deletions(-)
> >>
> >> diff --git a/drivers/net/wireless/realtek/rtw88/fw.c b/drivers/net/wireless/realtek/rtw88/fw.c
> >> index e6e9946fbf44..02389b7c6876 100644
> >> --- a/drivers/net/wireless/realtek/rtw88/fw.c
> >> +++ b/drivers/net/wireless/realtek/rtw88/fw.c
> >> @@ -332,6 +332,9 @@ void rtw_fw_c2h_cmd_handle(struct rtw_dev *rtwdev, struct sk_buff *skb)
> >>         case C2H_RA_RPT:
> >>                 rtw_fw_ra_report_handle(rtwdev, c2h->payload, len);
> >>                 break;
> >> +       case C2H_ADAPTIVITY:
> >> +               rtw_fw_adaptivity_result(rtwdev, c2h->payload, len);
> >> +               break;
> >>         default:
> >>                 rtw_dbg(rtwdev, RTW_DBG_FW, "C2H 0x%x isn't handled\n", c2h->id);
> >>                 break;
> >> @@ -367,10 +370,6 @@ void rtw_fw_c2h_cmd_rx_irqsafe(struct rtw_dev *rtwdev, u32 pkt_offset,
> >>                 rtw_fw_scan_result(rtwdev, c2h->payload, len);
> >>                 dev_kfree_skb_any(skb);
> >>                 break;
> >> -       case C2H_ADAPTIVITY:
> >> -               rtw_fw_adaptivity_result(rtwdev, c2h->payload, len);
> >> -               dev_kfree_skb_any(skb);
> >> -               break;
> >>         default:
> >>                 /* pass offset for further operation */
> >>                 *((u32 *)skb->cb) = pkt_offset;
> >> diff --git a/drivers/net/wireless/realtek/rtw88/usb.c b/drivers/net/wireless/realtek/rtw88/usb.c
> >> index 727569d4ef4b..5c2dfa2ced92 100644
> >> --- a/drivers/net/wireless/realtek/rtw88/usb.c
> >> +++ b/drivers/net/wireless/realtek/rtw88/usb.c
> >> @@ -585,7 +585,7 @@ static void rtw_usb_rx_handler(struct work_struct *work)
> >>                                 goto skip_packet;
> >>                         }
> >>
> >> -                       skb = alloc_skb(skb_len, GFP_KERNEL);
> >> +                       skb = alloc_skb(skb_len, GFP_ATOMIC);
> >>                         if (!skb) {
> >>                                 rtw_dbg(rtwdev, RTW_DBG_USB,
> >>                                         "failed to allocate RX skb of size %u\n",
> >> @@ -612,7 +612,11 @@ static void rtw_usb_rx_handler(struct work_struct *work)
> >>                         rx_desc += next_pkt;
> >>                 } while (rx_desc + pkt_desc_sz < rx_skb->data + rx_skb->len);
> >>
> >> -               dev_kfree_skb_any(rx_skb);
> >> +               if (skb_queue_len(&rtwusb->rx_free_queue) >=
> >> +                   RTW_USB_RX_SKB_NUM - RTW_USB_RXCB_NUM)
> >> +                       dev_kfree_skb_any(rx_skb);
> >> +               else
> >> +                       skb_queue_tail(&rtwusb->rx_free_queue, rx_skb);
> >
> > Why not just queue and reuse rx_skb for all? That would be simpler.
> >
> 
> I didn't want to let it allocate too many skbs. I didn't find any kind
> of limit in the official drivers, so maybe it would be fine.

The case ' skb_queue_len(&rtwusb->rx_free_queue) >= 8 - 4' is rare? 
If so, I think this is fine. If not, to repeatedly allocate and free 
would cause memory fragment, and higher probability to failed to allocate
memory with GFP_ATOMIC. Also seemingly additional 4 persistent rx_skb
is not a big cost.
Bitterblue Smith Dec. 11, 2024, 6:07 p.m. UTC | #3
On 26/11/2024 03:26, Ping-Ke Shih wrote:
> Bitterblue Smith <rtl8821cerfe2@gmail.com> wrote:
>> On 19/11/2024 02:50, Ping-Ke Shih wrote:
>>> Bitterblue Smith <rtl8821cerfe2@gmail.com> wrote:
>>>> The USB driver uses four USB Request Blocks for RX. Before submitting
>>>> one, it allocates a 32768 byte skb for the RX data. This allocation can
>>>> fail, maybe due to temporary memory fragmentation. When the allocation
>>>> fails, the corresponding URB is never submitted again. After four such
>>>> allocation failures, all RX stops because the driver is not requesting
>>>> data from the device anymore.
>>>>
>>>> Don't allocate a 32768 byte skb when submitting a USB Request Block
>>>> (which happens very often). Instead preallocate 8 such skbs, and reuse
>>>> them over and over. If all 8 are busy, allocate a new one. This is
>>>> pretty rare. If the allocation fails, use a work to try again later.
>>>> When there are enough free skbs again, free the excess skbs.
>>>>
>>>> Also, use WQ_BH for the RX workqueue. With a normal or high priority
>>>> workqueue the skbs are processed too slowly when the system is even a
>>>> little busy, like when opening a new page in a browser, and the driver
>>>> runs out of free skbs and allocates a lot of new ones.
>>>>
>>>> Move C2H_ADAPTIVITY to the c2h workqueue instead of handling it directly
>>>> from rtw_fw_c2h_cmd_rx_irqsafe(), which runs in the RX workqueue. It
>>>> reads hardware registers, which is not "irqsafe" with USB.
>>>
>>> This part should be in a separated patch.
>>>
>>
>> Do you mean just C2H_ADAPTIVITY or WQ_BH as well?
> 
> Just C2H_ADAPTIVITY.
> 
> With patch subject, people can't understand this change. 
> 
>>
>>>>
>>>> This is more or less what the out-of-tree Realtek drivers do, except
>>>> they use a tasklet instead of a BH workqueue.
>>>>
>>>> Tested with RTL8723DU, RTL8821AU, RTL8812AU, RTL8812BU, RTL8822CU,
>>>> RTL8811CU.
>>>>
>>>> Closes: https://lore.kernel.org/linux-wireless/6e7ecb47-7ea0-433a-a19f-05f88a2edf6b@gmail.com/
>>>> Signed-off-by: Bitterblue Smith <rtl8821cerfe2@gmail.com>
>>>> ---
>>>>  drivers/net/wireless/realtek/rtw88/fw.c  |  7 +--
>>>>  drivers/net/wireless/realtek/rtw88/usb.c | 73 ++++++++++++++++++++----
>>>>  drivers/net/wireless/realtek/rtw88/usb.h |  3 +
>>>>  3 files changed, 67 insertions(+), 16 deletions(-)
>>>>
>>>> diff --git a/drivers/net/wireless/realtek/rtw88/fw.c b/drivers/net/wireless/realtek/rtw88/fw.c
>>>> index e6e9946fbf44..02389b7c6876 100644
>>>> --- a/drivers/net/wireless/realtek/rtw88/fw.c
>>>> +++ b/drivers/net/wireless/realtek/rtw88/fw.c
>>>> @@ -332,6 +332,9 @@ void rtw_fw_c2h_cmd_handle(struct rtw_dev *rtwdev, struct sk_buff *skb)
>>>>         case C2H_RA_RPT:
>>>>                 rtw_fw_ra_report_handle(rtwdev, c2h->payload, len);
>>>>                 break;
>>>> +       case C2H_ADAPTIVITY:
>>>> +               rtw_fw_adaptivity_result(rtwdev, c2h->payload, len);
>>>> +               break;
>>>>         default:
>>>>                 rtw_dbg(rtwdev, RTW_DBG_FW, "C2H 0x%x isn't handled\n", c2h->id);
>>>>                 break;
>>>> @@ -367,10 +370,6 @@ void rtw_fw_c2h_cmd_rx_irqsafe(struct rtw_dev *rtwdev, u32 pkt_offset,
>>>>                 rtw_fw_scan_result(rtwdev, c2h->payload, len);
>>>>                 dev_kfree_skb_any(skb);
>>>>                 break;
>>>> -       case C2H_ADAPTIVITY:
>>>> -               rtw_fw_adaptivity_result(rtwdev, c2h->payload, len);
>>>> -               dev_kfree_skb_any(skb);
>>>> -               break;
>>>>         default:
>>>>                 /* pass offset for further operation */
>>>>                 *((u32 *)skb->cb) = pkt_offset;
>>>> diff --git a/drivers/net/wireless/realtek/rtw88/usb.c b/drivers/net/wireless/realtek/rtw88/usb.c
>>>> index 727569d4ef4b..5c2dfa2ced92 100644
>>>> --- a/drivers/net/wireless/realtek/rtw88/usb.c
>>>> +++ b/drivers/net/wireless/realtek/rtw88/usb.c
>>>> @@ -585,7 +585,7 @@ static void rtw_usb_rx_handler(struct work_struct *work)
>>>>                                 goto skip_packet;
>>>>                         }
>>>>
>>>> -                       skb = alloc_skb(skb_len, GFP_KERNEL);
>>>> +                       skb = alloc_skb(skb_len, GFP_ATOMIC);
>>>>                         if (!skb) {
>>>>                                 rtw_dbg(rtwdev, RTW_DBG_USB,
>>>>                                         "failed to allocate RX skb of size %u\n",
>>>> @@ -612,7 +612,11 @@ static void rtw_usb_rx_handler(struct work_struct *work)
>>>>                         rx_desc += next_pkt;
>>>>                 } while (rx_desc + pkt_desc_sz < rx_skb->data + rx_skb->len);
>>>>
>>>> -               dev_kfree_skb_any(rx_skb);
>>>> +               if (skb_queue_len(&rtwusb->rx_free_queue) >=
>>>> +                   RTW_USB_RX_SKB_NUM - RTW_USB_RXCB_NUM)
>>>> +                       dev_kfree_skb_any(rx_skb);
>>>> +               else
>>>> +                       skb_queue_tail(&rtwusb->rx_free_queue, rx_skb);
>>>
>>> Why not just queue and reuse rx_skb for all? That would be simpler.
>>>
>>
>> I didn't want to let it allocate too many skbs. I didn't find any kind
>> of limit in the official drivers, so maybe it would be fine.
> 
> The case ' skb_queue_len(&rtwusb->rx_free_queue) >= 8 - 4' is rare? 
> If so, I think this is fine. If not, to repeatedly allocate and free 
> would cause memory fragment, and higher probability to failed to allocate
> memory with GFP_ATOMIC. Also seemingly additional 4 persistent rx_skb
> is not a big cost. 
> 
> 

I feel like it's rare for me, but I never counted how often it happens.

Someone else reported that in a period of 14.5 hours additional skbs
were allocated and freed 295 times. This is their log:
https://github.com/user-attachments/files/17792914/dmesg_grep_rtw_8821au.txt
This computer has i7-6700 CPU with 32 GB of RAM.

Another user reported that additional skbs were allocated and freed
3824 times in a period of 1.25 hours. This is their log:
https://github.com/user-attachments/files/18008735/log.txt
This computer is a Raspberry Pi 3B with 1 GB RAM.

I agree that another 4 skbs would be fine. I don't know what the upper
limit should be. The user with the Raspberry Pi 3B also tested with no
limit and reported that the driver allocated 17 extra skbs. That seems
like too many to me.
diff mbox series

Patch

diff --git a/drivers/net/wireless/realtek/rtw88/fw.c b/drivers/net/wireless/realtek/rtw88/fw.c
index e6e9946fbf44..02389b7c6876 100644
--- a/drivers/net/wireless/realtek/rtw88/fw.c
+++ b/drivers/net/wireless/realtek/rtw88/fw.c
@@ -332,6 +332,9 @@  void rtw_fw_c2h_cmd_handle(struct rtw_dev *rtwdev, struct sk_buff *skb)
 	case C2H_RA_RPT:
 		rtw_fw_ra_report_handle(rtwdev, c2h->payload, len);
 		break;
+	case C2H_ADAPTIVITY:
+		rtw_fw_adaptivity_result(rtwdev, c2h->payload, len);
+		break;
 	default:
 		rtw_dbg(rtwdev, RTW_DBG_FW, "C2H 0x%x isn't handled\n", c2h->id);
 		break;
@@ -367,10 +370,6 @@  void rtw_fw_c2h_cmd_rx_irqsafe(struct rtw_dev *rtwdev, u32 pkt_offset,
 		rtw_fw_scan_result(rtwdev, c2h->payload, len);
 		dev_kfree_skb_any(skb);
 		break;
-	case C2H_ADAPTIVITY:
-		rtw_fw_adaptivity_result(rtwdev, c2h->payload, len);
-		dev_kfree_skb_any(skb);
-		break;
 	default:
 		/* pass offset for further operation */
 		*((u32 *)skb->cb) = pkt_offset;
diff --git a/drivers/net/wireless/realtek/rtw88/usb.c b/drivers/net/wireless/realtek/rtw88/usb.c
index 727569d4ef4b..5c2dfa2ced92 100644
--- a/drivers/net/wireless/realtek/rtw88/usb.c
+++ b/drivers/net/wireless/realtek/rtw88/usb.c
@@ -585,7 +585,7 @@  static void rtw_usb_rx_handler(struct work_struct *work)
 				goto skip_packet;
 			}
 
-			skb = alloc_skb(skb_len, GFP_KERNEL);
+			skb = alloc_skb(skb_len, GFP_ATOMIC);
 			if (!skb) {
 				rtw_dbg(rtwdev, RTW_DBG_USB,
 					"failed to allocate RX skb of size %u\n",
@@ -612,7 +612,11 @@  static void rtw_usb_rx_handler(struct work_struct *work)
 			rx_desc += next_pkt;
 		} while (rx_desc + pkt_desc_sz < rx_skb->data + rx_skb->len);
 
-		dev_kfree_skb_any(rx_skb);
+		if (skb_queue_len(&rtwusb->rx_free_queue) >=
+		    RTW_USB_RX_SKB_NUM - RTW_USB_RXCB_NUM)
+			dev_kfree_skb_any(rx_skb);
+		else
+			skb_queue_tail(&rtwusb->rx_free_queue, rx_skb);
 	}
 }
 
@@ -621,23 +625,57 @@  static void rtw_usb_read_port_complete(struct urb *urb);
 static void rtw_usb_rx_resubmit(struct rtw_usb *rtwusb, struct rx_usb_ctrl_block *rxcb)
 {
 	struct rtw_dev *rtwdev = rtwusb->rtwdev;
+	struct sk_buff *rx_skb;
+	gfp_t priority = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL;
 	int error;
 
-	rxcb->rx_skb = alloc_skb(RTW_USB_MAX_RECVBUF_SZ, GFP_ATOMIC);
-	if (!rxcb->rx_skb)
-		return;
+	rx_skb = skb_dequeue(&rtwusb->rx_free_queue);
+	if (!rx_skb)
+		rx_skb = alloc_skb(RTW_USB_MAX_RECVBUF_SZ, priority);
+
+	if (!rx_skb)
+		goto try_later;
+
+	rxcb->rx_skb = rx_skb;
+
+	skb_reset_tail_pointer(rxcb->rx_skb);
+	rxcb->rx_skb->len = 0;
 
 	usb_fill_bulk_urb(rxcb->rx_urb, rtwusb->udev,
 			  usb_rcvbulkpipe(rtwusb->udev, rtwusb->pipe_in),
 			  rxcb->rx_skb->data, RTW_USB_MAX_RECVBUF_SZ,
 			  rtw_usb_read_port_complete, rxcb);
 
-	error = usb_submit_urb(rxcb->rx_urb, GFP_ATOMIC);
+	error = usb_submit_urb(rxcb->rx_urb, priority);
 	if (error) {
-		kfree_skb(rxcb->rx_skb);
+		skb_queue_tail(&rtwusb->rx_free_queue, rxcb->rx_skb);
+
 		if (error != -ENODEV)
 			rtw_err(rtwdev, "Err sending rx data urb %d\n",
 				error);
+
+		if (error == -ENOMEM)
+			goto try_later;
+	}
+
+	return;
+
+try_later:
+	rxcb->rx_skb = NULL;
+	queue_work(rtwusb->rxwq, &rtwusb->rx_urb_work);
+}
+
+static void rtw_usb_rx_resubmit_work(struct work_struct *work)
+{
+	struct rtw_usb *rtwusb = container_of(work, struct rtw_usb, rx_urb_work);
+	struct rx_usb_ctrl_block *rxcb;
+	int i;
+
+	for (i = 0; i < RTW_USB_RXCB_NUM; i++) {
+		rxcb = &rtwusb->rx_cb[i];
+
+		if (!rxcb->rx_skb)
+			rtw_usb_rx_resubmit(rtwusb, rxcb);
 	}
 }
 
@@ -653,8 +691,7 @@  static void rtw_usb_read_port_complete(struct urb *urb)
 		    urb->actual_length < 24) {
 			rtw_err(rtwdev, "failed to get urb length:%d\n",
 				urb->actual_length);
-			if (skb)
-				dev_kfree_skb_any(skb);
+			skb_queue_tail(&rtwusb->rx_free_queue, skb);
 		} else {
 			skb_put(skb, urb->actual_length);
 			skb_queue_tail(&rtwusb->rx_queue, skb);
@@ -662,6 +699,8 @@  static void rtw_usb_read_port_complete(struct urb *urb)
 		}
 		rtw_usb_rx_resubmit(rtwusb, rxcb);
 	} else {
+		skb_queue_tail(&rtwusb->rx_free_queue, skb);
+
 		switch (urb->status) {
 		case -EINVAL:
 		case -EPIPE:
@@ -679,8 +718,6 @@  static void rtw_usb_read_port_complete(struct urb *urb)
 			rtw_err(rtwdev, "status %d\n", urb->status);
 			break;
 		}
-		if (skb)
-			dev_kfree_skb_any(skb);
 	}
 }
 
@@ -868,16 +905,26 @@  static struct rtw_hci_ops rtw_usb_ops = {
 static int rtw_usb_init_rx(struct rtw_dev *rtwdev)
 {
 	struct rtw_usb *rtwusb = rtw_get_usb_priv(rtwdev);
+	struct sk_buff *rx_skb;
+	int i;
 
-	rtwusb->rxwq = create_singlethread_workqueue("rtw88_usb: rx wq");
+	rtwusb->rxwq = alloc_workqueue("rtw88_usb: rx wq", WQ_BH, 0);
 	if (!rtwusb->rxwq) {
 		rtw_err(rtwdev, "failed to create RX work queue\n");
 		return -ENOMEM;
 	}
 
 	skb_queue_head_init(&rtwusb->rx_queue);
+	skb_queue_head_init(&rtwusb->rx_free_queue);
 
 	INIT_WORK(&rtwusb->rx_work, rtw_usb_rx_handler);
+	INIT_WORK(&rtwusb->rx_urb_work, rtw_usb_rx_resubmit_work);
+
+	for (i = 0; i < RTW_USB_RX_SKB_NUM; i++) {
+		rx_skb = alloc_skb(RTW_USB_MAX_RECVBUF_SZ, GFP_KERNEL);
+		if (rx_skb)
+			skb_queue_tail(&rtwusb->rx_free_queue, rx_skb);
+	}
 
 	return 0;
 }
@@ -902,6 +949,8 @@  static void rtw_usb_deinit_rx(struct rtw_dev *rtwdev)
 
 	flush_workqueue(rtwusb->rxwq);
 	destroy_workqueue(rtwusb->rxwq);
+
+	skb_queue_purge(&rtwusb->rx_free_queue);
 }
 
 static int rtw_usb_init_tx(struct rtw_dev *rtwdev)
diff --git a/drivers/net/wireless/realtek/rtw88/usb.h b/drivers/net/wireless/realtek/rtw88/usb.h
index 86697a5c0103..9b695b688b24 100644
--- a/drivers/net/wireless/realtek/rtw88/usb.h
+++ b/drivers/net/wireless/realtek/rtw88/usb.h
@@ -38,6 +38,7 @@ 
 #define RTW_USB_RXAGG_TIMEOUT		10
 
 #define RTW_USB_RXCB_NUM		4
+#define RTW_USB_RX_SKB_NUM		8
 
 #define RTW_USB_EP_MAX			4
 
@@ -81,7 +82,9 @@  struct rtw_usb {
 
 	struct rx_usb_ctrl_block rx_cb[RTW_USB_RXCB_NUM];
 	struct sk_buff_head rx_queue;
+	struct sk_buff_head rx_free_queue;
 	struct work_struct rx_work;
+	struct work_struct rx_urb_work;
 };
 
 static inline struct rtw_usb_tx_data *rtw_usb_get_tx_data(struct sk_buff *skb)