Message ID | 20240703125541.7ced468fe431.Ibb109867dc680c37fe8d891e9ab9ef64ed5c5d2d@changeid |
---|---|
State | New |
Headers | show |
Series | wifi: iwlwifi: updates - 03-07-2024 | expand |
On 7/3/24 02:58, Miri Korenblit wrote: > From: Benjamin Berg <benjamin.berg@intel.com> > > Map the pages when allocating them so that we will not need to map each > of the used fragments at a later point. > > For now the mapping is not used, this will be changed in a later commit. > > Signed-off-by: Benjamin Berg <benjamin.berg@intel.com> > Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com> > Reviewed-by: Johannes Berg <johannes.berg@intel.com> Hello, I patched every iwlwifi and mac80211 patch I found on the mailing list up to this one into my 6.10-ish tree. I see immediate and reproducible crash when I send TCP traffic on be200 radio. I bisected it to this particular patch. Previously I was only testing download direction and it ran fine, and UDP traffic upload seems to run OK, so it must be triggered by large TCP frames, and based on what this patch is messing with, that makes sense. Always possible I messed up when applying patches of course. Oops: general protection fault, probably for non-canonical address 0x5088000000ff0: 0000 [#1] PREEMPT SMP CPU: 3 PID: 800 Comm: irq/181-iwlwifi Tainted: G W 6.10.0-rc7+ #17 Hardware name: Default string Default string/SKYBAY, BIOS 5.12 02/15/2023 RIP: 0010:iwl_pcie_prep_tso+0x213/0x2b0 [iwlwifi] Code: 01 e0 49 c1 e4 06 41 b8 01 00 00 00 48 c1 f8 06 48 c1 e0 0c 48 01 d0 31 d2 48 89 45 08 48 b8 e8 0f 00 00 80 88 ff ff0 RSP: 0018:ffffc900001e0678 EFLAGS: 00010207 RAX: ffff888000000fe8 RBX: ffff8881263a3500 RCX: 0000000000001000 RDX: 0000000000000000 RSI: ffffffff82b6ee60 RDI: ffff8881128d3748 RBP: ffffe8ffffb882d0 R08: 0000000000000001 R09: 0000000000000020 R10: 0000000000000002 R11: ffff8881128d35b8 R12: 0005088000000fe8 R13: ffff888111b90028 R14: ffff88810f664438 R15: ffff8881263a3540 FS: 0000000000000000(0000) GS:ffff88845db80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000040f70fe0 CR3: 0000000002a50002 CR4: 00000000003706f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: <IRQ> ? die_addr+0x2d/0x80 ? exc_general_protection+0x1aa/0x3f0 ? asm_exc_general_protection+0x22/0x30 ? iwl_pcie_prep_tso+0x213/0x2b0 [iwlwifi] ? iwl_pcie_prep_tso+0x1b2/0x2b0 [iwlwifi] iwl_txq_gen2_tx+0x7ec/0xf10 [iwlwifi] iwl_mvm_tx_mpdu+0x1da/0x560 [iwlmvm] iwl_mvm_tx_skb_sta+0x34c/0x540 [iwlmvm] iwl_mvm_tx_skb+0x12/0x30 [iwlmvm] iwl_mvm_mac_itxq_xmit+0xbe/0x1e0 [iwlmvm] ieee80211_queue_skb+0x557/0x6d0 [mac80211] __ieee80211_xmit_fast+0x7b2/0xad0 [mac80211] ? lock_acquire+0xc7/0x2d0 ? lock_is_held_type+0xa5/0x110 ? find_held_lock+0x2b/0x80 ? skb_mac_gso_segment+0x130/0x1e0 ? lock_release+0xc6/0x290 ? skb_mac_gso_segment+0x13a/0x1e0 __ieee80211_subif_start_xmit+0x22d/0x5a0 [mac80211] ieee80211_subif_start_xmit+0x57/0x570 [mac80211] ? dev_queue_xmit_nit+0x2cd/0x3a0 ? lock_release+0xc6/0x290 ? dev_hard_start_xmit+0x76/0x250 dev_hard_start_xmit+0x76/0x250 __dev_queue_xmit+0x246/0x1270 ? lockdep_hardirqs_on_prepare+0xa7/0x170 ? eth_header+0x21/0xb0 ip_finish_output2+0x230/0xaa0 __ip_queue_xmit+0x1e6/0x760 __tcp_transmit_skb+0x521/0xdc0 ? find_held_lock+0x2b/0x80 tcp_write_xmit+0x4d7/0x17a0 ? lock_is_held_type+0xa5/0x110 tcp_tsq_handler+0x8a/0xd0 tcp_tasklet_func+0xbc/0x140 tasklet_action_common.constprop.0+0xfd/0x2f0 handle_softirqs+0xd0/0x440 ? iwl_pcie_irq_rx_msix_handler+0xe5/0x140 [iwlwifi] ? iwl_pcie_irq_rx_msix_handler+0x5d/0x140 [iwlwifi] do_softirq.part.0+0x3a/0x90 </IRQ> <TASK> __local_bh_enable_ip+0xbd/0xd0 ? iwl_pcie_irq_rx_msix_handler+0xe5/0x140 [iwlwifi] iwl_pcie_irq_rx_msix_handler+0xed/0x140 [iwlwifi] ? irq_thread+0x8c/0x200 irq_thread_fn+0x14/0x50 ? irq_thread+0x8c/0x200 irq_thread+0x12c/0x200 ? disable_irq_nosync+0x10/0x10 ? irq_set_affinity_notifier+0x140/0x140 ? irq_check_status_bit+0xf0/0xf0 kthread+0xd7/0x110 ? kthread_insert_work_sanity_check+0x50/0x50 ret_from_fork+0x28/0x40 ? kthread_insert_work_sanity_check+0x50/0x50 ret_from_fork_asm+0x11/0x20 </TASK> > --- > .../wireless/intel/iwlwifi/pcie/internal.h | 30 +++++++++- > .../net/wireless/intel/iwlwifi/pcie/tx-gen2.c | 22 ++++++- > drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 60 +++++++++++++++---- > 3 files changed, 95 insertions(+), 17 deletions(-) > > diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h > index d63c1c284f70..b59de4f80b4b 100644 > --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h > +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h > @@ -603,6 +603,22 @@ struct iwl_tso_hdr_page { > u8 *pos; > }; > > +/* > + * Note that we put this struct *last* in the page. By doing that, we ensure > + * that no TB referencing this page can trigger the 32-bit boundary hardware > + * bug. > + */ > +struct iwl_tso_page_info { > + dma_addr_t dma_addr; > + struct page *next; > + refcount_t use_count; > +}; > + > +#define IWL_TSO_PAGE_DATA_SIZE (PAGE_SIZE - sizeof(struct iwl_tso_page_info)) > +#define IWL_TSO_PAGE_INFO(addr) \ > + ((struct iwl_tso_page_info *)(((unsigned long)addr & PAGE_MASK) + \ > + IWL_TSO_PAGE_DATA_SIZE)) > + > int iwl_pcie_tx_init(struct iwl_trans *trans); > void iwl_pcie_tx_start(struct iwl_trans *trans, u32 scd_base_addr); > int iwl_pcie_tx_stop(struct iwl_trans *trans); > @@ -628,8 +644,18 @@ struct sg_table *iwl_pcie_prep_tso(struct iwl_trans *trans, struct sk_buff *skb, > struct iwl_cmd_meta *cmd_meta, > u8 **hdr, unsigned int hdr_room); > > -void iwl_pcie_free_tso_page(struct iwl_trans *trans, struct sk_buff *skb, > - struct iwl_cmd_meta *cmd_meta); > +void iwl_pcie_free_tso_pages(struct iwl_trans *trans, struct sk_buff *skb, > + struct iwl_cmd_meta *cmd_meta); > + > +static inline dma_addr_t iwl_pcie_get_tso_page_phys(void *addr) > +{ > + dma_addr_t res; > + > + res = IWL_TSO_PAGE_INFO(addr)->dma_addr; > + res += (unsigned long)addr & ~PAGE_MASK; > + > + return res; > +} > > static inline dma_addr_t > iwl_txq_get_first_tb_dma(struct iwl_txq *txq, int idx) > diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c > index 3dcce6a8da50..10ee2c328458 100644 > --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c > +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c > @@ -19,8 +19,10 @@ static struct page *get_workaround_page(struct iwl_trans *trans, > struct sk_buff *skb) > { > struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); > + struct iwl_tso_page_info *info; > struct page **page_ptr; > struct page *ret; > + dma_addr_t phys; > > page_ptr = (void *)((u8 *)skb->cb + trans_pcie->txqs.page_offs); > > @@ -28,8 +30,22 @@ static struct page *get_workaround_page(struct iwl_trans *trans, > if (!ret) > return NULL; > > + info = IWL_TSO_PAGE_INFO(page_address(ret)); > + > + /* Create a DMA mapping for the page */ > + phys = dma_map_page_attrs(trans->dev, ret, 0, PAGE_SIZE, > + DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); > + if (unlikely(dma_mapping_error(trans->dev, phys))) { > + __free_page(ret); > + return NULL; > + } > + > + /* Store physical address and set use count */ > + info->dma_addr = phys; > + refcount_set(&info->use_count, 1); > + > /* set the chaining pointer to the previous page if there */ > - *(void **)((u8 *)page_address(ret) + PAGE_SIZE - sizeof(void *)) = *page_ptr; > + info->next = *page_ptr; > *page_ptr = ret; > > return ret; > @@ -76,7 +92,7 @@ static int iwl_txq_gen2_set_tb_with_wa(struct iwl_trans *trans, > * a new mapping for it so the device will not fail. > */ > > - if (WARN_ON(len > PAGE_SIZE - sizeof(void *))) { > + if (WARN_ON(len > IWL_TSO_PAGE_DATA_SIZE)) { > ret = -ENOBUFS; > goto unmap; > } > @@ -782,7 +798,7 @@ static void iwl_txq_gen2_unmap(struct iwl_trans *trans, int txq_id) > struct sk_buff *skb = txq->entries[idx].skb; > > if (!WARN_ON_ONCE(!skb)) > - iwl_pcie_free_tso_page(trans, skb, cmd_meta); > + iwl_pcie_free_tso_pages(trans, skb, cmd_meta); > } > iwl_txq_gen2_free_tfd(trans, txq); > txq->read_ptr = iwl_txq_inc_wrap(trans, txq->read_ptr); > diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c > index ac545a39ad2a..e00d85866de9 100644 > --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c > +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c > @@ -209,8 +209,22 @@ static void iwl_pcie_clear_cmd_in_flight(struct iwl_trans *trans) > spin_unlock(&trans_pcie->reg_lock); > } > > -void iwl_pcie_free_tso_page(struct iwl_trans *trans, struct sk_buff *skb, > - struct iwl_cmd_meta *cmd_meta) > +static void iwl_pcie_free_and_unmap_tso_page(struct iwl_trans *trans, > + struct page *page) > +{ > + struct iwl_tso_page_info *info = IWL_TSO_PAGE_INFO(page_address(page)); > + > + /* Decrease internal use count and unmap/free page if needed */ > + if (refcount_dec_and_test(&info->use_count)) { > + dma_unmap_page(trans->dev, info->dma_addr, PAGE_SIZE, > + DMA_TO_DEVICE); > + > + __free_page(page); > + } > +} > + > +void iwl_pcie_free_tso_pages(struct iwl_trans *trans, struct sk_buff *skb, > + struct iwl_cmd_meta *cmd_meta) > { > struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); > struct page **page_ptr; > @@ -221,10 +235,11 @@ void iwl_pcie_free_tso_page(struct iwl_trans *trans, struct sk_buff *skb, > *page_ptr = NULL; > > while (next) { > + struct iwl_tso_page_info *info; > struct page *tmp = next; > > - next = *(void **)((u8 *)page_address(next) + PAGE_SIZE - > - sizeof(void *)); > + info = IWL_TSO_PAGE_INFO(page_address(next)); > + next = info->next; > > /* Unmap the scatter gather list that is on the last page */ > if (!next && cmd_meta->sg_offset) { > @@ -236,7 +251,7 @@ void iwl_pcie_free_tso_page(struct iwl_trans *trans, struct sk_buff *skb, > dma_unmap_sgtable(trans->dev, sgt, DMA_TO_DEVICE, 0); > } > > - __free_page(tmp); > + iwl_pcie_free_and_unmap_tso_page(trans, tmp); > } > } > > @@ -381,7 +396,7 @@ static void iwl_pcie_txq_unmap(struct iwl_trans *trans, int txq_id) > if (WARN_ON_ONCE(!skb)) > continue; > > - iwl_pcie_free_tso_page(trans, skb, cmd_meta); > + iwl_pcie_free_tso_pages(trans, skb, cmd_meta); > } > iwl_txq_free_tfd(trans, txq); > txq->read_ptr = iwl_txq_inc_wrap(trans, txq->read_ptr); > @@ -1722,7 +1737,9 @@ static void *iwl_pcie_get_page_hdr(struct iwl_trans *trans, > { > struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); > struct iwl_tso_hdr_page *p = this_cpu_ptr(trans_pcie->txqs.tso_hdr_page); > + struct iwl_tso_page_info *info; > struct page **page_ptr; > + dma_addr_t phys; > void *ret; > > page_ptr = (void *)((u8 *)skb->cb + trans_pcie->txqs.page_offs); > @@ -1743,23 +1760,42 @@ static void *iwl_pcie_get_page_hdr(struct iwl_trans *trans, > * > * (see also get_workaround_page() in tx-gen2.c) > */ > - if (p->pos + len < (u8 *)page_address(p->page) + PAGE_SIZE - > - sizeof(void *)) > + if (((unsigned long)p->pos & ~PAGE_MASK) + len < IWL_TSO_PAGE_DATA_SIZE) { > + info = IWL_TSO_PAGE_INFO(page_address(ret)); > goto out; > + } > > /* We don't have enough room on this page, get a new one. */ > - __free_page(p->page); > + iwl_pcie_free_and_unmap_tso_page(trans, p->page); > > alloc: > p->page = alloc_page(GFP_ATOMIC); > if (!p->page) > return NULL; > p->pos = page_address(p->page); > + > + info = IWL_TSO_PAGE_INFO(page_address(ret)); > + > /* set the chaining pointer to NULL */ > - *(void **)((u8 *)page_address(p->page) + PAGE_SIZE - sizeof(void *)) = NULL; > + info->next = NULL; > + > + /* Create a DMA mapping for the page */ > + phys = dma_map_page_attrs(trans->dev, p->page, 0, PAGE_SIZE, > + DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); > + if (unlikely(dma_mapping_error(trans->dev, phys))) { > + __free_page(p->page); > + p->page = NULL; > + > + return NULL; > + } > + > + /* Store physical address and set use count */ > + info->dma_addr = phys; > + refcount_set(&info->use_count, 1); > out: > *page_ptr = p->page; > - get_page(p->page); > + /* Return an internal reference for the caller */ > + refcount_inc(&info->use_count); > ret = p->pos; > p->pos += len; > > @@ -2330,7 +2366,7 @@ void iwl_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn, > read_ptr, txq->read_ptr, txq_id)) > continue; > > - iwl_pcie_free_tso_page(trans, skb, cmd_meta); > + iwl_pcie_free_tso_pages(trans, skb, cmd_meta); > > __skb_queue_tail(skbs, skb); >
Hi Ben, yes, you need to apply: commit 003eae5a28c6c9d50290a4ac9b955be912f24c9f Author: Benjamin Berg <benjamin.berg@intel.com> Date: Tue Jul 9 14:31:49 2024 +0200 wifi: iwlwifi: correctly reference TSO page information I had not fully tested the last revision and the error slipped unfortunately. Benjamin On Wed, 2024-07-10 at 14:41 -0700, Ben Greear wrote: > On 7/3/24 02:58, Miri Korenblit wrote: > > From: Benjamin Berg <benjamin.berg@intel.com> > > > > Map the pages when allocating them so that we will not need to map > > each > > of the used fragments at a later point. > > > > For now the mapping is not used, this will be changed in a later > > commit. > > > > Signed-off-by: Benjamin Berg <benjamin.berg@intel.com> > > Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com> > > Reviewed-by: Johannes Berg <johannes.berg@intel.com> > > Hello, > > I patched every iwlwifi and mac80211 patch I found on the mailing > list > up to this one into my 6.10-ish tree. > > I see immediate and reproducible crash when I send TCP traffic on > be200 > radio. I bisected it to this particular patch. > > Previously I was only testing download direction and it ran fine, and > UDP > traffic upload seems to run OK, so it must be triggered by large TCP > frames, and based on what this patch is messing with, that makes > sense. > Always possible I messed up when applying patches of course. > > > Oops: general protection fault, probably for non-canonical address > 0x5088000000ff0: 0000 [#1] PREEMPT SMP > CPU: 3 PID: 800 Comm: irq/181-iwlwifi Tainted: G W > 6.10.0-rc7+ #17 > Hardware name: Default string Default string/SKYBAY, BIOS 5.12 > 02/15/2023 > RIP: 0010:iwl_pcie_prep_tso+0x213/0x2b0 [iwlwifi] > Code: 01 e0 49 c1 e4 06 41 b8 01 00 00 00 48 c1 f8 06 48 c1 e0 0c 48 > 01 d0 31 d2 48 89 45 08 48 b8 e8 0f 00 00 80 88 ff ff0 > RSP: 0018:ffffc900001e0678 EFLAGS: 00010207 > RAX: ffff888000000fe8 RBX: ffff8881263a3500 RCX: 0000000000001000 > RDX: 0000000000000000 RSI: ffffffff82b6ee60 RDI: ffff8881128d3748 > RBP: ffffe8ffffb882d0 R08: 0000000000000001 R09: 0000000000000020 > R10: 0000000000000002 R11: ffff8881128d35b8 R12: 0005088000000fe8 > R13: ffff888111b90028 R14: ffff88810f664438 R15: ffff8881263a3540 > FS: 0000000000000000(0000) GS:ffff88845db80000(0000) > knlGS:0000000000000000 > CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > CR2: 0000000040f70fe0 CR3: 0000000002a50002 CR4: 00000000003706f0 > DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 > Call Trace: > <IRQ> > ? die_addr+0x2d/0x80 > ? exc_general_protection+0x1aa/0x3f0 > ? asm_exc_general_protection+0x22/0x30 > ? iwl_pcie_prep_tso+0x213/0x2b0 [iwlwifi] > ? iwl_pcie_prep_tso+0x1b2/0x2b0 [iwlwifi] > iwl_txq_gen2_tx+0x7ec/0xf10 [iwlwifi] > iwl_mvm_tx_mpdu+0x1da/0x560 [iwlmvm] > iwl_mvm_tx_skb_sta+0x34c/0x540 [iwlmvm] > iwl_mvm_tx_skb+0x12/0x30 [iwlmvm] > iwl_mvm_mac_itxq_xmit+0xbe/0x1e0 [iwlmvm] > ieee80211_queue_skb+0x557/0x6d0 [mac80211] > __ieee80211_xmit_fast+0x7b2/0xad0 [mac80211] > ? lock_acquire+0xc7/0x2d0 > ? lock_is_held_type+0xa5/0x110 > ? find_held_lock+0x2b/0x80 > ? skb_mac_gso_segment+0x130/0x1e0 > ? lock_release+0xc6/0x290 > ? skb_mac_gso_segment+0x13a/0x1e0 > __ieee80211_subif_start_xmit+0x22d/0x5a0 [mac80211] > ieee80211_subif_start_xmit+0x57/0x570 [mac80211] > ? dev_queue_xmit_nit+0x2cd/0x3a0 > ? lock_release+0xc6/0x290 > ? dev_hard_start_xmit+0x76/0x250 > dev_hard_start_xmit+0x76/0x250 > __dev_queue_xmit+0x246/0x1270 > ? lockdep_hardirqs_on_prepare+0xa7/0x170 > ? eth_header+0x21/0xb0 > ip_finish_output2+0x230/0xaa0 > __ip_queue_xmit+0x1e6/0x760 > __tcp_transmit_skb+0x521/0xdc0 > ? find_held_lock+0x2b/0x80 > tcp_write_xmit+0x4d7/0x17a0 > ? lock_is_held_type+0xa5/0x110 > tcp_tsq_handler+0x8a/0xd0 > tcp_tasklet_func+0xbc/0x140 > tasklet_action_common.constprop.0+0xfd/0x2f0 > handle_softirqs+0xd0/0x440 > ? iwl_pcie_irq_rx_msix_handler+0xe5/0x140 [iwlwifi] > ? iwl_pcie_irq_rx_msix_handler+0x5d/0x140 [iwlwifi] > do_softirq.part.0+0x3a/0x90 > </IRQ> > <TASK> > __local_bh_enable_ip+0xbd/0xd0 > ? iwl_pcie_irq_rx_msix_handler+0xe5/0x140 [iwlwifi] > iwl_pcie_irq_rx_msix_handler+0xed/0x140 [iwlwifi] > ? irq_thread+0x8c/0x200 > irq_thread_fn+0x14/0x50 > ? irq_thread+0x8c/0x200 > irq_thread+0x12c/0x200 > ? disable_irq_nosync+0x10/0x10 > ? irq_set_affinity_notifier+0x140/0x140 > ? irq_check_status_bit+0xf0/0xf0 > kthread+0xd7/0x110 > ? kthread_insert_work_sanity_check+0x50/0x50 > ret_from_fork+0x28/0x40 > ? kthread_insert_work_sanity_check+0x50/0x50 > ret_from_fork_asm+0x11/0x20 > </TASK> > > > > --- > > .../wireless/intel/iwlwifi/pcie/internal.h | 30 +++++++++- > > .../net/wireless/intel/iwlwifi/pcie/tx-gen2.c | 22 ++++++- > > drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 60 > > +++++++++++++++---- > > 3 files changed, 95 insertions(+), 17 deletions(-) > > > > diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h > > b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h > > index d63c1c284f70..b59de4f80b4b 100644 > > --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h > > +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h > > @@ -603,6 +603,22 @@ struct iwl_tso_hdr_page { > > u8 *pos; > > }; > > > > +/* > > + * Note that we put this struct *last* in the page. By doing that, > > we ensure > > + * that no TB referencing this page can trigger the 32-bit > > boundary hardware > > + * bug. > > + */ > > +struct iwl_tso_page_info { > > + dma_addr_t dma_addr; > > + struct page *next; > > + refcount_t use_count; > > +}; > > + > > +#define IWL_TSO_PAGE_DATA_SIZE (PAGE_SIZE - sizeof(struct > > iwl_tso_page_info)) > > +#define IWL_TSO_PAGE_INFO(addr) \ > > + ((struct iwl_tso_page_info *)(((unsigned long)addr & > > PAGE_MASK) + \ > > + IWL_TSO_PAGE_DATA_SIZE)) > > + > > int iwl_pcie_tx_init(struct iwl_trans *trans); > > void iwl_pcie_tx_start(struct iwl_trans *trans, u32 > > scd_base_addr); > > int iwl_pcie_tx_stop(struct iwl_trans *trans); > > @@ -628,8 +644,18 @@ struct sg_table *iwl_pcie_prep_tso(struct > > iwl_trans *trans, struct sk_buff *skb, > > struct iwl_cmd_meta *cmd_meta, > > u8 **hdr, unsigned int > > hdr_room); > > > > -void iwl_pcie_free_tso_page(struct iwl_trans *trans, struct > > sk_buff *skb, > > - struct iwl_cmd_meta *cmd_meta); > > +void iwl_pcie_free_tso_pages(struct iwl_trans *trans, struct > > sk_buff *skb, > > + struct iwl_cmd_meta *cmd_meta); > > + > > +static inline dma_addr_t iwl_pcie_get_tso_page_phys(void *addr) > > +{ > > + dma_addr_t res; > > + > > + res = IWL_TSO_PAGE_INFO(addr)->dma_addr; > > + res += (unsigned long)addr & ~PAGE_MASK; > > + > > + return res; > > +} > > > > static inline dma_addr_t > > iwl_txq_get_first_tb_dma(struct iwl_txq *txq, int idx) > > diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c > > b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c > > index 3dcce6a8da50..10ee2c328458 100644 > > --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c > > +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c > > @@ -19,8 +19,10 @@ static struct page *get_workaround_page(struct > > iwl_trans *trans, > > struct sk_buff *skb) > > { > > struct iwl_trans_pcie *trans_pcie = > > IWL_TRANS_GET_PCIE_TRANS(trans); > > + struct iwl_tso_page_info *info; > > struct page **page_ptr; > > struct page *ret; > > + dma_addr_t phys; > > > > page_ptr = (void *)((u8 *)skb->cb + trans_pcie- > > >txqs.page_offs); > > > > @@ -28,8 +30,22 @@ static struct page *get_workaround_page(struct > > iwl_trans *trans, > > if (!ret) > > return NULL; > > > > + info = IWL_TSO_PAGE_INFO(page_address(ret)); > > + > > + /* Create a DMA mapping for the page */ > > + phys = dma_map_page_attrs(trans->dev, ret, 0, PAGE_SIZE, > > + DMA_TO_DEVICE, > > DMA_ATTR_SKIP_CPU_SYNC); > > + if (unlikely(dma_mapping_error(trans->dev, phys))) { > > + __free_page(ret); > > + return NULL; > > + } > > + > > + /* Store physical address and set use count */ > > + info->dma_addr = phys; > > + refcount_set(&info->use_count, 1); > > + > > /* set the chaining pointer to the previous page if there > > */ > > - *(void **)((u8 *)page_address(ret) + PAGE_SIZE - > > sizeof(void *)) = *page_ptr; > > + info->next = *page_ptr; > > *page_ptr = ret; > > > > return ret; > > @@ -76,7 +92,7 @@ static int iwl_txq_gen2_set_tb_with_wa(struct > > iwl_trans *trans, > > * a new mapping for it so the device will not fail. > > */ > > > > - if (WARN_ON(len > PAGE_SIZE - sizeof(void *))) { > > + if (WARN_ON(len > IWL_TSO_PAGE_DATA_SIZE)) { > > ret = -ENOBUFS; > > goto unmap; > > } > > @@ -782,7 +798,7 @@ static void iwl_txq_gen2_unmap(struct iwl_trans > > *trans, int txq_id) > > struct sk_buff *skb = txq- > > >entries[idx].skb; > > > > if (!WARN_ON_ONCE(!skb)) > > - iwl_pcie_free_tso_page(trans, skb, > > cmd_meta); > > + iwl_pcie_free_tso_pages(trans, > > skb, cmd_meta); > > } > > iwl_txq_gen2_free_tfd(trans, txq); > > txq->read_ptr = iwl_txq_inc_wrap(trans, txq- > > >read_ptr); > > diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c > > b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c > > index ac545a39ad2a..e00d85866de9 100644 > > --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c > > +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c > > @@ -209,8 +209,22 @@ static void > > iwl_pcie_clear_cmd_in_flight(struct iwl_trans *trans) > > spin_unlock(&trans_pcie->reg_lock); > > } > > > > -void iwl_pcie_free_tso_page(struct iwl_trans *trans, struct > > sk_buff *skb, > > - struct iwl_cmd_meta *cmd_meta) > > +static void iwl_pcie_free_and_unmap_tso_page(struct iwl_trans > > *trans, > > + struct page *page) > > +{ > > + struct iwl_tso_page_info *info = > > IWL_TSO_PAGE_INFO(page_address(page)); > > + > > + /* Decrease internal use count and unmap/free page if > > needed */ > > + if (refcount_dec_and_test(&info->use_count)) { > > + dma_unmap_page(trans->dev, info->dma_addr, > > PAGE_SIZE, > > + DMA_TO_DEVICE); > > + > > + __free_page(page); > > + } > > +} > > + > > +void iwl_pcie_free_tso_pages(struct iwl_trans *trans, struct > > sk_buff *skb, > > + struct iwl_cmd_meta *cmd_meta) > > { > > struct iwl_trans_pcie *trans_pcie = > > IWL_TRANS_GET_PCIE_TRANS(trans); > > struct page **page_ptr; > > @@ -221,10 +235,11 @@ void iwl_pcie_free_tso_page(struct iwl_trans > > *trans, struct sk_buff *skb, > > *page_ptr = NULL; > > > > while (next) { > > + struct iwl_tso_page_info *info; > > struct page *tmp = next; > > > > - next = *(void **)((u8 *)page_address(next) + > > PAGE_SIZE - > > - sizeof(void *)); > > + info = IWL_TSO_PAGE_INFO(page_address(next)); > > + next = info->next; > > > > /* Unmap the scatter gather list that is on the > > last page */ > > if (!next && cmd_meta->sg_offset) { > > @@ -236,7 +251,7 @@ void iwl_pcie_free_tso_page(struct iwl_trans > > *trans, struct sk_buff *skb, > > dma_unmap_sgtable(trans->dev, sgt, > > DMA_TO_DEVICE, 0); > > } > > > > - __free_page(tmp); > > + iwl_pcie_free_and_unmap_tso_page(trans, tmp); > > } > > } > > > > @@ -381,7 +396,7 @@ static void iwl_pcie_txq_unmap(struct iwl_trans > > *trans, int txq_id) > > if (WARN_ON_ONCE(!skb)) > > continue; > > > > - iwl_pcie_free_tso_page(trans, skb, > > cmd_meta); > > + iwl_pcie_free_tso_pages(trans, skb, > > cmd_meta); > > } > > iwl_txq_free_tfd(trans, txq); > > txq->read_ptr = iwl_txq_inc_wrap(trans, txq- > > >read_ptr); > > @@ -1722,7 +1737,9 @@ static void *iwl_pcie_get_page_hdr(struct > > iwl_trans *trans, > > { > > struct iwl_trans_pcie *trans_pcie = > > IWL_TRANS_GET_PCIE_TRANS(trans); > > struct iwl_tso_hdr_page *p = this_cpu_ptr(trans_pcie- > > >txqs.tso_hdr_page); > > + struct iwl_tso_page_info *info; > > struct page **page_ptr; > > + dma_addr_t phys; > > void *ret; > > > > page_ptr = (void *)((u8 *)skb->cb + trans_pcie- > > >txqs.page_offs); > > @@ -1743,23 +1760,42 @@ static void *iwl_pcie_get_page_hdr(struct > > iwl_trans *trans, > > * > > * (see also get_workaround_page() in tx-gen2.c) > > */ > > - if (p->pos + len < (u8 *)page_address(p->page) + PAGE_SIZE > > - > > - sizeof(void *)) > > + if (((unsigned long)p->pos & ~PAGE_MASK) + len < > > IWL_TSO_PAGE_DATA_SIZE) { > > + info = IWL_TSO_PAGE_INFO(page_address(ret)); > > goto out; > > + } > > > > /* We don't have enough room on this page, get a new one. > > */ > > - __free_page(p->page); > > + iwl_pcie_free_and_unmap_tso_page(trans, p->page); > > > > alloc: > > p->page = alloc_page(GFP_ATOMIC); > > if (!p->page) > > return NULL; > > p->pos = page_address(p->page); > > + > > + info = IWL_TSO_PAGE_INFO(page_address(ret)); > > + > > /* set the chaining pointer to NULL */ > > - *(void **)((u8 *)page_address(p->page) + PAGE_SIZE - > > sizeof(void *)) = NULL; > > + info->next = NULL; > > + > > + /* Create a DMA mapping for the page */ > > + phys = dma_map_page_attrs(trans->dev, p->page, 0, > > PAGE_SIZE, > > + DMA_TO_DEVICE, > > DMA_ATTR_SKIP_CPU_SYNC); > > + if (unlikely(dma_mapping_error(trans->dev, phys))) { > > + __free_page(p->page); > > + p->page = NULL; > > + > > + return NULL; > > + } > > + > > + /* Store physical address and set use count */ > > + info->dma_addr = phys; > > + refcount_set(&info->use_count, 1); > > out: > > *page_ptr = p->page; > > - get_page(p->page); > > + /* Return an internal reference for the caller */ > > + refcount_inc(&info->use_count); > > ret = p->pos; > > p->pos += len; > > > > @@ -2330,7 +2366,7 @@ void iwl_pcie_reclaim(struct iwl_trans > > *trans, int txq_id, int ssn, > > read_ptr, txq->read_ptr, txq_id)) > > continue; > > > > - iwl_pcie_free_tso_page(trans, skb, cmd_meta); > > + iwl_pcie_free_tso_pages(trans, skb, cmd_meta); > > > > __skb_queue_tail(skbs, skb); > > > Intel Deutschland GmbH Registered Address: Am Campeon 10, 85579 Neubiberg, Germany Tel: +49 89 99 8853-0, www.intel.de Managing Directors: Sean Fennelly, Jeffrey Schneiderman, Tiffany Doon Silva Chairperson of the Supervisory Board: Nicole Lau Registered Office: Munich Commercial Register: Amtsgericht Muenchen HRB 186928
On 7/10/24 23:15, Berg, Benjamin wrote: > Hi Ben, > > yes, you need to apply: > > commit 003eae5a28c6c9d50290a4ac9b955be912f24c9f > Author: Benjamin Berg <benjamin.berg@intel.com> > Date: Tue Jul 9 14:31:49 2024 +0200 > > wifi: iwlwifi: correctly reference TSO page information > > > I had not fully tested the last revision and the error slipped > unfortunately. Hello Benjamin, Sorry I did not notice that patch on the mailing list on my own. I re-applied the 6/18 and 7/18 patches, and the fix you mention above, and system appears stable. Thanks, Ben
On 7/11/24 09:09, Ben Greear wrote: > On 7/10/24 23:15, Berg, Benjamin wrote: >> Hi Ben, >> >> yes, you need to apply: >> >> commit 003eae5a28c6c9d50290a4ac9b955be912f24c9f >> Author: Benjamin Berg <benjamin.berg@intel.com> >> Date: Tue Jul 9 14:31:49 2024 +0200 >> >> wifi: iwlwifi: correctly reference TSO page information >> >> >> I had not fully tested the last revision and the error slipped >> unfortunately. > > Hello Benjamin, > > Sorry I did not notice that patch on the mailing list on my own. I re-applied > the 6/18 and 7/18 patches, and the fix you mention above, and system appears stable. Hello, We found another regression in our patched 6.10-ish kernel. Before I apply these 3 patches: wifi: iwlwifi: keep the TSO and workaround pages mapped wifi: iwlwifi: use already mapped data when TXing an AMSDU wifi: iwlwifi: correctly reference TSO page information Then I see around 4Gbps TCP upload on my test rig. After this, it runs very poorly, perhaps bouncing up to high speed for a second or two, but mostly averaging 80Mbps or so after it runs for a bit. What are these patches trying to solve, and are you able to see good TCP upload performance with these patches applied? Thanks, Ben
Hi, have you applied all patches? It'll be slightly slower until you apply the last patch in the series (and break completely unless you have the bugfix obviously). A slowdown to 80MBit/s seems a bit like we are getting packet loss and the AMSDU is simply not send out. I suppose, that could happen if some of the DMA mapping is failing, but I really do not have a good idea right now. I do think that these patches have gone through quite some verification and were working well for us. As for the value of the patchset, the thing to keep in mind is that we send out large SKBs that are spanning multiple pages. Before the patch, we would do mapping as below, where h1, … and m1, … are each mappings that only span a small part of a page (h_n is the header for the nth packet while m_n is the corresponding message data): TSO page: |- h1 -|- h2 -|- h3 -|- h4 -| … SKB page 1: |- m1 -|- m2 -| SKB page 2: |- m2 -|- m3 -|- m4 -| SKB page 3: |- m4 -|- … With the patches applied, each of the pages will be mapped in a single chunk. This brings down the number of DMA mappings and also avoids partial mappings which can very be particularly expensive depending on the IOMMU configuration. Only the last patch in the series removes the old mappings (obviously you also need the later bugfix commit). So you might see a slight slowdown if the patch "wifi: iwlwifi: use already mapped data when TXing an AMSDU" is not applied. But not a slowdown to 80MBit/s … Benjamin On Thu, 2024-07-18 at 12:20 -0700, Ben Greear wrote: > On 7/11/24 09:09, Ben Greear wrote: > > On 7/10/24 23:15, Berg, Benjamin wrote: > > > Hi Ben, > > > > > > yes, you need to apply: > > > > > > commit 003eae5a28c6c9d50290a4ac9b955be912f24c9f > > > Author: Benjamin Berg <benjamin.berg@intel.com> > > > Date: Tue Jul 9 14:31:49 2024 +0200 > > > > > > wifi: iwlwifi: correctly reference TSO page information > > > > > > > > > I had not fully tested the last revision and the error slipped > > > unfortunately. > > > > Hello Benjamin, > > > > Sorry I did not notice that patch on the mailing list on my own. I > > re-applied > > the 6/18 and 7/18 patches, and the fix you mention above, and > > system appears stable. > > Hello, > > We found another regression in our patched 6.10-ish kernel. Before I > apply these 3 > patches: > > wifi: iwlwifi: keep the TSO and workaround pages mapped > wifi: iwlwifi: use already mapped data when TXing an AMSDU > wifi: iwlwifi: correctly reference TSO page information > > Then I see around 4Gbps TCP upload on my test rig. After this, it > runs very poorly, > perhaps bouncing up to high speed for a second or two, but mostly > averaging 80Mbps > or so after it runs for a bit. > > What are these patches trying to solve, and are you able to see good > TCP upload performance > with these patches applied? > > Thanks, > Ben > Intel Deutschland GmbH Registered Address: Am Campeon 10, 85579 Neubiberg, Germany Tel: +49 89 99 8853-0, www.intel.de Managing Directors: Sean Fennelly, Jeffrey Schneiderman, Tiffany Doon Silva Chairperson of the Supervisory Board: Nicole Lau Registered Office: Munich Commercial Register: Amtsgericht Muenchen HRB 186928
On 7/19/24 00:22, Berg, Benjamin wrote: > Hi, > > have you applied all patches? It'll be slightly slower until you apply > the last patch in the series (and break completely unless you have the > bugfix obviously). I applied those three in my email below, on top of everything else that seemed pertintent to iwlwifi/mac80211, but always possible I mis-applied something or missed another patch. I think I'll just leave those out and test again when i rebase on 6.11. If you do start getting other reports of slowdowns, however...I think these will be the suspect patches. Thanks, Ben > > A slowdown to 80MBit/s seems a bit like we are getting packet loss and > the AMSDU is simply not send out. I suppose, that could happen if some > of the DMA mapping is failing, but I really do not have a good idea > right now. > > I do think that these patches have gone through quite some verification > and were working well for us. > > > As for the value of the patchset, the thing to keep in mind is that we > send out large SKBs that are spanning multiple pages. Before the patch, > we would do mapping as below, where h1, … and m1, … are each mappings > that only span a small part of a page (h_n is the header for the nth > packet while m_n is the corresponding message data): > > TSO page: |- h1 -|- h2 -|- h3 -|- h4 -| … > SKB page 1: |- m1 -|- m2 -| > SKB page 2: |- m2 -|- m3 -|- m4 -| > SKB page 3: |- m4 -|- … > > With the patches applied, each of the pages will be mapped in a single > chunk. This brings down the number of DMA mappings and also avoids > partial mappings which can very be particularly expensive depending on > the IOMMU configuration. > > Only the last patch in the series removes the old mappings (obviously > you also need the later bugfix commit). So you might see a slight > slowdown if the patch "wifi: iwlwifi: use already mapped data when > TXing an AMSDU" is not applied. > > But not a slowdown to 80MBit/s … > > Benjamin > > On Thu, 2024-07-18 at 12:20 -0700, Ben Greear wrote: >> On 7/11/24 09:09, Ben Greear wrote: >>> On 7/10/24 23:15, Berg, Benjamin wrote: >>>> Hi Ben, >>>> >>>> yes, you need to apply: >>>> >>>> commit 003eae5a28c6c9d50290a4ac9b955be912f24c9f >>>> Author: Benjamin Berg <benjamin.berg@intel.com> >>>> Date: Tue Jul 9 14:31:49 2024 +0200 >>>> >>>> wifi: iwlwifi: correctly reference TSO page information >>>> >>>> >>>> I had not fully tested the last revision and the error slipped >>>> unfortunately. >>> >>> Hello Benjamin, >>> >>> Sorry I did not notice that patch on the mailing list on my own. I >>> re-applied >>> the 6/18 and 7/18 patches, and the fix you mention above, and >>> system appears stable. >> >> Hello, >> >> We found another regression in our patched 6.10-ish kernel. Before I >> apply these 3 >> patches: >> >> wifi: iwlwifi: keep the TSO and workaround pages mapped >> wifi: iwlwifi: use already mapped data when TXing an AMSDU >> wifi: iwlwifi: correctly reference TSO page information >> >> Then I see around 4Gbps TCP upload on my test rig. After this, it >> runs very poorly, >> perhaps bouncing up to high speed for a second or two, but mostly >> averaging 80Mbps >> or so after it runs for a bit. >> >> What are these patches trying to solve, and are you able to see good >> TCP upload performance >> with these patches applied? >> >> Thanks, >> Ben >> > > Intel Deutschland GmbH > Registered Address: Am Campeon 10, 85579 Neubiberg, Germany > Tel: +49 89 99 8853-0, www.intel.de > Managing Directors: Sean Fennelly, Jeffrey Schneiderman, Tiffany Doon Silva > Chairperson of the Supervisory Board: Nicole Lau > Registered Office: Munich > Commercial Register: Amtsgericht Muenchen HRB 186928
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h index d63c1c284f70..b59de4f80b4b 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h @@ -603,6 +603,22 @@ struct iwl_tso_hdr_page { u8 *pos; }; +/* + * Note that we put this struct *last* in the page. By doing that, we ensure + * that no TB referencing this page can trigger the 32-bit boundary hardware + * bug. + */ +struct iwl_tso_page_info { + dma_addr_t dma_addr; + struct page *next; + refcount_t use_count; +}; + +#define IWL_TSO_PAGE_DATA_SIZE (PAGE_SIZE - sizeof(struct iwl_tso_page_info)) +#define IWL_TSO_PAGE_INFO(addr) \ + ((struct iwl_tso_page_info *)(((unsigned long)addr & PAGE_MASK) + \ + IWL_TSO_PAGE_DATA_SIZE)) + int iwl_pcie_tx_init(struct iwl_trans *trans); void iwl_pcie_tx_start(struct iwl_trans *trans, u32 scd_base_addr); int iwl_pcie_tx_stop(struct iwl_trans *trans); @@ -628,8 +644,18 @@ struct sg_table *iwl_pcie_prep_tso(struct iwl_trans *trans, struct sk_buff *skb, struct iwl_cmd_meta *cmd_meta, u8 **hdr, unsigned int hdr_room); -void iwl_pcie_free_tso_page(struct iwl_trans *trans, struct sk_buff *skb, - struct iwl_cmd_meta *cmd_meta); +void iwl_pcie_free_tso_pages(struct iwl_trans *trans, struct sk_buff *skb, + struct iwl_cmd_meta *cmd_meta); + +static inline dma_addr_t iwl_pcie_get_tso_page_phys(void *addr) +{ + dma_addr_t res; + + res = IWL_TSO_PAGE_INFO(addr)->dma_addr; + res += (unsigned long)addr & ~PAGE_MASK; + + return res; +} static inline dma_addr_t iwl_txq_get_first_tb_dma(struct iwl_txq *txq, int idx) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c index 3dcce6a8da50..10ee2c328458 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c @@ -19,8 +19,10 @@ static struct page *get_workaround_page(struct iwl_trans *trans, struct sk_buff *skb) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + struct iwl_tso_page_info *info; struct page **page_ptr; struct page *ret; + dma_addr_t phys; page_ptr = (void *)((u8 *)skb->cb + trans_pcie->txqs.page_offs); @@ -28,8 +30,22 @@ static struct page *get_workaround_page(struct iwl_trans *trans, if (!ret) return NULL; + info = IWL_TSO_PAGE_INFO(page_address(ret)); + + /* Create a DMA mapping for the page */ + phys = dma_map_page_attrs(trans->dev, ret, 0, PAGE_SIZE, + DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); + if (unlikely(dma_mapping_error(trans->dev, phys))) { + __free_page(ret); + return NULL; + } + + /* Store physical address and set use count */ + info->dma_addr = phys; + refcount_set(&info->use_count, 1); + /* set the chaining pointer to the previous page if there */ - *(void **)((u8 *)page_address(ret) + PAGE_SIZE - sizeof(void *)) = *page_ptr; + info->next = *page_ptr; *page_ptr = ret; return ret; @@ -76,7 +92,7 @@ static int iwl_txq_gen2_set_tb_with_wa(struct iwl_trans *trans, * a new mapping for it so the device will not fail. */ - if (WARN_ON(len > PAGE_SIZE - sizeof(void *))) { + if (WARN_ON(len > IWL_TSO_PAGE_DATA_SIZE)) { ret = -ENOBUFS; goto unmap; } @@ -782,7 +798,7 @@ static void iwl_txq_gen2_unmap(struct iwl_trans *trans, int txq_id) struct sk_buff *skb = txq->entries[idx].skb; if (!WARN_ON_ONCE(!skb)) - iwl_pcie_free_tso_page(trans, skb, cmd_meta); + iwl_pcie_free_tso_pages(trans, skb, cmd_meta); } iwl_txq_gen2_free_tfd(trans, txq); txq->read_ptr = iwl_txq_inc_wrap(trans, txq->read_ptr); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index ac545a39ad2a..e00d85866de9 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -209,8 +209,22 @@ static void iwl_pcie_clear_cmd_in_flight(struct iwl_trans *trans) spin_unlock(&trans_pcie->reg_lock); } -void iwl_pcie_free_tso_page(struct iwl_trans *trans, struct sk_buff *skb, - struct iwl_cmd_meta *cmd_meta) +static void iwl_pcie_free_and_unmap_tso_page(struct iwl_trans *trans, + struct page *page) +{ + struct iwl_tso_page_info *info = IWL_TSO_PAGE_INFO(page_address(page)); + + /* Decrease internal use count and unmap/free page if needed */ + if (refcount_dec_and_test(&info->use_count)) { + dma_unmap_page(trans->dev, info->dma_addr, PAGE_SIZE, + DMA_TO_DEVICE); + + __free_page(page); + } +} + +void iwl_pcie_free_tso_pages(struct iwl_trans *trans, struct sk_buff *skb, + struct iwl_cmd_meta *cmd_meta) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct page **page_ptr; @@ -221,10 +235,11 @@ void iwl_pcie_free_tso_page(struct iwl_trans *trans, struct sk_buff *skb, *page_ptr = NULL; while (next) { + struct iwl_tso_page_info *info; struct page *tmp = next; - next = *(void **)((u8 *)page_address(next) + PAGE_SIZE - - sizeof(void *)); + info = IWL_TSO_PAGE_INFO(page_address(next)); + next = info->next; /* Unmap the scatter gather list that is on the last page */ if (!next && cmd_meta->sg_offset) { @@ -236,7 +251,7 @@ void iwl_pcie_free_tso_page(struct iwl_trans *trans, struct sk_buff *skb, dma_unmap_sgtable(trans->dev, sgt, DMA_TO_DEVICE, 0); } - __free_page(tmp); + iwl_pcie_free_and_unmap_tso_page(trans, tmp); } } @@ -381,7 +396,7 @@ static void iwl_pcie_txq_unmap(struct iwl_trans *trans, int txq_id) if (WARN_ON_ONCE(!skb)) continue; - iwl_pcie_free_tso_page(trans, skb, cmd_meta); + iwl_pcie_free_tso_pages(trans, skb, cmd_meta); } iwl_txq_free_tfd(trans, txq); txq->read_ptr = iwl_txq_inc_wrap(trans, txq->read_ptr); @@ -1722,7 +1737,9 @@ static void *iwl_pcie_get_page_hdr(struct iwl_trans *trans, { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwl_tso_hdr_page *p = this_cpu_ptr(trans_pcie->txqs.tso_hdr_page); + struct iwl_tso_page_info *info; struct page **page_ptr; + dma_addr_t phys; void *ret; page_ptr = (void *)((u8 *)skb->cb + trans_pcie->txqs.page_offs); @@ -1743,23 +1760,42 @@ static void *iwl_pcie_get_page_hdr(struct iwl_trans *trans, * * (see also get_workaround_page() in tx-gen2.c) */ - if (p->pos + len < (u8 *)page_address(p->page) + PAGE_SIZE - - sizeof(void *)) + if (((unsigned long)p->pos & ~PAGE_MASK) + len < IWL_TSO_PAGE_DATA_SIZE) { + info = IWL_TSO_PAGE_INFO(page_address(ret)); goto out; + } /* We don't have enough room on this page, get a new one. */ - __free_page(p->page); + iwl_pcie_free_and_unmap_tso_page(trans, p->page); alloc: p->page = alloc_page(GFP_ATOMIC); if (!p->page) return NULL; p->pos = page_address(p->page); + + info = IWL_TSO_PAGE_INFO(page_address(ret)); + /* set the chaining pointer to NULL */ - *(void **)((u8 *)page_address(p->page) + PAGE_SIZE - sizeof(void *)) = NULL; + info->next = NULL; + + /* Create a DMA mapping for the page */ + phys = dma_map_page_attrs(trans->dev, p->page, 0, PAGE_SIZE, + DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); + if (unlikely(dma_mapping_error(trans->dev, phys))) { + __free_page(p->page); + p->page = NULL; + + return NULL; + } + + /* Store physical address and set use count */ + info->dma_addr = phys; + refcount_set(&info->use_count, 1); out: *page_ptr = p->page; - get_page(p->page); + /* Return an internal reference for the caller */ + refcount_inc(&info->use_count); ret = p->pos; p->pos += len; @@ -2330,7 +2366,7 @@ void iwl_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn, read_ptr, txq->read_ptr, txq_id)) continue; - iwl_pcie_free_tso_page(trans, skb, cmd_meta); + iwl_pcie_free_tso_pages(trans, skb, cmd_meta); __skb_queue_tail(skbs, skb);