diff mbox series

[06/18] wifi: iwlwifi: keep the TSO and workaround pages mapped

Message ID 20240703125541.7ced468fe431.Ibb109867dc680c37fe8d891e9ab9ef64ed5c5d2d@changeid
State New
Headers show
Series wifi: iwlwifi: updates - 03-07-2024 | expand

Commit Message

Korenblit, Miriam Rachel July 3, 2024, 9:58 a.m. UTC
From: Benjamin Berg <benjamin.berg@intel.com>

Map the pages when allocating them so that we will not need to map each
of the used fragments at a later point.

For now the mapping is not used, this will be changed in a later commit.

Signed-off-by: Benjamin Berg <benjamin.berg@intel.com>
Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Reviewed-by: Johannes Berg <johannes.berg@intel.com>
---
 .../wireless/intel/iwlwifi/pcie/internal.h    | 30 +++++++++-
 .../net/wireless/intel/iwlwifi/pcie/tx-gen2.c | 22 ++++++-
 drivers/net/wireless/intel/iwlwifi/pcie/tx.c  | 60 +++++++++++++++----
 3 files changed, 95 insertions(+), 17 deletions(-)

Comments

Ben Greear July 10, 2024, 9:41 p.m. UTC | #1
On 7/3/24 02:58, Miri Korenblit wrote:
> From: Benjamin Berg <benjamin.berg@intel.com>
> 
> Map the pages when allocating them so that we will not need to map each
> of the used fragments at a later point.
> 
> For now the mapping is not used, this will be changed in a later commit.
> 
> Signed-off-by: Benjamin Berg <benjamin.berg@intel.com>
> Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
> Reviewed-by: Johannes Berg <johannes.berg@intel.com>

Hello,

I patched every iwlwifi and mac80211 patch I found on the mailing list
up to this one into my 6.10-ish tree.

I see immediate and reproducible crash when I send TCP traffic on be200
radio.  I bisected it to this particular patch.

Previously I was only testing download direction and it ran fine, and UDP
traffic upload seems to run OK, so it must be triggered by large TCP
frames, and based on what this patch is messing with, that makes sense.
Always possible I messed up when applying patches of course.


Oops: general protection fault, probably for non-canonical address 0x5088000000ff0: 0000 [#1] PREEMPT SMP
CPU: 3 PID: 800 Comm: irq/181-iwlwifi Tainted: G        W          6.10.0-rc7+ #17
Hardware name: Default string Default string/SKYBAY, BIOS 5.12 02/15/2023
RIP: 0010:iwl_pcie_prep_tso+0x213/0x2b0 [iwlwifi]
Code: 01 e0 49 c1 e4 06 41 b8 01 00 00 00 48 c1 f8 06 48 c1 e0 0c 48 01 d0 31 d2 48 89 45 08 48 b8 e8 0f 00 00 80 88 ff ff0
RSP: 0018:ffffc900001e0678 EFLAGS: 00010207
RAX: ffff888000000fe8 RBX: ffff8881263a3500 RCX: 0000000000001000
RDX: 0000000000000000 RSI: ffffffff82b6ee60 RDI: ffff8881128d3748
RBP: ffffe8ffffb882d0 R08: 0000000000000001 R09: 0000000000000020
R10: 0000000000000002 R11: ffff8881128d35b8 R12: 0005088000000fe8
R13: ffff888111b90028 R14: ffff88810f664438 R15: ffff8881263a3540
FS:  0000000000000000(0000) GS:ffff88845db80000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000040f70fe0 CR3: 0000000002a50002 CR4: 00000000003706f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
  <IRQ>
  ? die_addr+0x2d/0x80
  ? exc_general_protection+0x1aa/0x3f0
  ? asm_exc_general_protection+0x22/0x30
  ? iwl_pcie_prep_tso+0x213/0x2b0 [iwlwifi]
  ? iwl_pcie_prep_tso+0x1b2/0x2b0 [iwlwifi]
  iwl_txq_gen2_tx+0x7ec/0xf10 [iwlwifi]
  iwl_mvm_tx_mpdu+0x1da/0x560 [iwlmvm]
  iwl_mvm_tx_skb_sta+0x34c/0x540 [iwlmvm]
  iwl_mvm_tx_skb+0x12/0x30 [iwlmvm]
  iwl_mvm_mac_itxq_xmit+0xbe/0x1e0 [iwlmvm]
  ieee80211_queue_skb+0x557/0x6d0 [mac80211]
  __ieee80211_xmit_fast+0x7b2/0xad0 [mac80211]
  ? lock_acquire+0xc7/0x2d0
  ? lock_is_held_type+0xa5/0x110
  ? find_held_lock+0x2b/0x80
  ? skb_mac_gso_segment+0x130/0x1e0
  ? lock_release+0xc6/0x290
  ? skb_mac_gso_segment+0x13a/0x1e0
  __ieee80211_subif_start_xmit+0x22d/0x5a0 [mac80211]
  ieee80211_subif_start_xmit+0x57/0x570 [mac80211]
  ? dev_queue_xmit_nit+0x2cd/0x3a0
  ? lock_release+0xc6/0x290
  ? dev_hard_start_xmit+0x76/0x250
  dev_hard_start_xmit+0x76/0x250
  __dev_queue_xmit+0x246/0x1270
  ? lockdep_hardirqs_on_prepare+0xa7/0x170
  ? eth_header+0x21/0xb0
  ip_finish_output2+0x230/0xaa0
  __ip_queue_xmit+0x1e6/0x760
  __tcp_transmit_skb+0x521/0xdc0
  ? find_held_lock+0x2b/0x80
  tcp_write_xmit+0x4d7/0x17a0
  ? lock_is_held_type+0xa5/0x110
  tcp_tsq_handler+0x8a/0xd0
  tcp_tasklet_func+0xbc/0x140
  tasklet_action_common.constprop.0+0xfd/0x2f0
  handle_softirqs+0xd0/0x440
  ? iwl_pcie_irq_rx_msix_handler+0xe5/0x140 [iwlwifi]
  ? iwl_pcie_irq_rx_msix_handler+0x5d/0x140 [iwlwifi]
  do_softirq.part.0+0x3a/0x90
  </IRQ>
  <TASK>
  __local_bh_enable_ip+0xbd/0xd0
  ? iwl_pcie_irq_rx_msix_handler+0xe5/0x140 [iwlwifi]
  iwl_pcie_irq_rx_msix_handler+0xed/0x140 [iwlwifi]
  ? irq_thread+0x8c/0x200
  irq_thread_fn+0x14/0x50
  ? irq_thread+0x8c/0x200
  irq_thread+0x12c/0x200
  ? disable_irq_nosync+0x10/0x10
  ? irq_set_affinity_notifier+0x140/0x140
  ? irq_check_status_bit+0xf0/0xf0
  kthread+0xd7/0x110
  ? kthread_insert_work_sanity_check+0x50/0x50
  ret_from_fork+0x28/0x40
  ? kthread_insert_work_sanity_check+0x50/0x50
  ret_from_fork_asm+0x11/0x20
  </TASK>


> ---
>   .../wireless/intel/iwlwifi/pcie/internal.h    | 30 +++++++++-
>   .../net/wireless/intel/iwlwifi/pcie/tx-gen2.c | 22 ++++++-
>   drivers/net/wireless/intel/iwlwifi/pcie/tx.c  | 60 +++++++++++++++----
>   3 files changed, 95 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
> index d63c1c284f70..b59de4f80b4b 100644
> --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
> +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
> @@ -603,6 +603,22 @@ struct iwl_tso_hdr_page {
>   	u8 *pos;
>   };
>   
> +/*
> + * Note that we put this struct *last* in the page. By doing that, we ensure
> + * that no TB referencing this page can trigger the 32-bit boundary hardware
> + * bug.
> + */
> +struct iwl_tso_page_info {
> +	dma_addr_t dma_addr;
> +	struct page *next;
> +	refcount_t use_count;
> +};
> +
> +#define IWL_TSO_PAGE_DATA_SIZE	(PAGE_SIZE - sizeof(struct iwl_tso_page_info))
> +#define IWL_TSO_PAGE_INFO(addr)	\
> +	((struct iwl_tso_page_info *)(((unsigned long)addr & PAGE_MASK) + \
> +				      IWL_TSO_PAGE_DATA_SIZE))
> +
>   int iwl_pcie_tx_init(struct iwl_trans *trans);
>   void iwl_pcie_tx_start(struct iwl_trans *trans, u32 scd_base_addr);
>   int iwl_pcie_tx_stop(struct iwl_trans *trans);
> @@ -628,8 +644,18 @@ struct sg_table *iwl_pcie_prep_tso(struct iwl_trans *trans, struct sk_buff *skb,
>   				   struct iwl_cmd_meta *cmd_meta,
>   				   u8 **hdr, unsigned int hdr_room);
>   
> -void iwl_pcie_free_tso_page(struct iwl_trans *trans, struct sk_buff *skb,
> -			    struct iwl_cmd_meta *cmd_meta);
> +void iwl_pcie_free_tso_pages(struct iwl_trans *trans, struct sk_buff *skb,
> +			     struct iwl_cmd_meta *cmd_meta);
> +
> +static inline dma_addr_t iwl_pcie_get_tso_page_phys(void *addr)
> +{
> +	dma_addr_t res;
> +
> +	res = IWL_TSO_PAGE_INFO(addr)->dma_addr;
> +	res += (unsigned long)addr & ~PAGE_MASK;
> +
> +	return res;
> +}
>   
>   static inline dma_addr_t
>   iwl_txq_get_first_tb_dma(struct iwl_txq *txq, int idx)
> diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
> index 3dcce6a8da50..10ee2c328458 100644
> --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
> +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
> @@ -19,8 +19,10 @@ static struct page *get_workaround_page(struct iwl_trans *trans,
>   					struct sk_buff *skb)
>   {
>   	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
> +	struct iwl_tso_page_info *info;
>   	struct page **page_ptr;
>   	struct page *ret;
> +	dma_addr_t phys;
>   
>   	page_ptr = (void *)((u8 *)skb->cb + trans_pcie->txqs.page_offs);
>   
> @@ -28,8 +30,22 @@ static struct page *get_workaround_page(struct iwl_trans *trans,
>   	if (!ret)
>   		return NULL;
>   
> +	info = IWL_TSO_PAGE_INFO(page_address(ret));
> +
> +	/* Create a DMA mapping for the page */
> +	phys = dma_map_page_attrs(trans->dev, ret, 0, PAGE_SIZE,
> +				  DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
> +	if (unlikely(dma_mapping_error(trans->dev, phys))) {
> +		__free_page(ret);
> +		return NULL;
> +	}
> +
> +	/* Store physical address and set use count */
> +	info->dma_addr = phys;
> +	refcount_set(&info->use_count, 1);
> +
>   	/* set the chaining pointer to the previous page if there */
> -	*(void **)((u8 *)page_address(ret) + PAGE_SIZE - sizeof(void *)) = *page_ptr;
> +	info->next = *page_ptr;
>   	*page_ptr = ret;
>   
>   	return ret;
> @@ -76,7 +92,7 @@ static int iwl_txq_gen2_set_tb_with_wa(struct iwl_trans *trans,
>   	 * a new mapping for it so the device will not fail.
>   	 */
>   
> -	if (WARN_ON(len > PAGE_SIZE - sizeof(void *))) {
> +	if (WARN_ON(len > IWL_TSO_PAGE_DATA_SIZE)) {
>   		ret = -ENOBUFS;
>   		goto unmap;
>   	}
> @@ -782,7 +798,7 @@ static void iwl_txq_gen2_unmap(struct iwl_trans *trans, int txq_id)
>   			struct sk_buff *skb = txq->entries[idx].skb;
>   
>   			if (!WARN_ON_ONCE(!skb))
> -				iwl_pcie_free_tso_page(trans, skb, cmd_meta);
> +				iwl_pcie_free_tso_pages(trans, skb, cmd_meta);
>   		}
>   		iwl_txq_gen2_free_tfd(trans, txq);
>   		txq->read_ptr = iwl_txq_inc_wrap(trans, txq->read_ptr);
> diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
> index ac545a39ad2a..e00d85866de9 100644
> --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
> +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
> @@ -209,8 +209,22 @@ static void iwl_pcie_clear_cmd_in_flight(struct iwl_trans *trans)
>   	spin_unlock(&trans_pcie->reg_lock);
>   }
>   
> -void iwl_pcie_free_tso_page(struct iwl_trans *trans, struct sk_buff *skb,
> -			    struct iwl_cmd_meta *cmd_meta)
> +static void iwl_pcie_free_and_unmap_tso_page(struct iwl_trans *trans,
> +					     struct page *page)
> +{
> +	struct iwl_tso_page_info *info = IWL_TSO_PAGE_INFO(page_address(page));
> +
> +	/* Decrease internal use count and unmap/free page if needed */
> +	if (refcount_dec_and_test(&info->use_count)) {
> +		dma_unmap_page(trans->dev, info->dma_addr, PAGE_SIZE,
> +			       DMA_TO_DEVICE);
> +
> +		__free_page(page);
> +	}
> +}
> +
> +void iwl_pcie_free_tso_pages(struct iwl_trans *trans, struct sk_buff *skb,
> +			     struct iwl_cmd_meta *cmd_meta)
>   {
>   	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
>   	struct page **page_ptr;
> @@ -221,10 +235,11 @@ void iwl_pcie_free_tso_page(struct iwl_trans *trans, struct sk_buff *skb,
>   	*page_ptr = NULL;
>   
>   	while (next) {
> +		struct iwl_tso_page_info *info;
>   		struct page *tmp = next;
>   
> -		next = *(void **)((u8 *)page_address(next) + PAGE_SIZE -
> -				  sizeof(void *));
> +		info = IWL_TSO_PAGE_INFO(page_address(next));
> +		next = info->next;
>   
>   		/* Unmap the scatter gather list that is on the last page */
>   		if (!next && cmd_meta->sg_offset) {
> @@ -236,7 +251,7 @@ void iwl_pcie_free_tso_page(struct iwl_trans *trans, struct sk_buff *skb,
>   			dma_unmap_sgtable(trans->dev, sgt, DMA_TO_DEVICE, 0);
>   		}
>   
> -		__free_page(tmp);
> +		iwl_pcie_free_and_unmap_tso_page(trans, tmp);
>   	}
>   }
>   
> @@ -381,7 +396,7 @@ static void iwl_pcie_txq_unmap(struct iwl_trans *trans, int txq_id)
>   			if (WARN_ON_ONCE(!skb))
>   				continue;
>   
> -			iwl_pcie_free_tso_page(trans, skb, cmd_meta);
> +			iwl_pcie_free_tso_pages(trans, skb, cmd_meta);
>   		}
>   		iwl_txq_free_tfd(trans, txq);
>   		txq->read_ptr = iwl_txq_inc_wrap(trans, txq->read_ptr);
> @@ -1722,7 +1737,9 @@ static void *iwl_pcie_get_page_hdr(struct iwl_trans *trans,
>   {
>   	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
>   	struct iwl_tso_hdr_page *p = this_cpu_ptr(trans_pcie->txqs.tso_hdr_page);
> +	struct iwl_tso_page_info *info;
>   	struct page **page_ptr;
> +	dma_addr_t phys;
>   	void *ret;
>   
>   	page_ptr = (void *)((u8 *)skb->cb + trans_pcie->txqs.page_offs);
> @@ -1743,23 +1760,42 @@ static void *iwl_pcie_get_page_hdr(struct iwl_trans *trans,
>   	 *
>   	 * (see also get_workaround_page() in tx-gen2.c)
>   	 */
> -	if (p->pos + len < (u8 *)page_address(p->page) + PAGE_SIZE -
> -			   sizeof(void *))
> +	if (((unsigned long)p->pos & ~PAGE_MASK) + len < IWL_TSO_PAGE_DATA_SIZE) {
> +		info = IWL_TSO_PAGE_INFO(page_address(ret));
>   		goto out;
> +	}
>   
>   	/* We don't have enough room on this page, get a new one. */
> -	__free_page(p->page);
> +	iwl_pcie_free_and_unmap_tso_page(trans, p->page);
>   
>   alloc:
>   	p->page = alloc_page(GFP_ATOMIC);
>   	if (!p->page)
>   		return NULL;
>   	p->pos = page_address(p->page);
> +
> +	info = IWL_TSO_PAGE_INFO(page_address(ret));
> +
>   	/* set the chaining pointer to NULL */
> -	*(void **)((u8 *)page_address(p->page) + PAGE_SIZE - sizeof(void *)) = NULL;
> +	info->next = NULL;
> +
> +	/* Create a DMA mapping for the page */
> +	phys = dma_map_page_attrs(trans->dev, p->page, 0, PAGE_SIZE,
> +				  DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
> +	if (unlikely(dma_mapping_error(trans->dev, phys))) {
> +		__free_page(p->page);
> +		p->page = NULL;
> +
> +		return NULL;
> +	}
> +
> +	/* Store physical address and set use count */
> +	info->dma_addr = phys;
> +	refcount_set(&info->use_count, 1);
>   out:
>   	*page_ptr = p->page;
> -	get_page(p->page);
> +	/* Return an internal reference for the caller */
> +	refcount_inc(&info->use_count);
>   	ret = p->pos;
>   	p->pos += len;
>   
> @@ -2330,7 +2366,7 @@ void iwl_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn,
>   			      read_ptr, txq->read_ptr, txq_id))
>   			continue;
>   
> -		iwl_pcie_free_tso_page(trans, skb, cmd_meta);
> +		iwl_pcie_free_tso_pages(trans, skb, cmd_meta);
>   
>   		__skb_queue_tail(skbs, skb);
>
Berg, Benjamin July 11, 2024, 6:15 a.m. UTC | #2
Hi Ben,

yes, you need to apply:

commit 003eae5a28c6c9d50290a4ac9b955be912f24c9f
Author: Benjamin Berg <benjamin.berg@intel.com>
Date:   Tue Jul 9 14:31:49 2024 +0200

    wifi: iwlwifi: correctly reference TSO page information


I had not fully tested the last revision and the error slipped
unfortunately.

Benjamin


On Wed, 2024-07-10 at 14:41 -0700, Ben Greear wrote:
> On 7/3/24 02:58, Miri Korenblit wrote:
> > From: Benjamin Berg <benjamin.berg@intel.com>
> > 
> > Map the pages when allocating them so that we will not need to map
> > each
> > of the used fragments at a later point.
> > 
> > For now the mapping is not used, this will be changed in a later
> > commit.
> > 
> > Signed-off-by: Benjamin Berg <benjamin.berg@intel.com>
> > Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
> > Reviewed-by: Johannes Berg <johannes.berg@intel.com>
> 
> Hello,
> 
> I patched every iwlwifi and mac80211 patch I found on the mailing
> list
> up to this one into my 6.10-ish tree.
> 
> I see immediate and reproducible crash when I send TCP traffic on
> be200
> radio.  I bisected it to this particular patch.
> 
> Previously I was only testing download direction and it ran fine, and
> UDP
> traffic upload seems to run OK, so it must be triggered by large TCP
> frames, and based on what this patch is messing with, that makes
> sense.
> Always possible I messed up when applying patches of course.
> 
> 
> Oops: general protection fault, probably for non-canonical address
> 0x5088000000ff0: 0000 [#1] PREEMPT SMP
> CPU: 3 PID: 800 Comm: irq/181-iwlwifi Tainted: G        W         
> 6.10.0-rc7+ #17
> Hardware name: Default string Default string/SKYBAY, BIOS 5.12
> 02/15/2023
> RIP: 0010:iwl_pcie_prep_tso+0x213/0x2b0 [iwlwifi]
> Code: 01 e0 49 c1 e4 06 41 b8 01 00 00 00 48 c1 f8 06 48 c1 e0 0c 48
> 01 d0 31 d2 48 89 45 08 48 b8 e8 0f 00 00 80 88 ff ff0
> RSP: 0018:ffffc900001e0678 EFLAGS: 00010207
> RAX: ffff888000000fe8 RBX: ffff8881263a3500 RCX: 0000000000001000
> RDX: 0000000000000000 RSI: ffffffff82b6ee60 RDI: ffff8881128d3748
> RBP: ffffe8ffffb882d0 R08: 0000000000000001 R09: 0000000000000020
> R10: 0000000000000002 R11: ffff8881128d35b8 R12: 0005088000000fe8
> R13: ffff888111b90028 R14: ffff88810f664438 R15: ffff8881263a3540
> FS:  0000000000000000(0000) GS:ffff88845db80000(0000)
> knlGS:0000000000000000
> CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: 0000000040f70fe0 CR3: 0000000002a50002 CR4: 00000000003706f0
> DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> Call Trace:
>   <IRQ>
>   ? die_addr+0x2d/0x80
>   ? exc_general_protection+0x1aa/0x3f0
>   ? asm_exc_general_protection+0x22/0x30
>   ? iwl_pcie_prep_tso+0x213/0x2b0 [iwlwifi]
>   ? iwl_pcie_prep_tso+0x1b2/0x2b0 [iwlwifi]
>   iwl_txq_gen2_tx+0x7ec/0xf10 [iwlwifi]
>   iwl_mvm_tx_mpdu+0x1da/0x560 [iwlmvm]
>   iwl_mvm_tx_skb_sta+0x34c/0x540 [iwlmvm]
>   iwl_mvm_tx_skb+0x12/0x30 [iwlmvm]
>   iwl_mvm_mac_itxq_xmit+0xbe/0x1e0 [iwlmvm]
>   ieee80211_queue_skb+0x557/0x6d0 [mac80211]
>   __ieee80211_xmit_fast+0x7b2/0xad0 [mac80211]
>   ? lock_acquire+0xc7/0x2d0
>   ? lock_is_held_type+0xa5/0x110
>   ? find_held_lock+0x2b/0x80
>   ? skb_mac_gso_segment+0x130/0x1e0
>   ? lock_release+0xc6/0x290
>   ? skb_mac_gso_segment+0x13a/0x1e0
>   __ieee80211_subif_start_xmit+0x22d/0x5a0 [mac80211]
>   ieee80211_subif_start_xmit+0x57/0x570 [mac80211]
>   ? dev_queue_xmit_nit+0x2cd/0x3a0
>   ? lock_release+0xc6/0x290
>   ? dev_hard_start_xmit+0x76/0x250
>   dev_hard_start_xmit+0x76/0x250
>   __dev_queue_xmit+0x246/0x1270
>   ? lockdep_hardirqs_on_prepare+0xa7/0x170
>   ? eth_header+0x21/0xb0
>   ip_finish_output2+0x230/0xaa0
>   __ip_queue_xmit+0x1e6/0x760
>   __tcp_transmit_skb+0x521/0xdc0
>   ? find_held_lock+0x2b/0x80
>   tcp_write_xmit+0x4d7/0x17a0
>   ? lock_is_held_type+0xa5/0x110
>   tcp_tsq_handler+0x8a/0xd0
>   tcp_tasklet_func+0xbc/0x140
>   tasklet_action_common.constprop.0+0xfd/0x2f0
>   handle_softirqs+0xd0/0x440
>   ? iwl_pcie_irq_rx_msix_handler+0xe5/0x140 [iwlwifi]
>   ? iwl_pcie_irq_rx_msix_handler+0x5d/0x140 [iwlwifi]
>   do_softirq.part.0+0x3a/0x90
>   </IRQ>
>   <TASK>
>   __local_bh_enable_ip+0xbd/0xd0
>   ? iwl_pcie_irq_rx_msix_handler+0xe5/0x140 [iwlwifi]
>   iwl_pcie_irq_rx_msix_handler+0xed/0x140 [iwlwifi]
>   ? irq_thread+0x8c/0x200
>   irq_thread_fn+0x14/0x50
>   ? irq_thread+0x8c/0x200
>   irq_thread+0x12c/0x200
>   ? disable_irq_nosync+0x10/0x10
>   ? irq_set_affinity_notifier+0x140/0x140
>   ? irq_check_status_bit+0xf0/0xf0
>   kthread+0xd7/0x110
>   ? kthread_insert_work_sanity_check+0x50/0x50
>   ret_from_fork+0x28/0x40
>   ? kthread_insert_work_sanity_check+0x50/0x50
>   ret_from_fork_asm+0x11/0x20
>   </TASK>
> 
> 
> > ---
> >   .../wireless/intel/iwlwifi/pcie/internal.h    | 30 +++++++++-
> >   .../net/wireless/intel/iwlwifi/pcie/tx-gen2.c | 22 ++++++-
> >   drivers/net/wireless/intel/iwlwifi/pcie/tx.c  | 60
> > +++++++++++++++----
> >   3 files changed, 95 insertions(+), 17 deletions(-)
> > 
> > diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
> > b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
> > index d63c1c284f70..b59de4f80b4b 100644
> > --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
> > +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
> > @@ -603,6 +603,22 @@ struct iwl_tso_hdr_page {
> >   	u8 *pos;
> >   };
> >   
> > +/*
> > + * Note that we put this struct *last* in the page. By doing that,
> > we ensure
> > + * that no TB referencing this page can trigger the 32-bit
> > boundary hardware
> > + * bug.
> > + */
> > +struct iwl_tso_page_info {
> > +	dma_addr_t dma_addr;
> > +	struct page *next;
> > +	refcount_t use_count;
> > +};
> > +
> > +#define IWL_TSO_PAGE_DATA_SIZE	(PAGE_SIZE - sizeof(struct
> > iwl_tso_page_info))
> > +#define IWL_TSO_PAGE_INFO(addr)	\
> > +	((struct iwl_tso_page_info *)(((unsigned long)addr &
> > PAGE_MASK) + \
> > +				      IWL_TSO_PAGE_DATA_SIZE))
> > +
> >   int iwl_pcie_tx_init(struct iwl_trans *trans);
> >   void iwl_pcie_tx_start(struct iwl_trans *trans, u32
> > scd_base_addr);
> >   int iwl_pcie_tx_stop(struct iwl_trans *trans);
> > @@ -628,8 +644,18 @@ struct sg_table *iwl_pcie_prep_tso(struct
> > iwl_trans *trans, struct sk_buff *skb,
> >   				   struct iwl_cmd_meta *cmd_meta,
> >   				   u8 **hdr, unsigned int
> > hdr_room);
> >   
> > -void iwl_pcie_free_tso_page(struct iwl_trans *trans, struct
> > sk_buff *skb,
> > -			    struct iwl_cmd_meta *cmd_meta);
> > +void iwl_pcie_free_tso_pages(struct iwl_trans *trans, struct
> > sk_buff *skb,
> > +			     struct iwl_cmd_meta *cmd_meta);
> > +
> > +static inline dma_addr_t iwl_pcie_get_tso_page_phys(void *addr)
> > +{
> > +	dma_addr_t res;
> > +
> > +	res = IWL_TSO_PAGE_INFO(addr)->dma_addr;
> > +	res += (unsigned long)addr & ~PAGE_MASK;
> > +
> > +	return res;
> > +}
> >   
> >   static inline dma_addr_t
> >   iwl_txq_get_first_tb_dma(struct iwl_txq *txq, int idx)
> > diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
> > b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
> > index 3dcce6a8da50..10ee2c328458 100644
> > --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
> > +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
> > @@ -19,8 +19,10 @@ static struct page *get_workaround_page(struct
> > iwl_trans *trans,
> >   					struct sk_buff *skb)
> >   {
> >   	struct iwl_trans_pcie *trans_pcie =
> > IWL_TRANS_GET_PCIE_TRANS(trans);
> > +	struct iwl_tso_page_info *info;
> >   	struct page **page_ptr;
> >   	struct page *ret;
> > +	dma_addr_t phys;
> >   
> >   	page_ptr = (void *)((u8 *)skb->cb + trans_pcie-
> > >txqs.page_offs);
> >   
> > @@ -28,8 +30,22 @@ static struct page *get_workaround_page(struct
> > iwl_trans *trans,
> >   	if (!ret)
> >   		return NULL;
> >   
> > +	info = IWL_TSO_PAGE_INFO(page_address(ret));
> > +
> > +	/* Create a DMA mapping for the page */
> > +	phys = dma_map_page_attrs(trans->dev, ret, 0, PAGE_SIZE,
> > +				  DMA_TO_DEVICE,
> > DMA_ATTR_SKIP_CPU_SYNC);
> > +	if (unlikely(dma_mapping_error(trans->dev, phys))) {
> > +		__free_page(ret);
> > +		return NULL;
> > +	}
> > +
> > +	/* Store physical address and set use count */
> > +	info->dma_addr = phys;
> > +	refcount_set(&info->use_count, 1);
> > +
> >   	/* set the chaining pointer to the previous page if there
> > */
> > -	*(void **)((u8 *)page_address(ret) + PAGE_SIZE -
> > sizeof(void *)) = *page_ptr;
> > +	info->next = *page_ptr;
> >   	*page_ptr = ret;
> >   
> >   	return ret;
> > @@ -76,7 +92,7 @@ static int iwl_txq_gen2_set_tb_with_wa(struct
> > iwl_trans *trans,
> >   	 * a new mapping for it so the device will not fail.
> >   	 */
> >   
> > -	if (WARN_ON(len > PAGE_SIZE - sizeof(void *))) {
> > +	if (WARN_ON(len > IWL_TSO_PAGE_DATA_SIZE)) {
> >   		ret = -ENOBUFS;
> >   		goto unmap;
> >   	}
> > @@ -782,7 +798,7 @@ static void iwl_txq_gen2_unmap(struct iwl_trans
> > *trans, int txq_id)
> >   			struct sk_buff *skb = txq-
> > >entries[idx].skb;
> >   
> >   			if (!WARN_ON_ONCE(!skb))
> > -				iwl_pcie_free_tso_page(trans, skb,
> > cmd_meta);
> > +				iwl_pcie_free_tso_pages(trans,
> > skb, cmd_meta);
> >   		}
> >   		iwl_txq_gen2_free_tfd(trans, txq);
> >   		txq->read_ptr = iwl_txq_inc_wrap(trans, txq-
> > >read_ptr);
> > diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
> > b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
> > index ac545a39ad2a..e00d85866de9 100644
> > --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
> > +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
> > @@ -209,8 +209,22 @@ static void
> > iwl_pcie_clear_cmd_in_flight(struct iwl_trans *trans)
> >   	spin_unlock(&trans_pcie->reg_lock);
> >   }
> >   
> > -void iwl_pcie_free_tso_page(struct iwl_trans *trans, struct
> > sk_buff *skb,
> > -			    struct iwl_cmd_meta *cmd_meta)
> > +static void iwl_pcie_free_and_unmap_tso_page(struct iwl_trans
> > *trans,
> > +					     struct page *page)
> > +{
> > +	struct iwl_tso_page_info *info =
> > IWL_TSO_PAGE_INFO(page_address(page));
> > +
> > +	/* Decrease internal use count and unmap/free page if
> > needed */
> > +	if (refcount_dec_and_test(&info->use_count)) {
> > +		dma_unmap_page(trans->dev, info->dma_addr,
> > PAGE_SIZE,
> > +			       DMA_TO_DEVICE);
> > +
> > +		__free_page(page);
> > +	}
> > +}
> > +
> > +void iwl_pcie_free_tso_pages(struct iwl_trans *trans, struct
> > sk_buff *skb,
> > +			     struct iwl_cmd_meta *cmd_meta)
> >   {
> >   	struct iwl_trans_pcie *trans_pcie =
> > IWL_TRANS_GET_PCIE_TRANS(trans);
> >   	struct page **page_ptr;
> > @@ -221,10 +235,11 @@ void iwl_pcie_free_tso_page(struct iwl_trans
> > *trans, struct sk_buff *skb,
> >   	*page_ptr = NULL;
> >   
> >   	while (next) {
> > +		struct iwl_tso_page_info *info;
> >   		struct page *tmp = next;
> >   
> > -		next = *(void **)((u8 *)page_address(next) +
> > PAGE_SIZE -
> > -				  sizeof(void *));
> > +		info = IWL_TSO_PAGE_INFO(page_address(next));
> > +		next = info->next;
> >   
> >   		/* Unmap the scatter gather list that is on the
> > last page */
> >   		if (!next && cmd_meta->sg_offset) {
> > @@ -236,7 +251,7 @@ void iwl_pcie_free_tso_page(struct iwl_trans
> > *trans, struct sk_buff *skb,
> >   			dma_unmap_sgtable(trans->dev, sgt,
> > DMA_TO_DEVICE, 0);
> >   		}
> >   
> > -		__free_page(tmp);
> > +		iwl_pcie_free_and_unmap_tso_page(trans, tmp);
> >   	}
> >   }
> >   
> > @@ -381,7 +396,7 @@ static void iwl_pcie_txq_unmap(struct iwl_trans
> > *trans, int txq_id)
> >   			if (WARN_ON_ONCE(!skb))
> >   				continue;
> >   
> > -			iwl_pcie_free_tso_page(trans, skb,
> > cmd_meta);
> > +			iwl_pcie_free_tso_pages(trans, skb,
> > cmd_meta);
> >   		}
> >   		iwl_txq_free_tfd(trans, txq);
> >   		txq->read_ptr = iwl_txq_inc_wrap(trans, txq-
> > >read_ptr);
> > @@ -1722,7 +1737,9 @@ static void *iwl_pcie_get_page_hdr(struct
> > iwl_trans *trans,
> >   {
> >   	struct iwl_trans_pcie *trans_pcie =
> > IWL_TRANS_GET_PCIE_TRANS(trans);
> >   	struct iwl_tso_hdr_page *p = this_cpu_ptr(trans_pcie-
> > >txqs.tso_hdr_page);
> > +	struct iwl_tso_page_info *info;
> >   	struct page **page_ptr;
> > +	dma_addr_t phys;
> >   	void *ret;
> >   
> >   	page_ptr = (void *)((u8 *)skb->cb + trans_pcie-
> > >txqs.page_offs);
> > @@ -1743,23 +1760,42 @@ static void *iwl_pcie_get_page_hdr(struct
> > iwl_trans *trans,
> >   	 *
> >   	 * (see also get_workaround_page() in tx-gen2.c)
> >   	 */
> > -	if (p->pos + len < (u8 *)page_address(p->page) + PAGE_SIZE
> > -
> > -			   sizeof(void *))
> > +	if (((unsigned long)p->pos & ~PAGE_MASK) + len <
> > IWL_TSO_PAGE_DATA_SIZE) {
> > +		info = IWL_TSO_PAGE_INFO(page_address(ret));
> >   		goto out;
> > +	}
> >   
> >   	/* We don't have enough room on this page, get a new one.
> > */
> > -	__free_page(p->page);
> > +	iwl_pcie_free_and_unmap_tso_page(trans, p->page);
> >   
> >   alloc:
> >   	p->page = alloc_page(GFP_ATOMIC);
> >   	if (!p->page)
> >   		return NULL;
> >   	p->pos = page_address(p->page);
> > +
> > +	info = IWL_TSO_PAGE_INFO(page_address(ret));
> > +
> >   	/* set the chaining pointer to NULL */
> > -	*(void **)((u8 *)page_address(p->page) + PAGE_SIZE -
> > sizeof(void *)) = NULL;
> > +	info->next = NULL;
> > +
> > +	/* Create a DMA mapping for the page */
> > +	phys = dma_map_page_attrs(trans->dev, p->page, 0,
> > PAGE_SIZE,
> > +				  DMA_TO_DEVICE,
> > DMA_ATTR_SKIP_CPU_SYNC);
> > +	if (unlikely(dma_mapping_error(trans->dev, phys))) {
> > +		__free_page(p->page);
> > +		p->page = NULL;
> > +
> > +		return NULL;
> > +	}
> > +
> > +	/* Store physical address and set use count */
> > +	info->dma_addr = phys;
> > +	refcount_set(&info->use_count, 1);
> >   out:
> >   	*page_ptr = p->page;
> > -	get_page(p->page);
> > +	/* Return an internal reference for the caller */
> > +	refcount_inc(&info->use_count);
> >   	ret = p->pos;
> >   	p->pos += len;
> >   
> > @@ -2330,7 +2366,7 @@ void iwl_pcie_reclaim(struct iwl_trans
> > *trans, int txq_id, int ssn,
> >   			      read_ptr, txq->read_ptr, txq_id))
> >   			continue;
> >   
> > -		iwl_pcie_free_tso_page(trans, skb, cmd_meta);
> > +		iwl_pcie_free_tso_pages(trans, skb, cmd_meta);
> >   
> >   		__skb_queue_tail(skbs, skb);
> >   
> 

Intel Deutschland GmbH
Registered Address: Am Campeon 10, 85579 Neubiberg, Germany
Tel: +49 89 99 8853-0, www.intel.de
Managing Directors: Sean Fennelly, Jeffrey Schneiderman, Tiffany Doon Silva
Chairperson of the Supervisory Board: Nicole Lau
Registered Office: Munich
Commercial Register: Amtsgericht Muenchen HRB 186928
Ben Greear July 11, 2024, 4:09 p.m. UTC | #3
On 7/10/24 23:15, Berg, Benjamin wrote:
> Hi Ben,
> 
> yes, you need to apply:
> 
> commit 003eae5a28c6c9d50290a4ac9b955be912f24c9f
> Author: Benjamin Berg <benjamin.berg@intel.com>
> Date:   Tue Jul 9 14:31:49 2024 +0200
> 
>      wifi: iwlwifi: correctly reference TSO page information
> 
> 
> I had not fully tested the last revision and the error slipped
> unfortunately.

Hello Benjamin,

Sorry I did not notice that patch on the mailing list on my own.  I re-applied
the 6/18 and 7/18 patches, and the fix you mention above, and system appears stable.

Thanks,
Ben
Ben Greear July 18, 2024, 7:20 p.m. UTC | #4
On 7/11/24 09:09, Ben Greear wrote:
> On 7/10/24 23:15, Berg, Benjamin wrote:
>> Hi Ben,
>>
>> yes, you need to apply:
>>
>> commit 003eae5a28c6c9d50290a4ac9b955be912f24c9f
>> Author: Benjamin Berg <benjamin.berg@intel.com>
>> Date:   Tue Jul 9 14:31:49 2024 +0200
>>
>>      wifi: iwlwifi: correctly reference TSO page information
>>
>>
>> I had not fully tested the last revision and the error slipped
>> unfortunately.
> 
> Hello Benjamin,
> 
> Sorry I did not notice that patch on the mailing list on my own.  I re-applied
> the 6/18 and 7/18 patches, and the fix you mention above, and system appears stable.

Hello,

We found another regression in our patched 6.10-ish kernel.  Before I apply these 3
patches:

wifi: iwlwifi: keep the TSO and workaround pages mapped
wifi: iwlwifi: use already mapped data when TXing an AMSDU
wifi: iwlwifi: correctly reference TSO page information

Then I see around 4Gbps TCP upload on my test rig.  After this, it runs very poorly,
perhaps bouncing up to high speed for a second or two, but mostly averaging 80Mbps
or so after it runs for a bit.

What are these patches trying to solve, and are you able to see good TCP upload performance
with these patches applied?

Thanks,
Ben
Berg, Benjamin July 19, 2024, 7:22 a.m. UTC | #5
Hi,

have you applied all patches? It'll be slightly slower until you apply
the last patch in the series (and break completely unless you have the
bugfix obviously).

A slowdown to 80MBit/s seems a bit like we are getting packet loss and
the AMSDU is simply not send out. I suppose, that could happen if some
of the DMA mapping is failing, but I really do not have a good idea
right now.

I do think that these patches have gone through quite some verification
and were working well for us.


As for the value of the patchset, the thing to keep in mind is that we
send out large SKBs that are spanning multiple pages. Before the patch,
we would do mapping as below, where h1, … and m1, … are each mappings
that only span a small part of a page (h_n is the header for the nth
packet while m_n is the corresponding message data):

TSO page: |- h1 -|- h2 -|- h3 -|- h4 -| …
SKB page 1: |- m1     -|- m2      -|
SKB page 2: |- m2 -|-  m3  -|- m4 -|
SKB page 3: |- m4 -|- …

With the patches applied, each of the pages will be mapped in a single
chunk. This brings down the number of DMA mappings and also avoids
partial mappings which can very be particularly expensive depending on
the IOMMU configuration.

Only the last patch in the series removes the old mappings (obviously
you also need the later bugfix commit). So you might see a slight
slowdown if the patch "wifi: iwlwifi: use already mapped data when
TXing an AMSDU" is not applied.

But not a slowdown to 80MBit/s …

Benjamin

On Thu, 2024-07-18 at 12:20 -0700, Ben Greear wrote:
> On 7/11/24 09:09, Ben Greear wrote:
> > On 7/10/24 23:15, Berg, Benjamin wrote:
> > > Hi Ben,
> > > 
> > > yes, you need to apply:
> > > 
> > > commit 003eae5a28c6c9d50290a4ac9b955be912f24c9f
> > > Author: Benjamin Berg <benjamin.berg@intel.com>
> > > Date:   Tue Jul 9 14:31:49 2024 +0200
> > > 
> > >      wifi: iwlwifi: correctly reference TSO page information
> > > 
> > > 
> > > I had not fully tested the last revision and the error slipped
> > > unfortunately.
> > 
> > Hello Benjamin,
> > 
> > Sorry I did not notice that patch on the mailing list on my own.  I
> > re-applied
> > the 6/18 and 7/18 patches, and the fix you mention above, and
> > system appears stable.
> 
> Hello,
> 
> We found another regression in our patched 6.10-ish kernel.  Before I
> apply these 3
> patches:
> 
> wifi: iwlwifi: keep the TSO and workaround pages mapped
> wifi: iwlwifi: use already mapped data when TXing an AMSDU
> wifi: iwlwifi: correctly reference TSO page information
> 
> Then I see around 4Gbps TCP upload on my test rig.  After this, it
> runs very poorly,
> perhaps bouncing up to high speed for a second or two, but mostly
> averaging 80Mbps
> or so after it runs for a bit.
> 
> What are these patches trying to solve, and are you able to see good
> TCP upload performance
> with these patches applied?
> 
> Thanks,
> Ben
> 

Intel Deutschland GmbH
Registered Address: Am Campeon 10, 85579 Neubiberg, Germany
Tel: +49 89 99 8853-0, www.intel.de
Managing Directors: Sean Fennelly, Jeffrey Schneiderman, Tiffany Doon Silva
Chairperson of the Supervisory Board: Nicole Lau
Registered Office: Munich
Commercial Register: Amtsgericht Muenchen HRB 186928
Ben Greear July 19, 2024, 12:30 p.m. UTC | #6
On 7/19/24 00:22, Berg, Benjamin wrote:
> Hi,
> 
> have you applied all patches? It'll be slightly slower until you apply
> the last patch in the series (and break completely unless you have the
> bugfix obviously).

I applied those three in my email below, on top of everything else that seemed
pertintent to iwlwifi/mac80211, but always possible I mis-applied something
or missed another patch.

I think I'll just leave those out and test again when i rebase on 6.11.
If you do start getting other reports of slowdowns, however...I think these
will be the suspect patches.

Thanks,
Ben

> 
> A slowdown to 80MBit/s seems a bit like we are getting packet loss and
> the AMSDU is simply not send out. I suppose, that could happen if some
> of the DMA mapping is failing, but I really do not have a good idea
> right now.
> 
> I do think that these patches have gone through quite some verification
> and were working well for us.
> 
> 
> As for the value of the patchset, the thing to keep in mind is that we
> send out large SKBs that are spanning multiple pages. Before the patch,
> we would do mapping as below, where h1, … and m1, … are each mappings
> that only span a small part of a page (h_n is the header for the nth
> packet while m_n is the corresponding message data):
> 
> TSO page: |- h1 -|- h2 -|- h3 -|- h4 -| …
> SKB page 1: |- m1     -|- m2      -|
> SKB page 2: |- m2 -|-  m3  -|- m4 -|
> SKB page 3: |- m4 -|- …
> 
> With the patches applied, each of the pages will be mapped in a single
> chunk. This brings down the number of DMA mappings and also avoids
> partial mappings which can very be particularly expensive depending on
> the IOMMU configuration.
> 
> Only the last patch in the series removes the old mappings (obviously
> you also need the later bugfix commit). So you might see a slight
> slowdown if the patch "wifi: iwlwifi: use already mapped data when
> TXing an AMSDU" is not applied.
> 
> But not a slowdown to 80MBit/s …
> 
> Benjamin
> 
> On Thu, 2024-07-18 at 12:20 -0700, Ben Greear wrote:
>> On 7/11/24 09:09, Ben Greear wrote:
>>> On 7/10/24 23:15, Berg, Benjamin wrote:
>>>> Hi Ben,
>>>>
>>>> yes, you need to apply:
>>>>
>>>> commit 003eae5a28c6c9d50290a4ac9b955be912f24c9f
>>>> Author: Benjamin Berg <benjamin.berg@intel.com>
>>>> Date:   Tue Jul 9 14:31:49 2024 +0200
>>>>
>>>>       wifi: iwlwifi: correctly reference TSO page information
>>>>
>>>>
>>>> I had not fully tested the last revision and the error slipped
>>>> unfortunately.
>>>
>>> Hello Benjamin,
>>>
>>> Sorry I did not notice that patch on the mailing list on my own.  I
>>> re-applied
>>> the 6/18 and 7/18 patches, and the fix you mention above, and
>>> system appears stable.
>>
>> Hello,
>>
>> We found another regression in our patched 6.10-ish kernel.  Before I
>> apply these 3
>> patches:
>>
>> wifi: iwlwifi: keep the TSO and workaround pages mapped
>> wifi: iwlwifi: use already mapped data when TXing an AMSDU
>> wifi: iwlwifi: correctly reference TSO page information
>>
>> Then I see around 4Gbps TCP upload on my test rig.  After this, it
>> runs very poorly,
>> perhaps bouncing up to high speed for a second or two, but mostly
>> averaging 80Mbps
>> or so after it runs for a bit.
>>
>> What are these patches trying to solve, and are you able to see good
>> TCP upload performance
>> with these patches applied?
>>
>> Thanks,
>> Ben
>>
> 
> Intel Deutschland GmbH
> Registered Address: Am Campeon 10, 85579 Neubiberg, Germany
> Tel: +49 89 99 8853-0, www.intel.de
> Managing Directors: Sean Fennelly, Jeffrey Schneiderman, Tiffany Doon Silva
> Chairperson of the Supervisory Board: Nicole Lau
> Registered Office: Munich
> Commercial Register: Amtsgericht Muenchen HRB 186928
diff mbox series

Patch

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
index d63c1c284f70..b59de4f80b4b 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
@@ -603,6 +603,22 @@  struct iwl_tso_hdr_page {
 	u8 *pos;
 };
 
+/*
+ * Note that we put this struct *last* in the page. By doing that, we ensure
+ * that no TB referencing this page can trigger the 32-bit boundary hardware
+ * bug.
+ */
+struct iwl_tso_page_info {
+	dma_addr_t dma_addr;
+	struct page *next;
+	refcount_t use_count;
+};
+
+#define IWL_TSO_PAGE_DATA_SIZE	(PAGE_SIZE - sizeof(struct iwl_tso_page_info))
+#define IWL_TSO_PAGE_INFO(addr)	\
+	((struct iwl_tso_page_info *)(((unsigned long)addr & PAGE_MASK) + \
+				      IWL_TSO_PAGE_DATA_SIZE))
+
 int iwl_pcie_tx_init(struct iwl_trans *trans);
 void iwl_pcie_tx_start(struct iwl_trans *trans, u32 scd_base_addr);
 int iwl_pcie_tx_stop(struct iwl_trans *trans);
@@ -628,8 +644,18 @@  struct sg_table *iwl_pcie_prep_tso(struct iwl_trans *trans, struct sk_buff *skb,
 				   struct iwl_cmd_meta *cmd_meta,
 				   u8 **hdr, unsigned int hdr_room);
 
-void iwl_pcie_free_tso_page(struct iwl_trans *trans, struct sk_buff *skb,
-			    struct iwl_cmd_meta *cmd_meta);
+void iwl_pcie_free_tso_pages(struct iwl_trans *trans, struct sk_buff *skb,
+			     struct iwl_cmd_meta *cmd_meta);
+
+static inline dma_addr_t iwl_pcie_get_tso_page_phys(void *addr)
+{
+	dma_addr_t res;
+
+	res = IWL_TSO_PAGE_INFO(addr)->dma_addr;
+	res += (unsigned long)addr & ~PAGE_MASK;
+
+	return res;
+}
 
 static inline dma_addr_t
 iwl_txq_get_first_tb_dma(struct iwl_txq *txq, int idx)
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
index 3dcce6a8da50..10ee2c328458 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
@@ -19,8 +19,10 @@  static struct page *get_workaround_page(struct iwl_trans *trans,
 					struct sk_buff *skb)
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+	struct iwl_tso_page_info *info;
 	struct page **page_ptr;
 	struct page *ret;
+	dma_addr_t phys;
 
 	page_ptr = (void *)((u8 *)skb->cb + trans_pcie->txqs.page_offs);
 
@@ -28,8 +30,22 @@  static struct page *get_workaround_page(struct iwl_trans *trans,
 	if (!ret)
 		return NULL;
 
+	info = IWL_TSO_PAGE_INFO(page_address(ret));
+
+	/* Create a DMA mapping for the page */
+	phys = dma_map_page_attrs(trans->dev, ret, 0, PAGE_SIZE,
+				  DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
+	if (unlikely(dma_mapping_error(trans->dev, phys))) {
+		__free_page(ret);
+		return NULL;
+	}
+
+	/* Store physical address and set use count */
+	info->dma_addr = phys;
+	refcount_set(&info->use_count, 1);
+
 	/* set the chaining pointer to the previous page if there */
-	*(void **)((u8 *)page_address(ret) + PAGE_SIZE - sizeof(void *)) = *page_ptr;
+	info->next = *page_ptr;
 	*page_ptr = ret;
 
 	return ret;
@@ -76,7 +92,7 @@  static int iwl_txq_gen2_set_tb_with_wa(struct iwl_trans *trans,
 	 * a new mapping for it so the device will not fail.
 	 */
 
-	if (WARN_ON(len > PAGE_SIZE - sizeof(void *))) {
+	if (WARN_ON(len > IWL_TSO_PAGE_DATA_SIZE)) {
 		ret = -ENOBUFS;
 		goto unmap;
 	}
@@ -782,7 +798,7 @@  static void iwl_txq_gen2_unmap(struct iwl_trans *trans, int txq_id)
 			struct sk_buff *skb = txq->entries[idx].skb;
 
 			if (!WARN_ON_ONCE(!skb))
-				iwl_pcie_free_tso_page(trans, skb, cmd_meta);
+				iwl_pcie_free_tso_pages(trans, skb, cmd_meta);
 		}
 		iwl_txq_gen2_free_tfd(trans, txq);
 		txq->read_ptr = iwl_txq_inc_wrap(trans, txq->read_ptr);
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
index ac545a39ad2a..e00d85866de9 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
@@ -209,8 +209,22 @@  static void iwl_pcie_clear_cmd_in_flight(struct iwl_trans *trans)
 	spin_unlock(&trans_pcie->reg_lock);
 }
 
-void iwl_pcie_free_tso_page(struct iwl_trans *trans, struct sk_buff *skb,
-			    struct iwl_cmd_meta *cmd_meta)
+static void iwl_pcie_free_and_unmap_tso_page(struct iwl_trans *trans,
+					     struct page *page)
+{
+	struct iwl_tso_page_info *info = IWL_TSO_PAGE_INFO(page_address(page));
+
+	/* Decrease internal use count and unmap/free page if needed */
+	if (refcount_dec_and_test(&info->use_count)) {
+		dma_unmap_page(trans->dev, info->dma_addr, PAGE_SIZE,
+			       DMA_TO_DEVICE);
+
+		__free_page(page);
+	}
+}
+
+void iwl_pcie_free_tso_pages(struct iwl_trans *trans, struct sk_buff *skb,
+			     struct iwl_cmd_meta *cmd_meta)
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	struct page **page_ptr;
@@ -221,10 +235,11 @@  void iwl_pcie_free_tso_page(struct iwl_trans *trans, struct sk_buff *skb,
 	*page_ptr = NULL;
 
 	while (next) {
+		struct iwl_tso_page_info *info;
 		struct page *tmp = next;
 
-		next = *(void **)((u8 *)page_address(next) + PAGE_SIZE -
-				  sizeof(void *));
+		info = IWL_TSO_PAGE_INFO(page_address(next));
+		next = info->next;
 
 		/* Unmap the scatter gather list that is on the last page */
 		if (!next && cmd_meta->sg_offset) {
@@ -236,7 +251,7 @@  void iwl_pcie_free_tso_page(struct iwl_trans *trans, struct sk_buff *skb,
 			dma_unmap_sgtable(trans->dev, sgt, DMA_TO_DEVICE, 0);
 		}
 
-		__free_page(tmp);
+		iwl_pcie_free_and_unmap_tso_page(trans, tmp);
 	}
 }
 
@@ -381,7 +396,7 @@  static void iwl_pcie_txq_unmap(struct iwl_trans *trans, int txq_id)
 			if (WARN_ON_ONCE(!skb))
 				continue;
 
-			iwl_pcie_free_tso_page(trans, skb, cmd_meta);
+			iwl_pcie_free_tso_pages(trans, skb, cmd_meta);
 		}
 		iwl_txq_free_tfd(trans, txq);
 		txq->read_ptr = iwl_txq_inc_wrap(trans, txq->read_ptr);
@@ -1722,7 +1737,9 @@  static void *iwl_pcie_get_page_hdr(struct iwl_trans *trans,
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	struct iwl_tso_hdr_page *p = this_cpu_ptr(trans_pcie->txqs.tso_hdr_page);
+	struct iwl_tso_page_info *info;
 	struct page **page_ptr;
+	dma_addr_t phys;
 	void *ret;
 
 	page_ptr = (void *)((u8 *)skb->cb + trans_pcie->txqs.page_offs);
@@ -1743,23 +1760,42 @@  static void *iwl_pcie_get_page_hdr(struct iwl_trans *trans,
 	 *
 	 * (see also get_workaround_page() in tx-gen2.c)
 	 */
-	if (p->pos + len < (u8 *)page_address(p->page) + PAGE_SIZE -
-			   sizeof(void *))
+	if (((unsigned long)p->pos & ~PAGE_MASK) + len < IWL_TSO_PAGE_DATA_SIZE) {
+		info = IWL_TSO_PAGE_INFO(page_address(ret));
 		goto out;
+	}
 
 	/* We don't have enough room on this page, get a new one. */
-	__free_page(p->page);
+	iwl_pcie_free_and_unmap_tso_page(trans, p->page);
 
 alloc:
 	p->page = alloc_page(GFP_ATOMIC);
 	if (!p->page)
 		return NULL;
 	p->pos = page_address(p->page);
+
+	info = IWL_TSO_PAGE_INFO(page_address(ret));
+
 	/* set the chaining pointer to NULL */
-	*(void **)((u8 *)page_address(p->page) + PAGE_SIZE - sizeof(void *)) = NULL;
+	info->next = NULL;
+
+	/* Create a DMA mapping for the page */
+	phys = dma_map_page_attrs(trans->dev, p->page, 0, PAGE_SIZE,
+				  DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
+	if (unlikely(dma_mapping_error(trans->dev, phys))) {
+		__free_page(p->page);
+		p->page = NULL;
+
+		return NULL;
+	}
+
+	/* Store physical address and set use count */
+	info->dma_addr = phys;
+	refcount_set(&info->use_count, 1);
 out:
 	*page_ptr = p->page;
-	get_page(p->page);
+	/* Return an internal reference for the caller */
+	refcount_inc(&info->use_count);
 	ret = p->pos;
 	p->pos += len;
 
@@ -2330,7 +2366,7 @@  void iwl_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn,
 			      read_ptr, txq->read_ptr, txq_id))
 			continue;
 
-		iwl_pcie_free_tso_page(trans, skb, cmd_meta);
+		iwl_pcie_free_tso_pages(trans, skb, cmd_meta);
 
 		__skb_queue_tail(skbs, skb);