diff mbox series

[2/2] wifi: ath11k: Use dma_alloc_noncoherent for rx_tid buffer allocation

Message ID 20250119164219.647059-3-quic_ppranees@quicinc.com
State New
Headers show
Series wifi: ath11k: Fix SWIOTLB issues with DMA buffer allocation | expand

Commit Message

P Praneesh Jan. 19, 2025, 4:42 p.m. UTC
Currently, the driver allocates cacheable DMA buffers for the rx_tid
structure using kzalloc() and dma_map_single(). These buffers are
long-lived and can persist for the lifetime of the peer, which is not
advisable. Instead of using kzalloc() and dma_map_single() for allocating
cacheable DMA buffers, utilize the dma_alloc_noncoherent() helper for the
allocation of long-lived cacheable DMA buffers, such as the peer's rx_tid.
Since dma_alloc_noncoherent() returns unaligned physical and virtual
addresses, align them internally before use within the driver. This
ensures proper allocation of non-coherent memory through the kernel
helper.

Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.7.0.1-01744-QCAHKSWPL_SILICONZ-1
Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3

Signed-off-by: P Praneesh <quic_ppranees@quicinc.com>
---
 drivers/net/wireless/ath/ath11k/dp.h    |   6 +-
 drivers/net/wireless/ath/ath11k/dp_rx.c | 117 +++++++++++-------------
 2 files changed, 58 insertions(+), 65 deletions(-)

Comments

Tim Harvey Jan. 21, 2025, 5:26 p.m. UTC | #1
On Sun, Jan 19, 2025 at 8:42 AM P Praneesh <quic_ppranees@quicinc.com> wrote:
>
> Currently, the driver allocates cacheable DMA buffers for the rx_tid
> structure using kzalloc() and dma_map_single(). These buffers are
> long-lived and can persist for the lifetime of the peer, which is not
> advisable. Instead of using kzalloc() and dma_map_single() for allocating
> cacheable DMA buffers, utilize the dma_alloc_noncoherent() helper for the
> allocation of long-lived cacheable DMA buffers, such as the peer's rx_tid.
> Since dma_alloc_noncoherent() returns unaligned physical and virtual
> addresses, align them internally before use within the driver. This
> ensures proper allocation of non-coherent memory through the kernel
> helper.
>
> Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.7.0.1-01744-QCAHKSWPL_SILICONZ-1
> Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3
>
> Signed-off-by: P Praneesh <quic_ppranees@quicinc.com>
> ---
>  drivers/net/wireless/ath/ath11k/dp.h    |   6 +-
>  drivers/net/wireless/ath/ath11k/dp_rx.c | 117 +++++++++++-------------
>  2 files changed, 58 insertions(+), 65 deletions(-)
>
> diff --git a/drivers/net/wireless/ath/ath11k/dp.h b/drivers/net/wireless/ath/ath11k/dp.h
> index f777314db8b3..7a55afd33be8 100644
> --- a/drivers/net/wireless/ath/ath11k/dp.h
> +++ b/drivers/net/wireless/ath/ath11k/dp.h
> @@ -1,7 +1,7 @@
>  /* SPDX-License-Identifier: BSD-3-Clause-Clear */
>  /*
>   * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved.
> - * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
> + * Copyright (c) 2021-2023, 2025 Qualcomm Innovation Center, Inc. All rights reserved.
>   */
>
>  #ifndef ATH11K_DP_H
> @@ -20,7 +20,6 @@ struct ath11k_ext_irq_grp;
>
>  struct dp_rx_tid {
>         u8 tid;
> -       u32 *vaddr;
>         dma_addr_t paddr;
>         u32 size;
>         u32 ba_win_sz;
> @@ -37,6 +36,9 @@ struct dp_rx_tid {
>         /* Timer info related to fragments */
>         struct timer_list frag_timer;
>         struct ath11k_base *ab;
> +       u32 *vaddr_unaligned;
> +       dma_addr_t paddr_unaligned;
> +       u32 unaligned_size;
>  };
>
>  #define DP_REO_DESC_FREE_THRESHOLD  64
> diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c
> index 029ecf51c9ef..5e71e5d9ecb7 100644
> --- a/drivers/net/wireless/ath/ath11k/dp_rx.c
> +++ b/drivers/net/wireless/ath/ath11k/dp_rx.c
> @@ -1,7 +1,7 @@
>  // SPDX-License-Identifier: BSD-3-Clause-Clear
>  /*
>   * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved.
> - * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
> + * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved.
>   */
>
>  #include <linux/ieee80211.h>
> @@ -675,11 +675,11 @@ void ath11k_dp_reo_cmd_list_cleanup(struct ath11k_base *ab)
>         list_for_each_entry_safe(cmd, tmp, &dp->reo_cmd_list, list) {
>                 list_del(&cmd->list);
>                 rx_tid = &cmd->data;
> -               if (rx_tid->vaddr) {
> -                       dma_unmap_single(ab->dev, rx_tid->paddr,
> -                                        rx_tid->size, DMA_BIDIRECTIONAL);
> -                       kfree(rx_tid->vaddr);
> -                       rx_tid->vaddr = NULL;
> +               if (rx_tid->vaddr_unaligned) {
> +                       dma_free_noncoherent(ab->dev, rx_tid->unaligned_size,
> +                                            rx_tid->vaddr_unaligned,
> +                                            rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL);
> +                       rx_tid->vaddr_unaligned = NULL;
>                 }
>                 kfree(cmd);
>         }
> @@ -689,11 +689,11 @@ void ath11k_dp_reo_cmd_list_cleanup(struct ath11k_base *ab)
>                 list_del(&cmd_cache->list);
>                 dp->reo_cmd_cache_flush_count--;
>                 rx_tid = &cmd_cache->data;
> -               if (rx_tid->vaddr) {
> -                       dma_unmap_single(ab->dev, rx_tid->paddr,
> -                                        rx_tid->size, DMA_BIDIRECTIONAL);
> -                       kfree(rx_tid->vaddr);
> -                       rx_tid->vaddr = NULL;
> +               if (rx_tid->vaddr_unaligned) {
> +                       dma_free_noncoherent(ab->dev, rx_tid->unaligned_size,
> +                                            rx_tid->vaddr_unaligned,
> +                                            rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL);
> +                       rx_tid->vaddr_unaligned = NULL;
>                 }
>                 kfree(cmd_cache);
>         }
> @@ -708,11 +708,11 @@ static void ath11k_dp_reo_cmd_free(struct ath11k_dp *dp, void *ctx,
>         if (status != HAL_REO_CMD_SUCCESS)
>                 ath11k_warn(dp->ab, "failed to flush rx tid hw desc, tid %d status %d\n",
>                             rx_tid->tid, status);
> -       if (rx_tid->vaddr) {
> -               dma_unmap_single(dp->ab->dev, rx_tid->paddr, rx_tid->size,
> -                                DMA_BIDIRECTIONAL);
> -               kfree(rx_tid->vaddr);
> -               rx_tid->vaddr = NULL;
> +       if (rx_tid->vaddr_unaligned) {
> +               dma_free_noncoherent(dp->ab->dev, rx_tid->unaligned_size,
> +                                    rx_tid->vaddr_unaligned,
> +                                    rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL);
> +               rx_tid->vaddr_unaligned = NULL;
>         }
>  }
>
> @@ -749,10 +749,10 @@ static void ath11k_dp_reo_cache_flush(struct ath11k_base *ab,
>         if (ret) {
>                 ath11k_err(ab, "failed to send HAL_REO_CMD_FLUSH_CACHE cmd, tid %d (%d)\n",
>                            rx_tid->tid, ret);
> -               dma_unmap_single(ab->dev, rx_tid->paddr, rx_tid->size,
> -                                DMA_BIDIRECTIONAL);
> -               kfree(rx_tid->vaddr);
> -               rx_tid->vaddr = NULL;
> +               dma_free_noncoherent(ab->dev, rx_tid->unaligned_size,
> +                                    rx_tid->vaddr_unaligned,
> +                                    rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL);
> +               rx_tid->vaddr_unaligned = NULL;
>         }
>  }
>
> @@ -802,10 +802,10 @@ static void ath11k_dp_rx_tid_del_func(struct ath11k_dp *dp, void *ctx,
>
>         return;
>  free_desc:
> -       dma_unmap_single(ab->dev, rx_tid->paddr, rx_tid->size,
> -                        DMA_BIDIRECTIONAL);
> -       kfree(rx_tid->vaddr);
> -       rx_tid->vaddr = NULL;
> +       dma_free_noncoherent(ab->dev, rx_tid->unaligned_size,
> +                            rx_tid->vaddr_unaligned,
> +                            rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL);
> +       rx_tid->vaddr_unaligned = NULL;
>  }
>
>  void ath11k_peer_rx_tid_delete(struct ath11k *ar,
> @@ -831,14 +831,16 @@ void ath11k_peer_rx_tid_delete(struct ath11k *ar,
>                 if (ret != -ESHUTDOWN)
>                         ath11k_err(ar->ab, "failed to send HAL_REO_CMD_UPDATE_RX_QUEUE cmd, tid %d (%d)\n",
>                                    tid, ret);
> -               dma_unmap_single(ar->ab->dev, rx_tid->paddr, rx_tid->size,
> -                                DMA_BIDIRECTIONAL);
> -               kfree(rx_tid->vaddr);
> -               rx_tid->vaddr = NULL;
> +               dma_free_noncoherent(ar->ab->dev, rx_tid->unaligned_size,
> +                                    rx_tid->vaddr_unaligned,
> +                                    rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL);
> +               rx_tid->vaddr_unaligned = NULL;
>         }
>
>         rx_tid->paddr = 0;
> +       rx_tid->paddr_unaligned = 0;
>         rx_tid->size = 0;
> +       rx_tid->unaligned_size = 0;
>  }
>
>  static int ath11k_dp_rx_link_desc_return(struct ath11k_base *ab,
> @@ -982,10 +984,9 @@ static void ath11k_dp_rx_tid_mem_free(struct ath11k_base *ab,
>         if (!rx_tid->active)
>                 goto unlock_exit;
>
> -       dma_unmap_single(ab->dev, rx_tid->paddr, rx_tid->size,
> -                        DMA_BIDIRECTIONAL);
> -       kfree(rx_tid->vaddr);
> -       rx_tid->vaddr = NULL;
> +       dma_free_noncoherent(ab->dev, rx_tid->unaligned_size, rx_tid->vaddr_unaligned,
> +                            rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL);
> +       rx_tid->vaddr_unaligned = NULL;
>
>         rx_tid->active = false;
>
> @@ -1000,9 +1001,8 @@ int ath11k_peer_rx_tid_setup(struct ath11k *ar, const u8 *peer_mac, int vdev_id,
>         struct ath11k_base *ab = ar->ab;
>         struct ath11k_peer *peer;
>         struct dp_rx_tid *rx_tid;
> -       u32 hw_desc_sz;
> -       u32 *addr_aligned;
> -       void *vaddr;
> +       u32 hw_desc_sz, *vaddr;
> +       void *vaddr_unaligned;
>         dma_addr_t paddr;
>         int ret;
>
> @@ -1050,49 +1050,40 @@ int ath11k_peer_rx_tid_setup(struct ath11k *ar, const u8 *peer_mac, int vdev_id,
>         else
>                 hw_desc_sz = ath11k_hal_reo_qdesc_size(DP_BA_WIN_SZ_MAX, tid);
>
> -       vaddr = kzalloc(hw_desc_sz + HAL_LINK_DESC_ALIGN - 1, GFP_ATOMIC);
> -       if (!vaddr) {
> +       rx_tid->unaligned_size = hw_desc_sz + HAL_LINK_DESC_ALIGN - 1;
> +       vaddr_unaligned = dma_alloc_noncoherent(ab->dev, rx_tid->unaligned_size, &paddr,
> +                                               DMA_BIDIRECTIONAL, GFP_ATOMIC);
> +       if (!vaddr_unaligned) {
>                 spin_unlock_bh(&ab->base_lock);
>                 return -ENOMEM;
>         }
>
> -       addr_aligned = PTR_ALIGN(vaddr, HAL_LINK_DESC_ALIGN);
> -
> -       ath11k_hal_reo_qdesc_setup(addr_aligned, tid, ba_win_sz,
> -                                  ssn, pn_type);
> -
> -       paddr = dma_map_single(ab->dev, addr_aligned, hw_desc_sz,
> -                              DMA_BIDIRECTIONAL);
> -
> -       ret = dma_mapping_error(ab->dev, paddr);
> -       if (ret) {
> -               spin_unlock_bh(&ab->base_lock);
> -               ath11k_warn(ab, "failed to setup dma map for peer %pM rx tid %d: %d\n",
> -                           peer_mac, tid, ret);
> -               goto err_mem_free;
> -       }
> -
> -       rx_tid->vaddr = vaddr;
> -       rx_tid->paddr = paddr;
> +       rx_tid->vaddr_unaligned = vaddr_unaligned;
> +       vaddr = PTR_ALIGN(vaddr_unaligned, HAL_LINK_DESC_ALIGN);
> +       rx_tid->paddr_unaligned = paddr;
> +       rx_tid->paddr = rx_tid->paddr_unaligned + ((unsigned long)vaddr -
> +                       (unsigned long)rx_tid->vaddr_unaligned);
> +       ath11k_hal_reo_qdesc_setup(vaddr, tid, ba_win_sz, ssn, pn_type);
>         rx_tid->size = hw_desc_sz;
>         rx_tid->active = true;
>
> +       /* After dma_alloc_noncoherent, vaddr is being modified for reo qdesc setup.
> +        * Since these changes are not reflected in the device, driver now needs to
> +        * explicitly call dma_sync_single_for_device.
> +        */
> +       dma_sync_single_for_device(ab->dev, rx_tid->paddr,
> +                                  rx_tid->size,
> +                                  DMA_TO_DEVICE);
>         spin_unlock_bh(&ab->base_lock);
>
> -       ret = ath11k_wmi_peer_rx_reorder_queue_setup(ar, vdev_id, peer_mac,
> -                                                    paddr, tid, 1, ba_win_sz);
> +       ret = ath11k_wmi_peer_rx_reorder_queue_setup(ar, vdev_id, peer_mac, rx_tid->paddr,
> +                                                    tid, 1, ba_win_sz);
>         if (ret) {
>                 ath11k_warn(ar->ab, "failed to setup rx reorder queue for peer %pM tid %d: %d\n",
>                             peer_mac, tid, ret);
>                 ath11k_dp_rx_tid_mem_free(ab, peer_mac, vdev_id, tid);
>         }
>
> -       return ret;
> -
> -err_mem_free:
> -       kfree(rx_tid->vaddr);
> -       rx_tid->vaddr = NULL;
> -
>         return ret;
>  }
>
> --
> 2.34.1
>

I tested this with qcn9074 on an imx8mm (no iommu) with 4GiB DRAM and
verified swiotlb was not being used and infrastructure mode worked
fine.

Tested-By: Tim Harvey <tharvey@gateworks.com>

Best regards,

Tim
diff mbox series

Patch

diff --git a/drivers/net/wireless/ath/ath11k/dp.h b/drivers/net/wireless/ath/ath11k/dp.h
index f777314db8b3..7a55afd33be8 100644
--- a/drivers/net/wireless/ath/ath11k/dp.h
+++ b/drivers/net/wireless/ath/ath11k/dp.h
@@ -1,7 +1,7 @@ 
 /* SPDX-License-Identifier: BSD-3-Clause-Clear */
 /*
  * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2023, 2025 Qualcomm Innovation Center, Inc. All rights reserved.
  */
 
 #ifndef ATH11K_DP_H
@@ -20,7 +20,6 @@  struct ath11k_ext_irq_grp;
 
 struct dp_rx_tid {
 	u8 tid;
-	u32 *vaddr;
 	dma_addr_t paddr;
 	u32 size;
 	u32 ba_win_sz;
@@ -37,6 +36,9 @@  struct dp_rx_tid {
 	/* Timer info related to fragments */
 	struct timer_list frag_timer;
 	struct ath11k_base *ab;
+	u32 *vaddr_unaligned;
+	dma_addr_t paddr_unaligned;
+	u32 unaligned_size;
 };
 
 #define DP_REO_DESC_FREE_THRESHOLD  64
diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c
index 029ecf51c9ef..5e71e5d9ecb7 100644
--- a/drivers/net/wireless/ath/ath11k/dp_rx.c
+++ b/drivers/net/wireless/ath/ath11k/dp_rx.c
@@ -1,7 +1,7 @@ 
 // SPDX-License-Identifier: BSD-3-Clause-Clear
 /*
  * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved.
- * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved.
  */
 
 #include <linux/ieee80211.h>
@@ -675,11 +675,11 @@  void ath11k_dp_reo_cmd_list_cleanup(struct ath11k_base *ab)
 	list_for_each_entry_safe(cmd, tmp, &dp->reo_cmd_list, list) {
 		list_del(&cmd->list);
 		rx_tid = &cmd->data;
-		if (rx_tid->vaddr) {
-			dma_unmap_single(ab->dev, rx_tid->paddr,
-					 rx_tid->size, DMA_BIDIRECTIONAL);
-			kfree(rx_tid->vaddr);
-			rx_tid->vaddr = NULL;
+		if (rx_tid->vaddr_unaligned) {
+			dma_free_noncoherent(ab->dev, rx_tid->unaligned_size,
+					     rx_tid->vaddr_unaligned,
+					     rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL);
+			rx_tid->vaddr_unaligned = NULL;
 		}
 		kfree(cmd);
 	}
@@ -689,11 +689,11 @@  void ath11k_dp_reo_cmd_list_cleanup(struct ath11k_base *ab)
 		list_del(&cmd_cache->list);
 		dp->reo_cmd_cache_flush_count--;
 		rx_tid = &cmd_cache->data;
-		if (rx_tid->vaddr) {
-			dma_unmap_single(ab->dev, rx_tid->paddr,
-					 rx_tid->size, DMA_BIDIRECTIONAL);
-			kfree(rx_tid->vaddr);
-			rx_tid->vaddr = NULL;
+		if (rx_tid->vaddr_unaligned) {
+			dma_free_noncoherent(ab->dev, rx_tid->unaligned_size,
+					     rx_tid->vaddr_unaligned,
+					     rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL);
+			rx_tid->vaddr_unaligned = NULL;
 		}
 		kfree(cmd_cache);
 	}
@@ -708,11 +708,11 @@  static void ath11k_dp_reo_cmd_free(struct ath11k_dp *dp, void *ctx,
 	if (status != HAL_REO_CMD_SUCCESS)
 		ath11k_warn(dp->ab, "failed to flush rx tid hw desc, tid %d status %d\n",
 			    rx_tid->tid, status);
-	if (rx_tid->vaddr) {
-		dma_unmap_single(dp->ab->dev, rx_tid->paddr, rx_tid->size,
-				 DMA_BIDIRECTIONAL);
-		kfree(rx_tid->vaddr);
-		rx_tid->vaddr = NULL;
+	if (rx_tid->vaddr_unaligned) {
+		dma_free_noncoherent(dp->ab->dev, rx_tid->unaligned_size,
+				     rx_tid->vaddr_unaligned,
+				     rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL);
+		rx_tid->vaddr_unaligned = NULL;
 	}
 }
 
@@ -749,10 +749,10 @@  static void ath11k_dp_reo_cache_flush(struct ath11k_base *ab,
 	if (ret) {
 		ath11k_err(ab, "failed to send HAL_REO_CMD_FLUSH_CACHE cmd, tid %d (%d)\n",
 			   rx_tid->tid, ret);
-		dma_unmap_single(ab->dev, rx_tid->paddr, rx_tid->size,
-				 DMA_BIDIRECTIONAL);
-		kfree(rx_tid->vaddr);
-		rx_tid->vaddr = NULL;
+		dma_free_noncoherent(ab->dev, rx_tid->unaligned_size,
+				     rx_tid->vaddr_unaligned,
+				     rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL);
+		rx_tid->vaddr_unaligned = NULL;
 	}
 }
 
@@ -802,10 +802,10 @@  static void ath11k_dp_rx_tid_del_func(struct ath11k_dp *dp, void *ctx,
 
 	return;
 free_desc:
-	dma_unmap_single(ab->dev, rx_tid->paddr, rx_tid->size,
-			 DMA_BIDIRECTIONAL);
-	kfree(rx_tid->vaddr);
-	rx_tid->vaddr = NULL;
+	dma_free_noncoherent(ab->dev, rx_tid->unaligned_size,
+			     rx_tid->vaddr_unaligned,
+			     rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL);
+	rx_tid->vaddr_unaligned = NULL;
 }
 
 void ath11k_peer_rx_tid_delete(struct ath11k *ar,
@@ -831,14 +831,16 @@  void ath11k_peer_rx_tid_delete(struct ath11k *ar,
 		if (ret != -ESHUTDOWN)
 			ath11k_err(ar->ab, "failed to send HAL_REO_CMD_UPDATE_RX_QUEUE cmd, tid %d (%d)\n",
 				   tid, ret);
-		dma_unmap_single(ar->ab->dev, rx_tid->paddr, rx_tid->size,
-				 DMA_BIDIRECTIONAL);
-		kfree(rx_tid->vaddr);
-		rx_tid->vaddr = NULL;
+		dma_free_noncoherent(ar->ab->dev, rx_tid->unaligned_size,
+				     rx_tid->vaddr_unaligned,
+				     rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL);
+		rx_tid->vaddr_unaligned = NULL;
 	}
 
 	rx_tid->paddr = 0;
+	rx_tid->paddr_unaligned = 0;
 	rx_tid->size = 0;
+	rx_tid->unaligned_size = 0;
 }
 
 static int ath11k_dp_rx_link_desc_return(struct ath11k_base *ab,
@@ -982,10 +984,9 @@  static void ath11k_dp_rx_tid_mem_free(struct ath11k_base *ab,
 	if (!rx_tid->active)
 		goto unlock_exit;
 
-	dma_unmap_single(ab->dev, rx_tid->paddr, rx_tid->size,
-			 DMA_BIDIRECTIONAL);
-	kfree(rx_tid->vaddr);
-	rx_tid->vaddr = NULL;
+	dma_free_noncoherent(ab->dev, rx_tid->unaligned_size, rx_tid->vaddr_unaligned,
+			     rx_tid->paddr_unaligned, DMA_BIDIRECTIONAL);
+	rx_tid->vaddr_unaligned = NULL;
 
 	rx_tid->active = false;
 
@@ -1000,9 +1001,8 @@  int ath11k_peer_rx_tid_setup(struct ath11k *ar, const u8 *peer_mac, int vdev_id,
 	struct ath11k_base *ab = ar->ab;
 	struct ath11k_peer *peer;
 	struct dp_rx_tid *rx_tid;
-	u32 hw_desc_sz;
-	u32 *addr_aligned;
-	void *vaddr;
+	u32 hw_desc_sz, *vaddr;
+	void *vaddr_unaligned;
 	dma_addr_t paddr;
 	int ret;
 
@@ -1050,49 +1050,40 @@  int ath11k_peer_rx_tid_setup(struct ath11k *ar, const u8 *peer_mac, int vdev_id,
 	else
 		hw_desc_sz = ath11k_hal_reo_qdesc_size(DP_BA_WIN_SZ_MAX, tid);
 
-	vaddr = kzalloc(hw_desc_sz + HAL_LINK_DESC_ALIGN - 1, GFP_ATOMIC);
-	if (!vaddr) {
+	rx_tid->unaligned_size = hw_desc_sz + HAL_LINK_DESC_ALIGN - 1;
+	vaddr_unaligned = dma_alloc_noncoherent(ab->dev, rx_tid->unaligned_size, &paddr,
+						DMA_BIDIRECTIONAL, GFP_ATOMIC);
+	if (!vaddr_unaligned) {
 		spin_unlock_bh(&ab->base_lock);
 		return -ENOMEM;
 	}
 
-	addr_aligned = PTR_ALIGN(vaddr, HAL_LINK_DESC_ALIGN);
-
-	ath11k_hal_reo_qdesc_setup(addr_aligned, tid, ba_win_sz,
-				   ssn, pn_type);
-
-	paddr = dma_map_single(ab->dev, addr_aligned, hw_desc_sz,
-			       DMA_BIDIRECTIONAL);
-
-	ret = dma_mapping_error(ab->dev, paddr);
-	if (ret) {
-		spin_unlock_bh(&ab->base_lock);
-		ath11k_warn(ab, "failed to setup dma map for peer %pM rx tid %d: %d\n",
-			    peer_mac, tid, ret);
-		goto err_mem_free;
-	}
-
-	rx_tid->vaddr = vaddr;
-	rx_tid->paddr = paddr;
+	rx_tid->vaddr_unaligned = vaddr_unaligned;
+	vaddr = PTR_ALIGN(vaddr_unaligned, HAL_LINK_DESC_ALIGN);
+	rx_tid->paddr_unaligned = paddr;
+	rx_tid->paddr = rx_tid->paddr_unaligned + ((unsigned long)vaddr -
+			(unsigned long)rx_tid->vaddr_unaligned);
+	ath11k_hal_reo_qdesc_setup(vaddr, tid, ba_win_sz, ssn, pn_type);
 	rx_tid->size = hw_desc_sz;
 	rx_tid->active = true;
 
+	/* After dma_alloc_noncoherent, vaddr is being modified for reo qdesc setup.
+	 * Since these changes are not reflected in the device, driver now needs to
+	 * explicitly call dma_sync_single_for_device.
+	 */
+	dma_sync_single_for_device(ab->dev, rx_tid->paddr,
+				   rx_tid->size,
+				   DMA_TO_DEVICE);
 	spin_unlock_bh(&ab->base_lock);
 
-	ret = ath11k_wmi_peer_rx_reorder_queue_setup(ar, vdev_id, peer_mac,
-						     paddr, tid, 1, ba_win_sz);
+	ret = ath11k_wmi_peer_rx_reorder_queue_setup(ar, vdev_id, peer_mac, rx_tid->paddr,
+						     tid, 1, ba_win_sz);
 	if (ret) {
 		ath11k_warn(ar->ab, "failed to setup rx reorder queue for peer %pM tid %d: %d\n",
 			    peer_mac, tid, ret);
 		ath11k_dp_rx_tid_mem_free(ab, peer_mac, vdev_id, tid);
 	}
 
-	return ret;
-
-err_mem_free:
-	kfree(rx_tid->vaddr);
-	rx_tid->vaddr = NULL;
-
 	return ret;
 }