diff mbox series

[PULL,14/21] target/arm: Correct STRD atomicity

Message ID 20250307150708.3222813-15-peter.maydell@linaro.org
State Accepted
Commit ee786ca115045a2b7e86ac3073b0761cb99e0d49
Headers show
Series [PULL,01/21] hw/arm/smmu-common: Remove the repeated ttb field | expand

Commit Message

Peter Maydell March 7, 2025, 3:07 p.m. UTC
Our STRD implementation doesn't correctly implement the requirement:
 * if the address is 8-aligned the access must be a 64-bit
   single-copy atomic access, not two 32-bit accesses

Rewrite the handling of STRD to use a single tcg_gen_qemu_st_i64()
of a value produced by concatenating the two 32 bit source registers.
This allows us to get the atomicity right.

As with the LDRD change, now that we don't update 'addr' in the
course of performing the store we need to adjust the offset
we pass to op_addr_ri_post() and op_addr_rr_post().

Cc: qemu-stable@nongnu.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20250227142746.1698904-3-peter.maydell@linaro.org
---
 target/arm/tcg/translate.c | 59 +++++++++++++++++++++++++-------------
 1 file changed, 39 insertions(+), 20 deletions(-)
diff mbox series

Patch

diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c
index 93772da39a4..404a254678a 100644
--- a/target/arm/tcg/translate.c
+++ b/target/arm/tcg/translate.c
@@ -5063,10 +5063,42 @@  static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
     return true;
 }
 
+static void do_strd_store(DisasContext *s, TCGv_i32 addr, int rt, int rt2)
+{
+    /*
+     * STRD is required to be an atomic 64-bit access if the
+     * address is 8-aligned, two atomic 32-bit accesses if
+     * it's only 4-aligned, and to give an alignment fault
+     * if it's not 4-aligned.
+     * Rt is always the word from the lower address, and Rt2 the
+     * data from the higher address, regardless of endianness.
+     * So (like gen_store_exclusive) we avoid gen_aa32_ld_i64()
+     * so we don't get its SCTLR_B check, and instead do a 64-bit access
+     * using MO_BE if appropriate, using a value constructed
+     * by putting the two halves together in the right order.
+     *
+     * As with LDRD, the 64-bit atomicity is not required for
+     * M-profile, or for A-profile before LPAE, and we provide
+     * the higher guarantee always for simplicity.
+     */
+    int mem_idx = get_mem_index(s);
+    MemOp opc = MO_64 | MO_ALIGN_4 | MO_ATOM_SUBALIGN | s->be_data;
+    TCGv taddr = gen_aa32_addr(s, addr, opc);
+    TCGv_i32 t1 = load_reg(s, rt);
+    TCGv_i32 t2 = load_reg(s, rt2);
+    TCGv_i64 t64 = tcg_temp_new_i64();
+
+    if (s->be_data == MO_BE) {
+        tcg_gen_concat_i32_i64(t64, t2, t1);
+    } else {
+        tcg_gen_concat_i32_i64(t64, t1, t2);
+    }
+    tcg_gen_qemu_st_i64(t64, taddr, mem_idx, opc);
+}
+
 static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
 {
-    int mem_idx = get_mem_index(s);
-    TCGv_i32 addr, tmp;
+    TCGv_i32 addr;
 
     if (!ENABLE_ARCH_5TE) {
         return false;
@@ -5077,15 +5109,9 @@  static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
     }
     addr = op_addr_rr_pre(s, a);
 
-    tmp = load_reg(s, a->rt);
-    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
+    do_strd_store(s, addr, a->rt, a->rt + 1);
 
-    tcg_gen_addi_i32(addr, addr, 4);
-
-    tmp = load_reg(s, a->rt + 1);
-    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
-
-    op_addr_rr_post(s, a, addr, -4);
+    op_addr_rr_post(s, a, addr, 0);
     return true;
 }
 
@@ -5213,20 +5239,13 @@  static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
 
 static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
 {
-    int mem_idx = get_mem_index(s);
-    TCGv_i32 addr, tmp;
+    TCGv_i32 addr;
 
     addr = op_addr_ri_pre(s, a);
 
-    tmp = load_reg(s, a->rt);
-    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
+    do_strd_store(s, addr, a->rt, rt2);
 
-    tcg_gen_addi_i32(addr, addr, 4);
-
-    tmp = load_reg(s, rt2);
-    gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | MO_ALIGN);
-
-    op_addr_ri_post(s, a, addr, -4);
+    op_addr_ri_post(s, a, addr, 0);
     return true;
 }