diff mbox series

[PULL,14/68] target/arm: Add FPCR.NEP to TBFLAGS

Message ID 20250211162554.4135349-15-peter.maydell@linaro.org
State New
Headers show
Series [PULL,01/68] target/alpha: Don't corrupt error_code with unknown softfloat flags | expand

Commit Message

Peter Maydell Feb. 11, 2025, 4:25 p.m. UTC
For FEAT_AFP, we want to emit different code when FPCR.NEP is set, so
that instead of zeroing the high elements of a vector register when
we write the output of a scalar operation to it, we instead merge in
those elements from one of the source registers.  Since this affects
the generated code, we need to put FPCR.NEP into the TBFLAGS.

FPCR.NEP is treated as 0 when in streaming SVE mode and FEAT_SME_FA64
is not implemented or not enabled; we can implement this logic in
rebuild_hflags_a64().

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/cpu.h               | 1 +
 target/arm/tcg/translate.h     | 2 ++
 target/arm/tcg/hflags.c        | 9 +++++++++
 target/arm/tcg/translate-a64.c | 1 +
 4 files changed, 13 insertions(+)
diff mbox series

Patch

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 51fc50980b3..24b0a5fcb9f 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -3214,6 +3214,7 @@  FIELD(TBFLAG_A64, NV2_MEM_E20, 35, 1)
 /* Set if FEAT_NV2 RAM accesses are big-endian */
 FIELD(TBFLAG_A64, NV2_MEM_BE, 36, 1)
 FIELD(TBFLAG_A64, AH, 37, 1)   /* FPCR.AH */
+FIELD(TBFLAG_A64, NEP, 38, 1)   /* FPCR.NEP */
 
 /*
  * Helpers for using the above. Note that only the A64 accessors use
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
index 3be3fcbe728..0dff00015e8 100644
--- a/target/arm/tcg/translate.h
+++ b/target/arm/tcg/translate.h
@@ -157,6 +157,8 @@  typedef struct DisasContext {
     bool nv2_mem_be;
     /* True if FPCR.AH is 1 (alternate floating point handling) */
     bool fpcr_ah;
+    /* True if FPCR.NEP is 1 (FEAT_AFP scalar upper-element result handling) */
+    bool fpcr_nep;
     /*
      * >= 0, a copy of PSTATE.BTYPE, which will be 0 without v8.5-BTI.
      *  < 0, set by the current instruction.
diff --git a/target/arm/tcg/hflags.c b/target/arm/tcg/hflags.c
index b3a78564ec1..9e6a1869f94 100644
--- a/target/arm/tcg/hflags.c
+++ b/target/arm/tcg/hflags.c
@@ -407,6 +407,15 @@  static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
     if (env->vfp.fpcr & FPCR_AH) {
         DP_TBFLAG_A64(flags, AH, 1);
     }
+    if (env->vfp.fpcr & FPCR_NEP) {
+        /*
+         * In streaming-SVE without FA64, NEP behaves as if zero;
+         * compare pseudocode IsMerging()
+         */
+        if (!(EX_TBFLAG_A64(flags, PSTATE_SM) && !sme_fa64(env, el))) {
+            DP_TBFLAG_A64(flags, NEP, 1);
+        }
+    }
 
     return rebuild_hflags_common(env, fp_el, mmu_idx, flags);
 }
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index f9b62a2c4fd..d94a0022e4c 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -9742,6 +9742,7 @@  static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
     dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20);
     dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE);
     dc->fpcr_ah = EX_TBFLAG_A64(tb_flags, AH);
+    dc->fpcr_nep = EX_TBFLAG_A64(tb_flags, NEP);
     dc->vec_len = 0;
     dc->vec_stride = 0;
     dc->cp_regs = arm_cpu->cp_regs;