@@ -173,3 +173,8 @@ DEF_HELPER_FLAGS_5(gvec_fmaxnum_b16, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(gvec_fminnum_b16, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_6(sme2_fdot_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_FLAGS_6(sme2_fdot_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, env, i32)
@@ -1122,6 +1122,50 @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn,
}
}
+void HELPER(sme2_fdot_h)(void *vd, void *vn, void *vm, void *va,
+ CPUARMState *env, uint32_t desc)
+{
+ intptr_t i, oprsz = simd_maxsz(desc);
+ float_status fpst_odd, *fpst_std, *fpst_f16;
+ float32 *d = vd, *a = va;
+ uint32_t *n = vn, *m = vm;
+
+ fpst_std = &env->vfp.fp_status[FPST_ZA];
+ fpst_f16 = &env->vfp.fp_status[FPST_ZA_F16];
+ fpst_odd = *fpst_std;
+ set_float_rounding_mode(float_round_to_odd, &fpst_odd);
+
+ for (i = 0; i < oprsz / sizeof(float32); ++i) {
+ d[H4(i)] = f16_dotadd(a[H4(i)], n[H4(i)], m[H4(i)],
+ fpst_f16, fpst_std, &fpst_odd);
+ }
+}
+
+void HELPER(sme2_fdot_idx_h)(void *vd, void *vn, void *vm, void *va,
+ CPUARMState *env, uint32_t desc)
+{
+ intptr_t i, j, oprsz = simd_maxsz(desc);
+ intptr_t elements = oprsz / sizeof(float32);
+ intptr_t eltspersegment = MIN(4, elements);
+ int idx = extract32(desc, SIMD_DATA_SHIFT, 2);
+ float_status fpst_odd, *fpst_std, *fpst_f16;
+ float32 *d = vd, *a = va;
+ uint32_t *n = vn, *m = (uint32_t *)vm + H4(idx);
+
+ fpst_std = &env->vfp.fp_status[FPST_ZA];
+ fpst_f16 = &env->vfp.fp_status[FPST_ZA_F16];
+ fpst_odd = *fpst_std;
+ set_float_rounding_mode(float_round_to_odd, &fpst_odd);
+
+ for (i = 0; i < elements; i += eltspersegment) {
+ uint32_t mm = m[i];
+ for (j = 0; j < eltspersegment; ++j) {
+ d[H4(i + j)] = f16_dotadd(a[H4(i + j)], n[H4(i + j)], mm,
+ fpst_f16, fpst_std, &fpst_odd);
+ }
+ }
+}
+
void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm,
void *vpn, void *vpm, CPUARMState *env, uint32_t desc)
{
@@ -844,3 +844,21 @@ static bool do_bfmlal_nx(DisasContext *s, arg_azx_n *a, bool sub)
TRANS_FEAT(BFMLAL_nx, aa64_sme2, do_bfmlal_nx, a, false)
TRANS_FEAT(BFMLSL_nx, aa64_sme2, do_bfmlal_nx, a, true)
+
+static bool do_fdot(DisasContext *s, arg_azz_n *a, bool multi)
+{
+ return do_azz_acc_fp(s, a->n, 1, a->rv, a->off, a->zn, a->zm, 0, 0,
+ multi, FPST_ENV, gen_helper_sme2_fdot_h);
+}
+
+TRANS_FEAT(FDOT_n1, aa64_sme2, do_fdot, a, false)
+TRANS_FEAT(FDOT_nn, aa64_sme2, do_fdot, a, true)
+
+static bool do_fdot_nx(DisasContext *s, arg_azx_n *a)
+{
+ return do_azz_acc_fp(s, a->n, 1, a->rv, a->off, a->zn, a->zm,
+ a->idx, 0, false, FPST_ENV,
+ gen_helper_sme2_fdot_idx_h);
+}
+
+TRANS_FEAT(FDOT_nx, aa64_sme2, do_fdot_nx, a)
@@ -285,6 +285,9 @@ BFMLSL_n1 11000001 001 0 .... 0 .. 011 ..... 11 ... @azz_nx1_o3x2 n=1
BFMLSL_n1 11000001 001 0 .... 0 .. 010 ..... 110 .. @azz_nx1_o2x2 n=2
BFMLSL_n1 11000001 001 1 .... 0 .. 010 ..... 110 .. @azz_nx1_o2x2 n=4
+FDOT_n1 11000001 001 0 .... 0 .. 100 ..... 00 ... @azz_nx1_o3 n=2
+FDOT_n1 11000001 001 1 .... 0 .. 100 ..... 00 ... @azz_nx1_o3 n=4
+
### SME2 Multi-vector Multiple Array Vectors
%zn_ax2 6:4 !function=times_2
@@ -322,6 +325,9 @@ BFMLAL_nn 11000001 101 ...01 0 .. 010 ...00 100 .. @azz_4x4_o2x2
BFMLSL_nn 11000001 101 ....0 0 .. 010 ....0 110 .. @azz_2x2_o2x2
BFMLSL_nn 11000001 101 ...01 0 .. 010 ...00 110 .. @azz_4x4_o2x2
+FDOT_nn 11000001 101 ....0 0 .. 100 ....0 00 ... @azz_2x2_o3
+FDOT_nn 11000001 101 ...01 0 .. 100 ...00 00 ... @azz_4x4_o3
+
### SME2 Multi-vector Indexed
&azx_n n off rv zn zm idx
@@ -351,3 +357,11 @@ BFMLAL_nx 11000001 1001 .... 1 .. 1 .. ...00 10 ... @azx_4x1_o2x2
BFMLSL_nx 11000001 1000 .... . .. 1 .. ..... 11 ... @azx_1x1_o3x2
BFMLSL_nx 11000001 1001 .... 0 .. 1 .. ....0 11 ... @azx_2x1_o2x2
BFMLSL_nx 11000001 1001 .... 1 .. 1 .. ...00 11 ... @azx_4x1_o2x2
+
+@azx_2x1_i2_o3 ........ .... zm:4 . .. . idx:2 .... ... off:3 \
+ &azx_n n=2 rv=%mova_rv zn=%zn_ax2
+@azx_4x1_i2_o3 ........ .... zm:4 . .. . idx:2 .... ... off:3 \
+ &azx_n n=2 rv=%mova_rv zn=%zn_ax4
+
+FDOT_nx 11000001 0101 .... 0 .. 1 .. ....0 01 ... @azx_2x1_i2_o3
+FDOT_nx 11000001 0101 .... 1 .. 1 .. ...00 01 ... @azx_4x1_i2_o3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/arm/tcg/helper-sme.h | 5 ++++ target/arm/tcg/sme_helper.c | 44 ++++++++++++++++++++++++++++++++++ target/arm/tcg/translate-sme.c | 18 ++++++++++++++ target/arm/tcg/sme.decode | 14 +++++++++++ 4 files changed, 81 insertions(+)