@@ -191,3 +191,25 @@ DEF_HELPER_FLAGS_4(sme2_uvdot_idx_4h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sme2_svdot_idx_2h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sme2_uvdot_idx_2h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sme2_smlall_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sme2_smlall_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sme2_smlsll_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sme2_smlsll_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sme2_umlall_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sme2_umlall_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sme2_umlsll_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sme2_umlsll_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sme2_usmlall_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sme2_usmlall_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sme2_smlall_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sme2_smlall_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sme2_smlsll_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sme2_smlsll_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sme2_umlall_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sme2_umlall_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sme2_umlsll_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sme2_umlsll_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sme2_usmlall_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sme2_usmlall_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
@@ -1428,3 +1428,63 @@ DO_VDOT_IDX(sme2_svdot_idx_2h, int32_t, int16_t, int16_t, H4, H2)
DO_VDOT_IDX(sme2_uvdot_idx_2h, uint32_t, uint16_t, uint16_t, H4, H2)
#undef DO_VDOT_IDX
+
+#define DO_MLALL(NAME, TYPEW, TYPEN, TYPEM, HW, HN, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \
+{ \
+ intptr_t elements = simd_oprsz(desc) / sizeof(TYPEW); \
+ intptr_t sel = extract32(desc, SIMD_DATA_SHIFT, 2); \
+ TYPEW *d = vd, *a = va; TYPEN *n = vn; TYPEM *m = vm; \
+ for (intptr_t i = 0; i < elements; ++i) { \
+ TYPEW nn = n[HN(i * 4 + sel)]; \
+ TYPEN mm = m[HN(i * 4 + sel)]; \
+ d[HW(i)] = a[HW(i)] OP (nn * mm); \
+ } \
+}
+
+DO_MLALL(sme2_smlall_s, int32_t, int8_t, int8_t, H4, H1, +)
+DO_MLALL(sme2_smlall_d, int64_t, int16_t, int16_t, H8, H2, +)
+DO_MLALL(sme2_smlsll_s, int32_t, int8_t, int8_t, H4, H1, -)
+DO_MLALL(sme2_smlsll_d, int64_t, int16_t, int16_t, H8, H2, -)
+
+DO_MLALL(sme2_umlall_s, uint32_t, uint8_t, uint8_t, H4, H1, +)
+DO_MLALL(sme2_umlall_d, uint64_t, uint16_t, uint16_t, H8, H2, +)
+DO_MLALL(sme2_umlsll_s, uint32_t, uint8_t, uint8_t, H4, H1, -)
+DO_MLALL(sme2_umlsll_d, uint64_t, uint16_t, uint16_t, H8, H2, -)
+
+DO_MLALL(sme2_usmlall_s, uint32_t, uint8_t, int8_t, H4, H1, +)
+DO_MLALL(sme2_usmlall_d, uint64_t, uint16_t, int16_t, H8, H2, +)
+
+#undef DO_MLALL
+
+#define DO_MLALL_IDX(NAME, TYPEW, TYPEN, TYPEM, HW, HN, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \
+{ \
+ intptr_t elements = simd_oprsz(desc) / sizeof(TYPEW); \
+ intptr_t eltspersegment = 16 / sizeof(TYPEW); \
+ intptr_t sel = extract32(desc, SIMD_DATA_SHIFT, 2); \
+ intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 4); \
+ TYPEW *d = vd, *a = va; TYPEN *n = vn; TYPEM *m = vm; \
+ for (intptr_t i = 0; i < elements; i += eltspersegment) { \
+ TYPEW mm = m[HN(i * 4 + idx)]; \
+ for (intptr_t j = 0; j < eltspersegment; ++j) { \
+ TYPEN nn = n[HN((i + j) * 4 + sel)]; \
+ d[HW(i + j)] = a[HW(i + j)] OP (nn * mm); \
+ } \
+ } \
+}
+
+DO_MLALL_IDX(sme2_smlall_idx_s, int32_t, int8_t, int8_t, H4, H1, +)
+DO_MLALL_IDX(sme2_smlall_idx_d, int64_t, int16_t, int16_t, H8, H2, +)
+DO_MLALL_IDX(sme2_smlsll_idx_s, int32_t, int8_t, int8_t, H4, H1, -)
+DO_MLALL_IDX(sme2_smlsll_idx_d, int64_t, int16_t, int16_t, H8, H2, -)
+
+DO_MLALL_IDX(sme2_umlall_idx_s, uint32_t, uint8_t, uint8_t, H4, H1, +)
+DO_MLALL_IDX(sme2_umlall_idx_d, uint64_t, uint16_t, uint16_t, H8, H2, +)
+DO_MLALL_IDX(sme2_umlsll_idx_s, uint32_t, uint8_t, uint8_t, H4, H1, -)
+DO_MLALL_IDX(sme2_umlsll_idx_d, uint64_t, uint16_t, uint16_t, H8, H2, -)
+
+DO_MLALL_IDX(sme2_usmlall_idx_s, uint32_t, uint8_t, int8_t, H4, H1, +)
+DO_MLALL_IDX(sme2_usmlall_idx_d, uint64_t, uint16_t, int16_t, H8, H2, +)
+
+#undef DO_MLALL_IDX
@@ -1032,3 +1032,81 @@ TRANS_FEAT(SMLAL_nx, aa64_sme, do_smlal_nx, a, gen_helper_sve2_smlal_idx_s)
TRANS_FEAT(SMLSL_nx, aa64_sme, do_smlal_nx, a, gen_helper_sve2_smlsl_idx_s)
TRANS_FEAT(UMLAL_nx, aa64_sme, do_smlal_nx, a, gen_helper_sve2_umlal_idx_s)
TRANS_FEAT(UMLSL_nx, aa64_sme, do_smlal_nx, a, gen_helper_sve2_umlsl_idx_s)
+
+static bool do_smlall(DisasContext *s, arg_azz_n *a, bool multi,
+ gen_helper_gvec_4 *fn)
+{
+ return do_azz_acc(s, a->n, 4, a->rv, a->off, a->zn, a->zm,
+ 0, 0, multi, fn);
+}
+
+static void gen_helper_sme2_sumlall_s(TCGv_ptr d, TCGv_ptr n, TCGv_ptr m,
+ TCGv_ptr a, TCGv_i32 desc)
+{
+ gen_helper_sme2_usmlall_s(d, m, n, a, desc);
+}
+
+static void gen_helper_sme2_sumlall_d(TCGv_ptr d, TCGv_ptr n, TCGv_ptr m,
+ TCGv_ptr a, TCGv_i32 desc)
+{
+ gen_helper_sme2_usmlall_d(d, m, n, a, desc);
+}
+
+TRANS_FEAT(SMLALL_n1_s, aa64_sme, do_smlall, a, false, gen_helper_sme2_smlall_s)
+TRANS_FEAT(SMLSLL_n1_s, aa64_sme, do_smlall, a, false, gen_helper_sme2_smlsll_s)
+TRANS_FEAT(UMLALL_n1_s, aa64_sme, do_smlall, a, false, gen_helper_sme2_umlall_s)
+TRANS_FEAT(UMLSLL_n1_s, aa64_sme, do_smlall, a, false, gen_helper_sme2_umlsll_s)
+TRANS_FEAT(USMLALL_n1_s, aa64_sme, do_smlall, a, false, gen_helper_sme2_usmlall_s)
+TRANS_FEAT(SUMLALL_n1_s, aa64_sme, do_smlall, a, false, gen_helper_sme2_sumlall_s)
+
+TRANS_FEAT(SMLALL_n1_d, aa64_sme_i16i64, do_smlall, a, false, gen_helper_sme2_smlall_d)
+TRANS_FEAT(SMLSLL_n1_d, aa64_sme_i16i64, do_smlall, a, false, gen_helper_sme2_smlsll_d)
+TRANS_FEAT(UMLALL_n1_d, aa64_sme_i16i64, do_smlall, a, false, gen_helper_sme2_umlall_d)
+TRANS_FEAT(UMLSLL_n1_d, aa64_sme_i16i64, do_smlall, a, false, gen_helper_sme2_umlsll_d)
+TRANS_FEAT(USMLALL_n1_d, aa64_sme_i16i64, do_smlall, a, false, gen_helper_sme2_usmlall_d)
+TRANS_FEAT(SUMLALL_n1_d, aa64_sme_i16i64, do_smlall, a, false, gen_helper_sme2_sumlall_d)
+
+TRANS_FEAT(SMLALL_nn_s, aa64_sme, do_smlall, a, true, gen_helper_sme2_smlall_s)
+TRANS_FEAT(SMLSLL_nn_s, aa64_sme, do_smlall, a, true, gen_helper_sme2_smlsll_s)
+TRANS_FEAT(UMLALL_nn_s, aa64_sme, do_smlall, a, true, gen_helper_sme2_umlall_s)
+TRANS_FEAT(UMLSLL_nn_s, aa64_sme, do_smlall, a, true, gen_helper_sme2_umlsll_s)
+TRANS_FEAT(USMLALL_nn_s, aa64_sme, do_smlall, a, true, gen_helper_sme2_usmlall_s)
+
+TRANS_FEAT(SMLALL_nn_d, aa64_sme_i16i64, do_smlall, a, true, gen_helper_sme2_smlall_d)
+TRANS_FEAT(SMLSLL_nn_d, aa64_sme_i16i64, do_smlall, a, true, gen_helper_sme2_smlsll_d)
+TRANS_FEAT(UMLALL_nn_d, aa64_sme_i16i64, do_smlall, a, true, gen_helper_sme2_umlall_d)
+TRANS_FEAT(UMLSLL_nn_d, aa64_sme_i16i64, do_smlall, a, true, gen_helper_sme2_umlsll_d)
+TRANS_FEAT(USMLALL_nn_d, aa64_sme_i16i64, do_smlall, a, true, gen_helper_sme2_usmlall_d)
+
+static bool do_smlall_nx(DisasContext *s, arg_azx_n *a,
+ gen_helper_gvec_4 *fn)
+{
+ return do_azz_acc(s, a->n, 4, a->rv, a->off, a->zn, a->zm,
+ a->idx << 2, 0, false, fn);
+}
+
+static void gen_helper_sme2_sumlall_idx_s(TCGv_ptr d, TCGv_ptr n, TCGv_ptr m,
+ TCGv_ptr a, TCGv_i32 desc)
+{
+ gen_helper_sme2_usmlall_idx_s(d, m, n, a, desc);
+}
+
+static void gen_helper_sme2_sumlall_idx_d(TCGv_ptr d, TCGv_ptr n, TCGv_ptr m,
+ TCGv_ptr a, TCGv_i32 desc)
+{
+ gen_helper_sme2_usmlall_idx_d(d, m, n, a, desc);
+}
+
+TRANS_FEAT(SMLALL_nx_s, aa64_sme, do_smlall_nx, a, gen_helper_sme2_smlall_idx_s)
+TRANS_FEAT(SMLSLL_nx_s, aa64_sme, do_smlall_nx, a, gen_helper_sme2_smlsll_idx_s)
+TRANS_FEAT(UMLALL_nx_s, aa64_sme, do_smlall_nx, a, gen_helper_sme2_umlall_idx_s)
+TRANS_FEAT(UMLSLL_nx_s, aa64_sme, do_smlall_nx, a, gen_helper_sme2_smlsll_idx_s)
+TRANS_FEAT(USMLALL_nx_s, aa64_sme, do_smlall_nx, a, gen_helper_sme2_usmlall_idx_s)
+TRANS_FEAT(SUMLALL_nx_s, aa64_sme, do_smlall_nx, a, gen_helper_sme2_sumlall_idx_s)
+
+TRANS_FEAT(SMLALL_nx_d, aa64_sme_i16i64, do_smlall_nx, a, gen_helper_sme2_smlall_idx_d)
+TRANS_FEAT(SMLSLL_nx_d, aa64_sme_i16i64, do_smlall_nx, a, gen_helper_sme2_smlsll_idx_d)
+TRANS_FEAT(UMLALL_nx_d, aa64_sme_i16i64, do_smlall_nx, a, gen_helper_sme2_umlall_idx_d)
+TRANS_FEAT(UMLSLL_nx_d, aa64_sme_i16i64, do_smlall_nx, a, gen_helper_sme2_smlsll_idx_d)
+TRANS_FEAT(USMLALL_nx_d, aa64_sme_i16i64, do_smlall_nx, a, gen_helper_sme2_usmlall_idx_d)
+TRANS_FEAT(SUMLALL_nx_d, aa64_sme_i16i64, do_smlall_nx, a, gen_helper_sme2_sumlall_idx_d)
@@ -327,6 +327,52 @@ UMLSL_n1 11000001 011 0 .... 0 .. 011 ..... 11 ... @azz_nx1_o3x2 n=1
UMLSL_n1 11000001 011 0 .... 0 .. 010 ..... 110 .. @azz_nx1_o2x2 n=2
UMLSL_n1 11000001 011 1 .... 0 .. 010 ..... 110 .. @azz_nx1_o2x2 n=4
+%off2_x4 0:2 !function=times_4
+%off1_x4 0:1 !function=times_4
+
+@azz_nx1_o2x4 ........ ... . zm:4 . .. ... zn:5 ... .. \
+ &azz_n off=%off2_x4 rv=%mova_rv
+@azz_nx1_o1x4 ........ ... . zm:4 . .. ... zn:5 .... . \
+ &azz_n off=%off1_x4 rv=%mova_rv
+
+SMLALL_n1_s 11000001 001 0 .... 0 .. 001 ..... 000 .. @azz_nx1_o2x4 n=1
+SMLALL_n1_d 11000001 011 0 .... 0 .. 001 ..... 000 .. @azz_nx1_o2x4 n=1
+SMLALL_n1_s 11000001 001 0 .... 0 .. 000 ..... 0000 . @azz_nx1_o1x4 n=2
+SMLALL_n1_d 11000001 011 0 .... 0 .. 000 ..... 0000 . @azz_nx1_o1x4 n=2
+SMLALL_n1_s 11000001 001 1 .... 0 .. 000 ..... 0000 . @azz_nx1_o1x4 n=4
+SMLALL_n1_d 11000001 011 1 .... 0 .. 000 ..... 0000 . @azz_nx1_o1x4 n=4
+
+SMLSLL_n1_s 11000001 001 0 .... 0 .. 001 ..... 010 .. @azz_nx1_o2x4 n=1
+SMLSLL_n1_d 11000001 011 0 .... 0 .. 001 ..... 010 .. @azz_nx1_o2x4 n=1
+SMLSLL_n1_s 11000001 001 0 .... 0 .. 000 ..... 0100 . @azz_nx1_o1x4 n=2
+SMLSLL_n1_d 11000001 011 0 .... 0 .. 000 ..... 0100 . @azz_nx1_o1x4 n=2
+SMLSLL_n1_s 11000001 001 1 .... 0 .. 000 ..... 0100 . @azz_nx1_o1x4 n=4
+SMLSLL_n1_d 11000001 011 1 .... 0 .. 000 ..... 0100 . @azz_nx1_o1x4 n=4
+
+UMLALL_n1_s 11000001 001 0 .... 0 .. 001 ..... 100 .. @azz_nx1_o2x4 n=1
+UMLALL_n1_d 11000001 011 0 .... 0 .. 001 ..... 100 .. @azz_nx1_o2x4 n=1
+UMLALL_n1_s 11000001 001 0 .... 0 .. 000 ..... 1000 . @azz_nx1_o1x4 n=2
+UMLALL_n1_d 11000001 011 0 .... 0 .. 000 ..... 1000 . @azz_nx1_o1x4 n=2
+UMLALL_n1_s 11000001 001 1 .... 0 .. 000 ..... 1000 . @azz_nx1_o1x4 n=4
+UMLALL_n1_d 11000001 011 1 .... 0 .. 000 ..... 1000 . @azz_nx1_o1x4 n=4
+
+UMLSLL_n1_s 11000001 001 0 .... 0 .. 001 ..... 110 .. @azz_nx1_o2x4 n=1
+UMLSLL_n1_d 11000001 011 0 .... 0 .. 001 ..... 110 .. @azz_nx1_o2x4 n=1
+UMLSLL_n1_s 11000001 001 0 .... 0 .. 000 ..... 1100 . @azz_nx1_o1x4 n=2
+UMLSLL_n1_d 11000001 011 0 .... 0 .. 000 ..... 1100 . @azz_nx1_o1x4 n=2
+UMLSLL_n1_s 11000001 001 1 .... 0 .. 000 ..... 1100 . @azz_nx1_o1x4 n=4
+UMLSLL_n1_d 11000001 011 1 .... 0 .. 000 ..... 1100 . @azz_nx1_o1x4 n=4
+
+USMLALL_n1_s 11000001 001 0 .... 0 .. 000 ..... 0010 . @azz_nx1_o1x4 n=2
+USMLALL_n1_d 11000001 011 0 .... 0 .. 000 ..... 0010 . @azz_nx1_o1x4 n=2
+USMLALL_n1_s 11000001 001 1 .... 0 .. 000 ..... 0010 . @azz_nx1_o1x4 n=4
+USMLALL_n1_d 11000001 011 1 .... 0 .. 000 ..... 0010 . @azz_nx1_o1x4 n=4
+
+SUMLALL_n1_s 11000001 001 0 .... 0 .. 000 ..... 1010 . @azz_nx1_o1x4 n=2
+SUMLALL_n1_d 11000001 011 0 .... 0 .. 000 ..... 1010 . @azz_nx1_o1x4 n=2
+SUMLALL_n1_s 11000001 001 1 .... 0 .. 000 ..... 1010 . @azz_nx1_o1x4 n=4
+SUMLALL_n1_d 11000001 011 1 .... 0 .. 000 ..... 1010 . @azz_nx1_o1x4 n=4
+
### SME2 Multi-vector Multiple Array Vectors
%zn_ax2 6:4 !function=times_2
@@ -399,6 +445,36 @@ UMLAL_nn 11000001 111 ...01 0 .. 010 ...00 100 .. @azz_4x4_o2x2
UMLSL_nn 11000001 111 ....0 0 .. 010 ....0 110 .. @azz_2x2_o2x2
UMLSL_nn 11000001 111 ...01 0 .. 010 ...00 110 .. @azz_4x4_o2x2
+@azz_2x2_o1x4 ........ ... ..... . .. ... ..... ... .. \
+ &azz_n n=2 rv=%mova_rv zn=%zn_ax2 zm=%zm_ax2 off=%off1_x4
+@azz_4x4_o1x4 ........ ... ..... . .. ... ..... ... .. \
+ &azz_n n=4 rv=%mova_rv zn=%zn_ax4 zm=%zm_ax4 off=%off1_x4
+
+SMLALL_nn_s 11000001 101 ....0 0 .. 000 ....0 0000 . @azz_2x2_o1x4
+SMLALL_nn_d 11000001 111 ....0 0 .. 000 ....0 0000 . @azz_2x2_o1x4
+SMLALL_nn_s 11000001 101 ...01 0 .. 000 ...00 0000 . @azz_4x4_o1x4
+SMLALL_nn_d 11000001 111 ...01 0 .. 000 ...00 0000 . @azz_4x4_o1x4
+
+SMLSLL_nn_s 11000001 101 ....0 0 .. 000 ....0 0100 . @azz_2x2_o1x4
+SMLSLL_nn_d 11000001 111 ....0 0 .. 000 ....0 0100 . @azz_2x2_o1x4
+SMLSLL_nn_s 11000001 101 ...01 0 .. 000 ...00 0100 . @azz_4x4_o1x4
+SMLSLL_nn_d 11000001 111 ...01 0 .. 000 ...00 0100 . @azz_4x4_o1x4
+
+UMLALL_nn_s 11000001 101 ....0 0 .. 000 ....0 1000 . @azz_2x2_o1x4
+UMLALL_nn_d 11000001 111 ....0 0 .. 000 ....0 1000 . @azz_2x2_o1x4
+UMLALL_nn_s 11000001 101 ...01 0 .. 000 ...00 1000 . @azz_4x4_o1x4
+UMLALL_nn_d 11000001 111 ...01 0 .. 000 ...00 1000 . @azz_4x4_o1x4
+
+UMLSLL_nn_s 11000001 101 ....0 0 .. 000 ....0 1100 . @azz_2x2_o1x4
+UMLSLL_nn_d 11000001 111 ....0 0 .. 000 ....0 1100 . @azz_2x2_o1x4
+UMLSLL_nn_s 11000001 101 ...01 0 .. 000 ...00 1100 . @azz_4x4_o1x4
+UMLSLL_nn_d 11000001 111 ...01 0 .. 000 ...00 1100 . @azz_4x4_o1x4
+
+USMLALL_nn_s 11000001 101 ....0 0 .. 000 ....0 1010 . @azz_2x2_o1x4
+USMLALL_nn_d 11000001 111 ....0 0 .. 000 ....0 1010 . @azz_2x2_o1x4
+USMLALL_nn_s 11000001 101 ...01 0 .. 000 ...00 1010 . @azz_4x4_o1x4
+USMLALL_nn_d 11000001 111 ...01 0 .. 000 ...00 1010 . @azz_4x4_o1x4
+
### SME2 Multi-vector Indexed
&azx_n n off rv zn zm idx
@@ -493,3 +569,62 @@ UMLAL_nx 11000001 1101 .... 1 .. 1 .. ...00 10 ... @azx_4x1_o2x2
UMLSL_nx 11000001 1100 .... . .. 1 .. ..... 11 ... @azx_1x1_o3x2
UMLSL_nx 11000001 1101 .... 0 .. 1 .. ....0 11 ... @azx_2x1_o2x2
UMLSL_nx 11000001 1101 .... 1 .. 1 .. ...00 11 ... @azx_4x1_o2x2
+
+%idx4_15_10 15:1 10:3
+%idx4_10_1 10:2 1:2
+%idx3_10_1 10:1 1:2
+
+@azx_1x1_i4_o2 ........ .... zm:4 . .. ... zn:5 ... .. \
+ &azx_n n=1 rv=%mova_rv off=%off2_x4 idx=%idx4_15_10
+@azx_1x1_i3_o2 ........ .... zm:4 . .. ... zn:5 ... .. \
+ &azx_n n=1 rv=%mova_rv off=%off2_x4 idx=%idx3_15_10
+@azx_2x1_i4_o1 ........ .... zm:4 . .. ... ..... ... .. \
+ &azx_n n=2 rv=%mova_rv off=%off1_x4 zn=%zn_ax2 idx=%idx4_10_1
+@azx_2x1_i3_o1 ........ .... zm:4 . .. ... ..... ... .. \
+ &azx_n n=2 rv=%mova_rv off=%off1_x4 zn=%zn_ax2 idx=%idx3_10_1
+@azx_4x1_i4_o1 ........ .... zm:4 . .. ... ..... ... .. \
+ &azx_n n=4 rv=%mova_rv off=%off1_x4 zn=%zn_ax4 idx=%idx4_10_1
+@azx_4x1_i3_o1 ........ .... zm:4 . .. ... ..... ... .. \
+ &azx_n n=4 rv=%mova_rv off=%off1_x4 zn=%zn_ax4 idx=%idx3_10_1
+
+SMLALL_nx_s 11000001 0000 .... . .. ... ..... 000 .. @azx_1x1_i4_o2
+SMLALL_nx_d 11000001 1000 .... . .. 0.. ..... 000 .. @azx_1x1_i3_o2
+SMLALL_nx_s 11000001 0001 .... 0 .. 0.. ....0 00 ... @azx_2x1_i4_o1
+SMLALL_nx_d 11000001 1001 .... 0 .. 00. ....0 00 ... @azx_2x1_i3_o1
+SMLALL_nx_s 11000001 0001 .... 1 .. 0.. ...00 00 ... @azx_4x1_i4_o1
+SMLALL_nx_d 11000001 1001 .... 1 .. 00. ...00 00 ... @azx_4x1_i3_o1
+
+SMLSLL_nx_s 11000001 0000 .... . .. ... ..... 010 .. @azx_1x1_i4_o2
+SMLSLL_nx_d 11000001 1000 .... . .. 0.. ..... 010 .. @azx_1x1_i3_o2
+SMLSLL_nx_s 11000001 0001 .... 0 .. 0.. ....0 01 ... @azx_2x1_i4_o1
+SMLSLL_nx_d 11000001 1001 .... 0 .. 00. ....0 01 ... @azx_2x1_i3_o1
+SMLSLL_nx_s 11000001 0001 .... 1 .. 0.. ...00 01 ... @azx_4x1_i4_o1
+SMLSLL_nx_d 11000001 1001 .... 1 .. 00. ...00 01 ... @azx_4x1_i3_o1
+
+UMLALL_nx_s 11000001 0000 .... . .. ... ..... 100 .. @azx_1x1_i4_o2
+UMLALL_nx_d 11000001 1000 .... . .. 0.. ..... 100 .. @azx_1x1_i3_o2
+UMLALL_nx_s 11000001 0001 .... 0 .. 0.. ....0 10 ... @azx_2x1_i4_o1
+UMLALL_nx_d 11000001 1001 .... 0 .. 00. ....0 10 ... @azx_2x1_i3_o1
+UMLALL_nx_s 11000001 0001 .... 1 .. 0.. ...00 10 ... @azx_4x1_i4_o1
+UMLALL_nx_d 11000001 1001 .... 1 .. 00. ...00 10 ... @azx_4x1_i3_o1
+
+UMLSLL_nx_s 11000001 0000 .... . .. ... ..... 110 .. @azx_1x1_i4_o2
+UMLSLL_nx_d 11000001 1000 .... . .. 0.. ..... 110 .. @azx_1x1_i3_o2
+UMLSLL_nx_s 11000001 0001 .... 0 .. 0.. ....0 11 ... @azx_2x1_i4_o1
+UMLSLL_nx_d 11000001 1001 .... 0 .. 00. ....0 11 ... @azx_2x1_i3_o1
+UMLSLL_nx_s 11000001 0001 .... 1 .. 0.. ...00 11 ... @azx_4x1_i4_o1
+UMLSLL_nx_d 11000001 1001 .... 1 .. 00. ...00 11 ... @azx_4x1_i3_o1
+
+USMLALL_nx_s 11000001 0000 .... . .. ... ..... 001 .. @azx_1x1_i4_o2
+USMLALL_nx_d 11000001 1000 .... . .. 0.. ..... 001 .. @azx_1x1_i3_o2
+USMLALL_nx_s 11000001 0001 .... 0 .. 0.. ....1 00 ... @azx_2x1_i4_o1
+USMLALL_nx_d 11000001 1001 .... 0 .. 00. ....1 00 ... @azx_2x1_i3_o1
+USMLALL_nx_s 11000001 0001 .... 1 .. 0.. ...01 00 ... @azx_4x1_i4_o1
+USMLALL_nx_d 11000001 1001 .... 1 .. 00. ...01 00 ... @azx_4x1_i3_o1
+
+SUMLALL_nx_s 11000001 0000 .... . .. ... ..... 101 .. @azx_1x1_i4_o2
+SUMLALL_nx_d 11000001 1000 .... . .. 0.. ..... 101 .. @azx_1x1_i3_o2
+SUMLALL_nx_s 11000001 0001 .... 0 .. 0.. ....1 10 ... @azx_2x1_i4_o1
+SUMLALL_nx_d 11000001 1001 .... 0 .. 00. ....1 10 ... @azx_2x1_i3_o1
+SUMLALL_nx_s 11000001 0001 .... 1 .. 0.. ...01 10 ... @azx_4x1_i4_o1
+SUMLALL_nx_d 11000001 1001 .... 1 .. 00. ...01 10 ... @azx_4x1_i3_o1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/arm/tcg/helper-sme.h | 22 ++++++ target/arm/tcg/sme_helper.c | 60 +++++++++++++++ target/arm/tcg/translate-sme.c | 78 +++++++++++++++++++ target/arm/tcg/sme.decode | 135 +++++++++++++++++++++++++++++++++ 4 files changed, 295 insertions(+)