Message ID | 87shdox0q7.fsf@linaro.org |
---|---|
State | New |
Headers | show |
Series | Add SVE support for load/store_lanes | expand |
Both a ping and a repost with the new VNx names. See: https://gcc.gnu.org/ml/gcc-patches/2017-11/msg00592.html for the full series. Thanks, Richard --- This patch adds support for SVE LD[234], ST[234] and associated structure modes. Unlike Advanced SIMD, these modes are extra-long vector modes instead of integer modes. 2017-11-06 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * config/aarch64/aarch64-modes.def: Define x2, x3 and x4 vector modes for SVE. * config/aarch64/aarch64-protos.h (aarch64_sve_struct_memory_operand_p): Declare. * config/aarch64/iterators.md (SVE_STRUCT): New mode iterator. (vector_count, insn_length, VSINGLE, vsingle): New mode attributes. (VPRED, vpred): Handle SVE structure modes. * config/aarch64/constraints.md (Utx): New constraint. * config/aarch64/predicates.md (aarch64_sve_struct_memory_operand) (aarch64_sve_struct_nonimmediate_operand): New predicates. * config/aarch64/aarch64.md (UNSPEC_LDN, UNSPEC_STN): New unspecs. * config/aarch64/aarch64-sve.md (mov<mode>, *aarch64_sve_mov<mode>_le) (*aarch64_sve_mov<mode>_be, pred_mov<mode>): New patterns for structure modes. Split into pieces after RA. (vec_load_lanes<mode><vsingle>, vec_mask_load_lanes<mode><vsingle>) (vec_store_lanes<mode><vsingle>, vec_mask_store_lanes<mode><vsingle>): New patterns. * config/aarch64/aarch64.c (aarch64_classify_vector_mode): Handle SVE structure modes. (aarch64_classify_address): Likewise. (sizetochar): Move earlier in file. (aarch64_print_operand): Handle SVE register lists. (aarch64_array_mode): New function. (aarch64_sve_struct_memory_operand_p): Likewise. (TARGET_ARRAY_MODE): Redefine. Index: gcc/config/aarch64/aarch64-modes.def =================================================================== --- gcc/config/aarch64/aarch64-modes.def 2017-12-22 16:00:58.471012631 +0000 +++ gcc/config/aarch64/aarch64-modes.def 2017-12-22 16:01:42.042358758 +0000 @@ -87,6 +87,9 @@ INT_MODE (XI, 64); /* Give SVE vectors the names normally used for 256-bit vectors. The actual number depends on command-line flags. */ SVE_MODES (1, VNx16, VNx8, VNx4, VNx2) +SVE_MODES (2, VNx32, VNx16, VNx8, VNx4) +SVE_MODES (3, VNx48, VNx24, VNx12, VNx6) +SVE_MODES (4, VNx64, VNx32, VNx16, VNx8) /* Quad float: 128-bit floating mode for long doubles. */ FLOAT_MODE (TF, 16, ieee_quad_format); Index: gcc/config/aarch64/aarch64-protos.h =================================================================== --- gcc/config/aarch64/aarch64-protos.h 2017-12-22 16:00:58.471012631 +0000 +++ gcc/config/aarch64/aarch64-protos.h 2017-12-22 16:01:42.043358720 +0000 @@ -432,6 +432,7 @@ rtx aarch64_simd_gen_const_vector_dup (m bool aarch64_simd_mem_operand_p (rtx); bool aarch64_sve_ld1r_operand_p (rtx); bool aarch64_sve_ldr_operand_p (rtx); +bool aarch64_sve_struct_memory_operand_p (rtx); rtx aarch64_simd_vect_par_cnst_half (machine_mode, int, bool); rtx aarch64_tls_get_addr (void); tree aarch64_fold_builtin (tree, int, tree *, bool); Index: gcc/config/aarch64/iterators.md =================================================================== --- gcc/config/aarch64/iterators.md 2017-12-22 16:00:58.477012402 +0000 +++ gcc/config/aarch64/iterators.md 2017-12-22 16:01:42.045358644 +0000 @@ -250,6 +250,14 @@ (define_mode_iterator VMUL_CHANGE_NLANES (define_mode_iterator SVE_ALL [VNx16QI VNx8HI VNx4SI VNx2DI VNx8HF VNx4SF VNx2DF]) +;; All SVE vector structure modes. +(define_mode_iterator SVE_STRUCT [VNx32QI VNx16HI VNx8SI VNx4DI + VNx16HF VNx8SF VNx4DF + VNx48QI VNx24HI VNx12SI VNx6DI + VNx24HF VNx12SF VNx6DF + VNx64QI VNx32HI VNx16SI VNx8DI + VNx32HF VNx16SF VNx8DF]) + ;; All SVE vector modes that have 8-bit or 16-bit elements. (define_mode_iterator SVE_BH [VNx16QI VNx8HI VNx8HF]) @@ -587,9 +595,16 @@ (define_mode_attr Vetype [(V8QI "b") (V1 ;; Equivalent of "size" for a vector element. (define_mode_attr Vesize [(VNx16QI "b") - (VNx8HI "h") (VNx8HF "h") - (VNx4SI "w") (VNx4SF "w") - (VNx2DI "d") (VNx2DF "d")]) + (VNx8HI "h") (VNx8HF "h") + (VNx4SI "w") (VNx4SF "w") + (VNx2DI "d") (VNx2DF "d") + (VNx32QI "b") (VNx48QI "b") (VNx64QI "b") + (VNx16HI "h") (VNx24HI "h") (VNx32HI "h") + (VNx16HF "h") (VNx24HF "h") (VNx32HF "h") + (VNx8SI "w") (VNx12SI "w") (VNx16SI "w") + (VNx8SF "w") (VNx12SF "w") (VNx16SF "w") + (VNx4DI "d") (VNx6DI "d") (VNx8DI "d") + (VNx4DF "d") (VNx6DF "d") (VNx8DF "d")]) ;; Vetype is used everywhere in scheduling type and assembly output, ;; sometimes they are not the same, for example HF modes on some @@ -957,17 +972,93 @@ (define_mode_attr insn_count [(OI "8") ( ;; No need of iterator for -fPIC as it use got_lo12 for both modes. (define_mode_attr got_modifier [(SI "gotpage_lo14") (DI "gotpage_lo15")]) -;; The predicate mode associated with an SVE data mode. +;; The number of subvectors in an SVE_STRUCT. +(define_mode_attr vector_count [(VNx32QI "2") (VNx16HI "2") + (VNx8SI "2") (VNx4DI "2") + (VNx16HF "2") (VNx8SF "2") (VNx4DF "2") + (VNx48QI "3") (VNx24HI "3") + (VNx12SI "3") (VNx6DI "3") + (VNx24HF "3") (VNx12SF "3") (VNx6DF "3") + (VNx64QI "4") (VNx32HI "4") + (VNx16SI "4") (VNx8DI "4") + (VNx32HF "4") (VNx16SF "4") (VNx8DF "4")]) + +;; The number of instruction bytes needed for an SVE_STRUCT move. This is +;; equal to vector_count * 4. +(define_mode_attr insn_length [(VNx32QI "8") (VNx16HI "8") + (VNx8SI "8") (VNx4DI "8") + (VNx16HF "8") (VNx8SF "8") (VNx4DF "8") + (VNx48QI "12") (VNx24HI "12") + (VNx12SI "12") (VNx6DI "12") + (VNx24HF "12") (VNx12SF "12") (VNx6DF "12") + (VNx64QI "16") (VNx32HI "16") + (VNx16SI "16") (VNx8DI "16") + (VNx32HF "16") (VNx16SF "16") (VNx8DF "16")]) + +;; The type of a subvector in an SVE_STRUCT. +(define_mode_attr VSINGLE [(VNx32QI "VNx16QI") + (VNx16HI "VNx8HI") (VNx16HF "VNx8HF") + (VNx8SI "VNx4SI") (VNx8SF "VNx4SF") + (VNx4DI "VNx2DI") (VNx4DF "VNx2DF") + (VNx48QI "VNx16QI") + (VNx24HI "VNx8HI") (VNx24HF "VNx8HF") + (VNx12SI "VNx4SI") (VNx12SF "VNx4SF") + (VNx6DI "VNx2DI") (VNx6DF "VNx2DF") + (VNx64QI "VNx16QI") + (VNx32HI "VNx8HI") (VNx32HF "VNx8HF") + (VNx16SI "VNx4SI") (VNx16SF "VNx4SF") + (VNx8DI "VNx2DI") (VNx8DF "VNx2DF")]) + +;; ...and again in lower case. +(define_mode_attr vsingle [(VNx32QI "vnx16qi") + (VNx16HI "vnx8hi") (VNx16HF "vnx8hf") + (VNx8SI "vnx4si") (VNx8SF "vnx4sf") + (VNx4DI "vnx2di") (VNx4DF "vnx2df") + (VNx48QI "vnx16qi") + (VNx24HI "vnx8hi") (VNx24HF "vnx8hf") + (VNx12SI "vnx4si") (VNx12SF "vnx4sf") + (VNx6DI "vnx2di") (VNx6DF "vnx2df") + (VNx64QI "vnx16qi") + (VNx32HI "vnx8hi") (VNx32HF "vnx8hf") + (VNx16SI "vnx4si") (VNx16SF "vnx4sf") + (VNx8DI "vnx2di") (VNx8DF "vnx2df")]) + +;; The predicate mode associated with an SVE data mode. For structure modes +;; this is equivalent to the <VPRED> of the subvector mode. (define_mode_attr VPRED [(VNx16QI "VNx16BI") (VNx8HI "VNx8BI") (VNx8HF "VNx8BI") (VNx4SI "VNx4BI") (VNx4SF "VNx4BI") - (VNx2DI "VNx2BI") (VNx2DF "VNx2BI")]) + (VNx2DI "VNx2BI") (VNx2DF "VNx2BI") + (VNx32QI "VNx16BI") + (VNx16HI "VNx8BI") (VNx16HF "VNx8BI") + (VNx8SI "VNx4BI") (VNx8SF "VNx4BI") + (VNx4DI "VNx2BI") (VNx4DF "VNx2BI") + (VNx48QI "VNx16BI") + (VNx24HI "VNx8BI") (VNx24HF "VNx8BI") + (VNx12SI "VNx4BI") (VNx12SF "VNx4BI") + (VNx6DI "VNx2BI") (VNx6DF "VNx2BI") + (VNx64QI "VNx16BI") + (VNx32HI "VNx8BI") (VNx32HF "VNx8BI") + (VNx16SI "VNx4BI") (VNx16SF "VNx4BI") + (VNx8DI "VNx2BI") (VNx8DF "VNx2BI")]) ;; ...and again in lower case. (define_mode_attr vpred [(VNx16QI "vnx16bi") (VNx8HI "vnx8bi") (VNx8HF "vnx8bi") (VNx4SI "vnx4bi") (VNx4SF "vnx4bi") - (VNx2DI "vnx2bi") (VNx2DF "vnx2bi")]) + (VNx2DI "vnx2bi") (VNx2DF "vnx2bi") + (VNx32QI "vnx16bi") + (VNx16HI "vnx8bi") (VNx16HF "vnx8bi") + (VNx8SI "vnx4bi") (VNx8SF "vnx4bi") + (VNx4DI "vnx2bi") (VNx4DF "vnx2bi") + (VNx48QI "vnx16bi") + (VNx24HI "vnx8bi") (VNx24HF "vnx8bi") + (VNx12SI "vnx4bi") (VNx12SF "vnx4bi") + (VNx6DI "vnx2bi") (VNx6DF "vnx2bi") + (VNx64QI "vnx16bi") + (VNx32HI "vnx8bi") (VNx32HF "vnx4bi") + (VNx16SI "vnx4bi") (VNx16SF "vnx4bi") + (VNx8DI "vnx2bi") (VNx8DF "vnx2bi")]) ;; ------------------------------------------------------------------- ;; Code Iterators Index: gcc/config/aarch64/constraints.md =================================================================== --- gcc/config/aarch64/constraints.md 2017-12-22 16:00:58.476012440 +0000 +++ gcc/config/aarch64/constraints.md 2017-12-22 16:01:42.045358644 +0000 @@ -237,6 +237,12 @@ (define_memory_constraint "Uty" (and (match_code "mem") (match_test "aarch64_sve_ld1r_operand_p (op)"))) +(define_memory_constraint "Utx" + "@internal + An address valid for SVE structure mov patterns (as distinct from + LD[234] and ST[234] patterns)." + (match_operand 0 "aarch64_sve_struct_memory_operand")) + (define_constraint "Ufc" "A floating point constant which can be used with an\ FMOV immediate operation." Index: gcc/config/aarch64/predicates.md =================================================================== --- gcc/config/aarch64/predicates.md 2017-12-22 16:00:58.477012402 +0000 +++ gcc/config/aarch64/predicates.md 2017-12-22 16:01:42.045358644 +0000 @@ -482,6 +482,14 @@ (define_predicate "aarch64_sve_general_o (match_operand 0 "aarch64_sve_ldr_operand") (match_test "aarch64_mov_operand_p (op, mode)")))) +(define_predicate "aarch64_sve_struct_memory_operand" + (and (match_code "mem") + (match_test "aarch64_sve_struct_memory_operand_p (op)"))) + +(define_predicate "aarch64_sve_struct_nonimmediate_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "aarch64_sve_struct_memory_operand"))) + ;; Doesn't include immediates, since those are handled by the move ;; patterns instead. (define_predicate "aarch64_sve_dup_operand" Index: gcc/config/aarch64/aarch64.md =================================================================== --- gcc/config/aarch64/aarch64.md 2017-12-22 16:00:58.476012440 +0000 +++ gcc/config/aarch64/aarch64.md 2017-12-22 16:01:42.045358644 +0000 @@ -161,6 +161,8 @@ (define_c_enum "unspec" [ UNSPEC_PACK UNSPEC_FLOAT_CONVERT UNSPEC_WHILE_LO + UNSPEC_LDN + UNSPEC_STN ]) (define_c_enum "unspecv" [ Index: gcc/config/aarch64/aarch64-sve.md =================================================================== --- gcc/config/aarch64/aarch64-sve.md 2017-12-22 16:00:58.471012631 +0000 +++ gcc/config/aarch64/aarch64-sve.md 2017-12-22 16:01:42.043358720 +0000 @@ -189,6 +189,105 @@ (define_insn "maskstore<mode><vpred>" "st1<Vesize>\t%1.<Vetype>, %2, %0" ) +;; SVE structure moves. +(define_expand "mov<mode>" + [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand") + (match_operand:SVE_STRUCT 1 "general_operand"))] + "TARGET_SVE" + { + /* Big-endian loads and stores need to be done via LD1 and ST1; + see the comment at the head of the file for details. */ + if ((MEM_P (operands[0]) || MEM_P (operands[1])) + && BYTES_BIG_ENDIAN) + { + gcc_assert (can_create_pseudo_p ()); + aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode); + DONE; + } + + if (CONSTANT_P (operands[1])) + { + aarch64_expand_mov_immediate (operands[0], operands[1]); + DONE; + } + } +) + +;; Unpredicated structure moves (little-endian). +(define_insn "*aarch64_sve_mov<mode>_le" + [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w") + (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))] + "TARGET_SVE && !BYTES_BIG_ENDIAN" + "#" + [(set_attr "length" "<insn_length>")] +) + +;; Unpredicated structure moves (big-endian). Memory accesses require +;; secondary reloads. +(define_insn "*aarch64_sve_mov<mode>_le" + [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w") + (match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))] + "TARGET_SVE && BYTES_BIG_ENDIAN" + "#" + [(set_attr "length" "<insn_length>")] +) + +;; Split unpredicated structure moves into pieces. This is the same +;; for both big-endian and little-endian code, although it only needs +;; to handle memory operands for little-endian code. +(define_split + [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand") + (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand"))] + "TARGET_SVE && reload_completed" + [(const_int 0)] + { + rtx dest = operands[0]; + rtx src = operands[1]; + if (REG_P (dest) && REG_P (src)) + aarch64_simd_emit_reg_reg_move (operands, <VSINGLE>mode, <vector_count>); + else + for (unsigned int i = 0; i < <vector_count>; ++i) + { + rtx subdest = simplify_gen_subreg (<VSINGLE>mode, dest, <MODE>mode, + i * BYTES_PER_SVE_VECTOR); + rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, src, <MODE>mode, + i * BYTES_PER_SVE_VECTOR); + emit_insn (gen_rtx_SET (subdest, subsrc)); + } + DONE; + } +) + +;; Predicated structure moves. This works for both endiannesses but in +;; practice is only useful for big-endian. +(define_insn_and_split "pred_mov<mode>" + [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_struct_nonimmediate_operand" "=w, Utx") + (unspec:SVE_STRUCT + [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") + (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "Utx, w")] + UNSPEC_MERGE_PTRUE))] + "TARGET_SVE + && (register_operand (operands[0], <MODE>mode) + || register_operand (operands[2], <MODE>mode))" + "#" + "&& reload_completed" + [(const_int 0)] + { + for (unsigned int i = 0; i < <vector_count>; ++i) + { + rtx subdest = simplify_gen_subreg (<VSINGLE>mode, operands[0], + <MODE>mode, + i * BYTES_PER_SVE_VECTOR); + rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, operands[2], + <MODE>mode, + i * BYTES_PER_SVE_VECTOR); + aarch64_emit_sve_pred_move (subdest, operands[1], subsrc); + } + DONE; + } + [(set_attr "length" "<insn_length>")] +) + (define_expand "mov<mode>" [(set (match_operand:PRED_ALL 0 "nonimmediate_operand") (match_operand:PRED_ALL 1 "general_operand"))] @@ -460,6 +559,60 @@ (define_insn "*vec_series<mode>_plus" } ) +;; Unpredicated LD[234]. +(define_expand "vec_load_lanes<mode><vsingle>" + [(set (match_operand:SVE_STRUCT 0 "register_operand") + (unspec:SVE_STRUCT + [(match_dup 2) + (match_operand:SVE_STRUCT 1 "memory_operand")] + UNSPEC_LDN))] + "TARGET_SVE" + { + operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); + } +) + +;; Predicated LD[234]. +(define_insn "vec_mask_load_lanes<mode><vsingle>" + [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w") + (unspec:SVE_STRUCT + [(match_operand:<VPRED> 2 "register_operand" "Upl") + (match_operand:SVE_STRUCT 1 "memory_operand" "m")] + UNSPEC_LDN))] + "TARGET_SVE" + "ld<vector_count><Vesize>\t%0, %2/z, %1" +) + +;; Unpredicated ST[234]. This is always a full update, so the dependence +;; on the old value of the memory location (via (match_dup 0)) is redundant. +;; There doesn't seem to be any obvious benefit to treating the all-true +;; case differently though. In particular, it's very unlikely that we'll +;; only find out during RTL that a store_lanes is dead. +(define_expand "vec_store_lanes<mode><vsingle>" + [(set (match_operand:SVE_STRUCT 0 "memory_operand") + (unspec:SVE_STRUCT + [(match_dup 2) + (match_operand:SVE_STRUCT 1 "register_operand") + (match_dup 0)] + UNSPEC_STN))] + "TARGET_SVE" + { + operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); + } +) + +;; Predicated ST[234]. +(define_insn "vec_mask_store_lanes<mode><vsingle>" + [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m") + (unspec:SVE_STRUCT + [(match_operand:<VPRED> 2 "register_operand" "Upl") + (match_operand:SVE_STRUCT 1 "register_operand" "w") + (match_dup 0)] + UNSPEC_STN))] + "TARGET_SVE" + "st<vector_count><Vesize>\t%1, %2, %0" +) + (define_expand "vec_perm<mode>" [(match_operand:SVE_ALL 0 "register_operand") (match_operand:SVE_ALL 1 "register_operand") Index: gcc/config/aarch64/aarch64.c =================================================================== --- gcc/config/aarch64/aarch64.c 2017-12-22 16:00:42.829606965 +0000 +++ gcc/config/aarch64/aarch64.c 2017-12-22 16:01:42.044358682 +0000 @@ -1178,9 +1178,15 @@ aarch64_classify_vector_mode (machine_mo || inner == DImode || inner == DFmode)) { - if (TARGET_SVE - && known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR)) - return VEC_SVE_DATA; + if (TARGET_SVE) + { + if (known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR)) + return VEC_SVE_DATA; + if (known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 2) + || known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 3) + || known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 4)) + return VEC_SVE_DATA | VEC_STRUCT; + } /* This includes V1DF but not V1DI (which doesn't exist). */ if (TARGET_SIMD @@ -1208,6 +1214,18 @@ aarch64_sve_data_mode_p (machine_mode mo return aarch64_classify_vector_mode (mode) & VEC_SVE_DATA; } +/* Implement target hook TARGET_ARRAY_MODE. */ +static opt_machine_mode +aarch64_array_mode (machine_mode mode, unsigned HOST_WIDE_INT nelems) +{ + if (aarch64_classify_vector_mode (mode) == VEC_SVE_DATA + && IN_RANGE (nelems, 2, 4)) + return mode_for_vector (GET_MODE_INNER (mode), + GET_MODE_NUNITS (mode) * nelems); + + return opt_machine_mode (); +} + /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */ static bool aarch64_array_mode_supported_p (machine_mode mode, @@ -5778,6 +5796,18 @@ aarch64_classify_address (struct aarch64 ? offset_4bit_signed_scaled_p (mode, offset) : offset_9bit_signed_scaled_p (mode, offset)); + if (vec_flags == (VEC_SVE_DATA | VEC_STRUCT)) + { + poly_int64 end_offset = (offset + + GET_MODE_SIZE (mode) + - BYTES_PER_SVE_VECTOR); + return (type == ADDR_QUERY_M + ? offset_4bit_signed_scaled_p (mode, offset) + : (offset_9bit_signed_scaled_p (SVE_BYTE_MODE, offset) + && offset_9bit_signed_scaled_p (SVE_BYTE_MODE, + end_offset))); + } + if (vec_flags == VEC_SVE_PRED) return offset_9bit_signed_scaled_p (mode, offset); @@ -6490,6 +6520,20 @@ aarch64_print_vector_float_operand (FILE return true; } +/* Return the equivalent letter for size. */ +static char +sizetochar (int size) +{ + switch (size) + { + case 64: return 'd'; + case 32: return 's'; + case 16: return 'h'; + case 8 : return 'b'; + default: gcc_unreachable (); + } +} + /* Print operand X to file F in a target specific manner according to CODE. The acceptable formatting commands given by CODE are: 'c': An integer or symbol address without a preceding # @@ -6777,7 +6821,18 @@ aarch64_print_operand (FILE *f, rtx x, i { case REG: if (aarch64_sve_data_mode_p (GET_MODE (x))) - asm_fprintf (f, "z%d", REGNO (x) - V0_REGNUM); + { + if (REG_NREGS (x) == 1) + asm_fprintf (f, "z%d", REGNO (x) - V0_REGNUM); + else + { + char suffix + = sizetochar (GET_MODE_UNIT_BITSIZE (GET_MODE (x))); + asm_fprintf (f, "{z%d.%c - z%d.%c}", + REGNO (x) - V0_REGNUM, suffix, + END_REGNO (x) - V0_REGNUM - 1, suffix); + } + } else asm_fprintf (f, "%s", reg_names [REGNO (x)]); break; @@ -12952,20 +13007,6 @@ aarch64_final_prescan_insn (rtx_insn *in } -/* Return the equivalent letter for size. */ -static char -sizetochar (int size) -{ - switch (size) - { - case 64: return 'd'; - case 32: return 's'; - case 16: return 'h'; - case 8 : return 'b'; - default: gcc_unreachable (); - } -} - /* Return true if BASE_OR_STEP is a valid immediate operand for an SVE INDEX instruction. */ @@ -13560,6 +13601,28 @@ aarch64_sve_ldr_operand_p (rtx op) && addr.type == ADDRESS_REG_IMM); } +/* Return true if OP is a valid MEM operand for an SVE_STRUCT mode. + We need to be able to access the individual pieces, so the range + is different from LD[234] and ST[234]. */ +bool +aarch64_sve_struct_memory_operand_p (rtx op) +{ + if (!MEM_P (op)) + return false; + + machine_mode mode = GET_MODE (op); + struct aarch64_address_info addr; + if (!aarch64_classify_address (&addr, XEXP (op, 0), SVE_BYTE_MODE, false, + ADDR_QUERY_ANY) + || addr.type != ADDRESS_REG_IMM) + return false; + + poly_int64 first = addr.const_offset; + poly_int64 last = first + GET_MODE_SIZE (mode) - BYTES_PER_SVE_VECTOR; + return (offset_4bit_signed_scaled_p (SVE_BYTE_MODE, first) + && offset_4bit_signed_scaled_p (SVE_BYTE_MODE, last)); +} + /* Emit a register copy from operand to operand, taking care not to early-clobber source registers in the process. @@ -17629,6 +17692,9 @@ #define TARGET_VECTOR_MODE_SUPPORTED_P a #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \ aarch64_builtin_support_vector_misalignment +#undef TARGET_ARRAY_MODE +#define TARGET_ARRAY_MODE aarch64_array_mode + #undef TARGET_ARRAY_MODE_SUPPORTED_P #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
Index: gcc/config/aarch64/aarch64-modes.def =================================================================== --- gcc/config/aarch64/aarch64-modes.def 2017-11-08 15:05:50.202852894 +0000 +++ gcc/config/aarch64/aarch64-modes.def 2017-11-08 15:06:19.687849905 +0000 @@ -87,6 +87,9 @@ INT_MODE (XI, 64); /* Give SVE vectors the names normally used for 256-bit vectors. The actual number depends on command-line flags. */ SVE_MODES (1, V32, V16, V8, V4) +SVE_MODES (2, V64, V32, V16, V8) +SVE_MODES (3, V96, V48, V24, V12) +SVE_MODES (4, V128, V64, V32, V16) /* Quad float: 128-bit floating mode for long doubles. */ FLOAT_MODE (TF, 16, ieee_quad_format); Index: gcc/config/aarch64/aarch64-protos.h =================================================================== --- gcc/config/aarch64/aarch64-protos.h 2017-11-08 15:05:50.204852894 +0000 +++ gcc/config/aarch64/aarch64-protos.h 2017-11-08 15:06:19.687849905 +0000 @@ -432,6 +432,7 @@ rtx aarch64_simd_gen_const_vector_dup (m bool aarch64_simd_mem_operand_p (rtx); bool aarch64_sve_ld1r_operand_p (rtx); bool aarch64_sve_ldr_operand_p (rtx); +bool aarch64_sve_struct_memory_operand_p (rtx); rtx aarch64_simd_vect_par_cnst_half (machine_mode, int, bool); rtx aarch64_tls_get_addr (void); tree aarch64_fold_builtin (tree, int, tree *, bool); Index: gcc/config/aarch64/iterators.md =================================================================== --- gcc/config/aarch64/iterators.md 2017-11-08 15:05:50.216852893 +0000 +++ gcc/config/aarch64/iterators.md 2017-11-08 15:06:19.690849904 +0000 @@ -249,6 +249,11 @@ (define_mode_iterator VMUL_CHANGE_NLANES ;; All SVE vector modes. (define_mode_iterator SVE_ALL [V32QI V16HI V8SI V4DI V16HF V8SF V4DF]) +;; All SVE vector structure modes. +(define_mode_iterator SVE_STRUCT [V64QI V32HI V16SI V8DI V32HF V16SF V8DF + V96QI V48HI V24SI V12DI V48HF V24SF V12DF + V128QI V64HI V32SI V16DI V64HF V32SF V16DF]) + ;; All SVE vector modes that have 8-bit or 16-bit elements. (define_mode_iterator SVE_BH [V32QI V16HI V16HF]) @@ -588,7 +593,14 @@ (define_mode_attr Vetype [(V8QI "b") (V1 (define_mode_attr Vesize [(V32QI "b") (V16HI "h") (V16HF "h") (V8SI "w") (V8SF "w") - (V4DI "d") (V4DF "d")]) + (V4DI "d") (V4DF "d") + (V64QI "b") (V96QI "b") (V128QI "b") + (V32HI "h") (V48HI "h") (V64HI "h") + (V32HF "h") (V48HF "h") (V64HF "h") + (V16SI "w") (V24SI "w") (V32SI "w") + (V16SF "w") (V24SF "w") (V32SF "w") + (V8DI "d") (V12DI "d") (V16DI "d") + (V8DF "d") (V12DF "d") (V16DF "d")]) ;; Vetype is used everywhere in scheduling type and assembly output, ;; sometimes they are not the same, for example HF modes on some @@ -946,17 +958,87 @@ (define_mode_attr insn_count [(OI "8") ( ;; No need of iterator for -fPIC as it use got_lo12 for both modes. (define_mode_attr got_modifier [(SI "gotpage_lo14") (DI "gotpage_lo15")]) -;; The predicate mode associated with an SVE data mode. +;; The number of subvectors in an SVE_STRUCT. +(define_mode_attr vector_count [(V64QI "2") (V32HI "2") + (V16SI "2") (V8DI "2") + (V32HF "2") (V16SF "2") (V8DF "2") + (V96QI "3") (V48HI "3") + (V24SI "3") (V12DI "3") + (V48HF "3") (V24SF "3") (V12DF "3") + (V128QI "4") (V64HI "4") + (V32SI "4") (V16DI "4") + (V64HF "4") (V32SF "4") (V16DF "4")]) + +;; The number of instruction bytes needed for an SVE_STRUCT move. This is +;; equal to vector_count * 4. +(define_mode_attr insn_length [(V64QI "8") (V32HI "8") + (V16SI "8") (V8DI "8") + (V32HF "8") (V16SF "8") (V8DF "8") + (V96QI "12") (V48HI "12") + (V24SI "12") (V12DI "12") + (V48HF "12") (V24SF "12") (V12DF "12") + (V128QI "16") (V64HI "16") + (V32SI "16") (V16DI "16") + (V64HF "16") (V32SF "16") (V16DF "16")]) + +;; The type of a subvector in an SVE_STRUCT. +(define_mode_attr VSINGLE [(V64QI "V32QI") (V32HI "V16HI") + (V16SI "V8SI") (V8DI "V4DI") + (V32HF "V16HF") (V16SF "V8SF") (V8DF "V4DF") + (V96QI "V32QI") (V48HI "V16HI") + (V24SI "V8SI") (V12DI "V4DI") + (V48HF "V16HF") (V24SF "V8SF") (V12DF "V4DF") + (V128QI "V32QI") (V64HI "V16HI") + (V32SI "V8SI") (V16DI "V4DI") + (V64HF "V16HF") (V32SF "V8SF") (V16DF "V4DF")]) + +;; ...and again in lower case. +(define_mode_attr vsingle [(V64QI "v32qi") (V32HI "v16hi") + (V16SI "v8si") (V8DI "v4di") + (V32HF "v16hf") (V16SF "v8sf") (V8DF "v4df") + (V96QI "v32qi") (V48HI "v16hi") + (V24SI "v8si") (V12DI "v4di") + (V48HF "v16hf") (V24SF "v8sf") (V12DF "v4df") + (V128QI "v32qi") (V64HI "v16hi") + (V32SI "v8si") (V16DI "v4di") + (V64HF "v16hf") (V32SF "v8sf") (V16DF "v4df")]) + +;; The predicate mode associated with an SVE data mode. For structure modes +;; this is equivalent to the <VPRED> of the subvector mode. (define_mode_attr VPRED [(V32QI "V32BI") (V16HI "V16BI") (V16HF "V16BI") (V8SI "V8BI") (V8SF "V8BI") - (V4DI "V4BI") (V4DF "V4BI")]) + (V4DI "V4BI") (V4DF "V4BI") + (V64QI "V32BI") + (V32HI "V16BI") (V32HF "V16BI") + (V16SI "V8BI") (V16SF "V8BI") + (V8DI "V4BI") (V8DF "V4BI") + (V96QI "V32BI") + (V48HI "V16BI") (V48HF "V16BI") + (V24SI "V8BI") (V24SF "V8BI") + (V12DI "V4BI") (V12DF "V4BI") + (V128QI "V32BI") + (V64HI "V16BI") (V64HF "V16BI") + (V32SI "V8BI") (V32SF "V8BI") + (V16DI "V4BI") (V16DF "V4BI")]) ;; ...and again in lower case. (define_mode_attr vpred [(V32QI "v32bi") (V16HI "v16bi") (V16HF "v16bi") (V8SI "v8bi") (V8SF "v8bi") - (V4DI "v4bi") (V4DF "v4bi")]) + (V4DI "v4bi") (V4DF "v4bi") + (V64QI "v32bi") + (V32HI "v16bi") (V32HF "v16bi") + (V16SI "v8bi") (V16SF "v8bi") + (V8DI "v4bi") (V8DF "v4bi") + (V96QI "v32bi") + (V48HI "v16bi") (V48HF "v16bi") + (V24SI "v8bi") (V24SF "v8bi") + (V12DI "v4bi") (V12DF "v4bi") + (V128QI "v32bi") + (V64HI "v16bi") (V64HF "v8bi") + (V32SI "v8bi") (V32SF "v8bi") + (V16DI "v4bi") (V16DF "v4bi")]) ;; ------------------------------------------------------------------- ;; Code Iterators Index: gcc/config/aarch64/constraints.md =================================================================== --- gcc/config/aarch64/constraints.md 2017-11-08 15:05:50.215852893 +0000 +++ gcc/config/aarch64/constraints.md 2017-11-08 15:06:19.690849904 +0000 @@ -210,6 +210,12 @@ (define_memory_constraint "Utw" (and (match_code "mem") (match_test "aarch64_sve_ld1r_operand_p (op)"))) +(define_memory_constraint "Utx" + "@internal + An address valid for SVE structure mov patterns (as distinct from + LD[234] and ST[234] patterns)." + (match_operand 0 "aarch64_sve_struct_memory_operand")) + (define_constraint "Ufc" "A floating point constant which can be used with an\ FMOV immediate operation." Index: gcc/config/aarch64/predicates.md =================================================================== --- gcc/config/aarch64/predicates.md 2017-11-08 15:05:50.216852893 +0000 +++ gcc/config/aarch64/predicates.md 2017-11-08 15:06:19.690849904 +0000 @@ -471,6 +471,14 @@ (define_predicate "aarch64_sve_general_o (match_operand 0 "aarch64_sve_ldr_operand") (match_test "aarch64_mov_operand_p (op, mode)")))) +(define_predicate "aarch64_sve_struct_memory_operand" + (and (match_code "mem") + (match_test "aarch64_sve_struct_memory_operand_p (op)"))) + +(define_predicate "aarch64_sve_struct_nonimmediate_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "aarch64_sve_struct_memory_operand"))) + ;; Doesn't include immediates, since those are handled by the move ;; patterns instead. (define_predicate "aarch64_sve_dup_operand" Index: gcc/config/aarch64/aarch64.md =================================================================== --- gcc/config/aarch64/aarch64.md 2017-11-08 15:05:50.214852893 +0000 +++ gcc/config/aarch64/aarch64.md 2017-11-08 15:06:19.689849905 +0000 @@ -160,6 +160,8 @@ (define_c_enum "unspec" [ UNSPEC_PACK UNSPEC_FLOAT_CONVERT UNSPEC_WHILE_LO + UNSPEC_LDN + UNSPEC_STN ]) (define_c_enum "unspecv" [ Index: gcc/config/aarch64/aarch64-sve.md =================================================================== --- gcc/config/aarch64/aarch64-sve.md 2017-11-08 15:05:50.206852894 +0000 +++ gcc/config/aarch64/aarch64-sve.md 2017-11-08 15:06:19.687849905 +0000 @@ -189,6 +189,105 @@ (define_insn "maskstore<mode><vpred>" "st1<Vesize>\t%1.<Vetype>, %2, %0" ) +;; SVE structure moves. +(define_expand "mov<mode>" + [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand") + (match_operand:SVE_STRUCT 1 "general_operand"))] + "TARGET_SVE" + { + /* Big-endian loads and stores need to be done via LD1 and ST1; + see the comment at the head of the file for details. */ + if ((MEM_P (operands[0]) || MEM_P (operands[1])) + && BYTES_BIG_ENDIAN) + { + gcc_assert (can_create_pseudo_p ()); + aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode); + DONE; + } + + if (CONSTANT_P (operands[1])) + { + aarch64_expand_mov_immediate (operands[0], operands[1]); + DONE; + } + } +) + +;; Unpredicated structure moves (little-endian). +(define_insn "*aarch64_sve_mov<mode>_le" + [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w") + (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))] + "TARGET_SVE && !BYTES_BIG_ENDIAN" + "#" + [(set_attr "length" "<insn_length>")] +) + +;; Unpredicated structure moves (big-endian). Memory accesses require +;; secondary reloads. +(define_insn "*aarch64_sve_mov<mode>_le" + [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w") + (match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))] + "TARGET_SVE && BYTES_BIG_ENDIAN" + "#" + [(set_attr "length" "<insn_length>")] +) + +;; Split unpredicated structure moves into pieces. This is the same +;; for both big-endian and little-endian code, although it only needs +;; to handle memory operands for little-endian code. +(define_split + [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand") + (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand"))] + "TARGET_SVE && reload_completed" + [(const_int 0)] + { + rtx dest = operands[0]; + rtx src = operands[1]; + if (REG_P (dest) && REG_P (src)) + aarch64_simd_emit_reg_reg_move (operands, <VSINGLE>mode, <vector_count>); + else + for (unsigned int i = 0; i < <vector_count>; ++i) + { + rtx subdest = simplify_gen_subreg (<VSINGLE>mode, dest, <MODE>mode, + i * BYTES_PER_SVE_VECTOR); + rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, src, <MODE>mode, + i * BYTES_PER_SVE_VECTOR); + emit_insn (gen_rtx_SET (subdest, subsrc)); + } + DONE; + } +) + +;; Predicated structure moves. This works for both endiannesses but in +;; practice is only useful for big-endian. +(define_insn_and_split "pred_mov<mode>" + [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_struct_nonimmediate_operand" "=w, Utx") + (unspec:SVE_STRUCT + [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") + (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "Utx, w")] + UNSPEC_MERGE_PTRUE))] + "TARGET_SVE + && (register_operand (operands[0], <MODE>mode) + || register_operand (operands[2], <MODE>mode))" + "#" + "&& reload_completed" + [(const_int 0)] + { + for (unsigned int i = 0; i < <vector_count>; ++i) + { + rtx subdest = simplify_gen_subreg (<VSINGLE>mode, operands[0], + <MODE>mode, + i * BYTES_PER_SVE_VECTOR); + rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, operands[2], + <MODE>mode, + i * BYTES_PER_SVE_VECTOR); + aarch64_emit_sve_pred_move (subdest, operands[1], subsrc); + } + DONE; + } + [(set_attr "length" "<insn_length>")] +) + (define_expand "mov<mode>" [(set (match_operand:PRED_ALL 0 "nonimmediate_operand") (match_operand:PRED_ALL 1 "general_operand"))] @@ -447,6 +546,60 @@ (define_insn "*vec_series<mode>_plus" } ) +;; Unpredicated LD[234]. +(define_expand "vec_load_lanes<mode><vsingle>" + [(set (match_operand:SVE_STRUCT 0 "register_operand") + (unspec:SVE_STRUCT + [(match_dup 2) + (match_operand:SVE_STRUCT 1 "memory_operand")] + UNSPEC_LDN))] + "TARGET_SVE" + { + operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); + } +) + +;; Predicated LD[234]. +(define_insn "vec_mask_load_lanes<mode><vsingle>" + [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w") + (unspec:SVE_STRUCT + [(match_operand:<VPRED> 2 "register_operand" "Upl") + (match_operand:SVE_STRUCT 1 "memory_operand" "m")] + UNSPEC_LDN))] + "TARGET_SVE" + "ld<vector_count><Vesize>\t%0, %2/z, %1" +) + +;; Unpredicated ST[234]. This is always a full update, so the dependence +;; on the old value of the memory location (via (match_dup 0)) is redundant. +;; There doesn't seem to be any obvious benefit to treating the all-true +;; case differently though. In particular, it's very unlikely that we'll +;; only find out during RTL that a store_lanes is dead. +(define_expand "vec_store_lanes<mode><vsingle>" + [(set (match_operand:SVE_STRUCT 0 "memory_operand") + (unspec:SVE_STRUCT + [(match_dup 2) + (match_operand:SVE_STRUCT 1 "register_operand") + (match_dup 0)] + UNSPEC_STN))] + "TARGET_SVE" + { + operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); + } +) + +;; Predicated ST[234]. +(define_insn "vec_mask_store_lanes<mode><vsingle>" + [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m") + (unspec:SVE_STRUCT + [(match_operand:<VPRED> 2 "register_operand" "Upl") + (match_operand:SVE_STRUCT 1 "register_operand" "w") + (match_dup 0)] + UNSPEC_STN))] + "TARGET_SVE" + "st<vector_count><Vesize>\t%1, %2, %0" +) + (define_expand "vec_perm_const<mode>" [(match_operand:SVE_ALL 0 "register_operand") (match_operand:SVE_ALL 1 "register_operand") Index: gcc/config/aarch64/aarch64.c =================================================================== --- gcc/config/aarch64/aarch64.c 2017-11-08 15:05:50.213852893 +0000 +++ gcc/config/aarch64/aarch64.c 2017-11-08 15:06:19.689849905 +0000 @@ -1179,9 +1179,15 @@ aarch64_classify_vector_mode (machine_mo || inner == DImode || inner == DFmode)) { - if (TARGET_SVE - && must_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR)) - return VEC_SVE_DATA; + if (TARGET_SVE) + { + if (must_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR)) + return VEC_SVE_DATA; + if (must_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 2) + || must_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 3) + || must_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 4)) + return VEC_SVE_DATA | VEC_STRUCT; + } /* This includes V1DF but not V1DI (which doesn't exist). */ if (TARGET_SIMD @@ -1209,6 +1215,18 @@ aarch64_sve_data_mode_p (machine_mode mo return aarch64_classify_vector_mode (mode) & VEC_SVE_DATA; } +/* Implement target hook TARGET_ARRAY_MODE. */ +static opt_machine_mode +aarch64_array_mode (machine_mode mode, unsigned HOST_WIDE_INT nelems) +{ + if (aarch64_classify_vector_mode (mode) == VEC_SVE_DATA + && IN_RANGE (nelems, 2, 4)) + return mode_for_vector (GET_MODE_INNER (mode), + GET_MODE_NUNITS (mode) * nelems); + + return opt_machine_mode (); +} + /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */ static bool aarch64_array_mode_supported_p (machine_mode mode, @@ -5677,6 +5695,18 @@ aarch64_classify_address (struct aarch64 ? offset_4bit_signed_scaled_p (mode, offset) : offset_9bit_signed_scaled_p (mode, offset)); + if (vec_flags == (VEC_SVE_DATA | VEC_STRUCT)) + { + poly_int64 end_offset = (offset + + GET_MODE_SIZE (mode) + - BYTES_PER_SVE_VECTOR); + return (type == ADDR_QUERY_M + ? offset_4bit_signed_scaled_p (mode, offset) + : (offset_9bit_signed_scaled_p (SVE_BYTE_MODE, offset) + && offset_9bit_signed_scaled_p (SVE_BYTE_MODE, + end_offset))); + } + if (vec_flags == VEC_SVE_PRED) return offset_9bit_signed_scaled_p (mode, offset); @@ -6391,6 +6421,20 @@ aarch64_print_vector_float_operand (FILE return true; } +/* Return the equivalent letter for size. */ +static char +sizetochar (int size) +{ + switch (size) + { + case 64: return 'd'; + case 32: return 's'; + case 16: return 'h'; + case 8 : return 'b'; + default: gcc_unreachable (); + } +} + /* Print operand X to file F in a target specific manner according to CODE. The acceptable formatting commands given by CODE are: 'c': An integer or symbol address without a preceding # @@ -6674,7 +6718,18 @@ aarch64_print_operand (FILE *f, rtx x, i { case REG: if (aarch64_sve_data_mode_p (GET_MODE (x))) - asm_fprintf (f, "z%d", REGNO (x) - V0_REGNUM); + { + if (REG_NREGS (x) == 1) + asm_fprintf (f, "z%d", REGNO (x) - V0_REGNUM); + else + { + char suffix + = sizetochar (GET_MODE_UNIT_BITSIZE (GET_MODE (x))); + asm_fprintf (f, "{z%d.%c - z%d.%c}", + REGNO (x) - V0_REGNUM, suffix, + END_REGNO (x) - V0_REGNUM - 1, suffix); + } + } else asm_fprintf (f, "%s", reg_names [REGNO (x)]); break; @@ -12825,20 +12880,6 @@ aarch64_final_prescan_insn (rtx_insn *in } -/* Return the equivalent letter for size. */ -static char -sizetochar (int size) -{ - switch (size) - { - case 64: return 'd'; - case 32: return 's'; - case 16: return 'h'; - case 8 : return 'b'; - default: gcc_unreachable (); - } -} - /* Return true if BASE_OR_STEP is a valid immediate operand for an SVE INDEX instruction. */ @@ -13432,6 +13473,28 @@ aarch64_sve_ldr_operand_p (rtx op) && addr.type == ADDRESS_REG_IMM); } +/* Return true if OP is a valid MEM operand for an SVE_STRUCT mode. + We need to be able to access the individual pieces, so the range + is different from LD[234] and ST[234]. */ +bool +aarch64_sve_struct_memory_operand_p (rtx op) +{ + if (!MEM_P (op)) + return false; + + machine_mode mode = GET_MODE (op); + struct aarch64_address_info addr; + if (!aarch64_classify_address (&addr, XEXP (op, 0), SVE_BYTE_MODE, false, + ADDR_QUERY_ANY) + || addr.type != ADDRESS_REG_IMM) + return false; + + poly_int64 first = addr.const_offset; + poly_int64 last = first + GET_MODE_SIZE (mode) - BYTES_PER_SVE_VECTOR; + return (offset_4bit_signed_scaled_p (SVE_BYTE_MODE, first) + && offset_4bit_signed_scaled_p (SVE_BYTE_MODE, last)); +} + /* Emit a register copy from operand to operand, taking care not to early-clobber source registers in the process. @@ -17542,6 +17605,9 @@ #define TARGET_VECTOR_MODE_SUPPORTED_P a #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \ aarch64_builtin_support_vector_misalignment +#undef TARGET_ARRAY_MODE +#define TARGET_ARRAY_MODE aarch64_array_mode + #undef TARGET_ARRAY_MODE_SUPPORTED_P #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p