[ARM] 64-bit shifts in NEON.

Message ID	4F43B707.7070908@codesourcery.com
State	New
Headers	show Return-Path: <patch+caf_=linaro-patchwork=canonical.com@linaro.org> Received-SPF: neutral (google.com: 192.94.38.131 is neither permitted nor denied by best guess record for domain of Andrew_Stubbs@mentor.com) client-ip=192.94.38.131; Message-ID: <4F43B707.7070908@codesourcery.com> Date: Tue, 21 Feb 2012 15:23:51 +0000 From: Andrew Stubbs <ams@codesourcery.com> User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:10.0.2) Gecko/20120216 Thunderbird/10.0.2 MIME-Version: 1.0 To: "gcc-patches@gcc.gnu.org" <gcc-patches@gcc.gnu.org> CC: "patches@linaro.org" <patches@linaro.org> Subject: Re: [PATCH][ARM] 64-bit shifts in NEON. References: <4F2FD216.6090507@codesourcery.com> In-Reply-To: <4F2FD216.6090507@codesourcery.com> Content-Type: multipart/mixed; boundary="------------090408030205060605000208"

2012-02-21 Andrew Stubbs <ams@codesourcery.com> gcc/ * config/arm/arm.c (arm_print_operand): Add new 'E' format code. * config/arm/arm.h (enum reg_class): Add VFP_LO_REGS_EVEN. (REG_CLASS_NAMES, REG_CLASS_CONTENTS, IS_VFP_CLASS): Likewise. * config/arm/arm.md (ashldi3): Add TARGET_NEON case. (ashrdi3, lshrdi3): Likewise. * config/arm/constraints.md (T): New register constraint. (Pe, P1, Pf, Pg): New constraints. * config/arm/neon.md (signed_shift_di3_neon): New pattern. (unsigned_shift_di3_neon, ashldi3_neon): New patterns. (ashrdi3_neon_imm, ashrdi3_neon_reg): New patterns. (ashrdi3_neon, lshrdi3_neon_imm, ashrdi3_neon): New patterns. (lshrdi3_neon_imm, lshrdi3_neon_reg, lshrdi3_neon): New patterns. * config/arm/predicates.md (int_0_to_63): New predicate. (shift_amount_64): New predicate. --- gcc/config/arm/arm.c | 18 +++ gcc/config/arm/arm.h | 5 + gcc/config/arm/arm.md | 33 ++++- gcc/config/arm/constraints.md | 30 ++++- gcc/config/arm/neon.md | 271 +++++++++++++++++++++++++++++++++++++++++ gcc/config/arm/predicates.md | 8 + 6 files changed, 355 insertions(+), 10 deletions(-) diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 386231a..65ccd91 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -17661,6 +17661,24 @@ arm_print_operand (FILE *stream, rtx x, int code) } return; + /* Print the VFP/Neon double precision register name that overlaps the + given single-precision register. */ + case 'E': + { + int mode = GET_MODE (x); + + if (GET_MODE_SIZE (mode) != 4 + || GET_CODE (x) != REG + || !IS_VFP_REGNUM (REGNO (x))) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + fprintf (stream, "d%d", (REGNO (x) - FIRST_VFP_REGNUM) >> 1); + } + return; + /* These two codes print the low/high doubleword register of a Neon quad register, respectively. For pair-structure types, can also print low/high quadword registers. */ diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index 5a78125..6f0df83 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -1061,6 +1061,7 @@ enum reg_class CIRRUS_REGS, VFP_D0_D7_REGS, VFP_LO_REGS, + VFP_LO_REGS_EVEN, VFP_HI_REGS, VFP_REGS, IWMMXT_GR_REGS, @@ -1087,6 +1088,7 @@ enum reg_class "CIRRUS_REGS", \ "VFP_D0_D7_REGS", \ "VFP_LO_REGS", \ + "VFP_LO_REGS_EVEN", \ "VFP_HI_REGS", \ "VFP_REGS", \ "IWMMXT_GR_REGS", \ @@ -1112,6 +1114,7 @@ enum reg_class { 0xF8000000, 0x000007FF, 0x00000000, 0x00000000 }, /* CIRRUS_REGS */ \ { 0x00000000, 0x80000000, 0x00007FFF, 0x00000000 }, /* VFP_D0_D7_REGS */ \ { 0x00000000, 0x80000000, 0x7FFFFFFF, 0x00000000 }, /* VFP_LO_REGS */ \ + { 0x00000000, 0x80000000, 0x2AAAAAAA, 0x00000000 }, /* VFP_LO_REGS_EVEN */ \ { 0x00000000, 0x00000000, 0x80000000, 0x7FFFFFFF }, /* VFP_HI_REGS */ \ { 0x00000000, 0x80000000, 0xFFFFFFFF, 0x7FFFFFFF }, /* VFP_REGS */ \ { 0x00000000, 0x00007800, 0x00000000, 0x00000000 }, /* IWMMXT_GR_REGS */ \ @@ -1129,7 +1132,7 @@ enum reg_class /* Any of the VFP register classes. */ #define IS_VFP_CLASS(X) \ - ((X) == VFP_D0_D7_REGS || (X) == VFP_LO_REGS \ + ((X) == VFP_D0_D7_REGS || (X) == VFP_LO_REGS || (X) == VFP_LO_REGS_EVEN \ || (X) == VFP_HI_REGS || (X) == VFP_REGS) /* The same information, inverted: diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index 7910bae..182c52a 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -3466,8 +3466,15 @@ (match_operand:SI 2 "reg_or_int_operand" "")))] "TARGET_32BIT" " - if (!CONST_INT_P (operands[2]) - && (TARGET_REALLY_IWMMXT || (TARGET_HARD_FLOAT && TARGET_MAVERICK))) + if (TARGET_NEON) + { + /* Delay the decision whether to use NEON or core-regs until + register allocation. */ + emit_insn (gen_ashldi3_neon (operands[0], operands[1], operands[2])); + DONE; + } + else if (!CONST_INT_P (operands[2]) + && (TARGET_REALLY_IWMMXT || (TARGET_HARD_FLOAT && TARGET_MAVERICK))) ; /* No special preparation statements; expand pattern as above. */ else { @@ -3541,8 +3548,15 @@ (match_operand:SI 2 "reg_or_int_operand" "")))] "TARGET_32BIT" " - if (!CONST_INT_P (operands[2]) - && (TARGET_REALLY_IWMMXT || (TARGET_HARD_FLOAT && TARGET_MAVERICK))) + if (TARGET_NEON) + { + /* Delay the decision whether to use NEON or core-regs until + register allocation. */ + emit_insn (gen_ashrdi3_neon (operands[0], operands[1], operands[2])); + DONE; + } + else if (!CONST_INT_P (operands[2]) + && (TARGET_REALLY_IWMMXT || (TARGET_HARD_FLOAT && TARGET_MAVERICK))) ; /* No special preparation statements; expand pattern as above. */ else { @@ -3614,8 +3628,15 @@ (match_operand:SI 2 "reg_or_int_operand" "")))] "TARGET_32BIT" " - if (!CONST_INT_P (operands[2]) - && (TARGET_REALLY_IWMMXT || (TARGET_HARD_FLOAT && TARGET_MAVERICK))) + if (TARGET_NEON) + { + /* Delay the decision whether to use NEON or core-regs until + register allocation. */ + emit_insn (gen_lshrdi3_neon (operands[0], operands[1], operands[2])); + DONE; + } + else if (!CONST_INT_P (operands[2]) + && (TARGET_REALLY_IWMMXT || (TARGET_HARD_FLOAT && TARGET_MAVERICK))) ; /* No special preparation statements; expand pattern as above. */ else { diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md index 7d0269a..a1aaf43 100644 --- a/gcc/config/arm/constraints.md +++ b/gcc/config/arm/constraints.md @@ -19,7 +19,7 @@ ;; <http://www.gnu.org/licenses/>. ;; The following register constraints have been used: -;; - in ARM/Thumb-2 state: f, t, v, w, x, y, z +;; - in ARM/Thumb-2 state: f, t, T, v, w, x, y, z ;; - in Thumb state: h, b ;; - in both states: l, c, k ;; In ARM state, 'l' is an alias for 'r' @@ -29,9 +29,9 @@ ;; in Thumb-1 state: I, J, K, L, M, N, O ;; The following multi-letter normal constraints have been used: -;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dt, Dz +;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dt, Dz, Pe, Pf, P1 ;; in Thumb-1 state: Pa, Pb, Pc, Pd -;; in Thumb-2 state: Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py +;; in Thumb-2 state: Pg, Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py ;; The following memory constraints have been used: ;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us @@ -45,6 +45,9 @@ (define_register_constraint "t" "TARGET_32BIT ? VFP_LO_REGS : NO_REGS" "The VFP registers @code{s0}-@code{s31}.") +(define_register_constraint "T" "TARGET_32BIT ? VFP_LO_REGS_EVEN : NO_REGS" + "The even numbered VFP registers @code{s0}-@code{s31}.") + (define_register_constraint "v" "TARGET_ARM ? CIRRUS_REGS : NO_REGS" "The Cirrus Maverick co-processor registers.") @@ -172,6 +175,27 @@ (and (match_code "const_int") (match_test "TARGET_THUMB1 && ival >= 0 && ival <= 7"))) +(define_constraint "Pe" + "@internal In ARM/Thumb-2 state, a constant in the range 0 to 63" + (and (match_code "const_int") + (match_test "TARGET_32BIT && ival >= 0 && ival < 64"))) + +(define_constraint "P1" + "@internal In ARM/Thumb2 state, a constant of 1" + (and (match_code "const_int") + (match_test "TARGET_32BIT && ival == 1"))) + +(define_constraint "Pf" + "@internal In ARM state, a constant in the range 0 to 63, and in thumb-2 state, 32 to 63" + (and (match_code "const_int") + (match_test "(TARGET_ARM && ival >= 0 && ival < 64) + || (TARGET_THUMB2 && ival >= 32 && ival < 64)"))) + +(define_constraint "Pg" + "@internal In Thumb-2 state, a constant in the range 0 to 31" + (and (match_code "const_int") + (match_test "TARGET_THUMB2 && ival >= 0 && ival <= 31"))) + (define_constraint "Ps" "@internal In Thumb-2 state a constant in the range -255 to +255" (and (match_code "const_int") diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index d7caa37..93f1a22 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -1090,6 +1090,277 @@ DONE; }) +;; 64-bit shifts + +; The shift amount needs to be negated for right-shifts +(define_insn "signed_shift_di3_neon" + [(set (match_operand:DI 0 "s_register_operand" "=w") + (unspec:DI [(match_operand:DI 1 "s_register_operand" " w") + (match_operand:SI 2 "s_register_operand" " T")] + UNSPEC_ASHIFT_SIGNED))] + "TARGET_NEON" + "vshl.s64\t%P0, %P1, %E2" + [(set_attr "neon_type" "neon_vshl_ddd")] +) + +; The shift amount needs to be negated for right-shifts +(define_insn "unsigned_shift_di3_neon" + [(set (match_operand:DI 0 "s_register_operand" "=w") + (unspec:DI [(match_operand:DI 1 "s_register_operand" " w") + (match_operand:SI 2 "s_register_operand" " T")] + UNSPEC_ASHIFT_UNSIGNED))] + "TARGET_NEON" + "vshl.u64\t%P0, %P1, %E2" + [(set_attr "neon_type" "neon_vshl_ddd")] +) + +(define_insn_and_split "ashldi3_neon" + [(set (match_operand:DI 0 "s_register_operand" "=w, w,?&r,?&r,?r,?r,?r,?w,?w") + (ashift:DI (match_operand:DI 1 "s_register_operand" " w, w, 0, r, r, r, r, w, w") + (match_operand:SI 2 "shift_amount_64" " T,Pe, r, r,P1,Pf,Pg, T,Pe"))) + (clobber (match_scratch:SI 3 "=X, X, r, r, X, X, r, X, X")) + (clobber (match_scratch:SI 4 "=X, X, r, r, X, X, X, X, X")) + (clobber (reg:CC_C CC_REGNUM))] + "TARGET_NEON" + "@ + vshl.u64\t%P0, %P1, %E2 + vshl.u64\t%P0, %P1, %2 + # + # + # + # + # + vshl.u64\t%P0, %P1, %E2 + vshl.u64\t%P0, %P1, %2" + "TARGET_NEON && reload_completed && !(IS_VFP_REGNUM (REGNO (operands[0])))" + [(const_int 0)] + " + { + if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1) + /* This clobbers CC. */ + emit_insn (gen_arm_ashrdi3_1bit (operands[0], operands[1])); + else + arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1], + operands[2], operands[3], operands[4]); + DONE; + }" + [(set_attr "neon_type" "neon_vshl_ddd,neon_vshl_ddd,*,*,*,*,*,neon_vshl_ddd,neon_vshl_ddd") + (set_attr "length" "*,*,24,24,8,12,12,*,*") + (set_attr "arch" "nota8,nota8,*,*,*,*,*,onlya8,onlya8") + (set_attr_alternative "insn_enabled" + [(const_string "yes") + (const_string "yes") + (if_then_else (match_test "optimize_function_for_size_p (cfun)") + (const_string "no") + (const_string "yes")) + (if_then_else (match_test "optimize_function_for_size_p (cfun)") + (const_string "no") + (const_string "yes")) + (if_then_else (match_test "optimize_function_for_size_p (cfun)") + (const_string "no") + (const_string "yes")) + (if_then_else (match_test "optimize_function_for_size_p (cfun)") + (const_string "no") + (const_string "yes")) + (if_then_else (match_test "optimize_function_for_size_p (cfun)") + (const_string "no") + (const_string "yes")) + (const_string "yes") + (const_string "yes")])] +) + +(define_insn_and_split "ashrdi3_neon_imm" + [(set (match_operand:DI 0 "s_register_operand" "=w,?r,?r,?r,?w") + (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w, r, r, r, w") + (match_operand:SI 2 "int_0_to_63" "Pe,P1,Pf,Pg,Pe"))) + (clobber (match_scratch:SI 3 "=X, X, X, r, X")) + (clobber (reg:CC_C CC_REGNUM))] + "TARGET_NEON" + "@ + vshr.s64\t%P0, %P1, %2 + # + # + # + vshr.s64\t%P0, %P1, %2" + "TARGET_NEON && reload_completed && !(IS_VFP_REGNUM (REGNO (operands[0])))" + [(const_int 0)] + " + { + if (INTVAL (operands[2]) == 1) + /* This clobbers CC. */ + emit_insn (gen_arm_ashrdi3_1bit (operands[0], operands[1])); + else + arm_emit_coreregs_64bit_shift (ASHIFTRT, operands[0], operands[1], + operands[2], operands[3], NULL); + DONE; + }" + [(set_attr "neon_type" "neon_vshl_ddd,*,*,*,neon_vshl_ddd") + (set_attr "length" "*,8,12,12,*") + (set_attr "arch" "nota8,*,*,*,onlya8") + (set_attr_alternative "insn_enabled" + [(const_string "yes") + (if_then_else (match_test "optimize_function_for_size_p (cfun)") + (const_string "no") + (const_string "yes")) + (if_then_else (match_test "optimize_function_for_size_p (cfun)") + (const_string "no") + (const_string "yes")) + (if_then_else (match_test "optimize_function_for_size_p (cfun)") + (const_string "no") + (const_string "yes")) + (const_string "yes")])] +) + +(define_insn_and_split "ashrdi3_neon_reg" + [(set (match_operand:DI 0 "s_register_operand" "=w,w,?&r,?&r,?w,?w") + (unspec:DI [(match_operand:DI 1 "s_register_operand" " w,w, 0, r, w, w") + (match_operand:SI 2 "s_register_operand" " r,r, r, r, r, r")] + UNSPEC_ASHIFT_SIGNED)) + (clobber (match_scratch:SI 3 "=2,r, r, r, 2, r")) + (clobber (match_scratch:SI 4 "=T,T, r, r, T, T")) + (clobber (reg:CC CC_REGNUM))] + "TARGET_NEON" + "#" + "TARGET_NEON && reload_completed" + [(const_int 0)] + " + { + if (IS_VFP_REGNUM (REGNO (operands[0]))) + { + emit_insn (gen_negsi2 (operands[3], operands[2])); + emit_insn (gen_rtx_SET (SImode, operands[4], operands[3])); + emit_insn (gen_signed_shift_di3_neon (operands[0], operands[1], + operands[4])); + } + else + /* This clobbers CC (ASHIFTRT only). */ + arm_emit_coreregs_64bit_shift (ASHIFTRT, operands[0], operands[1], + operands[2], operands[3], operands[4]); + DONE; + }" + [(set_attr "length" "12,12,24,24,12,12") + (set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8") + (set_attr_alternative "insn_enabled" + [(const_string "yes") + (const_string "yes") + (if_then_else (match_test "optimize_function_for_size_p (cfun)") + (const_string "no") + (const_string "yes")) + (if_then_else (match_test "optimize_function_for_size_p (cfun)") + (const_string "no") + (const_string "yes")) + (const_string "yes") + (const_string "yes")])] +) + +(define_expand "ashrdi3_neon" + [(match_operand:DI 0 "s_register_operand" "") + (match_operand:DI 1 "s_register_operand" "") + (match_operand:SI 2 "shift_amount_64" "")] + "TARGET_NEON" +{ + if (CONST_INT_P (operands[2])) + emit_insn (gen_ashrdi3_neon_imm (operands[0], operands[1], operands[2])); + else + emit_insn (gen_ashrdi3_neon_reg (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn_and_split "lshrdi3_neon_imm" + [(set (match_operand:DI 0 "s_register_operand" "=w,?r,?r,?r,?w") + (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w, r, r, r, w") + (match_operand:SI 2 "int_0_to_63" "Pe,P1,Pf,Pg,Pe"))) + (clobber (match_scratch:SI 3 "=X, X, X, r, X")) + (clobber (reg:CC_C CC_REGNUM))] + "TARGET_NEON" + "@ + vshr.u64\t%P0, %P1, %2 + # + # + # + vshr.u64\t%P0, %P1, %2" + "TARGET_NEON && reload_completed && !(IS_VFP_REGNUM (REGNO (operands[0])))" + [(const_int 0)] + " + { + if (INTVAL (operands[2]) == 1) + /* This clobbers CC. */ + emit_insn (gen_arm_lshrdi3_1bit (operands[0], operands[1])); + else + arm_emit_coreregs_64bit_shift (LSHIFTRT, operands[0], operands[1], + operands[2], operands[3], NULL); + DONE; + }" + [(set_attr "neon_type" "neon_vshl_ddd,*,*,*,neon_vshl_ddd") + (set_attr "length" "*,8,12,12,*") + (set_attr "arch" "nota8,*,*,*,onlya8") + (set_attr_alternative "insn_enabled" + [(const_string "yes") + (if_then_else (match_test "optimize_function_for_size_p (cfun)") + (const_string "no") + (const_string "yes")) + (if_then_else (match_test "optimize_function_for_size_p (cfun)") + (const_string "no") + (const_string "yes")) + (if_then_else (match_test "optimize_function_for_size_p (cfun)") + (const_string "no") + (const_string "yes")) + (const_string "yes")])] +) + +(define_insn_and_split "lshrdi3_neon_reg" + [(set (match_operand:DI 0 "s_register_operand" "=w,w,?&r,?&r,?w,?w") + (unspec:DI [(match_operand:DI 1 "s_register_operand" " w,w, 0, r, w, w") + (match_operand:SI 2 "s_register_operand" " r,r, r, r, r, r")] + UNSPEC_ASHIFT_UNSIGNED)) + (clobber (match_scratch:SI 3 "=2,r, r, r, 2, r")) + (clobber (match_scratch:SI 4 "=T,T, r, r, T, T"))] + "TARGET_NEON" + "#" + "TARGET_NEON && reload_completed" + [(const_int 0)] + " + { + if (IS_VFP_REGNUM (REGNO (operands[0]))) + { + emit_insn (gen_negsi2 (operands[3], operands[2])); + emit_insn (gen_rtx_SET (SImode, operands[4], operands[3])); + emit_insn (gen_unsigned_shift_di3_neon (operands[0], operands[1], + operands[4])); + } + else + arm_emit_coreregs_64bit_shift (LSHIFTRT, operands[0], operands[1], + operands[2], operands[3], operands[4]); + DONE; + }" + [(set_attr "length" "12,12,24,24,12,12") + (set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8") + (set_attr_alternative "insn_enabled" + [(const_string "yes") + (const_string "yes") + (if_then_else (match_test "optimize_function_for_size_p (cfun)") + (const_string "no") + (const_string "yes")) + (if_then_else (match_test "optimize_function_for_size_p (cfun)") + (const_string "no") + (const_string "yes")) + (const_string "yes") + (const_string "yes")])] +) + +(define_expand "lshrdi3_neon" + [(match_operand:DI 0 "s_register_operand" "") + (match_operand:DI 1 "s_register_operand" "") + (match_operand:SI 2 "shift_amount_64" "")] + "TARGET_NEON" +{ + if (CONST_INT_P (operands[2])) + emit_insn (gen_lshrdi3_neon_imm (operands[0], operands[1], operands[2])); + else + emit_insn (gen_lshrdi3_neon_reg (operands[0], operands[1], operands[2])); + DONE; +}) + ;; Widening operations (define_insn "widen_ssum<mode>3" diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md index b535335..64eb3b8 100644 --- a/gcc/config/arm/predicates.md +++ b/gcc/config/arm/predicates.md @@ -769,3 +769,11 @@ (define_special_predicate "add_operator" (match_code "plus")) + +(define_predicate "int_0_to_63" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 63)"))) + +(define_predicate "shift_amount_64" + (ior (match_operand 0 "s_register_operand") + (match_operand 0 "int_0_to_63")))

[ARM] 64-bit shifts in NEON.

Commit Message

Patch