2012-02-21 Andrew Stubbs <ams@codesourcery.com>
gcc/
* config/arm/arm.c (arm_print_operand): Add new 'E' format code.
* config/arm/arm.h (enum reg_class): Add VFP_LO_REGS_EVEN.
(REG_CLASS_NAMES, REG_CLASS_CONTENTS, IS_VFP_CLASS): Likewise.
* config/arm/arm.md (ashldi3): Add TARGET_NEON case.
(ashrdi3, lshrdi3): Likewise.
* config/arm/constraints.md (T): New register constraint.
(Pe, P1, Pf, Pg): New constraints.
* config/arm/neon.md (signed_shift_di3_neon): New pattern.
(unsigned_shift_di3_neon, ashldi3_neon): New patterns.
(ashrdi3_neon_imm, ashrdi3_neon_reg): New patterns.
(ashrdi3_neon, lshrdi3_neon_imm, ashrdi3_neon): New patterns.
(lshrdi3_neon_imm, lshrdi3_neon_reg, lshrdi3_neon): New patterns.
* config/arm/predicates.md (int_0_to_63): New predicate.
(shift_amount_64): New predicate.
---
gcc/config/arm/arm.c | 18 +++
gcc/config/arm/arm.h | 5 +
gcc/config/arm/arm.md | 33 ++++-
gcc/config/arm/constraints.md | 30 ++++-
gcc/config/arm/neon.md | 271 +++++++++++++++++++++++++++++++++++++++++
gcc/config/arm/predicates.md | 8 +
6 files changed, 355 insertions(+), 10 deletions(-)
@@ -17661,6 +17661,24 @@ arm_print_operand (FILE *stream, rtx x, int code)
}
return;
+ /* Print the VFP/Neon double precision register name that overlaps the
+ given single-precision register. */
+ case 'E':
+ {
+ int mode = GET_MODE (x);
+
+ if (GET_MODE_SIZE (mode) != 4
+ || GET_CODE (x) != REG
+ || !IS_VFP_REGNUM (REGNO (x)))
+ {
+ output_operand_lossage ("invalid operand for code '%c'", code);
+ return;
+ }
+
+ fprintf (stream, "d%d", (REGNO (x) - FIRST_VFP_REGNUM) >> 1);
+ }
+ return;
+
/* These two codes print the low/high doubleword register of a Neon quad
register, respectively. For pair-structure types, can also print
low/high quadword registers. */
@@ -1061,6 +1061,7 @@ enum reg_class
CIRRUS_REGS,
VFP_D0_D7_REGS,
VFP_LO_REGS,
+ VFP_LO_REGS_EVEN,
VFP_HI_REGS,
VFP_REGS,
IWMMXT_GR_REGS,
@@ -1087,6 +1088,7 @@ enum reg_class
"CIRRUS_REGS", \
"VFP_D0_D7_REGS", \
"VFP_LO_REGS", \
+ "VFP_LO_REGS_EVEN", \
"VFP_HI_REGS", \
"VFP_REGS", \
"IWMMXT_GR_REGS", \
@@ -1112,6 +1114,7 @@ enum reg_class
{ 0xF8000000, 0x000007FF, 0x00000000, 0x00000000 }, /* CIRRUS_REGS */ \
{ 0x00000000, 0x80000000, 0x00007FFF, 0x00000000 }, /* VFP_D0_D7_REGS */ \
{ 0x00000000, 0x80000000, 0x7FFFFFFF, 0x00000000 }, /* VFP_LO_REGS */ \
+ { 0x00000000, 0x80000000, 0x2AAAAAAA, 0x00000000 }, /* VFP_LO_REGS_EVEN */ \
{ 0x00000000, 0x00000000, 0x80000000, 0x7FFFFFFF }, /* VFP_HI_REGS */ \
{ 0x00000000, 0x80000000, 0xFFFFFFFF, 0x7FFFFFFF }, /* VFP_REGS */ \
{ 0x00000000, 0x00007800, 0x00000000, 0x00000000 }, /* IWMMXT_GR_REGS */ \
@@ -1129,7 +1132,7 @@ enum reg_class
/* Any of the VFP register classes. */
#define IS_VFP_CLASS(X) \
- ((X) == VFP_D0_D7_REGS || (X) == VFP_LO_REGS \
+ ((X) == VFP_D0_D7_REGS || (X) == VFP_LO_REGS || (X) == VFP_LO_REGS_EVEN \
|| (X) == VFP_HI_REGS || (X) == VFP_REGS)
/* The same information, inverted:
@@ -3466,8 +3466,15 @@
(match_operand:SI 2 "reg_or_int_operand" "")))]
"TARGET_32BIT"
"
- if (!CONST_INT_P (operands[2])
- && (TARGET_REALLY_IWMMXT || (TARGET_HARD_FLOAT && TARGET_MAVERICK)))
+ if (TARGET_NEON)
+ {
+ /* Delay the decision whether to use NEON or core-regs until
+ register allocation. */
+ emit_insn (gen_ashldi3_neon (operands[0], operands[1], operands[2]));
+ DONE;
+ }
+ else if (!CONST_INT_P (operands[2])
+ && (TARGET_REALLY_IWMMXT || (TARGET_HARD_FLOAT && TARGET_MAVERICK)))
; /* No special preparation statements; expand pattern as above. */
else
{
@@ -3541,8 +3548,15 @@
(match_operand:SI 2 "reg_or_int_operand" "")))]
"TARGET_32BIT"
"
- if (!CONST_INT_P (operands[2])
- && (TARGET_REALLY_IWMMXT || (TARGET_HARD_FLOAT && TARGET_MAVERICK)))
+ if (TARGET_NEON)
+ {
+ /* Delay the decision whether to use NEON or core-regs until
+ register allocation. */
+ emit_insn (gen_ashrdi3_neon (operands[0], operands[1], operands[2]));
+ DONE;
+ }
+ else if (!CONST_INT_P (operands[2])
+ && (TARGET_REALLY_IWMMXT || (TARGET_HARD_FLOAT && TARGET_MAVERICK)))
; /* No special preparation statements; expand pattern as above. */
else
{
@@ -3614,8 +3628,15 @@
(match_operand:SI 2 "reg_or_int_operand" "")))]
"TARGET_32BIT"
"
- if (!CONST_INT_P (operands[2])
- && (TARGET_REALLY_IWMMXT || (TARGET_HARD_FLOAT && TARGET_MAVERICK)))
+ if (TARGET_NEON)
+ {
+ /* Delay the decision whether to use NEON or core-regs until
+ register allocation. */
+ emit_insn (gen_lshrdi3_neon (operands[0], operands[1], operands[2]));
+ DONE;
+ }
+ else if (!CONST_INT_P (operands[2])
+ && (TARGET_REALLY_IWMMXT || (TARGET_HARD_FLOAT && TARGET_MAVERICK)))
; /* No special preparation statements; expand pattern as above. */
else
{
@@ -19,7 +19,7 @@
;; <http://www.gnu.org/licenses/>.
;; The following register constraints have been used:
-;; - in ARM/Thumb-2 state: f, t, v, w, x, y, z
+;; - in ARM/Thumb-2 state: f, t, T, v, w, x, y, z
;; - in Thumb state: h, b
;; - in both states: l, c, k
;; In ARM state, 'l' is an alias for 'r'
@@ -29,9 +29,9 @@
;; in Thumb-1 state: I, J, K, L, M, N, O
;; The following multi-letter normal constraints have been used:
-;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dt, Dz
+;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dt, Dz, Pe, Pf, P1
;; in Thumb-1 state: Pa, Pb, Pc, Pd
-;; in Thumb-2 state: Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py
+;; in Thumb-2 state: Pg, Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py
;; The following memory constraints have been used:
;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us
@@ -45,6 +45,9 @@
(define_register_constraint "t" "TARGET_32BIT ? VFP_LO_REGS : NO_REGS"
"The VFP registers @code{s0}-@code{s31}.")
+(define_register_constraint "T" "TARGET_32BIT ? VFP_LO_REGS_EVEN : NO_REGS"
+ "The even numbered VFP registers @code{s0}-@code{s31}.")
+
(define_register_constraint "v" "TARGET_ARM ? CIRRUS_REGS : NO_REGS"
"The Cirrus Maverick co-processor registers.")
@@ -172,6 +175,27 @@
(and (match_code "const_int")
(match_test "TARGET_THUMB1 && ival >= 0 && ival <= 7")))
+(define_constraint "Pe"
+ "@internal In ARM/Thumb-2 state, a constant in the range 0 to 63"
+ (and (match_code "const_int")
+ (match_test "TARGET_32BIT && ival >= 0 && ival < 64")))
+
+(define_constraint "P1"
+ "@internal In ARM/Thumb2 state, a constant of 1"
+ (and (match_code "const_int")
+ (match_test "TARGET_32BIT && ival == 1")))
+
+(define_constraint "Pf"
+ "@internal In ARM state, a constant in the range 0 to 63, and in thumb-2 state, 32 to 63"
+ (and (match_code "const_int")
+ (match_test "(TARGET_ARM && ival >= 0 && ival < 64)
+ || (TARGET_THUMB2 && ival >= 32 && ival < 64)")))
+
+(define_constraint "Pg"
+ "@internal In Thumb-2 state, a constant in the range 0 to 31"
+ (and (match_code "const_int")
+ (match_test "TARGET_THUMB2 && ival >= 0 && ival <= 31")))
+
(define_constraint "Ps"
"@internal In Thumb-2 state a constant in the range -255 to +255"
(and (match_code "const_int")
@@ -1090,6 +1090,277 @@
DONE;
})
+;; 64-bit shifts
+
+; The shift amount needs to be negated for right-shifts
+(define_insn "signed_shift_di3_neon"
+ [(set (match_operand:DI 0 "s_register_operand" "=w")
+ (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
+ (match_operand:SI 2 "s_register_operand" " T")]
+ UNSPEC_ASHIFT_SIGNED))]
+ "TARGET_NEON"
+ "vshl.s64\t%P0, %P1, %E2"
+ [(set_attr "neon_type" "neon_vshl_ddd")]
+)
+
+; The shift amount needs to be negated for right-shifts
+(define_insn "unsigned_shift_di3_neon"
+ [(set (match_operand:DI 0 "s_register_operand" "=w")
+ (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
+ (match_operand:SI 2 "s_register_operand" " T")]
+ UNSPEC_ASHIFT_UNSIGNED))]
+ "TARGET_NEON"
+ "vshl.u64\t%P0, %P1, %E2"
+ [(set_attr "neon_type" "neon_vshl_ddd")]
+)
+
+(define_insn_and_split "ashldi3_neon"
+ [(set (match_operand:DI 0 "s_register_operand" "=w, w,?&r,?&r,?r,?r,?r,?w,?w")
+ (ashift:DI (match_operand:DI 1 "s_register_operand" " w, w, 0, r, r, r, r, w, w")
+ (match_operand:SI 2 "shift_amount_64" " T,Pe, r, r,P1,Pf,Pg, T,Pe")))
+ (clobber (match_scratch:SI 3 "=X, X, r, r, X, X, r, X, X"))
+ (clobber (match_scratch:SI 4 "=X, X, r, r, X, X, X, X, X"))
+ (clobber (reg:CC_C CC_REGNUM))]
+ "TARGET_NEON"
+ "@
+ vshl.u64\t%P0, %P1, %E2
+ vshl.u64\t%P0, %P1, %2
+ #
+ #
+ #
+ #
+ #
+ vshl.u64\t%P0, %P1, %E2
+ vshl.u64\t%P0, %P1, %2"
+ "TARGET_NEON && reload_completed && !(IS_VFP_REGNUM (REGNO (operands[0])))"
+ [(const_int 0)]
+ "
+ {
+ if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1)
+ /* This clobbers CC. */
+ emit_insn (gen_arm_ashrdi3_1bit (operands[0], operands[1]));
+ else
+ arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
+ operands[2], operands[3], operands[4]);
+ DONE;
+ }"
+ [(set_attr "neon_type" "neon_vshl_ddd,neon_vshl_ddd,*,*,*,*,*,neon_vshl_ddd,neon_vshl_ddd")
+ (set_attr "length" "*,*,24,24,8,12,12,*,*")
+ (set_attr "arch" "nota8,nota8,*,*,*,*,*,onlya8,onlya8")
+ (set_attr_alternative "insn_enabled"
+ [(const_string "yes")
+ (const_string "yes")
+ (if_then_else (match_test "optimize_function_for_size_p (cfun)")
+ (const_string "no")
+ (const_string "yes"))
+ (if_then_else (match_test "optimize_function_for_size_p (cfun)")
+ (const_string "no")
+ (const_string "yes"))
+ (if_then_else (match_test "optimize_function_for_size_p (cfun)")
+ (const_string "no")
+ (const_string "yes"))
+ (if_then_else (match_test "optimize_function_for_size_p (cfun)")
+ (const_string "no")
+ (const_string "yes"))
+ (if_then_else (match_test "optimize_function_for_size_p (cfun)")
+ (const_string "no")
+ (const_string "yes"))
+ (const_string "yes")
+ (const_string "yes")])]
+)
+
+(define_insn_and_split "ashrdi3_neon_imm"
+ [(set (match_operand:DI 0 "s_register_operand" "=w,?r,?r,?r,?w")
+ (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w, r, r, r, w")
+ (match_operand:SI 2 "int_0_to_63" "Pe,P1,Pf,Pg,Pe")))
+ (clobber (match_scratch:SI 3 "=X, X, X, r, X"))
+ (clobber (reg:CC_C CC_REGNUM))]
+ "TARGET_NEON"
+ "@
+ vshr.s64\t%P0, %P1, %2
+ #
+ #
+ #
+ vshr.s64\t%P0, %P1, %2"
+ "TARGET_NEON && reload_completed && !(IS_VFP_REGNUM (REGNO (operands[0])))"
+ [(const_int 0)]
+ "
+ {
+ if (INTVAL (operands[2]) == 1)
+ /* This clobbers CC. */
+ emit_insn (gen_arm_ashrdi3_1bit (operands[0], operands[1]));
+ else
+ arm_emit_coreregs_64bit_shift (ASHIFTRT, operands[0], operands[1],
+ operands[2], operands[3], NULL);
+ DONE;
+ }"
+ [(set_attr "neon_type" "neon_vshl_ddd,*,*,*,neon_vshl_ddd")
+ (set_attr "length" "*,8,12,12,*")
+ (set_attr "arch" "nota8,*,*,*,onlya8")
+ (set_attr_alternative "insn_enabled"
+ [(const_string "yes")
+ (if_then_else (match_test "optimize_function_for_size_p (cfun)")
+ (const_string "no")
+ (const_string "yes"))
+ (if_then_else (match_test "optimize_function_for_size_p (cfun)")
+ (const_string "no")
+ (const_string "yes"))
+ (if_then_else (match_test "optimize_function_for_size_p (cfun)")
+ (const_string "no")
+ (const_string "yes"))
+ (const_string "yes")])]
+)
+
+(define_insn_and_split "ashrdi3_neon_reg"
+ [(set (match_operand:DI 0 "s_register_operand" "=w,w,?&r,?&r,?w,?w")
+ (unspec:DI [(match_operand:DI 1 "s_register_operand" " w,w, 0, r, w, w")
+ (match_operand:SI 2 "s_register_operand" " r,r, r, r, r, r")]
+ UNSPEC_ASHIFT_SIGNED))
+ (clobber (match_scratch:SI 3 "=2,r, r, r, 2, r"))
+ (clobber (match_scratch:SI 4 "=T,T, r, r, T, T"))
+ (clobber (reg:CC CC_REGNUM))]
+ "TARGET_NEON"
+ "#"
+ "TARGET_NEON && reload_completed"
+ [(const_int 0)]
+ "
+ {
+ if (IS_VFP_REGNUM (REGNO (operands[0])))
+ {
+ emit_insn (gen_negsi2 (operands[3], operands[2]));
+ emit_insn (gen_rtx_SET (SImode, operands[4], operands[3]));
+ emit_insn (gen_signed_shift_di3_neon (operands[0], operands[1],
+ operands[4]));
+ }
+ else
+ /* This clobbers CC (ASHIFTRT only). */
+ arm_emit_coreregs_64bit_shift (ASHIFTRT, operands[0], operands[1],
+ operands[2], operands[3], operands[4]);
+ DONE;
+ }"
+ [(set_attr "length" "12,12,24,24,12,12")
+ (set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")
+ (set_attr_alternative "insn_enabled"
+ [(const_string "yes")
+ (const_string "yes")
+ (if_then_else (match_test "optimize_function_for_size_p (cfun)")
+ (const_string "no")
+ (const_string "yes"))
+ (if_then_else (match_test "optimize_function_for_size_p (cfun)")
+ (const_string "no")
+ (const_string "yes"))
+ (const_string "yes")
+ (const_string "yes")])]
+)
+
+(define_expand "ashrdi3_neon"
+ [(match_operand:DI 0 "s_register_operand" "")
+ (match_operand:DI 1 "s_register_operand" "")
+ (match_operand:SI 2 "shift_amount_64" "")]
+ "TARGET_NEON"
+{
+ if (CONST_INT_P (operands[2]))
+ emit_insn (gen_ashrdi3_neon_imm (operands[0], operands[1], operands[2]));
+ else
+ emit_insn (gen_ashrdi3_neon_reg (operands[0], operands[1], operands[2]));
+ DONE;
+})
+
+(define_insn_and_split "lshrdi3_neon_imm"
+ [(set (match_operand:DI 0 "s_register_operand" "=w,?r,?r,?r,?w")
+ (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w, r, r, r, w")
+ (match_operand:SI 2 "int_0_to_63" "Pe,P1,Pf,Pg,Pe")))
+ (clobber (match_scratch:SI 3 "=X, X, X, r, X"))
+ (clobber (reg:CC_C CC_REGNUM))]
+ "TARGET_NEON"
+ "@
+ vshr.u64\t%P0, %P1, %2
+ #
+ #
+ #
+ vshr.u64\t%P0, %P1, %2"
+ "TARGET_NEON && reload_completed && !(IS_VFP_REGNUM (REGNO (operands[0])))"
+ [(const_int 0)]
+ "
+ {
+ if (INTVAL (operands[2]) == 1)
+ /* This clobbers CC. */
+ emit_insn (gen_arm_lshrdi3_1bit (operands[0], operands[1]));
+ else
+ arm_emit_coreregs_64bit_shift (LSHIFTRT, operands[0], operands[1],
+ operands[2], operands[3], NULL);
+ DONE;
+ }"
+ [(set_attr "neon_type" "neon_vshl_ddd,*,*,*,neon_vshl_ddd")
+ (set_attr "length" "*,8,12,12,*")
+ (set_attr "arch" "nota8,*,*,*,onlya8")
+ (set_attr_alternative "insn_enabled"
+ [(const_string "yes")
+ (if_then_else (match_test "optimize_function_for_size_p (cfun)")
+ (const_string "no")
+ (const_string "yes"))
+ (if_then_else (match_test "optimize_function_for_size_p (cfun)")
+ (const_string "no")
+ (const_string "yes"))
+ (if_then_else (match_test "optimize_function_for_size_p (cfun)")
+ (const_string "no")
+ (const_string "yes"))
+ (const_string "yes")])]
+)
+
+(define_insn_and_split "lshrdi3_neon_reg"
+ [(set (match_operand:DI 0 "s_register_operand" "=w,w,?&r,?&r,?w,?w")
+ (unspec:DI [(match_operand:DI 1 "s_register_operand" " w,w, 0, r, w, w")
+ (match_operand:SI 2 "s_register_operand" " r,r, r, r, r, r")]
+ UNSPEC_ASHIFT_UNSIGNED))
+ (clobber (match_scratch:SI 3 "=2,r, r, r, 2, r"))
+ (clobber (match_scratch:SI 4 "=T,T, r, r, T, T"))]
+ "TARGET_NEON"
+ "#"
+ "TARGET_NEON && reload_completed"
+ [(const_int 0)]
+ "
+ {
+ if (IS_VFP_REGNUM (REGNO (operands[0])))
+ {
+ emit_insn (gen_negsi2 (operands[3], operands[2]));
+ emit_insn (gen_rtx_SET (SImode, operands[4], operands[3]));
+ emit_insn (gen_unsigned_shift_di3_neon (operands[0], operands[1],
+ operands[4]));
+ }
+ else
+ arm_emit_coreregs_64bit_shift (LSHIFTRT, operands[0], operands[1],
+ operands[2], operands[3], operands[4]);
+ DONE;
+ }"
+ [(set_attr "length" "12,12,24,24,12,12")
+ (set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")
+ (set_attr_alternative "insn_enabled"
+ [(const_string "yes")
+ (const_string "yes")
+ (if_then_else (match_test "optimize_function_for_size_p (cfun)")
+ (const_string "no")
+ (const_string "yes"))
+ (if_then_else (match_test "optimize_function_for_size_p (cfun)")
+ (const_string "no")
+ (const_string "yes"))
+ (const_string "yes")
+ (const_string "yes")])]
+)
+
+(define_expand "lshrdi3_neon"
+ [(match_operand:DI 0 "s_register_operand" "")
+ (match_operand:DI 1 "s_register_operand" "")
+ (match_operand:SI 2 "shift_amount_64" "")]
+ "TARGET_NEON"
+{
+ if (CONST_INT_P (operands[2]))
+ emit_insn (gen_lshrdi3_neon_imm (operands[0], operands[1], operands[2]));
+ else
+ emit_insn (gen_lshrdi3_neon_reg (operands[0], operands[1], operands[2]));
+ DONE;
+})
+
;; Widening operations
(define_insn "widen_ssum<mode>3"
@@ -769,3 +769,11 @@
(define_special_predicate "add_operator"
(match_code "plus"))
+
+(define_predicate "int_0_to_63"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 0, 63)")))
+
+(define_predicate "shift_amount_64"
+ (ior (match_operand 0 "s_register_operand")
+ (match_operand 0 "int_0_to_63")))