Message ID | 1444175989-24944-2-git-send-email-charles.baylis@linaro.org |
---|---|
State | Superseded |
Headers | show |
On 07/10/15 00:59, charles.baylis@linaro.org wrote: > diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c ... > case NEON_ARG_MEMORY: > /* Check if expand failed. */ > if (op[argc] == const0_rtx) > { > - va_end (ap); > return 0; > } ...and drop the braces? > diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c > index 02f5dc3..448cde3 100644 > --- a/gcc/config/arm/arm.c > +++ b/gcc/config/arm/arm.c > @@ -30117,4 +30117,5 @@ arm_sched_fusion_priority (rtx_insn *insn, int max_pri, > *pri = tmp; > return; > } > + > #include "gt-arm.h" This looks unrelated (and is the only change to arm.c) - perhaps commit separately? (Note I am not a maintainer! But this looks "obvious"...) > diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h > index 87c9f90..27ac4dc 100644 > --- a/gcc/config/arm/arm.h > +++ b/gcc/config/arm/arm.h > @@ -288,6 +288,9 @@ extern void (*arm_lang_output_object_attributes_hook)(void); > #define TARGET_BPABI false > #endif > > +#define ENDIAN_LANE_N(mode, n) \ > + (BYTES_BIG_ENDIAN ? GET_MODE_NUNITS (mode) - 1 - n : n) > + Given we are making changes here to how this all works on bigendian, have you tested armeb at all? Generally I would say this all looks sensible :) Cheers, Alan
On 12 October 2015 at 11:58, Alan Lawrence <alan.lawrence@arm.com> wrote: > On 07/10/15 00:59, charles.baylis@linaro.org wrote: >> >> diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c > > ... >> >> case NEON_ARG_MEMORY: >> /* Check if expand failed. */ >> if (op[argc] == const0_rtx) >> { >> - va_end (ap); >> return 0; >> } > > > ...and drop the braces? Will do. >> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c >> index 02f5dc3..448cde3 100644 >> --- a/gcc/config/arm/arm.c >> +++ b/gcc/config/arm/arm.c >> @@ -30117,4 +30117,5 @@ arm_sched_fusion_priority (rtx_insn *insn, int >> max_pri, >> *pri = tmp; >> return; >> } >> + >> #include "gt-arm.h" > > > This looks unrelated (and is the only change to arm.c) - perhaps commit > separately? (Note I am not a maintainer! But this looks "obvious"...) It doesn't seem very useful. I'll drop it. >> diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h >> index 87c9f90..27ac4dc 100644 >> --- a/gcc/config/arm/arm.h >> +++ b/gcc/config/arm/arm.h >> @@ -288,6 +288,9 @@ extern void >> (*arm_lang_output_object_attributes_hook)(void); >> #define TARGET_BPABI false >> #endif >> >> +#define ENDIAN_LANE_N(mode, n) \ >> + (BYTES_BIG_ENDIAN ? GET_MODE_NUNITS (mode) - 1 - n : n) >> + > > > Given we are making changes here to how this all works on bigendian, have > you tested armeb at all? I tested on big endian, and it passes, except for a testsuite issue with the *_f16 tests, which fail because they are built without the fp16 options on big endian. This is because check_effective_target_arm_neon_fp16_ok_nocache gets an ICE when it attempts to compile the test program. I think those fp16 intrinsics are in your area, do you want to take a look? :) Thanks for the review Charles
On 14/10/15 23:02, Charles Baylis wrote: > On 12 October 2015 at 11:58, Alan Lawrence <alan.lawrence@arm.com> wrote: > >> Given we are making changes here to how this all works on bigendian, have >> you tested armeb at all? > > I tested on big endian, and it passes, except.... Well, I asked because it seemed good to make sure that the changes/improvements to how lane-swapping was done, wasn't breaking anything on armeb by the back door, and so thank you, I'm happy with that as far as your patch is concerned ;). > for a testsuite issue > with the *_f16 tests, which fail because they are built without the > fp16 options on big endian. This is because > check_effective_target_arm_neon_fp16_ok_nocache gets an ICE when it > attempts to compile the test program. I think those fp16 intrinsics > are in your area, do you want to take a look? :) Heh, yes, I see ;). So I've dug into this a bit, and the problem seems to be that we don't define a movv4hf pattern, and hence, we fall back to emit_multi_word_move. This uses subregs, and in simplify_subreg_regno, REG_CANNOT_CHANGE_MODE_P is true on bigendian (but false on little-endian). That is, I *think* the right thing to do is just to add a movv4hf (and v8hf) pattern, I'm testing this now.... Cheers, Alan
diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c index 0f5a1f1..a29f8d6 100644 --- a/gcc/config/arm/arm-builtins.c +++ b/gcc/config/arm/arm-builtins.c @@ -77,7 +77,9 @@ enum arm_type_qualifiers /* Polynomial types. */ qualifier_poly = 0x100, /* Lane indices - must be within range of previous argument = a vector. */ - qualifier_lane_index = 0x200 + qualifier_lane_index = 0x200, + /* Lane indices for single lane structure loads and stores. */ + qualifier_struct_load_store_lane_index = 0x400 }; /* The qualifier_internal allows generation of a unary builtin from @@ -1973,6 +1975,7 @@ typedef enum { NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_LANE_INDEX, + NEON_ARG_STRUCT_LOAD_STORE_LANE_INDEX, NEON_ARG_MEMORY, NEON_ARG_STOP } builtin_arg; @@ -2030,9 +2033,9 @@ neon_dereference_pointer (tree exp, tree type, machine_mode mem_mode, /* Expand a Neon builtin. */ static rtx arm_expand_neon_args (rtx target, machine_mode map_mode, int fcode, - int icode, int have_retval, tree exp, ...) + int icode, int have_retval, tree exp, + builtin_arg *args) { - va_list ap; rtx pat; tree arg[SIMD_MAX_BUILTIN_ARGS]; rtx op[SIMD_MAX_BUILTIN_ARGS]; @@ -2047,13 +2050,11 @@ arm_expand_neon_args (rtx target, machine_mode map_mode, int fcode, || !(*insn_data[icode].operand[0].predicate) (target, tmode))) target = gen_reg_rtx (tmode); - va_start (ap, exp); - formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode])); for (;;) { - builtin_arg thisarg = (builtin_arg) va_arg (ap, int); + builtin_arg thisarg = args[argc]; if (thisarg == NEON_ARG_STOP) break; @@ -2089,6 +2090,18 @@ arm_expand_neon_args (rtx target, machine_mode map_mode, int fcode, op[argc] = copy_to_mode_reg (mode[argc], op[argc]); break; + case NEON_ARG_STRUCT_LOAD_STORE_LANE_INDEX: + gcc_assert (argc > 1); + if (CONST_INT_P (op[argc])) + { + neon_lane_bounds (op[argc], 0, + GET_MODE_NUNITS (map_mode), exp); + /* Keep to GCC-vector-extension lane indices in the RTL. */ + op[argc] = + GEN_INT (ENDIAN_LANE_N (map_mode, INTVAL (op[argc]))); + } + goto constant_arg; + case NEON_ARG_LANE_INDEX: /* Previous argument must be a vector, which this indexes. */ gcc_assert (argc > 0); @@ -2099,17 +2112,22 @@ arm_expand_neon_args (rtx target, machine_mode map_mode, int fcode, } /* Fall through - if the lane index isn't a constant then the next case will error. */ + case NEON_ARG_CONSTANT: +constant_arg: if (!(*insn_data[icode].operand[opno].predicate) (op[argc], mode[argc])) - error_at (EXPR_LOCATION (exp), "incompatible type for argument %d, " - "expected %<const int%>", argc + 1); + { + error ("%Kargument %d must be a constant immediate", + exp, argc + 1); + return const0_rtx; + } break; + case NEON_ARG_MEMORY: /* Check if expand failed. */ if (op[argc] == const0_rtx) { - va_end (ap); return 0; } gcc_assert (MEM_P (op[argc])); @@ -2132,8 +2150,6 @@ arm_expand_neon_args (rtx target, machine_mode map_mode, int fcode, } } - va_end (ap); - if (have_retval) switch (argc) { @@ -2245,6 +2261,8 @@ arm_expand_neon_builtin (int fcode, tree exp, rtx target) if (d->qualifiers[qualifiers_k] & qualifier_lane_index) args[k] = NEON_ARG_LANE_INDEX; + else if (d->qualifiers[qualifiers_k] & qualifier_struct_load_store_lane_index) + args[k] = NEON_ARG_STRUCT_LOAD_STORE_LANE_INDEX; else if (d->qualifiers[qualifiers_k] & qualifier_immediate) args[k] = NEON_ARG_CONSTANT; else if (d->qualifiers[qualifiers_k] & qualifier_maybe_immediate) @@ -2270,11 +2288,7 @@ arm_expand_neon_builtin (int fcode, tree exp, rtx target) the function is void, and a 1 if it is not. */ return arm_expand_neon_args (target, d->mode, fcode, icode, !is_void, exp, - args[1], - args[2], - args[3], - args[4], - NEON_ARG_STOP); + &args[1]); } /* Expand an expression EXP that calls a built-in function, diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 02f5dc3..448cde3 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -30117,4 +30117,5 @@ arm_sched_fusion_priority (rtx_insn *insn, int max_pri, *pri = tmp; return; } + #include "gt-arm.h" diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index 87c9f90..27ac4dc 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -288,6 +288,9 @@ extern void (*arm_lang_output_object_attributes_hook)(void); #define TARGET_BPABI false #endif +#define ENDIAN_LANE_N(mode, n) \ + (BYTES_BIG_ENDIAN ? GET_MODE_NUNITS (mode) - 1 - n : n) + /* Support for a compile-time default CPU, et cetera. The rules are: --with-arch is ignored if -march or -mcpu are specified. --with-cpu is ignored if -march or -mcpu are specified, and is overridden
From: Charles Baylis <charles.baylis@linaro.org> gcc/ChangeLog: <DATE> Charles Baylis <charles.baylis@linaro.org> PR target/63870 * config/arm/arm-builtins.c (enum arm_type_qualifiers): New enumerator qualifier_struct_load_store_lane_index. (builtin_arg): New enumerator NEON_ARG_STRUCT_LOAD_STORE_LANE_INDEX. (arm_expand_neon_args): New parameter. Remove ellipsis. Handle NEON argument qualifiers. (arm_expand_neon_builtin): Handle new NEON argument qualifier. * config/arm/arm.h (ENDIAN_LANE_N): New macro. Change-Id: Iaa14d8736879fa53776319977eda2089f0a26647 --- gcc/config/arm/arm-builtins.c | 46 ++++++++++++++++++++++++++++--------------- gcc/config/arm/arm.c | 1 + gcc/config/arm/arm.h | 3 +++ 3 files changed, 34 insertions(+), 16 deletions(-)