Message ID | 87o9t2x95f.fsf@linaro.org |
---|---|
State | New |
Headers | show |
Series | None | expand |
On Mon, Jul 3, 2017 at 9:38 AM, Richard Sandiford <richard.sandiford@linaro.org> wrote: > This patch records the base alignment and misalignment in > innermost_loop_behavior, to avoid the second-guessing that was > previously done in vect_compute_data_ref_alignment. It also makes > vect_analyze_data_refs use dr_analyze_innermost, instead of having an > almost-copy of the same code. > > I wasn't sure whether the alignments should be measured in bits > (for consistency with most other interfaces) or in bytes (for consistency > with DR_ALIGNED_TO, now DR_OFFSET_ALIGNMENT, and with *_ptr_info_alignment). > I went for bytes because: > > - I think in practice most consumers are going to want bytes. > E.g. using bytes avoids having to mix TYPE_ALIGN and TYPE_ALIGN_UNIT > in vect_compute_data_ref_alignment. > > - It means that any bit-level paranoia is dealt with when building > the innermost_loop_behavior and doesn't get pushed down to consumers. > > Tested an aarch64-linux-gnu and x86_64-linux-gnu. OK to install? Ok. Thanks, Richard. > Richard > > > 2017-07-03 Richard Sandiford <richard.sandiford@linaro.org> > > gcc/ > * tree-data-ref.h (innermost_loop_behavior): Add base_alignment > and base_misalignment fields. > (DR_BASE_ALIGNMENT, DR_BASE_MISALIGNMENT): New macros. > * tree-data-ref.c: Include builtins.h. > (dr_analyze_innermost): Set up the new innmost_loop_behavior fields. > * tree-vectorizer.h (STMT_VINFO_DR_BASE_ALIGNMENT): New macro. > (STMT_VINFO_DR_BASE_MISALIGNMENT): Likewise. > * tree-vect-data-refs.c: Include tree-cfg.h. > (vect_compute_data_ref_alignment): Use the new innermost_loop_behavior > fields instead of calculating an alignment here. > (vect_analyze_data_refs): Use dr_analyze_innermost. Dump the new > innermost_loop_behavior fields. > > Index: gcc/tree-data-ref.h > =================================================================== > --- gcc/tree-data-ref.h 2017-07-03 07:52:14.194782203 +0100 > +++ gcc/tree-data-ref.h 2017-07-03 07:52:55.920272347 +0100 > @@ -52,6 +52,42 @@ struct innermost_loop_behavior > tree init; > tree step; > > + /* BASE_ADDRESS is known to be misaligned by BASE_MISALIGNMENT bytes > + from an alignment boundary of BASE_ALIGNMENT bytes. For example, > + if we had: > + > + struct S __attribute__((aligned(16))) { ... }; > + > + char *ptr; > + ... *(struct S *) (ptr - 4) ...; > + > + the information would be: > + > + base_address: ptr > + base_aligment: 16 > + base_misalignment: 4 > + init: -4 > + > + where init cancels the base misalignment. If instead we had a > + reference to a particular field: > + > + struct S __attribute__((aligned(16))) { ... int f; ... }; > + > + char *ptr; > + ... ((struct S *) (ptr - 4))->f ...; > + > + the information would be: > + > + base_address: ptr > + base_aligment: 16 > + base_misalignment: 4 > + init: -4 + offsetof (S, f) > + > + where base_address + init might also be misaligned, and by a different > + amount from base_address. */ > + unsigned int base_alignment; > + unsigned int base_misalignment; > + > /* The largest power of two that divides OFFSET, capped to a suitably > high value if the offset is zero. This is a byte rather than a bit > quantity. */ > @@ -147,6 +183,8 @@ #define DR_OFFSET(DR) (DR)- > #define DR_INIT(DR) (DR)->innermost.init > #define DR_STEP(DR) (DR)->innermost.step > #define DR_PTR_INFO(DR) (DR)->alias.ptr_info > +#define DR_BASE_ALIGNMENT(DR) (DR)->innermost.base_alignment > +#define DR_BASE_MISALIGNMENT(DR) (DR)->innermost.base_misalignment > #define DR_OFFSET_ALIGNMENT(DR) (DR)->innermost.offset_alignment > #define DR_STEP_ALIGNMENT(DR) (DR)->innermost.step_alignment > #define DR_INNERMOST(DR) (DR)->innermost > Index: gcc/tree-data-ref.c > =================================================================== > --- gcc/tree-data-ref.c 2017-07-03 07:52:14.193782226 +0100 > +++ gcc/tree-data-ref.c 2017-07-03 07:52:55.920272347 +0100 > @@ -94,6 +94,7 @@ Software Foundation; either version 3, o > #include "dumpfile.h" > #include "tree-affine.h" > #include "params.h" > +#include "builtins.h" > > static struct datadep_stats > { > @@ -802,11 +803,26 @@ dr_analyze_innermost (struct data_refere > return false; > } > > + /* Calculate the alignment and misalignment for the inner reference. */ > + unsigned int HOST_WIDE_INT base_misalignment; > + unsigned int base_alignment; > + get_object_alignment_1 (base, &base_alignment, &base_misalignment); > + > + /* There are no bitfield references remaining in BASE, so the values > + we got back must be whole bytes. */ > + gcc_assert (base_alignment % BITS_PER_UNIT == 0 > + && base_misalignment % BITS_PER_UNIT == 0); > + base_alignment /= BITS_PER_UNIT; > + base_misalignment /= BITS_PER_UNIT; > + > if (TREE_CODE (base) == MEM_REF) > { > if (!integer_zerop (TREE_OPERAND (base, 1))) > { > + /* Subtract MOFF from the base and add it to POFFSET instead. > + Adjust the misalignment to reflect the amount we subtracted. */ > offset_int moff = mem_ref_offset (base); > + base_misalignment -= moff.to_short_addr (); > tree mofft = wide_int_to_tree (sizetype, moff); > if (!poffset) > poffset = mofft; > @@ -855,20 +871,46 @@ dr_analyze_innermost (struct data_refere > } > > init = ssize_int (pbitpos / BITS_PER_UNIT); > + > + /* Subtract any constant component from the base and add it to INIT instead. > + Adjust the misalignment to reflect the amount we subtracted. */ > split_constant_offset (base_iv.base, &base_iv.base, &dinit); > - init = size_binop (PLUS_EXPR, init, dinit); > + init = size_binop (PLUS_EXPR, init, dinit); > + base_misalignment -= TREE_INT_CST_LOW (dinit); > + > split_constant_offset (offset_iv.base, &offset_iv.base, &dinit); > - init = size_binop (PLUS_EXPR, init, dinit); > + init = size_binop (PLUS_EXPR, init, dinit); > > step = size_binop (PLUS_EXPR, > fold_convert (ssizetype, base_iv.step), > fold_convert (ssizetype, offset_iv.step)); > > - drb->base_address = canonicalize_base_object_address (base_iv.base); > + base = canonicalize_base_object_address (base_iv.base); > + > + /* See if get_pointer_alignment can guarantee a higher alignment than > + the one we calculated above. */ > + unsigned int HOST_WIDE_INT alt_misalignment; > + unsigned int alt_alignment; > + get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment); > + > + /* As above, these values must be whole bytes. */ > + gcc_assert (alt_alignment % BITS_PER_UNIT == 0 > + && alt_misalignment % BITS_PER_UNIT == 0); > + alt_alignment /= BITS_PER_UNIT; > + alt_misalignment /= BITS_PER_UNIT; > + > + if (base_alignment < alt_alignment) > + { > + base_alignment = alt_alignment; > + base_misalignment = alt_misalignment; > + } > > + drb->base_address = base; > drb->offset = fold_convert (ssizetype, offset_iv.base); > drb->init = init; > drb->step = step; > + drb->base_alignment = base_alignment; > + drb->base_misalignment = base_misalignment & (base_alignment - 1); > drb->offset_alignment = highest_pow2_factor (offset_iv.base); > drb->step_alignment = highest_pow2_factor (step); > > @@ -1084,6 +1126,9 @@ create_data_ref (loop_p nest, loop_p loo > print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM); > fprintf (dump_file, "\n\tstep: "); > print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM); > + fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr)); > + fprintf (dump_file, "\n\tbase misalignment: %d", > + DR_BASE_MISALIGNMENT (dr)); > fprintf (dump_file, "\n\toffset alignment: %d", > DR_OFFSET_ALIGNMENT (dr)); > fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr)); > Index: gcc/tree-vectorizer.h > =================================================================== > --- gcc/tree-vectorizer.h 2017-07-03 07:52:14.196782157 +0100 > +++ gcc/tree-vectorizer.h 2017-07-03 07:52:55.921272300 +0100 > @@ -707,6 +707,9 @@ #define STMT_VINFO_DR_BASE_ADDRESS(S) > #define STMT_VINFO_DR_INIT(S) (S)->dr_wrt_vec_loop.init > #define STMT_VINFO_DR_OFFSET(S) (S)->dr_wrt_vec_loop.offset > #define STMT_VINFO_DR_STEP(S) (S)->dr_wrt_vec_loop.step > +#define STMT_VINFO_DR_BASE_ALIGNMENT(S) (S)->dr_wrt_vec_loop.base_alignment > +#define STMT_VINFO_DR_BASE_MISALIGNMENT(S) \ > + (S)->dr_wrt_vec_loop.base_misalignment > #define STMT_VINFO_DR_OFFSET_ALIGNMENT(S) \ > (S)->dr_wrt_vec_loop.offset_alignment > #define STMT_VINFO_DR_STEP_ALIGNMENT(S) \ > Index: gcc/tree-vect-data-refs.c > =================================================================== > --- gcc/tree-vect-data-refs.c 2017-07-03 07:52:14.194782203 +0100 > +++ gcc/tree-vect-data-refs.c 2017-07-03 07:52:55.921272300 +0100 > @@ -50,6 +50,7 @@ Software Foundation; either version 3, o > #include "expr.h" > #include "builtins.h" > #include "params.h" > +#include "tree-cfg.h" > > /* Return true if load- or store-lanes optab OPTAB is implemented for > COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */ > @@ -667,8 +668,6 @@ vect_compute_data_ref_alignment (struct > struct loop *loop = NULL; > tree ref = DR_REF (dr); > tree vectype = STMT_VINFO_VECTYPE (stmt_info); > - tree base; > - unsigned HOST_WIDE_INT alignment; > > if (dump_enabled_p ()) > dump_printf_loc (MSG_NOTE, vect_location, > @@ -728,48 +727,18 @@ vect_compute_data_ref_alignment (struct > dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > "step doesn't divide the vector-size.\n"); > } > - tree base_addr = drb->base_address; > > - /* To look at alignment of the base we have to preserve an inner MEM_REF > - as that carries alignment information of the actual access. */ > - base = ref; > - while (handled_component_p (base)) > - base = TREE_OPERAND (base, 0); > - unsigned int base_alignment = 0; > - unsigned HOST_WIDE_INT base_bitpos; > - get_object_alignment_1 (base, &base_alignment, &base_bitpos); > - /* As data-ref analysis strips the MEM_REF down to its base operand > - to form DR_BASE_ADDRESS and adds the offset to DR_INIT we have to > - adjust things to make base_alignment valid as the alignment of > - DR_BASE_ADDRESS. */ > - if (TREE_CODE (base) == MEM_REF) > - { > - /* Note all this only works if DR_BASE_ADDRESS is the same as > - MEM_REF operand zero, otherwise DR/SCEV analysis might have factored > - in other offsets. We need to rework DR to compute the alingment > - of DR_BASE_ADDRESS as long as all information is still available. */ > - if (operand_equal_p (TREE_OPERAND (base, 0), base_addr, 0)) > - { > - base_bitpos -= mem_ref_offset (base).to_short_addr () * BITS_PER_UNIT; > - base_bitpos &= (base_alignment - 1); > - } > - else > - base_bitpos = BITS_PER_UNIT; > - } > - if (base_bitpos != 0) > - base_alignment = base_bitpos & -base_bitpos; > - /* Also look at the alignment of the base address DR analysis > - computed. */ > - unsigned int base_addr_alignment = get_pointer_alignment (base_addr); > - if (base_addr_alignment > base_alignment) > - base_alignment = base_addr_alignment; > + unsigned int base_alignment = drb->base_alignment; > + unsigned int base_misalignment = drb->base_misalignment; > + unsigned HOST_WIDE_INT vector_alignment = TYPE_ALIGN_UNIT (vectype); > + unsigned HOST_WIDE_INT element_alignment > + = TYPE_ALIGN_UNIT (TREE_TYPE (vectype)); > > - if (base_alignment >= TYPE_ALIGN (TREE_TYPE (vectype))) > + if (base_alignment >= element_alignment > + && (base_misalignment & (element_alignment - 1)) == 0) > DR_VECT_AUX (dr)->base_element_aligned = true; > > - alignment = TYPE_ALIGN_UNIT (vectype); > - > - if (drb->offset_alignment < alignment > + if (drb->offset_alignment < vector_alignment > || !step_preserves_misalignment_p > /* We need to know whether the step wrt the vectorized loop is > negative when computing the starting misalignment below. */ > @@ -785,12 +754,13 @@ vect_compute_data_ref_alignment (struct > return true; > } > > - if (base_alignment < TYPE_ALIGN (vectype)) > + if (base_alignment < vector_alignment) > { > - base = base_addr; > + tree base = drb->base_address; > if (TREE_CODE (base) == ADDR_EXPR) > base = TREE_OPERAND (base, 0); > - if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))) > + if (!vect_can_force_dr_alignment_p (base, > + vector_alignment * BITS_PER_UNIT)) > { > if (dump_enabled_p ()) > { > @@ -828,24 +798,20 @@ vect_compute_data_ref_alignment (struct > DR_VECT_AUX (dr)->base_decl = base; > DR_VECT_AUX (dr)->base_misaligned = true; > DR_VECT_AUX (dr)->base_element_aligned = true; > + base_misalignment = 0; > } > + unsigned int misalignment = (base_misalignment > + + TREE_INT_CST_LOW (drb->init)); > > /* If this is a backward running DR then first access in the larger > vectype actually is N-1 elements before the address in the DR. > Adjust misalign accordingly. */ > - tree misalign = drb->init; > if (tree_int_cst_sgn (drb->step) < 0) > - { > - tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1); > - /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type, > - otherwise we wouldn't be here. */ > - offset = fold_build2 (MULT_EXPR, ssizetype, offset, drb->step); > - /* PLUS because STEP was negative. */ > - misalign = size_binop (PLUS_EXPR, misalign, offset); > - } > + /* PLUS because STEP is negative. */ > + misalignment += ((TYPE_VECTOR_SUBPARTS (vectype) - 1) > + * TREE_INT_CST_LOW (drb->step)); > > - SET_DR_MISALIGNMENT (dr, > - wi::mod_floor (misalign, alignment, SIGNED).to_uhwi ()); > + SET_DR_MISALIGNMENT (dr, misalignment & (vector_alignment - 1)); > > if (dump_enabled_p ()) > { > @@ -3554,100 +3520,27 @@ vect_analyze_data_refs (vec_info *vinfo, > the outer-loop. */ > if (loop && nested_in_vect_loop_p (loop, stmt)) > { > - tree outer_step, outer_base, outer_init; > - HOST_WIDE_INT pbitsize, pbitpos; > - tree poffset; > - machine_mode pmode; > - int punsignedp, preversep, pvolatilep; > - affine_iv base_iv, offset_iv; > - tree dinit; > - > /* Build a reference to the first location accessed by the > - inner-loop: *(BASE+INIT). (The first location is actually > - BASE+INIT+OFFSET, but we add OFFSET separately later). */ > - tree inner_base = build_fold_indirect_ref > - (fold_build_pointer_plus (base, init)); > + inner loop: *(BASE + INIT + OFFSET). By construction, > + this address must be invariant in the inner loop, so we > + can consider it as being used in the outer loop. */ > + tree init_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), > + init, offset); > + tree init_addr = fold_build_pointer_plus (base, init_offset); > + tree init_ref = build_fold_indirect_ref (init_addr); > > if (dump_enabled_p ()) > { > dump_printf_loc (MSG_NOTE, vect_location, > - "analyze in outer-loop: "); > - dump_generic_expr (MSG_NOTE, TDF_SLIM, inner_base); > + "analyze in outer loop: "); > + dump_generic_expr (MSG_NOTE, TDF_SLIM, init_ref); > dump_printf (MSG_NOTE, "\n"); > } > > - outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos, > - &poffset, &pmode, &punsignedp, > - &preversep, &pvolatilep); > - gcc_assert (outer_base != NULL_TREE); > - > - if (pbitpos % BITS_PER_UNIT != 0) > - { > - if (dump_enabled_p ()) > - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > - "failed: bit offset alignment.\n"); > - return false; > - } > - > - if (preversep) > - { > - if (dump_enabled_p ()) > - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > - "failed: reverse storage order.\n"); > - return false; > - } > - > - outer_base = build_fold_addr_expr (outer_base); > - if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base, > - &base_iv, false)) > - { > - if (dump_enabled_p ()) > - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > - "failed: evolution of base is not affine.\n"); > - return false; > - } > - > - if (offset) > - { > - if (poffset) > - poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, > - poffset); > - else > - poffset = offset; > - } > - > - if (!poffset) > - { > - offset_iv.base = ssize_int (0); > - offset_iv.step = ssize_int (0); > - } > - else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset, > - &offset_iv, false)) > - { > - if (dump_enabled_p ()) > - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > - "evolution of offset is not affine.\n"); > - return false; > - } > - > - outer_init = ssize_int (pbitpos / BITS_PER_UNIT); > - split_constant_offset (base_iv.base, &base_iv.base, &dinit); > - outer_init = size_binop (PLUS_EXPR, outer_init, dinit); > - split_constant_offset (offset_iv.base, &offset_iv.base, &dinit); > - outer_init = size_binop (PLUS_EXPR, outer_init, dinit); > - > - outer_step = size_binop (PLUS_EXPR, > - fold_convert (ssizetype, base_iv.step), > - fold_convert (ssizetype, offset_iv.step)); > - > - STMT_VINFO_DR_STEP (stmt_info) = outer_step; > - /* FIXME: Use canonicalize_base_object_address (base_iv.base); */ > - STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base; > - STMT_VINFO_DR_INIT (stmt_info) = outer_init; > - STMT_VINFO_DR_OFFSET (stmt_info) = > - fold_convert (ssizetype, offset_iv.base); > - STMT_VINFO_DR_OFFSET_ALIGNMENT (stmt_info) > - = highest_pow2_factor (offset_iv.base); > + if (!dr_analyze_innermost (&STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info), > + init_ref, loop)) > + /* dr_analyze_innermost already explained the failure. */ > + return false; > > if (dump_enabled_p ()) > { > @@ -3665,6 +3558,10 @@ vect_analyze_data_refs (vec_info *vinfo, > dump_printf (MSG_NOTE, "\n\touter step: "); > dump_generic_expr (MSG_NOTE, TDF_SLIM, > STMT_VINFO_DR_STEP (stmt_info)); > + dump_printf (MSG_NOTE, "\n\touter base alignment: %d\n", > + STMT_VINFO_DR_BASE_ALIGNMENT (stmt_info)); > + dump_printf (MSG_NOTE, "\n\touter base misalignment: %d\n", > + STMT_VINFO_DR_BASE_MISALIGNMENT (stmt_info)); > dump_printf (MSG_NOTE, "\n\touter offset alignment: %d\n", > STMT_VINFO_DR_OFFSET_ALIGNMENT (stmt_info)); > dump_printf (MSG_NOTE, "\n\touter step alignment: %d\n",
Index: gcc/tree-data-ref.h =================================================================== --- gcc/tree-data-ref.h 2017-07-03 07:52:14.194782203 +0100 +++ gcc/tree-data-ref.h 2017-07-03 07:52:55.920272347 +0100 @@ -52,6 +52,42 @@ struct innermost_loop_behavior tree init; tree step; + /* BASE_ADDRESS is known to be misaligned by BASE_MISALIGNMENT bytes + from an alignment boundary of BASE_ALIGNMENT bytes. For example, + if we had: + + struct S __attribute__((aligned(16))) { ... }; + + char *ptr; + ... *(struct S *) (ptr - 4) ...; + + the information would be: + + base_address: ptr + base_aligment: 16 + base_misalignment: 4 + init: -4 + + where init cancels the base misalignment. If instead we had a + reference to a particular field: + + struct S __attribute__((aligned(16))) { ... int f; ... }; + + char *ptr; + ... ((struct S *) (ptr - 4))->f ...; + + the information would be: + + base_address: ptr + base_aligment: 16 + base_misalignment: 4 + init: -4 + offsetof (S, f) + + where base_address + init might also be misaligned, and by a different + amount from base_address. */ + unsigned int base_alignment; + unsigned int base_misalignment; + /* The largest power of two that divides OFFSET, capped to a suitably high value if the offset is zero. This is a byte rather than a bit quantity. */ @@ -147,6 +183,8 @@ #define DR_OFFSET(DR) (DR)- #define DR_INIT(DR) (DR)->innermost.init #define DR_STEP(DR) (DR)->innermost.step #define DR_PTR_INFO(DR) (DR)->alias.ptr_info +#define DR_BASE_ALIGNMENT(DR) (DR)->innermost.base_alignment +#define DR_BASE_MISALIGNMENT(DR) (DR)->innermost.base_misalignment #define DR_OFFSET_ALIGNMENT(DR) (DR)->innermost.offset_alignment #define DR_STEP_ALIGNMENT(DR) (DR)->innermost.step_alignment #define DR_INNERMOST(DR) (DR)->innermost Index: gcc/tree-data-ref.c =================================================================== --- gcc/tree-data-ref.c 2017-07-03 07:52:14.193782226 +0100 +++ gcc/tree-data-ref.c 2017-07-03 07:52:55.920272347 +0100 @@ -94,6 +94,7 @@ Software Foundation; either version 3, o #include "dumpfile.h" #include "tree-affine.h" #include "params.h" +#include "builtins.h" static struct datadep_stats { @@ -802,11 +803,26 @@ dr_analyze_innermost (struct data_refere return false; } + /* Calculate the alignment and misalignment for the inner reference. */ + unsigned int HOST_WIDE_INT base_misalignment; + unsigned int base_alignment; + get_object_alignment_1 (base, &base_alignment, &base_misalignment); + + /* There are no bitfield references remaining in BASE, so the values + we got back must be whole bytes. */ + gcc_assert (base_alignment % BITS_PER_UNIT == 0 + && base_misalignment % BITS_PER_UNIT == 0); + base_alignment /= BITS_PER_UNIT; + base_misalignment /= BITS_PER_UNIT; + if (TREE_CODE (base) == MEM_REF) { if (!integer_zerop (TREE_OPERAND (base, 1))) { + /* Subtract MOFF from the base and add it to POFFSET instead. + Adjust the misalignment to reflect the amount we subtracted. */ offset_int moff = mem_ref_offset (base); + base_misalignment -= moff.to_short_addr (); tree mofft = wide_int_to_tree (sizetype, moff); if (!poffset) poffset = mofft; @@ -855,20 +871,46 @@ dr_analyze_innermost (struct data_refere } init = ssize_int (pbitpos / BITS_PER_UNIT); + + /* Subtract any constant component from the base and add it to INIT instead. + Adjust the misalignment to reflect the amount we subtracted. */ split_constant_offset (base_iv.base, &base_iv.base, &dinit); - init = size_binop (PLUS_EXPR, init, dinit); + init = size_binop (PLUS_EXPR, init, dinit); + base_misalignment -= TREE_INT_CST_LOW (dinit); + split_constant_offset (offset_iv.base, &offset_iv.base, &dinit); - init = size_binop (PLUS_EXPR, init, dinit); + init = size_binop (PLUS_EXPR, init, dinit); step = size_binop (PLUS_EXPR, fold_convert (ssizetype, base_iv.step), fold_convert (ssizetype, offset_iv.step)); - drb->base_address = canonicalize_base_object_address (base_iv.base); + base = canonicalize_base_object_address (base_iv.base); + + /* See if get_pointer_alignment can guarantee a higher alignment than + the one we calculated above. */ + unsigned int HOST_WIDE_INT alt_misalignment; + unsigned int alt_alignment; + get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment); + + /* As above, these values must be whole bytes. */ + gcc_assert (alt_alignment % BITS_PER_UNIT == 0 + && alt_misalignment % BITS_PER_UNIT == 0); + alt_alignment /= BITS_PER_UNIT; + alt_misalignment /= BITS_PER_UNIT; + + if (base_alignment < alt_alignment) + { + base_alignment = alt_alignment; + base_misalignment = alt_misalignment; + } + drb->base_address = base; drb->offset = fold_convert (ssizetype, offset_iv.base); drb->init = init; drb->step = step; + drb->base_alignment = base_alignment; + drb->base_misalignment = base_misalignment & (base_alignment - 1); drb->offset_alignment = highest_pow2_factor (offset_iv.base); drb->step_alignment = highest_pow2_factor (step); @@ -1084,6 +1126,9 @@ create_data_ref (loop_p nest, loop_p loo print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM); fprintf (dump_file, "\n\tstep: "); print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM); + fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr)); + fprintf (dump_file, "\n\tbase misalignment: %d", + DR_BASE_MISALIGNMENT (dr)); fprintf (dump_file, "\n\toffset alignment: %d", DR_OFFSET_ALIGNMENT (dr)); fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr)); Index: gcc/tree-vectorizer.h =================================================================== --- gcc/tree-vectorizer.h 2017-07-03 07:52:14.196782157 +0100 +++ gcc/tree-vectorizer.h 2017-07-03 07:52:55.921272300 +0100 @@ -707,6 +707,9 @@ #define STMT_VINFO_DR_BASE_ADDRESS(S) #define STMT_VINFO_DR_INIT(S) (S)->dr_wrt_vec_loop.init #define STMT_VINFO_DR_OFFSET(S) (S)->dr_wrt_vec_loop.offset #define STMT_VINFO_DR_STEP(S) (S)->dr_wrt_vec_loop.step +#define STMT_VINFO_DR_BASE_ALIGNMENT(S) (S)->dr_wrt_vec_loop.base_alignment +#define STMT_VINFO_DR_BASE_MISALIGNMENT(S) \ + (S)->dr_wrt_vec_loop.base_misalignment #define STMT_VINFO_DR_OFFSET_ALIGNMENT(S) \ (S)->dr_wrt_vec_loop.offset_alignment #define STMT_VINFO_DR_STEP_ALIGNMENT(S) \ Index: gcc/tree-vect-data-refs.c =================================================================== --- gcc/tree-vect-data-refs.c 2017-07-03 07:52:14.194782203 +0100 +++ gcc/tree-vect-data-refs.c 2017-07-03 07:52:55.921272300 +0100 @@ -50,6 +50,7 @@ Software Foundation; either version 3, o #include "expr.h" #include "builtins.h" #include "params.h" +#include "tree-cfg.h" /* Return true if load- or store-lanes optab OPTAB is implemented for COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */ @@ -667,8 +668,6 @@ vect_compute_data_ref_alignment (struct struct loop *loop = NULL; tree ref = DR_REF (dr); tree vectype = STMT_VINFO_VECTYPE (stmt_info); - tree base; - unsigned HOST_WIDE_INT alignment; if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -728,48 +727,18 @@ vect_compute_data_ref_alignment (struct dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "step doesn't divide the vector-size.\n"); } - tree base_addr = drb->base_address; - /* To look at alignment of the base we have to preserve an inner MEM_REF - as that carries alignment information of the actual access. */ - base = ref; - while (handled_component_p (base)) - base = TREE_OPERAND (base, 0); - unsigned int base_alignment = 0; - unsigned HOST_WIDE_INT base_bitpos; - get_object_alignment_1 (base, &base_alignment, &base_bitpos); - /* As data-ref analysis strips the MEM_REF down to its base operand - to form DR_BASE_ADDRESS and adds the offset to DR_INIT we have to - adjust things to make base_alignment valid as the alignment of - DR_BASE_ADDRESS. */ - if (TREE_CODE (base) == MEM_REF) - { - /* Note all this only works if DR_BASE_ADDRESS is the same as - MEM_REF operand zero, otherwise DR/SCEV analysis might have factored - in other offsets. We need to rework DR to compute the alingment - of DR_BASE_ADDRESS as long as all information is still available. */ - if (operand_equal_p (TREE_OPERAND (base, 0), base_addr, 0)) - { - base_bitpos -= mem_ref_offset (base).to_short_addr () * BITS_PER_UNIT; - base_bitpos &= (base_alignment - 1); - } - else - base_bitpos = BITS_PER_UNIT; - } - if (base_bitpos != 0) - base_alignment = base_bitpos & -base_bitpos; - /* Also look at the alignment of the base address DR analysis - computed. */ - unsigned int base_addr_alignment = get_pointer_alignment (base_addr); - if (base_addr_alignment > base_alignment) - base_alignment = base_addr_alignment; + unsigned int base_alignment = drb->base_alignment; + unsigned int base_misalignment = drb->base_misalignment; + unsigned HOST_WIDE_INT vector_alignment = TYPE_ALIGN_UNIT (vectype); + unsigned HOST_WIDE_INT element_alignment + = TYPE_ALIGN_UNIT (TREE_TYPE (vectype)); - if (base_alignment >= TYPE_ALIGN (TREE_TYPE (vectype))) + if (base_alignment >= element_alignment + && (base_misalignment & (element_alignment - 1)) == 0) DR_VECT_AUX (dr)->base_element_aligned = true; - alignment = TYPE_ALIGN_UNIT (vectype); - - if (drb->offset_alignment < alignment + if (drb->offset_alignment < vector_alignment || !step_preserves_misalignment_p /* We need to know whether the step wrt the vectorized loop is negative when computing the starting misalignment below. */ @@ -785,12 +754,13 @@ vect_compute_data_ref_alignment (struct return true; } - if (base_alignment < TYPE_ALIGN (vectype)) + if (base_alignment < vector_alignment) { - base = base_addr; + tree base = drb->base_address; if (TREE_CODE (base) == ADDR_EXPR) base = TREE_OPERAND (base, 0); - if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))) + if (!vect_can_force_dr_alignment_p (base, + vector_alignment * BITS_PER_UNIT)) { if (dump_enabled_p ()) { @@ -828,24 +798,20 @@ vect_compute_data_ref_alignment (struct DR_VECT_AUX (dr)->base_decl = base; DR_VECT_AUX (dr)->base_misaligned = true; DR_VECT_AUX (dr)->base_element_aligned = true; + base_misalignment = 0; } + unsigned int misalignment = (base_misalignment + + TREE_INT_CST_LOW (drb->init)); /* If this is a backward running DR then first access in the larger vectype actually is N-1 elements before the address in the DR. Adjust misalign accordingly. */ - tree misalign = drb->init; if (tree_int_cst_sgn (drb->step) < 0) - { - tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1); - /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type, - otherwise we wouldn't be here. */ - offset = fold_build2 (MULT_EXPR, ssizetype, offset, drb->step); - /* PLUS because STEP was negative. */ - misalign = size_binop (PLUS_EXPR, misalign, offset); - } + /* PLUS because STEP is negative. */ + misalignment += ((TYPE_VECTOR_SUBPARTS (vectype) - 1) + * TREE_INT_CST_LOW (drb->step)); - SET_DR_MISALIGNMENT (dr, - wi::mod_floor (misalign, alignment, SIGNED).to_uhwi ()); + SET_DR_MISALIGNMENT (dr, misalignment & (vector_alignment - 1)); if (dump_enabled_p ()) { @@ -3554,100 +3520,27 @@ vect_analyze_data_refs (vec_info *vinfo, the outer-loop. */ if (loop && nested_in_vect_loop_p (loop, stmt)) { - tree outer_step, outer_base, outer_init; - HOST_WIDE_INT pbitsize, pbitpos; - tree poffset; - machine_mode pmode; - int punsignedp, preversep, pvolatilep; - affine_iv base_iv, offset_iv; - tree dinit; - /* Build a reference to the first location accessed by the - inner-loop: *(BASE+INIT). (The first location is actually - BASE+INIT+OFFSET, but we add OFFSET separately later). */ - tree inner_base = build_fold_indirect_ref - (fold_build_pointer_plus (base, init)); + inner loop: *(BASE + INIT + OFFSET). By construction, + this address must be invariant in the inner loop, so we + can consider it as being used in the outer loop. */ + tree init_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), + init, offset); + tree init_addr = fold_build_pointer_plus (base, init_offset); + tree init_ref = build_fold_indirect_ref (init_addr); if (dump_enabled_p ()) { dump_printf_loc (MSG_NOTE, vect_location, - "analyze in outer-loop: "); - dump_generic_expr (MSG_NOTE, TDF_SLIM, inner_base); + "analyze in outer loop: "); + dump_generic_expr (MSG_NOTE, TDF_SLIM, init_ref); dump_printf (MSG_NOTE, "\n"); } - outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos, - &poffset, &pmode, &punsignedp, - &preversep, &pvolatilep); - gcc_assert (outer_base != NULL_TREE); - - if (pbitpos % BITS_PER_UNIT != 0) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "failed: bit offset alignment.\n"); - return false; - } - - if (preversep) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "failed: reverse storage order.\n"); - return false; - } - - outer_base = build_fold_addr_expr (outer_base); - if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base, - &base_iv, false)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "failed: evolution of base is not affine.\n"); - return false; - } - - if (offset) - { - if (poffset) - poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, - poffset); - else - poffset = offset; - } - - if (!poffset) - { - offset_iv.base = ssize_int (0); - offset_iv.step = ssize_int (0); - } - else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset, - &offset_iv, false)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "evolution of offset is not affine.\n"); - return false; - } - - outer_init = ssize_int (pbitpos / BITS_PER_UNIT); - split_constant_offset (base_iv.base, &base_iv.base, &dinit); - outer_init = size_binop (PLUS_EXPR, outer_init, dinit); - split_constant_offset (offset_iv.base, &offset_iv.base, &dinit); - outer_init = size_binop (PLUS_EXPR, outer_init, dinit); - - outer_step = size_binop (PLUS_EXPR, - fold_convert (ssizetype, base_iv.step), - fold_convert (ssizetype, offset_iv.step)); - - STMT_VINFO_DR_STEP (stmt_info) = outer_step; - /* FIXME: Use canonicalize_base_object_address (base_iv.base); */ - STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base; - STMT_VINFO_DR_INIT (stmt_info) = outer_init; - STMT_VINFO_DR_OFFSET (stmt_info) = - fold_convert (ssizetype, offset_iv.base); - STMT_VINFO_DR_OFFSET_ALIGNMENT (stmt_info) - = highest_pow2_factor (offset_iv.base); + if (!dr_analyze_innermost (&STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info), + init_ref, loop)) + /* dr_analyze_innermost already explained the failure. */ + return false; if (dump_enabled_p ()) { @@ -3665,6 +3558,10 @@ vect_analyze_data_refs (vec_info *vinfo, dump_printf (MSG_NOTE, "\n\touter step: "); dump_generic_expr (MSG_NOTE, TDF_SLIM, STMT_VINFO_DR_STEP (stmt_info)); + dump_printf (MSG_NOTE, "\n\touter base alignment: %d\n", + STMT_VINFO_DR_BASE_ALIGNMENT (stmt_info)); + dump_printf (MSG_NOTE, "\n\touter base misalignment: %d\n", + STMT_VINFO_DR_BASE_MISALIGNMENT (stmt_info)); dump_printf (MSG_NOTE, "\n\touter offset alignment: %d\n", STMT_VINFO_DR_OFFSET_ALIGNMENT (stmt_info)); dump_printf (MSG_NOTE, "\n\touter step alignment: %d\n",