Message ID | g48vvffutg.fsf@linaro.org |
---|---|
State | Accepted |
Headers | show |
On Tue, Apr 12, 2011 at 3:44 PM, Richard Sandiford <richard.sandiford@linaro.org> wrote: > NEON has vld3 and vst3 instructions, which support an interleaving of > three vectors. This patch therefore removes the blanket power-of-two > requirement for interleaving and enforces it on a per-operation > basis instead. > > The patch also replaces: > > /* Check that the operation is supported. */ > if (!vect_strided_store_supported (vectype)) > return false; > > with: > > gcc_assert (vect_strided_store_supported (vectype, length)); > > because it was vectorizable_store's responsibility to check this upfront. > Likewise for loads. > > Tested on x86_64-linux-gnu and arm-linux-gnueabi. OK to install? Ok. Thanks, Richard. > Richard > > > gcc/ > * tree-vectorizer.h (vect_strided_store_supported): Add a > HOST_WIDE_INT argument. > (vect_strided_load_supported): Likewise. > (vect_permute_store_chain): Return void. > (vect_transform_strided_load): Likewise. > (vect_permute_load_chain): Delete. > * tree-vect-data-refs.c (vect_strided_store_supported): Take a > count argument. Check that the count is a power of two. > (vect_strided_load_supported): Likewise. > (vect_permute_store_chain): Return void. Update after above changes. > Assert that the access is supported. > (vect_permute_load_chain): Likewise. > (vect_transform_strided_load): Return void. > * tree-vect-stmts.c (vectorizable_store): Update calls after > above interface changes. > (vectorizable_load): Likewise. > (vect_analyze_stmt): Don't check for strided powers of two here. > > Index: gcc/tree-vectorizer.h > =================================================================== > --- gcc/tree-vectorizer.h 2011-04-12 11:55:07.000000000 +0100 > +++ gcc/tree-vectorizer.h 2011-04-12 11:55:09.000000000 +0100 > @@ -828,16 +828,14 @@ extern tree vect_create_data_ref_ptr (gi > gimple *, bool, bool *); > extern tree bump_vector_ptr (tree, gimple, gimple_stmt_iterator *, gimple, tree); > extern tree vect_create_destination_var (tree, tree); > -extern bool vect_strided_store_supported (tree); > -extern bool vect_strided_load_supported (tree); > -extern bool vect_permute_store_chain (VEC(tree,heap) *,unsigned int, gimple, > +extern bool vect_strided_store_supported (tree, unsigned HOST_WIDE_INT); > +extern bool vect_strided_load_supported (tree, unsigned HOST_WIDE_INT); > +extern void vect_permute_store_chain (VEC(tree,heap) *,unsigned int, gimple, > gimple_stmt_iterator *, VEC(tree,heap) **); > extern tree vect_setup_realignment (gimple, gimple_stmt_iterator *, tree *, > enum dr_alignment_support, tree, > struct loop **); > -extern bool vect_permute_load_chain (VEC(tree,heap) *,unsigned int, gimple, > - gimple_stmt_iterator *, VEC(tree,heap) **); > -extern bool vect_transform_strided_load (gimple, VEC(tree,heap) *, int, > +extern void vect_transform_strided_load (gimple, VEC(tree,heap) *, int, > gimple_stmt_iterator *); > extern int vect_get_place_in_interleaving_chain (gimple, gimple); > extern tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *); > Index: gcc/tree-vect-data-refs.c > =================================================================== > --- gcc/tree-vect-data-refs.c 2011-04-12 11:55:07.000000000 +0100 > +++ gcc/tree-vect-data-refs.c 2011-04-12 11:55:09.000000000 +0100 > @@ -2196,19 +2196,6 @@ vect_analyze_group_access (struct data_r > return false; > } > > - /* FORNOW: we handle only interleaving that is a power of 2. > - We don't fail here if it may be still possible to vectorize the > - group using SLP. If not, the size of the group will be checked in > - vect_analyze_operations, and the vectorization will fail. */ > - if (exact_log2 (stride) == -1) > - { > - if (vect_print_dump_info (REPORT_DETAILS)) > - fprintf (vect_dump, "interleaving is not a power of 2"); > - > - if (slp_impossible) > - return false; > - } > - > if (stride == 0) > stride = count; > > @@ -3349,13 +3336,22 @@ vect_create_destination_var (tree scalar > and FALSE otherwise. */ > > bool > -vect_strided_store_supported (tree vectype) > +vect_strided_store_supported (tree vectype, unsigned HOST_WIDE_INT count) > { > optab interleave_high_optab, interleave_low_optab; > enum machine_mode mode; > > mode = TYPE_MODE (vectype); > > + /* vect_permute_store_chain requires the group size to be a power of two. */ > + if (exact_log2 (count) == -1) > + { > + if (vect_print_dump_info (REPORT_DETAILS)) > + fprintf (vect_dump, "the size of the group of strided accesses" > + " is not a power of 2"); > + return false; > + } > + > /* Check that the operation is supported. */ > interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR, > vectype, optab_default); > @@ -3441,7 +3437,7 @@ vect_strided_store_supported (tree vecty > I3: 4 12 20 28 5 13 21 30 > I4: 6 14 22 30 7 15 23 31. */ > > -bool > +void > vect_permute_store_chain (VEC(tree,heap) *dr_chain, > unsigned int length, > gimple stmt, > @@ -3455,9 +3451,7 @@ vect_permute_store_chain (VEC(tree,heap) > unsigned int j; > enum tree_code high_code, low_code; > > - /* Check that the operation is supported. */ > - if (!vect_strided_store_supported (vectype)) > - return false; > + gcc_assert (vect_strided_store_supported (vectype, length)); > > *result_chain = VEC_copy (tree, heap, dr_chain); > > @@ -3510,7 +3504,6 @@ vect_permute_store_chain (VEC(tree,heap) > } > dr_chain = VEC_copy (tree, heap, *result_chain); > } > - return true; > } > > /* Function vect_setup_realignment > @@ -3787,13 +3780,22 @@ vect_setup_realignment (gimple stmt, gim > and FALSE otherwise. */ > > bool > -vect_strided_load_supported (tree vectype) > +vect_strided_load_supported (tree vectype, unsigned HOST_WIDE_INT count) > { > optab perm_even_optab, perm_odd_optab; > enum machine_mode mode; > > mode = TYPE_MODE (vectype); > > + /* vect_permute_load_chain requires the group size to be a power of two. */ > + if (exact_log2 (count) == -1) > + { > + if (vect_print_dump_info (REPORT_DETAILS)) > + fprintf (vect_dump, "the size of the group of strided accesses" > + " is not a power of 2"); > + return false; > + } > + > perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype, > optab_default); > if (!perm_even_optab) > @@ -3905,7 +3907,7 @@ vect_strided_load_supported (tree vectyp > 3rd vec (E2): 2 6 10 14 18 22 26 30 > 4th vec (E4): 3 7 11 15 19 23 27 31. */ > > -bool > +static void > vect_permute_load_chain (VEC(tree,heap) *dr_chain, > unsigned int length, > gimple stmt, > @@ -3918,9 +3920,7 @@ vect_permute_load_chain (VEC(tree,heap) > int i; > unsigned int j; > > - /* Check that the operation is supported. */ > - if (!vect_strided_load_supported (vectype)) > - return false; > + gcc_assert (vect_strided_load_supported (vectype, length)); > > *result_chain = VEC_copy (tree, heap, dr_chain); > for (i = 0; i < exact_log2 (length); i++) > @@ -3963,7 +3963,6 @@ vect_permute_load_chain (VEC(tree,heap) > } > dr_chain = VEC_copy (tree, heap, *result_chain); > } > - return true; > } > > > @@ -3974,7 +3973,7 @@ vect_permute_load_chain (VEC(tree,heap) > the scalar statements. > */ > > -bool > +void > vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size, > gimple_stmt_iterator *gsi) > { > @@ -3990,8 +3989,7 @@ vect_transform_strided_load (gimple stmt > vectors, that are ready for vector computation. */ > result_chain = VEC_alloc (tree, heap, size); > /* Permute. */ > - if (!vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain)) > - return false; > + vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain); > > /* Put a permuted data-ref in the VECTORIZED_STMT field. > Since we scan the chain starting from it's first node, their order > @@ -4055,7 +4053,6 @@ vect_transform_strided_load (gimple stmt > } > > VEC_free (tree, heap, result_chain); > - return true; > } > > /* Function vect_force_dr_alignment_p. > Index: gcc/tree-vect-stmts.c > =================================================================== > --- gcc/tree-vect-stmts.c 2011-04-12 11:55:09.000000000 +0100 > +++ gcc/tree-vect-stmts.c 2011-04-12 11:55:09.000000000 +0100 > @@ -3412,9 +3412,12 @@ vectorizable_store (gimple stmt, gimple_ > { > strided_store = true; > first_stmt = DR_GROUP_FIRST_DR (stmt_info); > - if (!vect_strided_store_supported (vectype) > - && !PURE_SLP_STMT (stmt_info) && !slp) > - return false; > + if (!slp && !PURE_SLP_STMT (stmt_info)) > + { > + group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt)); > + if (!vect_strided_store_supported (vectype, group_size)) > + return false; > + } > > if (first_stmt == stmt) > { > @@ -3617,9 +3620,8 @@ vectorizable_store (gimple stmt, gimple_ > { > result_chain = VEC_alloc (tree, heap, group_size); > /* Permute. */ > - if (!vect_permute_store_chain (dr_chain, group_size, stmt, gsi, > - &result_chain)) > - return false; > + vect_permute_store_chain (dr_chain, group_size, stmt, gsi, > + &result_chain); > } > > next_stmt = first_stmt; > @@ -3912,10 +3914,13 @@ vectorizable_load (gimple stmt, gimple_s > /* FORNOW */ > gcc_assert (! nested_in_vect_loop); > > - /* Check if interleaving is supported. */ > - if (!vect_strided_load_supported (vectype) > - && !PURE_SLP_STMT (stmt_info) && !slp) > - return false; > + first_stmt = DR_GROUP_FIRST_DR (stmt_info); > + if (!slp && !PURE_SLP_STMT (stmt_info)) > + { > + group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt)); > + if (!vect_strided_load_supported (vectype, group_size)) > + return false; > + } > } > > if (negative) > @@ -4344,10 +4349,7 @@ vectorizable_load (gimple stmt, gimple_s > { > if (strided_load) > { > - if (!vect_transform_strided_load (stmt, dr_chain, > - group_size, gsi)) > - return false; > - > + vect_transform_strided_load (stmt, dr_chain, group_size, gsi); > *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); > } > else > @@ -4766,27 +4768,6 @@ vect_analyze_stmt (gimple stmt, bool *ne > return false; > } > > - if (!PURE_SLP_STMT (stmt_info)) > - { > - /* Groups of strided accesses whose size is not a power of 2 are not > - vectorizable yet using loop-vectorization. Therefore, if this stmt > - feeds non-SLP-able stmts (i.e., this stmt has to be both SLPed and > - loop-based vectorized), the loop cannot be vectorized. */ > - if (STMT_VINFO_STRIDED_ACCESS (stmt_info) > - && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt ( > - DR_GROUP_FIRST_DR (stmt_info)))) == -1) > - { > - if (vect_print_dump_info (REPORT_DETAILS)) > - { > - fprintf (vect_dump, "not vectorized: the size of group " > - "of strided accesses is not a power of 2"); > - print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); > - } > - > - return false; > - } > - } > - > return true; > } > >
Index: gcc/tree-vectorizer.h =================================================================== --- gcc/tree-vectorizer.h 2011-04-12 11:55:07.000000000 +0100 +++ gcc/tree-vectorizer.h 2011-04-12 11:55:09.000000000 +0100 @@ -828,16 +828,14 @@ extern tree vect_create_data_ref_ptr (gi gimple *, bool, bool *); extern tree bump_vector_ptr (tree, gimple, gimple_stmt_iterator *, gimple, tree); extern tree vect_create_destination_var (tree, tree); -extern bool vect_strided_store_supported (tree); -extern bool vect_strided_load_supported (tree); -extern bool vect_permute_store_chain (VEC(tree,heap) *,unsigned int, gimple, +extern bool vect_strided_store_supported (tree, unsigned HOST_WIDE_INT); +extern bool vect_strided_load_supported (tree, unsigned HOST_WIDE_INT); +extern void vect_permute_store_chain (VEC(tree,heap) *,unsigned int, gimple, gimple_stmt_iterator *, VEC(tree,heap) **); extern tree vect_setup_realignment (gimple, gimple_stmt_iterator *, tree *, enum dr_alignment_support, tree, struct loop **); -extern bool vect_permute_load_chain (VEC(tree,heap) *,unsigned int, gimple, - gimple_stmt_iterator *, VEC(tree,heap) **); -extern bool vect_transform_strided_load (gimple, VEC(tree,heap) *, int, +extern void vect_transform_strided_load (gimple, VEC(tree,heap) *, int, gimple_stmt_iterator *); extern int vect_get_place_in_interleaving_chain (gimple, gimple); extern tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *); Index: gcc/tree-vect-data-refs.c =================================================================== --- gcc/tree-vect-data-refs.c 2011-04-12 11:55:07.000000000 +0100 +++ gcc/tree-vect-data-refs.c 2011-04-12 11:55:09.000000000 +0100 @@ -2196,19 +2196,6 @@ vect_analyze_group_access (struct data_r return false; } - /* FORNOW: we handle only interleaving that is a power of 2. - We don't fail here if it may be still possible to vectorize the - group using SLP. If not, the size of the group will be checked in - vect_analyze_operations, and the vectorization will fail. */ - if (exact_log2 (stride) == -1) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "interleaving is not a power of 2"); - - if (slp_impossible) - return false; - } - if (stride == 0) stride = count; @@ -3349,13 +3336,22 @@ vect_create_destination_var (tree scalar and FALSE otherwise. */ bool -vect_strided_store_supported (tree vectype) +vect_strided_store_supported (tree vectype, unsigned HOST_WIDE_INT count) { optab interleave_high_optab, interleave_low_optab; enum machine_mode mode; mode = TYPE_MODE (vectype); + /* vect_permute_store_chain requires the group size to be a power of two. */ + if (exact_log2 (count) == -1) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "the size of the group of strided accesses" + " is not a power of 2"); + return false; + } + /* Check that the operation is supported. */ interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR, vectype, optab_default); @@ -3441,7 +3437,7 @@ vect_strided_store_supported (tree vecty I3: 4 12 20 28 5 13 21 30 I4: 6 14 22 30 7 15 23 31. */ -bool +void vect_permute_store_chain (VEC(tree,heap) *dr_chain, unsigned int length, gimple stmt, @@ -3455,9 +3451,7 @@ vect_permute_store_chain (VEC(tree,heap) unsigned int j; enum tree_code high_code, low_code; - /* Check that the operation is supported. */ - if (!vect_strided_store_supported (vectype)) - return false; + gcc_assert (vect_strided_store_supported (vectype, length)); *result_chain = VEC_copy (tree, heap, dr_chain); @@ -3510,7 +3504,6 @@ vect_permute_store_chain (VEC(tree,heap) } dr_chain = VEC_copy (tree, heap, *result_chain); } - return true; } /* Function vect_setup_realignment @@ -3787,13 +3780,22 @@ vect_setup_realignment (gimple stmt, gim and FALSE otherwise. */ bool -vect_strided_load_supported (tree vectype) +vect_strided_load_supported (tree vectype, unsigned HOST_WIDE_INT count) { optab perm_even_optab, perm_odd_optab; enum machine_mode mode; mode = TYPE_MODE (vectype); + /* vect_permute_load_chain requires the group size to be a power of two. */ + if (exact_log2 (count) == -1) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "the size of the group of strided accesses" + " is not a power of 2"); + return false; + } + perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype, optab_default); if (!perm_even_optab) @@ -3905,7 +3907,7 @@ vect_strided_load_supported (tree vectyp 3rd vec (E2): 2 6 10 14 18 22 26 30 4th vec (E4): 3 7 11 15 19 23 27 31. */ -bool +static void vect_permute_load_chain (VEC(tree,heap) *dr_chain, unsigned int length, gimple stmt, @@ -3918,9 +3920,7 @@ vect_permute_load_chain (VEC(tree,heap) int i; unsigned int j; - /* Check that the operation is supported. */ - if (!vect_strided_load_supported (vectype)) - return false; + gcc_assert (vect_strided_load_supported (vectype, length)); *result_chain = VEC_copy (tree, heap, dr_chain); for (i = 0; i < exact_log2 (length); i++) @@ -3963,7 +3963,6 @@ vect_permute_load_chain (VEC(tree,heap) } dr_chain = VEC_copy (tree, heap, *result_chain); } - return true; } @@ -3974,7 +3973,7 @@ vect_permute_load_chain (VEC(tree,heap) the scalar statements. */ -bool +void vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size, gimple_stmt_iterator *gsi) { @@ -3990,8 +3989,7 @@ vect_transform_strided_load (gimple stmt vectors, that are ready for vector computation. */ result_chain = VEC_alloc (tree, heap, size); /* Permute. */ - if (!vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain)) - return false; + vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain); /* Put a permuted data-ref in the VECTORIZED_STMT field. Since we scan the chain starting from it's first node, their order @@ -4055,7 +4053,6 @@ vect_transform_strided_load (gimple stmt } VEC_free (tree, heap, result_chain); - return true; } /* Function vect_force_dr_alignment_p. Index: gcc/tree-vect-stmts.c =================================================================== --- gcc/tree-vect-stmts.c 2011-04-12 11:55:09.000000000 +0100 +++ gcc/tree-vect-stmts.c 2011-04-12 11:55:09.000000000 +0100 @@ -3412,9 +3412,12 @@ vectorizable_store (gimple stmt, gimple_ { strided_store = true; first_stmt = DR_GROUP_FIRST_DR (stmt_info); - if (!vect_strided_store_supported (vectype) - && !PURE_SLP_STMT (stmt_info) && !slp) - return false; + if (!slp && !PURE_SLP_STMT (stmt_info)) + { + group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt)); + if (!vect_strided_store_supported (vectype, group_size)) + return false; + } if (first_stmt == stmt) { @@ -3617,9 +3620,8 @@ vectorizable_store (gimple stmt, gimple_ { result_chain = VEC_alloc (tree, heap, group_size); /* Permute. */ - if (!vect_permute_store_chain (dr_chain, group_size, stmt, gsi, - &result_chain)) - return false; + vect_permute_store_chain (dr_chain, group_size, stmt, gsi, + &result_chain); } next_stmt = first_stmt; @@ -3912,10 +3914,13 @@ vectorizable_load (gimple stmt, gimple_s /* FORNOW */ gcc_assert (! nested_in_vect_loop); - /* Check if interleaving is supported. */ - if (!vect_strided_load_supported (vectype) - && !PURE_SLP_STMT (stmt_info) && !slp) - return false; + first_stmt = DR_GROUP_FIRST_DR (stmt_info); + if (!slp && !PURE_SLP_STMT (stmt_info)) + { + group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt)); + if (!vect_strided_load_supported (vectype, group_size)) + return false; + } } if (negative) @@ -4344,10 +4349,7 @@ vectorizable_load (gimple stmt, gimple_s { if (strided_load) { - if (!vect_transform_strided_load (stmt, dr_chain, - group_size, gsi)) - return false; - + vect_transform_strided_load (stmt, dr_chain, group_size, gsi); *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); } else @@ -4766,27 +4768,6 @@ vect_analyze_stmt (gimple stmt, bool *ne return false; } - if (!PURE_SLP_STMT (stmt_info)) - { - /* Groups of strided accesses whose size is not a power of 2 are not - vectorizable yet using loop-vectorization. Therefore, if this stmt - feeds non-SLP-able stmts (i.e., this stmt has to be both SLPed and - loop-based vectorized), the loop cannot be vectorized. */ - if (STMT_VINFO_STRIDED_ACCESS (stmt_info) - && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt ( - DR_GROUP_FIRST_DR (stmt_info)))) == -1) - { - if (vect_print_dump_info (REPORT_DETAILS)) - { - fprintf (vect_dump, "not vectorized: the size of group " - "of strided accesses is not a power of 2"); - print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); - } - - return false; - } - } - return true; }