Message ID | 87muwqlvr3.fsf@linaro.org |
---|---|
State | Accepted |
Commit | 159440699bf6f97dccc94377d9d69e540a1904dc |
Headers | show |
Series | Fix SLP def type when computing masks (PR85853) | expand |
On Wed, May 23, 2018 at 8:41 AM Richard Sandiford < richard.sandiford@linaro.org> wrote: > In this PR, SLP failed to include a comparison node in the SLP > tree and so marked the node as external. It then went on to call > vect_is_simple_use on the comparison with its STMT_VINFO_DEF_TYPE > still claiming that it was an internal definition. > We already avoid that for vect_analyze_stmt by temporarily copying > the node's definition type to each STMT_VINFO_DEF_TYPE. This patch > extends that to the vector type calculation. The easiest thing > seemed to be to split the analysis of the root node out into > a subroutine, so that it's possible to return false early without > awkward control flow. > Tested on aarch64-linux-gnu (with and without SLP), aarch64_be-elf > and x86_64-linux-gnu. OK to install? OK. Richard. > Richard > 2018-05-23 Richard Sandiford <richard.sandiford@linaro.org> > gcc/ > PR tree-optimization/85853 > * tree-vect-slp.c (vect_slp_analyze_node_operations): Split out > handling of the root of the node to... > (vect_slp_analyze_node_operations_1): ...this new function, > and run the whole thing with the child nodes' def types > set according to their SLP node's def type. > gcc/testsuite/ > PR tree-optimization/85853 > * gfortran.dg/vect/pr85853.f90: New test. > Index: gcc/tree-vect-slp.c > =================================================================== > --- gcc/tree-vect-slp.c 2018-05-17 11:50:31.609158213 +0100 > +++ gcc/tree-vect-slp.c 2018-05-23 07:37:12.480578116 +0100 > @@ -2476,49 +2476,16 @@ _bb_vec_info::~_bb_vec_info () > bb->aux = NULL; > } > - > -/* Analyze statements contained in SLP tree NODE after recursively analyzing > - the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE. > - > - Return true if the operations are supported. */ > +/* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE, > + given then that child nodes have already been processed, and that > + their def types currently match their SLP node's def type. */ > static bool > -vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node, > - slp_instance node_instance, > - scalar_stmts_to_slp_tree_map_t *visited, > - scalar_stmts_to_slp_tree_map_t *lvisited, > - stmt_vector_for_cost *cost_vec) > +vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node, > + slp_instance node_instance, > + stmt_vector_for_cost *cost_vec) > { > - bool dummy; > - int i, j; > - gimple *stmt; > - slp_tree child; > - > - if (SLP_TREE_DEF_TYPE (node) != vect_internal_def) > - return true; > - > - /* If we already analyzed the exact same set of scalar stmts we're done. > - We share the generated vector stmts for those. */ > - slp_tree *leader; > - if ((leader = visited->get (SLP_TREE_SCALAR_STMTS (node))) > - || (leader = lvisited->get (SLP_TREE_SCALAR_STMTS (node)))) > - { > - SLP_TREE_NUMBER_OF_VEC_STMTS (node) > - = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader); > - return true; > - } > - > - /* The SLP graph is acyclic so not caching whether we failed or succeeded > - doesn't result in any issue since we throw away the lvisited set > - when we fail. */ > - lvisited->put (SLP_TREE_SCALAR_STMTS (node).copy (), node); > - > - FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) > - if (!vect_slp_analyze_node_operations (vinfo, child, node_instance, > - visited, lvisited, cost_vec)) > - return false; > - > - stmt = SLP_TREE_SCALAR_STMTS (node)[0]; > + gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]; > stmt_vec_info stmt_info = vinfo_for_stmt (stmt); > gcc_assert (stmt_info); > gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect); > @@ -2545,6 +2512,7 @@ vect_slp_analyze_node_operations (vec_in > } > gimple *sstmt; > + unsigned int i; > FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt) > STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt)) = vectype; > } > @@ -2572,12 +2540,56 @@ vect_slp_analyze_node_operations (vec_in > = vect_get_num_vectors (vf * group_size, vectype); > } > + bool dummy; > + return vect_analyze_stmt (stmt, &dummy, node, node_instance, cost_vec); > +} > + > +/* Analyze statements contained in SLP tree NODE after recursively analyzing > + the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE. > + > + Return true if the operations are supported. */ > + > +static bool > +vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node, > + slp_instance node_instance, > + scalar_stmts_to_slp_tree_map_t *visited, > + scalar_stmts_to_slp_tree_map_t *lvisited, > + stmt_vector_for_cost *cost_vec) > +{ > + int i, j; > + slp_tree child; > + > + if (SLP_TREE_DEF_TYPE (node) != vect_internal_def) > + return true; > + > + /* If we already analyzed the exact same set of scalar stmts we're done. > + We share the generated vector stmts for those. */ > + slp_tree *leader; > + if ((leader = visited->get (SLP_TREE_SCALAR_STMTS (node))) > + || (leader = lvisited->get (SLP_TREE_SCALAR_STMTS (node)))) > + { > + SLP_TREE_NUMBER_OF_VEC_STMTS (node) > + = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader); > + return true; > + } > + > + /* The SLP graph is acyclic so not caching whether we failed or succeeded > + doesn't result in any issue since we throw away the lvisited set > + when we fail. */ > + lvisited->put (SLP_TREE_SCALAR_STMTS (node).copy (), node); > + > + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) > + if (!vect_slp_analyze_node_operations (vinfo, child, node_instance, > + visited, lvisited, cost_vec)) > + return false; > + > /* Push SLP node def-type to stmt operands. */ > FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child) > if (SLP_TREE_DEF_TYPE (child) != vect_internal_def) > STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])) > = SLP_TREE_DEF_TYPE (child); > - bool res = vect_analyze_stmt (stmt, &dummy, node, node_instance, cost_vec); > + bool res = vect_slp_analyze_node_operations_1 (vinfo, node, node_instance, > + cost_vec); > /* Restore def-types. */ > FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child) > if (SLP_TREE_DEF_TYPE (child) != vect_internal_def) > Index: gcc/testsuite/gfortran.dg/vect/pr85853.f90 > =================================================================== > --- /dev/null 2018-04-20 16:19:46.369131350 +0100 > +++ gcc/testsuite/gfortran.dg/vect/pr85853.f90 2018-05-23 07:37:12.477578485 +0100 > @@ -0,0 +1,26 @@ > +! Taken from execute/where_2.f90, but with special flags. > +! { dg-do run } > +! { dg-additional-options "-O3 -fno-tree-loop-vectorize" } > + > +! Program to test the WHERE constructs > +program where_2 > + integer temp(10), reduce(10) > + > + temp = 10 > + reduce(1:3) = -1 > + reduce(4:6) = 0 > + reduce(7:8) = 5 > + reduce(9:10) = 10 > + > + WHERE (reduce < 0) > + temp = 100 > + ELSE WHERE (reduce .EQ. 0) > + temp = 200 + temp > + ELSE WHERE > + WHERE (reduce > 6) temp = temp + sum(reduce) > + temp = 300 + temp > + END WHERE > + > + if (any (temp .ne. (/100, 100, 100, 210, 210, 210, 310, 310, 337, 337/))) & > + STOP 1 > +end program
Index: gcc/tree-vect-slp.c =================================================================== --- gcc/tree-vect-slp.c 2018-05-17 11:50:31.609158213 +0100 +++ gcc/tree-vect-slp.c 2018-05-23 07:37:12.480578116 +0100 @@ -2476,49 +2476,16 @@ _bb_vec_info::~_bb_vec_info () bb->aux = NULL; } - -/* Analyze statements contained in SLP tree NODE after recursively analyzing - the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE. - - Return true if the operations are supported. */ +/* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE, + given then that child nodes have already been processed, and that + their def types currently match their SLP node's def type. */ static bool -vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node, - slp_instance node_instance, - scalar_stmts_to_slp_tree_map_t *visited, - scalar_stmts_to_slp_tree_map_t *lvisited, - stmt_vector_for_cost *cost_vec) +vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node, + slp_instance node_instance, + stmt_vector_for_cost *cost_vec) { - bool dummy; - int i, j; - gimple *stmt; - slp_tree child; - - if (SLP_TREE_DEF_TYPE (node) != vect_internal_def) - return true; - - /* If we already analyzed the exact same set of scalar stmts we're done. - We share the generated vector stmts for those. */ - slp_tree *leader; - if ((leader = visited->get (SLP_TREE_SCALAR_STMTS (node))) - || (leader = lvisited->get (SLP_TREE_SCALAR_STMTS (node)))) - { - SLP_TREE_NUMBER_OF_VEC_STMTS (node) - = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader); - return true; - } - - /* The SLP graph is acyclic so not caching whether we failed or succeeded - doesn't result in any issue since we throw away the lvisited set - when we fail. */ - lvisited->put (SLP_TREE_SCALAR_STMTS (node).copy (), node); - - FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) - if (!vect_slp_analyze_node_operations (vinfo, child, node_instance, - visited, lvisited, cost_vec)) - return false; - - stmt = SLP_TREE_SCALAR_STMTS (node)[0]; + gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]; stmt_vec_info stmt_info = vinfo_for_stmt (stmt); gcc_assert (stmt_info); gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect); @@ -2545,6 +2512,7 @@ vect_slp_analyze_node_operations (vec_in } gimple *sstmt; + unsigned int i; FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt) STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt)) = vectype; } @@ -2572,12 +2540,56 @@ vect_slp_analyze_node_operations (vec_in = vect_get_num_vectors (vf * group_size, vectype); } + bool dummy; + return vect_analyze_stmt (stmt, &dummy, node, node_instance, cost_vec); +} + +/* Analyze statements contained in SLP tree NODE after recursively analyzing + the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE. + + Return true if the operations are supported. */ + +static bool +vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node, + slp_instance node_instance, + scalar_stmts_to_slp_tree_map_t *visited, + scalar_stmts_to_slp_tree_map_t *lvisited, + stmt_vector_for_cost *cost_vec) +{ + int i, j; + slp_tree child; + + if (SLP_TREE_DEF_TYPE (node) != vect_internal_def) + return true; + + /* If we already analyzed the exact same set of scalar stmts we're done. + We share the generated vector stmts for those. */ + slp_tree *leader; + if ((leader = visited->get (SLP_TREE_SCALAR_STMTS (node))) + || (leader = lvisited->get (SLP_TREE_SCALAR_STMTS (node)))) + { + SLP_TREE_NUMBER_OF_VEC_STMTS (node) + = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader); + return true; + } + + /* The SLP graph is acyclic so not caching whether we failed or succeeded + doesn't result in any issue since we throw away the lvisited set + when we fail. */ + lvisited->put (SLP_TREE_SCALAR_STMTS (node).copy (), node); + + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) + if (!vect_slp_analyze_node_operations (vinfo, child, node_instance, + visited, lvisited, cost_vec)) + return false; + /* Push SLP node def-type to stmt operands. */ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child) if (SLP_TREE_DEF_TYPE (child) != vect_internal_def) STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])) = SLP_TREE_DEF_TYPE (child); - bool res = vect_analyze_stmt (stmt, &dummy, node, node_instance, cost_vec); + bool res = vect_slp_analyze_node_operations_1 (vinfo, node, node_instance, + cost_vec); /* Restore def-types. */ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child) if (SLP_TREE_DEF_TYPE (child) != vect_internal_def) Index: gcc/testsuite/gfortran.dg/vect/pr85853.f90 =================================================================== --- /dev/null 2018-04-20 16:19:46.369131350 +0100 +++ gcc/testsuite/gfortran.dg/vect/pr85853.f90 2018-05-23 07:37:12.477578485 +0100 @@ -0,0 +1,26 @@ +! Taken from execute/where_2.f90, but with special flags. +! { dg-do run } +! { dg-additional-options "-O3 -fno-tree-loop-vectorize" } + +! Program to test the WHERE constructs +program where_2 + integer temp(10), reduce(10) + + temp = 10 + reduce(1:3) = -1 + reduce(4:6) = 0 + reduce(7:8) = 5 + reduce(9:10) = 10 + + WHERE (reduce < 0) + temp = 100 + ELSE WHERE (reduce .EQ. 0) + temp = 200 + temp + ELSE WHERE + WHERE (reduce > 6) temp = temp + sum(reduce) + temp = 300 + temp + END WHERE + + if (any (temp .ne. (/100, 100, 100, 210, 210, 210, 310, 310, 337, 337/))) & + STOP 1 +end program