diff mbox

Support a choice of vector size in SLP

Message ID CAKSNEw5jqUqSsmTKQDE18Nkg7BJLCHjOFkHKLoMFDf07UcQbyg@mail.gmail.com
State New
Headers show

Commit Message

Ira Rosen Sept. 25, 2011, 10:59 a.m. UTC
Hi,

This patch supports an automatic choice of vector size in basic block
vectorization similar to the loop vectorization case.

I am not sure about the new keyword.

Bootstrapped on powerpc64-suse-linux, tested on powerpc64-suse-linux
and arm-linux-gnueabi.

Thanks,
Ira

ChangeLog:

	* tree-vect-slp.c (vect_slp_analyze_bb_1): Split out core part
	of vect_analyze_bb here.
	(vect_analyze_bb): Loop over vector sizes calling vect_analyze_bb_1.

testsuite/ChangeLog:

	* lib/target-supports.exp (check_effective_target_vect_half_size): New.
	* gcc.dg/vect/bb-slp-11.c: Expect the error message twice in case
	of multiple vector sizes.
	* gcc.dg/vect/bb-slp-26.c: New.

Comments

Richard Biener Sept. 25, 2011, 11:45 a.m. UTC | #1
On Sun, Sep 25, 2011 at 12:59 PM, Ira Rosen <ira.rosen@linaro.org> wrote:
> Hi,
>
> This patch supports an automatic choice of vector size in basic block
> vectorization similar to the loop vectorization case.
>
> I am not sure about the new keyword.

The testsuite one?  I guess we should name them vect128, vect256, etc.,
as testcases will be looking for an absolute size, not a relative ("half") one.

Richard.

> Bootstrapped on powerpc64-suse-linux, tested on powerpc64-suse-linux
> and arm-linux-gnueabi.
>
> Thanks,
> Ira
>
> ChangeLog:
>
>        * tree-vect-slp.c (vect_slp_analyze_bb_1): Split out core part
>        of vect_analyze_bb here.
>        (vect_analyze_bb): Loop over vector sizes calling vect_analyze_bb_1.
>
> testsuite/ChangeLog:
>
>        * lib/target-supports.exp (check_effective_target_vect_half_size): New.
>        * gcc.dg/vect/bb-slp-11.c: Expect the error message twice in case
>        of multiple vector sizes.
>        * gcc.dg/vect/bb-slp-26.c: New.
>
> Index: testsuite/lib/target-supports.exp
> ===================================================================
> --- testsuite/lib/target-supports.exp   (revision 179159)
> +++ testsuite/lib/target-supports.exp   (working copy)
> @@ -3393,6 +3393,24 @@ proc check_effective_target_vect_multiple_sizes {
>     return $et_vect_multiple_sizes_saved
>  }
>
> +# Return 1 if the target supports vectors of 8 chars, 4 shorts and 2 ints.
> +
> +proc check_effective_target_vect_half_size { } {
> +    global et_vect_half_size
> +
> +    if [info exists et_vect_half_size_saved] {
> +        verbose "check_effective_target_vect_half_size: using cached result" 2
> +    } else {
> +        set et_vect_half_size_saved 0
> +        if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } {
> +           set et_vect_half_size_saved 1
> +        }
> +    }
> +
> +    verbose "check_effective_target_vect_half_size: returning
> $et_vect_half_size_saved" 2
> +    return $et_vect_half_size_saved
> +}
> +
>  # Return 1 if the target supports section-anchors
>
>  proc check_effective_target_section_anchors { } {
> Index: testsuite/gcc.dg/vect/bb-slp-26.c
> ===================================================================
> --- testsuite/gcc.dg/vect/bb-slp-26.c   (revision 0)
> +++ testsuite/gcc.dg/vect/bb-slp-26.c   (revision 0)
> @@ -0,0 +1,59 @@
> +/* { dg-require-effective-target vect_int } */
> +
> +#include <stdarg.h>
> +#include "tree-vect.h"
> +
> +#define A 3
> +#define B 4
> +#define N 256
> +
> +char src[N], dst[N];
> +
> +void foo (char * __restrict__ dst, char * __restrict__ src, int h,
> int stride, int dummy)
> +{
> +  int i;
> +  h /= 16;
> +  for (i = 0; i < h; i++)
> +    {
> +      dst[0] += A*src[0] + src[stride];
> +      dst[1] += A*src[1] + src[1+stride];
> +      dst[2] += A*src[2] + src[2+stride];
> +      dst[3] += A*src[3] + src[3+stride];
> +      dst[4] += A*src[4] + src[4+stride];
> +      dst[5] += A*src[5] + src[5+stride];
> +      dst[6] += A*src[6] + src[6+stride];
> +      dst[7] += A*src[7] + src[7+stride];
> +      dst += 8;
> +      src += 8;
> +      if (dummy == 32)
> +        abort ();
> +   }
> +}
> +
> +
> +int main (void)
> +{
> +  int i;
> +
> +  check_vect ();
> +
> +  for (i = 0; i < N; i++)
> +    {
> +       dst[i] = 0;
> +       src[i] = i/8;
> +    }
> +
> +  foo (dst, src, N, 8, 0);
> +
> +  for (i = 0; i < N/2; i++)
> +    {
> +      if (dst[i] != A * src[i] + src[i+8])
> +        abort ();
> +    }
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "basic block vectorized using
> SLP" 1 "slp" { target vect_half_size } } } */
> +/* { dg-final { cleanup-tree-dump "slp" } } */
> +
> Index: testsuite/gcc.dg/vect/bb-slp-11.c
> ===================================================================
> --- testsuite/gcc.dg/vect/bb-slp-11.c   (revision 179159)
> +++ testsuite/gcc.dg/vect/bb-slp-11.c   (working copy)
> @@ -49,6 +49,7 @@ int main (void)
>  }
>
>  /* { dg-final { scan-tree-dump-times "basic block vectorized using
> SLP" 0 "slp" } } */
> -/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1 "slp" } } */
> +/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1
> "slp" { xfail vect_multiple_sizes } } } */
> +/* { dg-final { scan-tree-dump-times "SLP with multiple types" 2
> "slp" { target vect_multiple_sizes } } } */
>  /* { dg-final { cleanup-tree-dump "slp" } } */
>
> Index: tree-vect-slp.c
> ===================================================================
> --- tree-vect-slp.c     (revision 179159)
> +++ tree-vect-slp.c     (working copy)
> @@ -1694,42 +1694,18 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb
>
>  /* Check if the basic block can be vectorized.  */
>
> -bb_vec_info
> -vect_slp_analyze_bb (basic_block bb)
> +static bb_vec_info
> +vect_slp_analyze_bb_1 (basic_block bb)
>  {
>   bb_vec_info bb_vinfo;
>   VEC (ddr_p, heap) *ddrs;
>   VEC (slp_instance, heap) *slp_instances;
>   slp_instance instance;
> -  int i, insns = 0;
> -  gimple_stmt_iterator gsi;
> +  int i;
>   int min_vf = 2;
>   int max_vf = MAX_VECTORIZATION_FACTOR;
>   bool data_dependence_in_bb = false;
>
> -  current_vector_size = 0;
> -
> -  if (vect_print_dump_info (REPORT_DETAILS))
> -    fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
> -
> -  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> -    {
> -      gimple stmt = gsi_stmt (gsi);
> -      if (!is_gimple_debug (stmt)
> -         && !gimple_nop_p (stmt)
> -         && gimple_code (stmt) != GIMPLE_LABEL)
> -       insns++;
> -    }
> -
> -  if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
> -    {
> -      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
> -        fprintf (vect_dump, "not vectorized: too many instructions in basic "
> -                            "block.\n");
> -
> -      return NULL;
> -    }
> -
>   bb_vinfo = new_bb_vec_info (bb);
>   if (!bb_vinfo)
>     return NULL;
> @@ -1849,6 +1825,61 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb
>  }
>
>
> +bb_vec_info
> +vect_slp_analyze_bb (basic_block bb)
> +{
> +  bb_vec_info bb_vinfo;
> +  int insns = 0;
> +  gimple_stmt_iterator gsi;
> +  unsigned int vector_sizes;
> +
> +  if (vect_print_dump_info (REPORT_DETAILS))
> +    fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
> +
> +  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> +    {
> +      gimple stmt = gsi_stmt (gsi);
> +      if (!is_gimple_debug (stmt)
> +          && !gimple_nop_p (stmt)
> +          && gimple_code (stmt) != GIMPLE_LABEL)
> +        insns++;
> +    }
> +
> +  if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
> +    {
> +      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
> +        fprintf (vect_dump, "not vectorized: too many instructions in basic "
> +                            "block.\n");
> +
> +      return NULL;
> +    }
> +
> +  /* Autodetect first vector size we try.  */
> +  current_vector_size = 0;
> +  vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
> +
> +  while (1)
> +    {
> +      bb_vinfo = vect_slp_analyze_bb_1 (bb);
> +      if (bb_vinfo)
> +        return bb_vinfo;
> +
> +      destroy_bb_vec_info (bb_vinfo);
> +
> +      vector_sizes &= ~current_vector_size;
> +      if (vector_sizes == 0
> +          || current_vector_size == 0)
> +        return NULL;
> +
> +      /* Try the next biggest vector size.  */
> +      current_vector_size = 1 << floor_log2 (vector_sizes);
> +      if (vect_print_dump_info (REPORT_DETAILS))
> +        fprintf (vect_dump, "***** Re-trying analysis with "
> +                 "vector size %d\n", current_vector_size);
> +    }
> +}
> +
> +
>  /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
>    the number of created vector stmts depends on the unrolling factor).
>    However, the actual number of vector stmts for every SLP node depends on
>
diff mbox

Patch

Index: testsuite/lib/target-supports.exp
===================================================================
--- testsuite/lib/target-supports.exp   (revision 179159)
+++ testsuite/lib/target-supports.exp   (working copy)
@@ -3393,6 +3393,24 @@  proc check_effective_target_vect_multiple_sizes {
     return $et_vect_multiple_sizes_saved
 }

+# Return 1 if the target supports vectors of 8 chars, 4 shorts and 2 ints.
+
+proc check_effective_target_vect_half_size { } {
+    global et_vect_half_size
+
+    if [info exists et_vect_half_size_saved] {
+        verbose "check_effective_target_vect_half_size: using cached result" 2
+    } else {
+        set et_vect_half_size_saved 0
+        if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } {
+           set et_vect_half_size_saved 1
+        }
+    }
+
+    verbose "check_effective_target_vect_half_size: returning
$et_vect_half_size_saved" 2
+    return $et_vect_half_size_saved
+}
+
 # Return 1 if the target supports section-anchors

 proc check_effective_target_section_anchors { } {
Index: testsuite/gcc.dg/vect/bb-slp-26.c
===================================================================
--- testsuite/gcc.dg/vect/bb-slp-26.c   (revision 0)
+++ testsuite/gcc.dg/vect/bb-slp-26.c   (revision 0)
@@ -0,0 +1,59 @@ 
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define A 3
+#define B 4
+#define N 256
+
+char src[N], dst[N];
+
+void foo (char * __restrict__ dst, char * __restrict__ src, int h,
int stride, int dummy)
+{
+  int i;
+  h /= 16;
+  for (i = 0; i < h; i++)
+    {
+      dst[0] += A*src[0] + src[stride];
+      dst[1] += A*src[1] + src[1+stride];
+      dst[2] += A*src[2] + src[2+stride];
+      dst[3] += A*src[3] + src[3+stride];
+      dst[4] += A*src[4] + src[4+stride];
+      dst[5] += A*src[5] + src[5+stride];
+      dst[6] += A*src[6] + src[6+stride];
+      dst[7] += A*src[7] + src[7+stride];
+      dst += 8;
+      src += 8;
+      if (dummy == 32)
+        abort ();
+   }
+}
+
+
+int main (void)
+{
+  int i;
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+    {
+       dst[i] = 0;
+       src[i] = i/8;
+    }
+
+  foo (dst, src, N, 8, 0);
+
+  for (i = 0; i < N/2; i++)
+    {
+      if (dst[i] != A * src[i] + src[i+8])
+        abort ();
+    }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using
SLP" 1 "slp" { target vect_half_size } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
Index: testsuite/gcc.dg/vect/bb-slp-11.c
===================================================================
--- testsuite/gcc.dg/vect/bb-slp-11.c   (revision 179159)
+++ testsuite/gcc.dg/vect/bb-slp-11.c   (working copy)
@@ -49,6 +49,7 @@  int main (void)
 }

 /* { dg-final { scan-tree-dump-times "basic block vectorized using
SLP" 0 "slp" } } */
-/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1 "slp" } } */
+/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1
"slp" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "SLP with multiple types" 2
"slp" { target vect_multiple_sizes } } } */
 /* { dg-final { cleanup-tree-dump "slp" } } */

Index: tree-vect-slp.c
===================================================================
--- tree-vect-slp.c     (revision 179159)
+++ tree-vect-slp.c     (working copy)
@@ -1694,42 +1694,18 @@  vect_bb_vectorization_profitable_p (bb_vec_info bb

 /* Check if the basic block can be vectorized.  */

-bb_vec_info
-vect_slp_analyze_bb (basic_block bb)
+static bb_vec_info
+vect_slp_analyze_bb_1 (basic_block bb)
 {
   bb_vec_info bb_vinfo;
   VEC (ddr_p, heap) *ddrs;
   VEC (slp_instance, heap) *slp_instances;
   slp_instance instance;
-  int i, insns = 0;
-  gimple_stmt_iterator gsi;
+  int i;
   int min_vf = 2;
   int max_vf = MAX_VECTORIZATION_FACTOR;
   bool data_dependence_in_bb = false;

-  current_vector_size = 0;
-
-  if (vect_print_dump_info (REPORT_DETAILS))
-    fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
-
-  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
-    {
-      gimple stmt = gsi_stmt (gsi);
-      if (!is_gimple_debug (stmt)
-         && !gimple_nop_p (stmt)
-         && gimple_code (stmt) != GIMPLE_LABEL)
-       insns++;
-    }
-
-  if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
-    {
-      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
-        fprintf (vect_dump, "not vectorized: too many instructions in basic "
-                            "block.\n");
-
-      return NULL;
-    }
-
   bb_vinfo = new_bb_vec_info (bb);
   if (!bb_vinfo)
     return NULL;
@@ -1849,6 +1825,61 @@  vect_bb_vectorization_profitable_p (bb_vec_info bb
 }


+bb_vec_info
+vect_slp_analyze_bb (basic_block bb)
+{
+  bb_vec_info bb_vinfo;
+  int insns = 0;
+  gimple_stmt_iterator gsi;
+  unsigned int vector_sizes;
+
+  if (vect_print_dump_info (REPORT_DETAILS))
+    fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
+
+  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+    {
+      gimple stmt = gsi_stmt (gsi);
+      if (!is_gimple_debug (stmt)
+          && !gimple_nop_p (stmt)
+          && gimple_code (stmt) != GIMPLE_LABEL)
+        insns++;
+    }
+
+  if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
+    {
+      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
+        fprintf (vect_dump, "not vectorized: too many instructions in basic "
+                            "block.\n");
+
+      return NULL;
+    }
+
+  /* Autodetect first vector size we try.  */
+  current_vector_size = 0;
+  vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
+
+  while (1)
+    {
+      bb_vinfo = vect_slp_analyze_bb_1 (bb);
+      if (bb_vinfo)
+        return bb_vinfo;
+
+      destroy_bb_vec_info (bb_vinfo);
+
+      vector_sizes &= ~current_vector_size;
+      if (vector_sizes == 0
+          || current_vector_size == 0)
+        return NULL;
+
+      /* Try the next biggest vector size.  */
+      current_vector_size = 1 << floor_log2 (vector_sizes);
+      if (vect_print_dump_info (REPORT_DETAILS))
+        fprintf (vect_dump, "***** Re-trying analysis with "
+                 "vector size %d\n", current_vector_size);
+    }
+}
+
+
 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
    the number of created vector stmts depends on the unrolling factor).
    However, the actual number of vector stmts for every SLP node depends on