From patchwork Sun Sep 25 10:59:10 2011
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Ira Rosen <ira.rosen@linaro.org>
X-Patchwork-Id: 4315
Return-Path: <patch+caf_=linaro-patchwork=canonical.com@linaro.org>
X-Original-To: patchwork@peony.canonical.com
Delivered-To: patchwork@peony.canonical.com
Received: from fiordland.canonical.com (fiordland.canonical.com
 [91.189.94.145])
 by peony.canonical.com (Postfix) with ESMTP id 13B5023F6F
 for <patchwork@peony.canonical.com>;
 Sun, 25 Sep 2011 10:59:15 +0000 (UTC)
Received: from mail-fx0-f52.google.com (mail-fx0-f52.google.com
 [209.85.161.52])
 by fiordland.canonical.com (Postfix) with ESMTP id F0EC2A18846
 for <linaro-patchwork@canonical.com>;
 Sun, 25 Sep 2011 10:59:14 +0000 (UTC)
Received: by fxe23 with SMTP id 23so7112822fxe.11
 for <linaro-patchwork@canonical.com>;
 Sun, 25 Sep 2011 03:59:14 -0700 (PDT)
Received: by 10.223.33.19 with SMTP id f19mr8389016fad.122.1316948354742;
 Sun, 25 Sep 2011 03:59:14 -0700 (PDT)
X-Forwarded-To: linaro-patchwork@canonical.com
X-Forwarded-For: patch@linaro.org linaro-patchwork@canonical.com
Delivered-To: patches@linaro.org
Received: by 10.152.3.234 with SMTP id f10cs6649laf;
 Sun, 25 Sep 2011 03:59:14 -0700 (PDT)
Received: by 10.236.115.70 with SMTP id d46mr32992153yhh.83.1316948352094;
 Sun, 25 Sep 2011 03:59:12 -0700 (PDT)
Received: from mail-yi0-f50.google.com (mail-yi0-f50.google.com
 [209.85.218.50]) by mx.google.com with ESMTPS id
 z9si9693278ank.105.2011.09.25.03.59.11
 (version=TLSv1/SSLv3 cipher=OTHER);
 Sun, 25 Sep 2011 03:59:11 -0700 (PDT)
Received-SPF: neutral (google.com: 209.85.218.50 is neither permitted nor
 denied by best guess record for domain of
 ira.rosen@linaro.org) client-ip=209.85.218.50; 
Authentication-Results: mx.google.com;
 spf=neutral (google.com: 209.85.218.50 is neither
 permitted nor denied by best guess record for domain of
 ira.rosen@linaro.org) smtp.mail=ira.rosen@linaro.org
Received: by yib25 with SMTP id 25so5363855yib.37
 for <patches@linaro.org>; Sun, 25 Sep 2011 03:59:11 -0700 (PDT)
MIME-Version: 1.0
Received: by 10.150.75.14 with SMTP id x14mr4933138yba.179.1316948351038;
 Sun, 25 Sep 2011 03:59:11 -0700 (PDT)
Received: by 10.151.113.18 with HTTP; Sun, 25 Sep 2011 03:59:10 -0700 (PDT)
Date: Sun, 25 Sep 2011 13:59:10 +0300
Message-ID: <CAKSNEw5jqUqSsmTKQDE18Nkg7BJLCHjOFkHKLoMFDf07UcQbyg@mail.gmail.com>
Subject: [patch] Support a choice of vector size in SLP
From: Ira Rosen <ira.rosen@linaro.org>
To: gcc-patches@gcc.gnu.org
Cc: Patch Tracking <patches@linaro.org>

Hi,

This patch supports an automatic choice of vector size in basic block
vectorization similar to the loop vectorization case.

I am not sure about the new keyword.

Bootstrapped on powerpc64-suse-linux, tested on powerpc64-suse-linux
and arm-linux-gnueabi.

Thanks,
Ira

ChangeLog:

	* tree-vect-slp.c (vect_slp_analyze_bb_1): Split out core part
	of vect_analyze_bb here.
	(vect_analyze_bb): Loop over vector sizes calling vect_analyze_bb_1.

testsuite/ChangeLog:

	* lib/target-supports.exp (check_effective_target_vect_half_size): New.
	* gcc.dg/vect/bb-slp-11.c: Expect the error message twice in case
	of multiple vector sizes.
	* gcc.dg/vect/bb-slp-26.c: New.

Index: testsuite/lib/target-supports.exp
===================================================================
--- testsuite/lib/target-supports.exp   (revision 179159)
+++ testsuite/lib/target-supports.exp   (working copy)
@@ -3393,6 +3393,24 @@ proc check_effective_target_vect_multiple_sizes {
     return $et_vect_multiple_sizes_saved
 }

+# Return 1 if the target supports vectors of 8 chars, 4 shorts and 2 ints.
+
+proc check_effective_target_vect_half_size { } {
+    global et_vect_half_size
+
+    if [info exists et_vect_half_size_saved] {
+        verbose "check_effective_target_vect_half_size: using cached result" 2
+    } else {
+        set et_vect_half_size_saved 0
+        if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } {
+           set et_vect_half_size_saved 1
+        }
+    }
+
+    verbose "check_effective_target_vect_half_size: returning
$et_vect_half_size_saved" 2
+    return $et_vect_half_size_saved
+}
+
 # Return 1 if the target supports section-anchors

 proc check_effective_target_section_anchors { } {
Index: testsuite/gcc.dg/vect/bb-slp-26.c
===================================================================
--- testsuite/gcc.dg/vect/bb-slp-26.c   (revision 0)
+++ testsuite/gcc.dg/vect/bb-slp-26.c   (revision 0)
@@ -0,0 +1,59 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define A 3
+#define B 4
+#define N 256
+
+char src[N], dst[N];
+
+void foo (char * __restrict__ dst, char * __restrict__ src, int h,
int stride, int dummy)
+{
+  int i;
+  h /= 16;
+  for (i = 0; i < h; i++)
+    {
+      dst[0] += A*src[0] + src[stride];
+      dst[1] += A*src[1] + src[1+stride];
+      dst[2] += A*src[2] + src[2+stride];
+      dst[3] += A*src[3] + src[3+stride];
+      dst[4] += A*src[4] + src[4+stride];
+      dst[5] += A*src[5] + src[5+stride];
+      dst[6] += A*src[6] + src[6+stride];
+      dst[7] += A*src[7] + src[7+stride];
+      dst += 8;
+      src += 8;
+      if (dummy == 32)
+        abort ();
+   }
+}
+
+
+int main (void)
+{
+  int i;
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+    {
+       dst[i] = 0;
+       src[i] = i/8;
+    }
+
+  foo (dst, src, N, 8, 0);
+
+  for (i = 0; i < N/2; i++)
+    {
+      if (dst[i] != A * src[i] + src[i+8])
+        abort ();
+    }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized using
SLP" 1 "slp" { target vect_half_size } } } */
+/* { dg-final { cleanup-tree-dump "slp" } } */
+
Index: testsuite/gcc.dg/vect/bb-slp-11.c
===================================================================
--- testsuite/gcc.dg/vect/bb-slp-11.c   (revision 179159)
+++ testsuite/gcc.dg/vect/bb-slp-11.c   (working copy)
@@ -49,6 +49,7 @@ int main (void)
 }

 /* { dg-final { scan-tree-dump-times "basic block vectorized using
SLP" 0 "slp" } } */
-/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1 "slp" } } */
+/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1
"slp" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "SLP with multiple types" 2
"slp" { target vect_multiple_sizes } } } */
 /* { dg-final { cleanup-tree-dump "slp" } } */

Index: tree-vect-slp.c
===================================================================
--- tree-vect-slp.c     (revision 179159)
+++ tree-vect-slp.c     (working copy)
@@ -1694,42 +1694,18 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb

 /* Check if the basic block can be vectorized.  */

-bb_vec_info
-vect_slp_analyze_bb (basic_block bb)
+static bb_vec_info
+vect_slp_analyze_bb_1 (basic_block bb)
 {
   bb_vec_info bb_vinfo;
   VEC (ddr_p, heap) *ddrs;
   VEC (slp_instance, heap) *slp_instances;
   slp_instance instance;
-  int i, insns = 0;
-  gimple_stmt_iterator gsi;
+  int i;
   int min_vf = 2;
   int max_vf = MAX_VECTORIZATION_FACTOR;
   bool data_dependence_in_bb = false;

-  current_vector_size = 0;
-
-  if (vect_print_dump_info (REPORT_DETAILS))
-    fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
-
-  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
-    {
-      gimple stmt = gsi_stmt (gsi);
-      if (!is_gimple_debug (stmt)
-         && !gimple_nop_p (stmt)
-         && gimple_code (stmt) != GIMPLE_LABEL)
-       insns++;
-    }
-
-  if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
-    {
-      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
-        fprintf (vect_dump, "not vectorized: too many instructions in basic "
-                            "block.\n");
-
-      return NULL;
-    }
-
   bb_vinfo = new_bb_vec_info (bb);
   if (!bb_vinfo)
     return NULL;
@@ -1849,6 +1825,61 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb
 }


+bb_vec_info
+vect_slp_analyze_bb (basic_block bb)
+{
+  bb_vec_info bb_vinfo;
+  int insns = 0;
+  gimple_stmt_iterator gsi;
+  unsigned int vector_sizes;
+
+  if (vect_print_dump_info (REPORT_DETAILS))
+    fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
+
+  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+    {
+      gimple stmt = gsi_stmt (gsi);
+      if (!is_gimple_debug (stmt)
+          && !gimple_nop_p (stmt)
+          && gimple_code (stmt) != GIMPLE_LABEL)
+        insns++;
+    }
+
+  if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
+    {
+      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
+        fprintf (vect_dump, "not vectorized: too many instructions in basic "
+                            "block.\n");
+
+      return NULL;
+    }
+
+  /* Autodetect first vector size we try.  */
+  current_vector_size = 0;
+  vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
+
+  while (1)
+    {
+      bb_vinfo = vect_slp_analyze_bb_1 (bb);
+      if (bb_vinfo)
+        return bb_vinfo;
+
+      destroy_bb_vec_info (bb_vinfo);
+
+      vector_sizes &= ~current_vector_size;
+      if (vector_sizes == 0
+          || current_vector_size == 0)
+        return NULL;
+
+      /* Try the next biggest vector size.  */
+      current_vector_size = 1 << floor_log2 (vector_sizes);
+      if (vect_print_dump_info (REPORT_DETAILS))
+        fprintf (vect_dump, "***** Re-trying analysis with "
+                 "vector size %d\n", current_vector_size);
+    }
+}
+
+
 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
    the number of created vector stmts depends on the unrolling factor).
    However, the actual number of vector stmts for every SLP node depends on