From patchwork Sun Sep 25 10:59:10 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ira Rosen X-Patchwork-Id: 4315 Return-Path: X-Original-To: patchwork@peony.canonical.com Delivered-To: patchwork@peony.canonical.com Received: from fiordland.canonical.com (fiordland.canonical.com [91.189.94.145]) by peony.canonical.com (Postfix) with ESMTP id 13B5023F6F for ; Sun, 25 Sep 2011 10:59:15 +0000 (UTC) Received: from mail-fx0-f52.google.com (mail-fx0-f52.google.com [209.85.161.52]) by fiordland.canonical.com (Postfix) with ESMTP id F0EC2A18846 for ; Sun, 25 Sep 2011 10:59:14 +0000 (UTC) Received: by fxe23 with SMTP id 23so7112822fxe.11 for ; Sun, 25 Sep 2011 03:59:14 -0700 (PDT) Received: by 10.223.33.19 with SMTP id f19mr8389016fad.122.1316948354742; Sun, 25 Sep 2011 03:59:14 -0700 (PDT) X-Forwarded-To: linaro-patchwork@canonical.com X-Forwarded-For: patch@linaro.org linaro-patchwork@canonical.com Delivered-To: patches@linaro.org Received: by 10.152.3.234 with SMTP id f10cs6649laf; Sun, 25 Sep 2011 03:59:14 -0700 (PDT) Received: by 10.236.115.70 with SMTP id d46mr32992153yhh.83.1316948352094; Sun, 25 Sep 2011 03:59:12 -0700 (PDT) Received: from mail-yi0-f50.google.com (mail-yi0-f50.google.com [209.85.218.50]) by mx.google.com with ESMTPS id z9si9693278ank.105.2011.09.25.03.59.11 (version=TLSv1/SSLv3 cipher=OTHER); Sun, 25 Sep 2011 03:59:11 -0700 (PDT) Received-SPF: neutral (google.com: 209.85.218.50 is neither permitted nor denied by best guess record for domain of ira.rosen@linaro.org) client-ip=209.85.218.50; Authentication-Results: mx.google.com; spf=neutral (google.com: 209.85.218.50 is neither permitted nor denied by best guess record for domain of ira.rosen@linaro.org) smtp.mail=ira.rosen@linaro.org Received: by yib25 with SMTP id 25so5363855yib.37 for ; Sun, 25 Sep 2011 03:59:11 -0700 (PDT) MIME-Version: 1.0 Received: by 10.150.75.14 with SMTP id x14mr4933138yba.179.1316948351038; Sun, 25 Sep 2011 03:59:11 -0700 (PDT) Received: by 10.151.113.18 with HTTP; Sun, 25 Sep 2011 03:59:10 -0700 (PDT) Date: Sun, 25 Sep 2011 13:59:10 +0300 Message-ID: Subject: [patch] Support a choice of vector size in SLP From: Ira Rosen To: gcc-patches@gcc.gnu.org Cc: Patch Tracking Hi, This patch supports an automatic choice of vector size in basic block vectorization similar to the loop vectorization case. I am not sure about the new keyword. Bootstrapped on powerpc64-suse-linux, tested on powerpc64-suse-linux and arm-linux-gnueabi. Thanks, Ira ChangeLog: * tree-vect-slp.c (vect_slp_analyze_bb_1): Split out core part of vect_analyze_bb here. (vect_analyze_bb): Loop over vector sizes calling vect_analyze_bb_1. testsuite/ChangeLog: * lib/target-supports.exp (check_effective_target_vect_half_size): New. * gcc.dg/vect/bb-slp-11.c: Expect the error message twice in case of multiple vector sizes. * gcc.dg/vect/bb-slp-26.c: New. Index: testsuite/lib/target-supports.exp =================================================================== --- testsuite/lib/target-supports.exp (revision 179159) +++ testsuite/lib/target-supports.exp (working copy) @@ -3393,6 +3393,24 @@ proc check_effective_target_vect_multiple_sizes { return $et_vect_multiple_sizes_saved } +# Return 1 if the target supports vectors of 8 chars, 4 shorts and 2 ints. + +proc check_effective_target_vect_half_size { } { + global et_vect_half_size + + if [info exists et_vect_half_size_saved] { + verbose "check_effective_target_vect_half_size: using cached result" 2 + } else { + set et_vect_half_size_saved 0 + if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) } { + set et_vect_half_size_saved 1 + } + } + + verbose "check_effective_target_vect_half_size: returning $et_vect_half_size_saved" 2 + return $et_vect_half_size_saved +} + # Return 1 if the target supports section-anchors proc check_effective_target_section_anchors { } { Index: testsuite/gcc.dg/vect/bb-slp-26.c =================================================================== --- testsuite/gcc.dg/vect/bb-slp-26.c (revision 0) +++ testsuite/gcc.dg/vect/bb-slp-26.c (revision 0) @@ -0,0 +1,59 @@ +/* { dg-require-effective-target vect_int } */ + +#include +#include "tree-vect.h" + +#define A 3 +#define B 4 +#define N 256 + +char src[N], dst[N]; + +void foo (char * __restrict__ dst, char * __restrict__ src, int h, int stride, int dummy) +{ + int i; + h /= 16; + for (i = 0; i < h; i++) + { + dst[0] += A*src[0] + src[stride]; + dst[1] += A*src[1] + src[1+stride]; + dst[2] += A*src[2] + src[2+stride]; + dst[3] += A*src[3] + src[3+stride]; + dst[4] += A*src[4] + src[4+stride]; + dst[5] += A*src[5] + src[5+stride]; + dst[6] += A*src[6] + src[6+stride]; + dst[7] += A*src[7] + src[7+stride]; + dst += 8; + src += 8; + if (dummy == 32) + abort (); + } +} + + +int main (void) +{ + int i; + + check_vect (); + + for (i = 0; i < N; i++) + { + dst[i] = 0; + src[i] = i/8; + } + + foo (dst, src, N, 8, 0); + + for (i = 0; i < N/2; i++) + { + if (dst[i] != A * src[i] + src[i+8]) + abort (); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_half_size } } } */ +/* { dg-final { cleanup-tree-dump "slp" } } */ + Index: testsuite/gcc.dg/vect/bb-slp-11.c =================================================================== --- testsuite/gcc.dg/vect/bb-slp-11.c (revision 179159) +++ testsuite/gcc.dg/vect/bb-slp-11.c (working copy) @@ -49,6 +49,7 @@ int main (void) } /* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 0 "slp" } } */ -/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1 "slp" } } */ +/* { dg-final { scan-tree-dump-times "SLP with multiple types" 1 "slp" { xfail vect_multiple_sizes } } } */ +/* { dg-final { scan-tree-dump-times "SLP with multiple types" 2 "slp" { target vect_multiple_sizes } } } */ /* { dg-final { cleanup-tree-dump "slp" } } */ Index: tree-vect-slp.c =================================================================== --- tree-vect-slp.c (revision 179159) +++ tree-vect-slp.c (working copy) @@ -1694,42 +1694,18 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb /* Check if the basic block can be vectorized. */ -bb_vec_info -vect_slp_analyze_bb (basic_block bb) +static bb_vec_info +vect_slp_analyze_bb_1 (basic_block bb) { bb_vec_info bb_vinfo; VEC (ddr_p, heap) *ddrs; VEC (slp_instance, heap) *slp_instances; slp_instance instance; - int i, insns = 0; - gimple_stmt_iterator gsi; + int i; int min_vf = 2; int max_vf = MAX_VECTORIZATION_FACTOR; bool data_dependence_in_bb = false; - current_vector_size = 0; - - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "===vect_slp_analyze_bb===\n"); - - for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) - { - gimple stmt = gsi_stmt (gsi); - if (!is_gimple_debug (stmt) - && !gimple_nop_p (stmt) - && gimple_code (stmt) != GIMPLE_LABEL) - insns++; - } - - if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB)) - { - if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) - fprintf (vect_dump, "not vectorized: too many instructions in basic " - "block.\n"); - - return NULL; - } - bb_vinfo = new_bb_vec_info (bb); if (!bb_vinfo) return NULL; @@ -1849,6 +1825,61 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb } +bb_vec_info +vect_slp_analyze_bb (basic_block bb) +{ + bb_vec_info bb_vinfo; + int insns = 0; + gimple_stmt_iterator gsi; + unsigned int vector_sizes; + + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "===vect_slp_analyze_bb===\n"); + + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple stmt = gsi_stmt (gsi); + if (!is_gimple_debug (stmt) + && !gimple_nop_p (stmt) + && gimple_code (stmt) != GIMPLE_LABEL) + insns++; + } + + if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB)) + { + if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) + fprintf (vect_dump, "not vectorized: too many instructions in basic " + "block.\n"); + + return NULL; + } + + /* Autodetect first vector size we try. */ + current_vector_size = 0; + vector_sizes = targetm.vectorize.autovectorize_vector_sizes (); + + while (1) + { + bb_vinfo = vect_slp_analyze_bb_1 (bb); + if (bb_vinfo) + return bb_vinfo; + + destroy_bb_vec_info (bb_vinfo); + + vector_sizes &= ~current_vector_size; + if (vector_sizes == 0 + || current_vector_size == 0) + return NULL; + + /* Try the next biggest vector size. */ + current_vector_size = 1 << floor_log2 (vector_sizes); + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "***** Re-trying analysis with " + "vector size %d\n", current_vector_size); + } +} + + /* SLP costs are calculated according to SLP instance unrolling factor (i.e., the number of created vector stmts depends on the unrolling factor). However, the actual number of vector stmts for every SLP node depends on