[ARM] Enable auto-detection of vector size for NEON

Message ID	AANLkTim8X-J9p6MB5-8m4sFWxkHqTgQuAAdjXdqqtsgg@mail.gmail.com
State	Accepted
Headers	show Return-Path: <ira.rosen@linaro.org> Received-SPF: neutral (google.com: 209.85.213.178 is neither permitted nor denied by best guess record for domain of ira.rosen@linaro.org) client-ip=209.85.213.178; MIME-Version: 1.0 Date: Thu, 24 Mar 2011 11:06:21 +0200 Message-ID: <AANLkTim8X-J9p6MB5-8m4sFWxkHqTgQuAAdjXdqqtsgg@mail.gmail.com> Subject: [patch, ARM] Enable auto-detection of vector size for NEON From: Ira Rosen <ira.rosen@linaro.org> To: gcc-patches@gcc.gnu.org Cc: Patch Tracking <patches@linaro.org> Content-Type: text/plain; charset=ISO-8859-1

Message ID

AANLkTim8X-J9p6MB5-8m4sFWxkHqTgQuAAdjXdqqtsgg@mail.gmail.com

State

Accepted

Headers

Received-SPF: neutral (google.com: 209.85.213.178 is neither permitted nor
	denied by best guess record for domain of
	ira.rosen@linaro.org) client-ip=209.85.213.178; 
MIME-Version: 1.0
Date: Thu, 24 Mar 2011 11:06:21 +0200
Message-ID: <AANLkTim8X-J9p6MB5-8m4sFWxkHqTgQuAAdjXdqqtsgg@mail.gmail.com>
Subject: [patch, ARM] Enable auto-detection of vector size for NEON
From: Ira Rosen <ira.rosen@linaro.org>
To: gcc-patches@gcc.gnu.org
Cc: Patch Tracking <patches@linaro.org>
Content-Type: text/plain; charset=ISO-8859-1

Commit Message

Ira Rosen March 24, 2011, 9:06 a.m. UTC

Hi,

This patch implements TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES for ARM NEON.

Regtested on arm-linux-gnueabi.
OK for trunk?

Thanks,
Ira

ChangeLog:

	* config/arm/arm.c (arm_autovectorize_vector_sizes): New
	function.
	(TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES): Define.

testsuite/ChangeLog:

        * gcc.dg/vect/vect-outer-5.c: Reduce the distance between data
        accesses to preserve the meaning of the test for doubleword vectors.
        * gcc.dg/vect/no-vfa-pr29145.c: Likewise.
        * gcc.dg/vect/slp-3.c: Reduce the loop bound for the same reason.

Comments

Joseph Myers March 24, 2011, 11:03 a.m. UTC | #1

On Thu, 24 Mar 2011, Ira Rosen wrote:

> Hi,
> 
> This patch implements TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES for ARM NEON.

Given the multiple vector sizes support, is there a reason not to enable 
-mvectorize-with-neon-quad by default?

Ira Rosen March 24, 2011, 11:07 a.m. UTC | #2

On 24 March 2011 13:03, Joseph S. Myers <joseph@codesourcery.com> wrote:
> On Thu, 24 Mar 2011, Ira Rosen wrote:
>
>> Hi,
>>
>> This patch implements TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES for ARM NEON.
>
> Given the multiple vector sizes support, is there a reason not to enable
> -mvectorize-with-neon-quad by default?

I don't see any reason, and I am going to submit a follow-up patch
that does that.

Ira

>
> --
> Joseph S. Myers
> joseph@codesourcery.com
>

Ramana Radhakrishnan March 24, 2011, 11:33 a.m. UTC | #3

On 24/03/11 09:06, Ira Rosen wrote:
> Hi,
>
> This patch implements TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES for ARM NEON.
>
> Regtested on arm-linux-gnueabi.
> OK for trunk?
>

This is OK for trunk if there are no regressions.

cheers
Ramana

> Thanks,
> Ira
>
> ChangeLog:
>
> 	* config/arm/arm.c (arm_autovectorize_vector_sizes): New
> 	function.
> 	(TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES): Define.
>
> testsuite/ChangeLog:
>
>          * gcc.dg/vect/vect-outer-5.c: Reduce the distance between data
>          accesses to preserve the meaning of the test for doubleword vectors.
>          * gcc.dg/vect/no-vfa-pr29145.c: Likewise.
>          * gcc.dg/vect/slp-3.c: Reduce the loop bound for the same reason.
>
> Index: config/arm/arm.c
> ===================================================================
> --- config/arm/arm.c    (revision 171339)
> +++ config/arm/arm.c    (working copy)
> @@ -252,6 +252,7 @@ static bool arm_builtin_support_vector_misalignmen
>                                                       bool is_packed);
>   static void arm_conditional_register_usage (void);
>   static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
> +static unsigned int arm_autovectorize_vector_sizes (void);
>
>   ^L
>   /* Table of machine attributes.  */
> @@ -404,6 +405,9 @@ static const struct default_options arm_option_opt
>   #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
>   #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
>   #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
> +#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
> +#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
> +  arm_autovectorize_vector_sizes
>
>   #undef  TARGET_MACHINE_DEPENDENT_REORG
>   #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
> @@ -23528,6 +23532,12 @@ arm_expand_sync (enum machine_mode mode,
>       }
>   }
>
> +static unsigned int
> +arm_autovectorize_vector_sizes (void)
> +{
> +  return TARGET_NEON_VECTORIZE_QUAD ? 16 | 8 : 0;
> +}
> +
>   static bool
>   arm_vector_alignment_reachable (const_tree type, bool is_packed)
>   {
> Index: testsuite/gcc.dg/vect/vect-outer-5.c
> ===================================================================
> --- testsuite/gcc.dg/vect/vect-outer-5.c        (revision 171339)
> +++ testsuite/gcc.dg/vect/vect-outer-5.c        (working copy)
> @@ -17,7 +17,7 @@ int main1 ()
>     float B[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
>     float C[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
>     float D[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
> -  float E[4] = {0,1,2,480};
> +  float E[4] = {0,480,960,1440};
>     float s;
>
>     int i, j;
> @@ -55,7 +55,7 @@ int main1 ()
>         s = 0;
>         for (j=0; j<N; j+=4)
>          s += C[j];
> -      B[i+3] = B[i] + s;
> +      B[i+1] = B[i] + s;
>       }
>
>     /* check results:  */
> Index: testsuite/gcc.dg/vect/slp-3.c
> ===================================================================
> --- testsuite/gcc.dg/vect/slp-3.c       (revision 171339)
> +++ testsuite/gcc.dg/vect/slp-3.c       (working copy)
> @@ -101,7 +101,7 @@ main1 ()
>       }
>
>     /* SLP with unrolling by 8.  */
> -  for (i = 0; i<  N/2; i++)
> +  for (i = 0; i<  N/4; i++)
>       {
>         out[i*9] = in[i*9];
>         out[i*9 + 1] = in[i*9 + 1];
> @@ -115,7 +115,7 @@ main1 ()
>       }
>
>     /* check results:  */
> -  for (i = 0; i<  N/2; i++)
> +  for (i = 0; i<  N/4; i++)
>       {
>         if (out[i*9] !=  in[i*9]
>            || out[i*9 + 1] != in[i*9 + 1]
> Index: testsuite/gcc.dg/vect/no-vfa-pr29145.c
> ===================================================================
> --- testsuite/gcc.dg/vect/no-vfa-pr29145.c      (revision 171339)
> +++ testsuite/gcc.dg/vect/no-vfa-pr29145.c      (working copy)
> @@ -8,7 +8,7 @@ __attribute__ ((noinline))
>   void with_restrict(int * __restrict p)
>   {
>     int i;
> -  int *q = p - 2;
> +  int *q = p - 1;
>
>     for (i = 0; i<  1000; ++i) {
>       p[i] = q[i];
> @@ -19,7 +19,7 @@ __attribute__ ((noinline))
>   void without_restrict(int * p)
>   {
>     int i;
> -  int *q = p - 2;
> +  int *q = p - 1;
>
>     for (i = 0; i<  1000; ++i) {
>       p[i] = q[i];
> @@ -38,8 +38,8 @@ int main(void)
>       a[i] = b[i] = i;
>     }
>
> -  with_restrict(a + 2);
> -  without_restrict(b + 2);
> +  with_restrict(a + 1);
> +  without_restrict(b + 1);
>
>     for (i = 0; i<  1002; ++i) {
>       if (a[i] != b[i])

Index: config/arm/arm.c
===================================================================
--- config/arm/arm.c    (revision 171339)
+++ config/arm/arm.c    (working copy)
@@ -252,6 +252,7 @@  static bool arm_builtin_support_vector_misalignmen
                                                     bool is_packed);
 static void arm_conditional_register_usage (void);
 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
+static unsigned int arm_autovectorize_vector_sizes (void);

 ^L
 /* Table of machine attributes.  */
@@ -404,6 +405,9 @@  static const struct default_options arm_option_opt
 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
+#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
+#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
+  arm_autovectorize_vector_sizes

 #undef  TARGET_MACHINE_DEPENDENT_REORG
 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
@@ -23528,6 +23532,12 @@  arm_expand_sync (enum machine_mode mode,
     }
 }

+static unsigned int
+arm_autovectorize_vector_sizes (void)
+{
+  return TARGET_NEON_VECTORIZE_QUAD ? 16 | 8 : 0;
+}
+
 static bool
 arm_vector_alignment_reachable (const_tree type, bool is_packed)
 {
Index: testsuite/gcc.dg/vect/vect-outer-5.c
===================================================================
--- testsuite/gcc.dg/vect/vect-outer-5.c        (revision 171339)
+++ testsuite/gcc.dg/vect/vect-outer-5.c        (working copy)
@@ -17,7 +17,7 @@  int main1 ()
   float B[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
   float C[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
   float D[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
-  float E[4] = {0,1,2,480};
+  float E[4] = {0,480,960,1440};
   float s;

   int i, j;
@@ -55,7 +55,7 @@  int main1 ()
       s = 0;
       for (j=0; j<N; j+=4)
        s += C[j];
-      B[i+3] = B[i] + s;
+      B[i+1] = B[i] + s;
     }

   /* check results:  */
Index: testsuite/gcc.dg/vect/slp-3.c
===================================================================
--- testsuite/gcc.dg/vect/slp-3.c       (revision 171339)
+++ testsuite/gcc.dg/vect/slp-3.c       (working copy)
@@ -101,7 +101,7 @@  main1 ()
     }

   /* SLP with unrolling by 8.  */
-  for (i = 0; i < N/2; i++)
+  for (i = 0; i < N/4; i++)
     {
       out[i*9] = in[i*9];
       out[i*9 + 1] = in[i*9 + 1];
@@ -115,7 +115,7 @@  main1 ()
     }

   /* check results:  */
-  for (i = 0; i < N/2; i++)
+  for (i = 0; i < N/4; i++)
     {
       if (out[i*9] !=  in[i*9]
          || out[i*9 + 1] != in[i*9 + 1]
Index: testsuite/gcc.dg/vect/no-vfa-pr29145.c
===================================================================
--- testsuite/gcc.dg/vect/no-vfa-pr29145.c      (revision 171339)
+++ testsuite/gcc.dg/vect/no-vfa-pr29145.c      (working copy)
@@ -8,7 +8,7 @@  __attribute__ ((noinline))
 void with_restrict(int * __restrict p)
 {
   int i;
-  int *q = p - 2;
+  int *q = p - 1;

   for (i = 0; i < 1000; ++i) {
     p[i] = q[i];
@@ -19,7 +19,7 @@  __attribute__ ((noinline))
 void without_restrict(int * p)
 {
   int i;
-  int *q = p - 2;
+  int *q = p - 1;

   for (i = 0; i < 1000; ++i) {
     p[i] = q[i];
@@ -38,8 +38,8 @@  int main(void)
     a[i] = b[i] = i;
   }

-  with_restrict(a + 2);
-  without_restrict(b + 2);
+  with_restrict(a + 1);
+  without_restrict(b + 1);

   for (i = 0; i < 1002; ++i) {
     if (a[i] != b[i])