Message ID | AANLkTim8X-J9p6MB5-8m4sFWxkHqTgQuAAdjXdqqtsgg@mail.gmail.com |
---|---|
State | Accepted |
Headers | show |
On Thu, 24 Mar 2011, Ira Rosen wrote: > Hi, > > This patch implements TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES for ARM NEON. Given the multiple vector sizes support, is there a reason not to enable -mvectorize-with-neon-quad by default?
On 24 March 2011 13:03, Joseph S. Myers <joseph@codesourcery.com> wrote: > On Thu, 24 Mar 2011, Ira Rosen wrote: > >> Hi, >> >> This patch implements TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES for ARM NEON. > > Given the multiple vector sizes support, is there a reason not to enable > -mvectorize-with-neon-quad by default? I don't see any reason, and I am going to submit a follow-up patch that does that. Ira > > -- > Joseph S. Myers > joseph@codesourcery.com >
On 24/03/11 09:06, Ira Rosen wrote: > Hi, > > This patch implements TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES for ARM NEON. > > Regtested on arm-linux-gnueabi. > OK for trunk? > This is OK for trunk if there are no regressions. cheers Ramana > Thanks, > Ira > > ChangeLog: > > * config/arm/arm.c (arm_autovectorize_vector_sizes): New > function. > (TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES): Define. > > testsuite/ChangeLog: > > * gcc.dg/vect/vect-outer-5.c: Reduce the distance between data > accesses to preserve the meaning of the test for doubleword vectors. > * gcc.dg/vect/no-vfa-pr29145.c: Likewise. > * gcc.dg/vect/slp-3.c: Reduce the loop bound for the same reason. > > Index: config/arm/arm.c > =================================================================== > --- config/arm/arm.c (revision 171339) > +++ config/arm/arm.c (working copy) > @@ -252,6 +252,7 @@ static bool arm_builtin_support_vector_misalignmen > bool is_packed); > static void arm_conditional_register_usage (void); > static reg_class_t arm_preferred_rename_class (reg_class_t rclass); > +static unsigned int arm_autovectorize_vector_sizes (void); > > ^L > /* Table of machine attributes. */ > @@ -404,6 +405,9 @@ static const struct default_options arm_option_opt > #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p > #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE > #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode > +#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES > +#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \ > + arm_autovectorize_vector_sizes > > #undef TARGET_MACHINE_DEPENDENT_REORG > #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg > @@ -23528,6 +23532,12 @@ arm_expand_sync (enum machine_mode mode, > } > } > > +static unsigned int > +arm_autovectorize_vector_sizes (void) > +{ > + return TARGET_NEON_VECTORIZE_QUAD ? 16 | 8 : 0; > +} > + > static bool > arm_vector_alignment_reachable (const_tree type, bool is_packed) > { > Index: testsuite/gcc.dg/vect/vect-outer-5.c > =================================================================== > --- testsuite/gcc.dg/vect/vect-outer-5.c (revision 171339) > +++ testsuite/gcc.dg/vect/vect-outer-5.c (working copy) > @@ -17,7 +17,7 @@ int main1 () > float B[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); > float C[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); > float D[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); > - float E[4] = {0,1,2,480}; > + float E[4] = {0,480,960,1440}; > float s; > > int i, j; > @@ -55,7 +55,7 @@ int main1 () > s = 0; > for (j=0; j<N; j+=4) > s += C[j]; > - B[i+3] = B[i] + s; > + B[i+1] = B[i] + s; > } > > /* check results: */ > Index: testsuite/gcc.dg/vect/slp-3.c > =================================================================== > --- testsuite/gcc.dg/vect/slp-3.c (revision 171339) > +++ testsuite/gcc.dg/vect/slp-3.c (working copy) > @@ -101,7 +101,7 @@ main1 () > } > > /* SLP with unrolling by 8. */ > - for (i = 0; i< N/2; i++) > + for (i = 0; i< N/4; i++) > { > out[i*9] = in[i*9]; > out[i*9 + 1] = in[i*9 + 1]; > @@ -115,7 +115,7 @@ main1 () > } > > /* check results: */ > - for (i = 0; i< N/2; i++) > + for (i = 0; i< N/4; i++) > { > if (out[i*9] != in[i*9] > || out[i*9 + 1] != in[i*9 + 1] > Index: testsuite/gcc.dg/vect/no-vfa-pr29145.c > =================================================================== > --- testsuite/gcc.dg/vect/no-vfa-pr29145.c (revision 171339) > +++ testsuite/gcc.dg/vect/no-vfa-pr29145.c (working copy) > @@ -8,7 +8,7 @@ __attribute__ ((noinline)) > void with_restrict(int * __restrict p) > { > int i; > - int *q = p - 2; > + int *q = p - 1; > > for (i = 0; i< 1000; ++i) { > p[i] = q[i]; > @@ -19,7 +19,7 @@ __attribute__ ((noinline)) > void without_restrict(int * p) > { > int i; > - int *q = p - 2; > + int *q = p - 1; > > for (i = 0; i< 1000; ++i) { > p[i] = q[i]; > @@ -38,8 +38,8 @@ int main(void) > a[i] = b[i] = i; > } > > - with_restrict(a + 2); > - without_restrict(b + 2); > + with_restrict(a + 1); > + without_restrict(b + 1); > > for (i = 0; i< 1002; ++i) { > if (a[i] != b[i])
Index: config/arm/arm.c =================================================================== --- config/arm/arm.c (revision 171339) +++ config/arm/arm.c (working copy) @@ -252,6 +252,7 @@ static bool arm_builtin_support_vector_misalignmen bool is_packed); static void arm_conditional_register_usage (void); static reg_class_t arm_preferred_rename_class (reg_class_t rclass); +static unsigned int arm_autovectorize_vector_sizes (void); ^L /* Table of machine attributes. */ @@ -404,6 +405,9 @@ static const struct default_options arm_option_opt #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode +#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES +#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \ + arm_autovectorize_vector_sizes #undef TARGET_MACHINE_DEPENDENT_REORG #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg @@ -23528,6 +23532,12 @@ arm_expand_sync (enum machine_mode mode, } } +static unsigned int +arm_autovectorize_vector_sizes (void) +{ + return TARGET_NEON_VECTORIZE_QUAD ? 16 | 8 : 0; +} + static bool arm_vector_alignment_reachable (const_tree type, bool is_packed) { Index: testsuite/gcc.dg/vect/vect-outer-5.c =================================================================== --- testsuite/gcc.dg/vect/vect-outer-5.c (revision 171339) +++ testsuite/gcc.dg/vect/vect-outer-5.c (working copy) @@ -17,7 +17,7 @@ int main1 () float B[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); float C[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); float D[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); - float E[4] = {0,1,2,480}; + float E[4] = {0,480,960,1440}; float s; int i, j; @@ -55,7 +55,7 @@ int main1 () s = 0; for (j=0; j<N; j+=4) s += C[j]; - B[i+3] = B[i] + s; + B[i+1] = B[i] + s; } /* check results: */ Index: testsuite/gcc.dg/vect/slp-3.c =================================================================== --- testsuite/gcc.dg/vect/slp-3.c (revision 171339) +++ testsuite/gcc.dg/vect/slp-3.c (working copy) @@ -101,7 +101,7 @@ main1 () } /* SLP with unrolling by 8. */ - for (i = 0; i < N/2; i++) + for (i = 0; i < N/4; i++) { out[i*9] = in[i*9]; out[i*9 + 1] = in[i*9 + 1]; @@ -115,7 +115,7 @@ main1 () } /* check results: */ - for (i = 0; i < N/2; i++) + for (i = 0; i < N/4; i++) { if (out[i*9] != in[i*9] || out[i*9 + 1] != in[i*9 + 1] Index: testsuite/gcc.dg/vect/no-vfa-pr29145.c =================================================================== --- testsuite/gcc.dg/vect/no-vfa-pr29145.c (revision 171339) +++ testsuite/gcc.dg/vect/no-vfa-pr29145.c (working copy) @@ -8,7 +8,7 @@ __attribute__ ((noinline)) void with_restrict(int * __restrict p) { int i; - int *q = p - 2; + int *q = p - 1; for (i = 0; i < 1000; ++i) { p[i] = q[i]; @@ -19,7 +19,7 @@ __attribute__ ((noinline)) void without_restrict(int * p) { int i; - int *q = p - 2; + int *q = p - 1; for (i = 0; i < 1000; ++i) { p[i] = q[i]; @@ -38,8 +38,8 @@ int main(void) a[i] = b[i] = i; } - with_restrict(a + 2); - without_restrict(b + 2); + with_restrict(a + 1); + without_restrict(b + 1); for (i = 0; i < 1002; ++i) { if (a[i] != b[i])