Message ID | 1421162314-25779-4-git-send-email-christophe.lyon@linaro.org |
---|---|
State | New |
Headers | show |
On 16 January 2015 at 14:56, Tejas Belagod <tejas.belagod@arm.com> wrote: >> +#ifndef NO_FLOAT_VARIANT >> + VLOAD(vector, buffer, , float, f, 32, 2); >> + VLOAD(vector, buffer, q, float, f, 32, 4); >> +#endif >> > .... >> >> +#ifndef NO_FLOAT_VARIANT >> + VDUP(vector2, , float, f, 32, 2, -15.5f); >> + VDUP(vector2, q, float, f, 32, 4, -14.5f); >> +#endif >> + >> +#ifndef NO_FLOAT_VARIANT >> +#define FLOAT_VARIANT(MACRO, VAR) \ >> + MACRO(VAR, , float, f, 32, 2); \ >> + MACRO(VAR, q, float, f, 32, 4) >> +#else >> +#define FLOAT_VARIANT(MACRO, VAR) >> +#endif > > > Double negative! :-) Probably easier on the reader to avoid it, but your > call. Oh yes... I am importing my existing code, so I try to minimize changes. >> diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmax.c >> b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmax.c >> new file mode 100644 >> index 0000000..2591b16 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmax.c >> @@ -0,0 +1,64 @@ >> +#include <arm_neon.h> >> +#include "arm-neon-ref.h" >> +#include "compute-ref-data.h" >> + >> +#define INSN_NAME vmax >> +#define TEST_MSG "VMAX/VMAXQ" >> + >> +/* Expected results. */ >> +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf3, 0xf3, 0xf3, 0xf3, >> + 0xf4, 0xf5, 0xf6, 0xf7 }; >> +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff3 }; >> +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; >> +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; >> +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf3, 0xf3, 0xf3, 0xf3, >> + 0xf4, 0xf5, 0xf6, 0xf7 }; >> +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff1, 0xfff1, 0xfff2, 0xfff3 >> }; >> +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; >> +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; >> +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, >> + 0x33, 0x33, 0x33, 0x33 }; >> +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 >> }; >> +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1780000, 0xc1700000 }; >> +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf4, 0xf4, 0xf4, 0xf4, >> + 0xf4, 0xf5, 0xf6, 0xf7, >> + 0xf8, 0xf9, 0xfa, 0xfb, >> + 0xfc, 0xfd, 0xfe, 0xff }; >> +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3, >> + 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; >> +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff1, 0xfffffff1, >> + 0xfffffff2, 0xfffffff3 }; >> +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, >> + 0x3333333333333333 }; >> +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf9, 0xf9, 0xf9, 0xf9, >> + 0xf9, 0xf9, 0xf9, 0xf9, >> + 0xf9, 0xf9, 0xfa, 0xfb, >> + 0xfc, 0xfd, 0xfe, 0xff }; >> +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff3, >> + 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; >> +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff1, 0xfffffff1, >> + 0xfffffff2, 0xfffffff3 }; >> +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, >> + 0x3333333333333333 }; >> +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, >> + 0x33, 0x33, 0x33, 0x33, >> + 0x33, 0x33, 0x33, 0x33, >> + 0x33, 0x33, 0x33, 0x33 }; >> +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, >> + 0x3333, 0x3333, 0x3333, 0x3333 }; >> +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1680000, 0xc1680000, >> + 0xc1600000, 0xc1500000 }; >> + >> +/* Expected results with special FP values. */ >> +VECT_VAR_DECL(expected_nan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000, >> + 0x7fc00000, 0x7fc00000 }; >> +VECT_VAR_DECL(expected_mnan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000, >> + 0x7fc00000, 0x7fc00000 }; >> +VECT_VAR_DECL(expected_inf,hfloat,32,4) [] = { 0x7f800000, 0x7f800000, >> + 0x7f800000, 0x7f800000 }; >> +VECT_VAR_DECL(expected_minf,hfloat,32,4) [] = { 0x3f800000, 0x3f800000, >> + 0x3f800000, 0x3f800000 }; >> +VECT_VAR_DECL(expected_zero1,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; >> +VECT_VAR_DECL(expected_zero2,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; >> + >> +#include "binary_op_no64.inc" >> diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmin.c >> b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmin.c >> new file mode 100644 >> index 0000000..2b5e87c >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmin.c >> @@ -0,0 +1,66 @@ >> +#include <arm_neon.h> >> +#include "arm-neon-ref.h" >> +#include "compute-ref-data.h" >> + >> +#define INSN_NAME vmin >> +#define TEST_MSG "VMIN/VMINQ" >> + >> +/* Expected results. */ >> +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, >> + 0xf3, 0xf3, 0xf3, 0xf3 }; >> +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff2 }; >> +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff0 }; >> +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; >> +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, >> + 0xf3, 0xf3, 0xf3, 0xf3 }; >> +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff1, 0xfff1 >> }; >> +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff0 }; >> +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; >> +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, >> + 0x33, 0x33, 0x33, 0x33 }; >> +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 >> }; >> +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1780000 }; >> +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, >> + 0xf4, 0xf4, 0xf4, 0xf4, >> + 0xf4, 0xf4, 0xf4, 0xf4, >> + 0xf4, 0xf4, 0xf4, 0xf4 }; >> +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, >> + 0xfff3, 0xfff3, 0xfff3, 0xfff3 }; >> +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, >> + 0xfffffff1, 0xfffffff1 }; >> +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, >> + 0x3333333333333333 }; >> +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, >> + 0xf4, 0xf5, 0xf6, 0xf7, >> + 0xf8, 0xf9, 0xf9, 0xf9, >> + 0xf9, 0xf9, 0xf9, 0xf9 }; >> +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff2, >> + 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; >> +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, >> + 0xfffffff1, 0xfffffff1 }; >> +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, >> + 0x3333333333333333 }; >> +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, >> + 0x33, 0x33, 0x33, 0x33, >> + 0x33, 0x33, 0x33, 0x33, >> + 0x33, 0x33, 0x33, 0x33 }; >> +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, >> + 0x3333, 0x3333, 0x3333, 0x3333 }; >> + >> +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, >> + 0xc1680000, 0xc1680000 }; >> +/* Expected results with special FP values. */ >> +VECT_VAR_DECL(expected_nan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000, >> + 0x7fc00000, 0x7fc00000 }; >> +VECT_VAR_DECL(expected_mnan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000, >> + 0x7fc00000, 0x7fc00000 }; >> +VECT_VAR_DECL(expected_inf,hfloat,32,4) [] = { 0x3f800000, 0x3f800000, >> + 0x3f800000, 0x3f800000 }; >> +VECT_VAR_DECL(expected_minf,hfloat,32,4) [] = { 0xff800000, 0xff800000, >> + 0xff800000, 0xff800000 }; >> +VECT_VAR_DECL(expected_zero1,hfloat,32,4) [] = { 0x80000000, 0x80000000, >> + 0x80000000, 0x80000000 }; >> +VECT_VAR_DECL(expected_zero2,hfloat,32,4) [] = { 0x80000000, 0x80000000, >> + 0x80000000, 0x80000000 }; >> + >> +#include "binary_op_no64.inc" > > > vmax and vmin do have v<maxmin>_f64 and v<maxmin>q_f64 variants. My existing tests only cover armv7 so far. I do plan to expand them once they are all in GCC. > Otherwise, they look good to me(but I can't approve it). > > Tejas. >
On 16 January 2015 at 18:14, Marcus Shawcroft <marcus.shawcroft@gmail.com> wrote: > On 16 January 2015 at 16:21, Christophe Lyon <christophe.lyon@linaro.org> wrote: > >> My existing tests only cover armv7 so far. >> I do plan to expand them once they are all in GCC. >> >>> Otherwise, they look good to me(but I can't approve it). >>> >>> Tejas. >>> > > OK provided, as per the previous couple, that we don;t regression or > introduce new fails on aarch64[_be] or aarch32. This patch shows failures on aarch64 and aarch64_be for vmax and vmin when the input is -NaN. It's a corner case, and my reading of the ARM ARM is that the result should the same as on aarch32. I haven't had time to look at it in more details though. So, not OK? > /Marcus
On 19 January 2015 at 14:29, Marcus Shawcroft <marcus.shawcroft@gmail.com> wrote: > On 16 January 2015 at 17:52, Christophe Lyon <christophe.lyon@linaro.org> wrote: > >>> OK provided, as per the previous couple, that we don;t regression or >>> introduce new fails on aarch64[_be] or aarch32. >> >> This patch shows failures on aarch64 and aarch64_be for vmax and vmin >> when the input is -NaN. >> It's a corner case, and my reading of the ARM ARM is that the result >> should the same as on aarch32. >> I haven't had time to look at it in more details though. >> So, not OK? > > They should have the same behaviour in aarch32 and aarch64. Did you > test on HW or a model? > I ran the tests on qemu for aarch32 and aarch64-linux, and on the foundation model for aarch64*-elf. > /Marcus
On 19 January 2015 at 17:54, Marcus Shawcroft <marcus.shawcroft@gmail.com> wrote: > On 19 January 2015 at 15:43, Christophe Lyon <christophe.lyon@linaro.org> wrote: >> On 19 January 2015 at 14:29, Marcus Shawcroft >> <marcus.shawcroft@gmail.com> wrote: >>> On 16 January 2015 at 17:52, Christophe Lyon <christophe.lyon@linaro.org> wrote: >>> >>>>> OK provided, as per the previous couple, that we don;t regression or >>>>> introduce new fails on aarch64[_be] or aarch32. >>>> >>>> This patch shows failures on aarch64 and aarch64_be for vmax and vmin >>>> when the input is -NaN. >>>> It's a corner case, and my reading of the ARM ARM is that the result >>>> should the same as on aarch32. >>>> I haven't had time to look at it in more details though. >>>> So, not OK? >>> >>> They should have the same behaviour in aarch32 and aarch64. Did you >>> test on HW or a model? >>> >> I ran the tests on qemu for aarch32 and aarch64-linux, and on the >> foundation model for aarch64*-elf. > > Leave this one out until we understand why it fails. /Marcus I've looked at this a bit more. We have fmax v0.4s, v0.4s, v1.4s where v0 is a vector of -NaN (0xffc00000) and v1 is a vector of 1. The output is still -NaN (0xffc00000), while the test expects defaultNaN (0x7fc00000). I have executed the test under GDB on AArch64 HW, and noticed that fpcr was 0. I forced it to have DN==1: set $fpcr=0x1000000 but this didn't change the result. Does setting fpcr.dn under gdb actually work? Christophe.
On 22 January 2015 at 12:19, Tejas Belagod <tejas.belagod@arm.com> wrote: > On 21/01/15 15:07, Christophe Lyon wrote: >> >> On 19 January 2015 at 17:54, Marcus Shawcroft >> <marcus.shawcroft@gmail.com> wrote: >>> >>> On 19 January 2015 at 15:43, Christophe Lyon <christophe.lyon@linaro.org> >>> wrote: >>>> >>>> On 19 January 2015 at 14:29, Marcus Shawcroft >>>> <marcus.shawcroft@gmail.com> wrote: >>>>> >>>>> On 16 January 2015 at 17:52, Christophe Lyon >>>>> <christophe.lyon@linaro.org> wrote: >>>>> >>>>>>> OK provided, as per the previous couple, that we don;t regression or >>>>>>> introduce new fails on aarch64[_be] or aarch32. >>>>>> >>>>>> >>>>>> This patch shows failures on aarch64 and aarch64_be for vmax and vmin >>>>>> when the input is -NaN. >>>>>> It's a corner case, and my reading of the ARM ARM is that the result >>>>>> should the same as on aarch32. >>>>>> I haven't had time to look at it in more details though. >>>>>> So, not OK? >>>>> >>>>> >>>>> They should have the same behaviour in aarch32 and aarch64. Did you >>>>> test on HW or a model? >>>>> >>>> I ran the tests on qemu for aarch32 and aarch64-linux, and on the >>>> foundation model for aarch64*-elf. >>> >>> >>> Leave this one out until we understand why it fails. /Marcus >> >> >> I've looked at this a bit more. >> We have >> fmax v0.4s, v0.4s, v1.4s >> where v0 is a vector of -NaN (0xffc00000) and v1 is a vector of 1. >> >> The output is still -NaN (0xffc00000), while the test expects >> defaultNaN (0x7fc00000). >> > > In the AArch32 execution state, Advanced SIMD FP arithmetic always uses the > DefaultNaN setting regardless of the DN-bit value in the FPSCR. In AArch64 > execution state, result of Advanced SIMD FP arithmetic operations depend on > the value of the DN-bit i.e. either propagate the input NaN or generate > DefaultNaN depending on the value of DN. Maybe I'm using an outdated doc. On page 2282 of ARMv8 ARM rev C, I can see only the latter (no diff between aarch32 and aarch64 in FPProcessNan pseudo-code) > If you're running your test in the AArch64 execution state, you'd want to > define the DN bit and modify the expected results accordingly or have the > test poll at runtime what the DN-bit is set to and check expected results > dynamically. Makes sense, I hadn't noticed the different aarch64 spec here. > I think the test already has expected behaviour for AArch32 execution state > by expecting DefaultNaN regardless. Yes. >> I have executed the test under GDB on AArch64 HW, and noticed that fpcr >> was 0. >> I forced it to have DN==1: >> set $fpcr=0x1000000 >> but this didn't change the result. >> >> Does setting fpcr.dn under gdb actually work? >> > > It should. Possibly a bug, patches welcome :-). > :-)
On 22 January 2015 at 16:22, Tejas Belagod <tejas.belagod@arm.com> wrote: > On 22/01/15 14:28, Christophe Lyon wrote: >> >> On 22 January 2015 at 12:19, Tejas Belagod <tejas.belagod@arm.com> wrote: >>> >>> On 21/01/15 15:07, Christophe Lyon wrote: >>>> >>>> >>>> On 19 January 2015 at 17:54, Marcus Shawcroft >>>> <marcus.shawcroft@gmail.com> wrote: >>>>> >>>>> >>>>> On 19 January 2015 at 15:43, Christophe Lyon >>>>> <christophe.lyon@linaro.org> >>>>> wrote: >>>>>> >>>>>> >>>>>> On 19 January 2015 at 14:29, Marcus Shawcroft >>>>>> <marcus.shawcroft@gmail.com> wrote: >>>>>>> >>>>>>> >>>>>>> On 16 January 2015 at 17:52, Christophe Lyon >>>>>>> <christophe.lyon@linaro.org> wrote: >>>>>>> >>>>>>>>> OK provided, as per the previous couple, that we don;t regression >>>>>>>>> or >>>>>>>>> introduce new fails on aarch64[_be] or aarch32. >>>>>>>> >>>>>>>> >>>>>>>> >>>>>>>> This patch shows failures on aarch64 and aarch64_be for vmax and >>>>>>>> vmin >>>>>>>> when the input is -NaN. >>>>>>>> It's a corner case, and my reading of the ARM ARM is that the result >>>>>>>> should the same as on aarch32. >>>>>>>> I haven't had time to look at it in more details though. >>>>>>>> So, not OK? >>>>>>> >>>>>>> >>>>>>> >>>>>>> They should have the same behaviour in aarch32 and aarch64. Did you >>>>>>> test on HW or a model? >>>>>>> >>>>>> I ran the tests on qemu for aarch32 and aarch64-linux, and on the >>>>>> foundation model for aarch64*-elf. >>>>> >>>>> >>>>> >>>>> Leave this one out until we understand why it fails. /Marcus >>>> >>>> >>>> >>>> I've looked at this a bit more. >>>> We have >>>> fmax v0.4s, v0.4s, v1.4s >>>> where v0 is a vector of -NaN (0xffc00000) and v1 is a vector of 1. >>>> >>>> The output is still -NaN (0xffc00000), while the test expects >>>> defaultNaN (0x7fc00000). >>>> >>> >>> In the AArch32 execution state, Advanced SIMD FP arithmetic always uses >>> the >>> DefaultNaN setting regardless of the DN-bit value in the FPSCR. In >>> AArch64 >>> execution state, result of Advanced SIMD FP arithmetic operations depend >>> on >>> the value of the DN-bit i.e. either propagate the input NaN or generate >>> DefaultNaN depending on the value of DN. >> >> >> Maybe I'm using an outdated doc. On page 2282 of ARMv8 ARM rev C, I >> can see only the latter (no diff between aarch32 and aarch64 in >> FPProcessNan pseudo-code) >> > > If you see pg. 4005 in the same doc(rev C), you'll see the FPSCR spec - > under DN: > > "The value of this bit only controls scalar floating-point arithmetic. > Advanced SIMD arithmetic always uses the Default NaN setting, regardless of > the value of the DN bit." > > Also on page 3180 for the description of VMAX(vector FP), it says: > " > * max(+0.0, -0.0) = +0.0 > * If any input is a NaN, the corresponding result element is the default > NaN. > " > Oops I was looking at FMAX (vector) pg 936. > The pseudocode for FPMax () on pg. 3180 passes StandardFPSCRValue() to > FPMax() which is on pg. 2285 > > // StandardFPSCRValue() > // ==================== > FPCRType StandardFPSCRValue() > return ‘00000’ : FPSCR.AHP : ‘11000000000000000000000000’ > > Here bit-25(FPSCR.DN) is set to 1. > So, we should get defaultNaN too on aarch64, and no need to try to force DN to 1 in gdb? What can be wrong? > Thanks, > Tejas. > > >>> If you're running your test in the AArch64 execution state, you'd want to >>> define the DN bit and modify the expected results accordingly or have the >>> test poll at runtime what the DN-bit is set to and check expected results >>> dynamically. >> >> Makes sense, I hadn't noticed the different aarch64 spec here. >> >>> I think the test already has expected behaviour for AArch32 execution >>> state >>> by expecting DefaultNaN regardless. >> >> Yes. >> >>>> I have executed the test under GDB on AArch64 HW, and noticed that fpcr >>>> was 0. >>>> I forced it to have DN==1: >>>> set $fpcr=0x1000000 >>>> but this didn't change the result. >>>> >>>> Does setting fpcr.dn under gdb actually work? >>>> >>> >>> It should. Possibly a bug, patches welcome :-). >>> >> :-) >> > >
On 23 January 2015 at 11:18, Tejas Belagod <tejas.belagod@arm.com> wrote: > On 22/01/15 21:31, Christophe Lyon wrote: >> >> On 22 January 2015 at 16:22, Tejas Belagod <tejas.belagod@arm.com> wrote: >>> >>> On 22/01/15 14:28, Christophe Lyon wrote: >>>> >>>> >>>> On 22 January 2015 at 12:19, Tejas Belagod <tejas.belagod@arm.com> >>>> wrote: >>>>> >>>>> >>>>> On 21/01/15 15:07, Christophe Lyon wrote: >>>>>> >>>>>> >>>>>> >>>>>> On 19 January 2015 at 17:54, Marcus Shawcroft >>>>>> <marcus.shawcroft@gmail.com> wrote: >>>>>>> >>>>>>> >>>>>>> >>>>>>> On 19 January 2015 at 15:43, Christophe Lyon >>>>>>> <christophe.lyon@linaro.org> >>>>>>> wrote: >>>>>>>> >>>>>>>> >>>>>>>> >>>>>>>> On 19 January 2015 at 14:29, Marcus Shawcroft >>>>>>>> <marcus.shawcroft@gmail.com> wrote: >>>>>>>>> >>>>>>>>> >>>>>>>>> >>>>>>>>> On 16 January 2015 at 17:52, Christophe Lyon >>>>>>>>> <christophe.lyon@linaro.org> wrote: >>>>>>>>> >>>>>>>>>>> OK provided, as per the previous couple, that we don;t regression >>>>>>>>>>> or >>>>>>>>>>> introduce new fails on aarch64[_be] or aarch32. >>>>>>>>>> >>>>>>>>>> >>>>>>>>>> >>>>>>>>>> >>>>>>>>>> This patch shows failures on aarch64 and aarch64_be for vmax and >>>>>>>>>> vmin >>>>>>>>>> when the input is -NaN. >>>>>>>>>> It's a corner case, and my reading of the ARM ARM is that the >>>>>>>>>> result >>>>>>>>>> should the same as on aarch32. >>>>>>>>>> I haven't had time to look at it in more details though. >>>>>>>>>> So, not OK? >>>>>>>>> >>>>>>>>> >>>>>>>>> >>>>>>>>> >>>>>>>>> They should have the same behaviour in aarch32 and aarch64. Did you >>>>>>>>> test on HW or a model? >>>>>>>>> >>>>>>>> I ran the tests on qemu for aarch32 and aarch64-linux, and on the >>>>>>>> foundation model for aarch64*-elf. >>>>>>> >>>>>>> >>>>>>> >>>>>>> >>>>>>> Leave this one out until we understand why it fails. /Marcus >>>>>> >>>>>> >>>>>> >>>>>> >>>>>> I've looked at this a bit more. >>>>>> We have >>>>>> fmax v0.4s, v0.4s, v1.4s >>>>>> where v0 is a vector of -NaN (0xffc00000) and v1 is a vector of 1. >>>>>> >>>>>> The output is still -NaN (0xffc00000), while the test expects >>>>>> defaultNaN (0x7fc00000). >>>>>> >>>>> >>>>> In the AArch32 execution state, Advanced SIMD FP arithmetic always uses >>>>> the >>>>> DefaultNaN setting regardless of the DN-bit value in the FPSCR. In >>>>> AArch64 >>>>> execution state, result of Advanced SIMD FP arithmetic operations >>>>> depend >>>>> on >>>>> the value of the DN-bit i.e. either propagate the input NaN or generate >>>>> DefaultNaN depending on the value of DN. >>>> >>>> >>>> >>>> Maybe I'm using an outdated doc. On page 2282 of ARMv8 ARM rev C, I >>>> can see only the latter (no diff between aarch32 and aarch64 in >>>> FPProcessNan pseudo-code) >>>> >>> >>> If you see pg. 4005 in the same doc(rev C), you'll see the FPSCR spec - >>> under DN: >>> >>> "The value of this bit only controls scalar floating-point arithmetic. >>> Advanced SIMD arithmetic always uses the Default NaN setting, regardless >>> of >>> the value of the DN bit." >>> >>> Also on page 3180 for the description of VMAX(vector FP), it says: >>> " >>> * max(+0.0, -0.0) = +0.0 >>> * If any input is a NaN, the corresponding result element is the default >>> NaN. >>> " >>> >> Oops I was looking at FMAX (vector) pg 936. >> >>> The pseudocode for FPMax () on pg. 3180 passes StandardFPSCRValue() to >>> FPMax() which is on pg. 2285 >>> >>> // StandardFPSCRValue() >>> // ==================== >>> FPCRType StandardFPSCRValue() >>> return ‘00000’ : FPSCR.AHP : ‘11000000000000000000000000’ >>> >>> Here bit-25(FPSCR.DN) is set to 1. >>> >> >> So, we should get defaultNaN too on aarch64, and no need to try to >> force DN to 1 in gdb? >> >> What can be wrong? >> > > On pg 3180, I see VMAX(FPSIMD) for A32/T32, not A64. I hope we're reading > the same document. > > Regardless of the page number, if you see the pseudocode for VMAX(FPSIMD) > for AArch32, StandardFPSCRValue() (i.e. DN = 1) is passed to FPMax() which > means generate DefaultNaN() regardless. > > OTOH, on pg 936, you have FMAX(vector) for A64 where FPMax() in the > pseudocode gets just FPCR. > > Ok, that was my initial understanding but our discussion confused me. And that's why I tried to force DN = 1 in gdb before single-stepping over fmax v0.4s, v0.4s, v1.4s but it changed nothing :-( Hence my question about a gdb possible bug or misuse. I'll try modifying the test to have it force DN=1. > Thanks, > Tejas. > > >>> Thanks, >>> Tejas. >>> >>> >>>>> If you're running your test in the AArch64 execution state, you'd want >>>>> to >>>>> define the DN bit and modify the expected results accordingly or have >>>>> the >>>>> test poll at runtime what the DN-bit is set to and check expected >>>>> results >>>>> dynamically. >>>> >>>> >>>> Makes sense, I hadn't noticed the different aarch64 spec here. >>>> >>>>> I think the test already has expected behaviour for AArch32 execution >>>>> state >>>>> by expecting DefaultNaN regardless. >>>> >>>> >>>> Yes. >>>> >>>>>> I have executed the test under GDB on AArch64 HW, and noticed that >>>>>> fpcr >>>>>> was 0. >>>>>> I forced it to have DN==1: >>>>>> set $fpcr=0x1000000 >>>>>> but this didn't change the result. >>>>>> >>>>>> Does setting fpcr.dn under gdb actually work? >>>>>> >>>>> >>>>> It should. Possibly a bug, patches welcome :-). >>>>> >>>> :-) >>>> >>> >>> >> > >
On 23 January 2015 at 12:42, Christophe Lyon <christophe.lyon@linaro.org> wrote: > On 23 January 2015 at 11:18, Tejas Belagod <tejas.belagod@arm.com> wrote: >> On 22/01/15 21:31, Christophe Lyon wrote: >>> >>> On 22 January 2015 at 16:22, Tejas Belagod <tejas.belagod@arm.com> wrote: >>>> >>>> On 22/01/15 14:28, Christophe Lyon wrote: >>>>> >>>>> >>>>> On 22 January 2015 at 12:19, Tejas Belagod <tejas.belagod@arm.com> >>>>> wrote: >>>>>> >>>>>> >>>>>> On 21/01/15 15:07, Christophe Lyon wrote: >>>>>>> >>>>>>> >>>>>>> >>>>>>> On 19 January 2015 at 17:54, Marcus Shawcroft >>>>>>> <marcus.shawcroft@gmail.com> wrote: >>>>>>>> >>>>>>>> >>>>>>>> >>>>>>>> On 19 January 2015 at 15:43, Christophe Lyon >>>>>>>> <christophe.lyon@linaro.org> >>>>>>>> wrote: >>>>>>>>> >>>>>>>>> >>>>>>>>> >>>>>>>>> On 19 January 2015 at 14:29, Marcus Shawcroft >>>>>>>>> <marcus.shawcroft@gmail.com> wrote: >>>>>>>>>> >>>>>>>>>> >>>>>>>>>> >>>>>>>>>> On 16 January 2015 at 17:52, Christophe Lyon >>>>>>>>>> <christophe.lyon@linaro.org> wrote: >>>>>>>>>> >>>>>>>>>>>> OK provided, as per the previous couple, that we don;t regression >>>>>>>>>>>> or >>>>>>>>>>>> introduce new fails on aarch64[_be] or aarch32. >>>>>>>>>>> >>>>>>>>>>> >>>>>>>>>>> >>>>>>>>>>> >>>>>>>>>>> This patch shows failures on aarch64 and aarch64_be for vmax and >>>>>>>>>>> vmin >>>>>>>>>>> when the input is -NaN. >>>>>>>>>>> It's a corner case, and my reading of the ARM ARM is that the >>>>>>>>>>> result >>>>>>>>>>> should the same as on aarch32. >>>>>>>>>>> I haven't had time to look at it in more details though. >>>>>>>>>>> So, not OK? >>>>>>>>>> >>>>>>>>>> >>>>>>>>>> >>>>>>>>>> >>>>>>>>>> They should have the same behaviour in aarch32 and aarch64. Did you >>>>>>>>>> test on HW or a model? >>>>>>>>>> >>>>>>>>> I ran the tests on qemu for aarch32 and aarch64-linux, and on the >>>>>>>>> foundation model for aarch64*-elf. >>>>>>>> >>>>>>>> >>>>>>>> >>>>>>>> >>>>>>>> Leave this one out until we understand why it fails. /Marcus >>>>>>> >>>>>>> >>>>>>> >>>>>>> >>>>>>> I've looked at this a bit more. >>>>>>> We have >>>>>>> fmax v0.4s, v0.4s, v1.4s >>>>>>> where v0 is a vector of -NaN (0xffc00000) and v1 is a vector of 1. >>>>>>> >>>>>>> The output is still -NaN (0xffc00000), while the test expects >>>>>>> defaultNaN (0x7fc00000). >>>>>>> >>>>>> >>>>>> In the AArch32 execution state, Advanced SIMD FP arithmetic always uses >>>>>> the >>>>>> DefaultNaN setting regardless of the DN-bit value in the FPSCR. In >>>>>> AArch64 >>>>>> execution state, result of Advanced SIMD FP arithmetic operations >>>>>> depend >>>>>> on >>>>>> the value of the DN-bit i.e. either propagate the input NaN or generate >>>>>> DefaultNaN depending on the value of DN. >>>>> >>>>> >>>>> >>>>> Maybe I'm using an outdated doc. On page 2282 of ARMv8 ARM rev C, I >>>>> can see only the latter (no diff between aarch32 and aarch64 in >>>>> FPProcessNan pseudo-code) >>>>> >>>> >>>> If you see pg. 4005 in the same doc(rev C), you'll see the FPSCR spec - >>>> under DN: >>>> >>>> "The value of this bit only controls scalar floating-point arithmetic. >>>> Advanced SIMD arithmetic always uses the Default NaN setting, regardless >>>> of >>>> the value of the DN bit." >>>> >>>> Also on page 3180 for the description of VMAX(vector FP), it says: >>>> " >>>> * max(+0.0, -0.0) = +0.0 >>>> * If any input is a NaN, the corresponding result element is the default >>>> NaN. >>>> " >>>> >>> Oops I was looking at FMAX (vector) pg 936. >>> >>>> The pseudocode for FPMax () on pg. 3180 passes StandardFPSCRValue() to >>>> FPMax() which is on pg. 2285 >>>> >>>> // StandardFPSCRValue() >>>> // ==================== >>>> FPCRType StandardFPSCRValue() >>>> return ‘00000’ : FPSCR.AHP : ‘11000000000000000000000000’ >>>> >>>> Here bit-25(FPSCR.DN) is set to 1. >>>> >>> >>> So, we should get defaultNaN too on aarch64, and no need to try to >>> force DN to 1 in gdb? >>> >>> What can be wrong? >>> >> >> On pg 3180, I see VMAX(FPSIMD) for A32/T32, not A64. I hope we're reading >> the same document. >> >> Regardless of the page number, if you see the pseudocode for VMAX(FPSIMD) >> for AArch32, StandardFPSCRValue() (i.e. DN = 1) is passed to FPMax() which >> means generate DefaultNaN() regardless. >> >> OTOH, on pg 936, you have FMAX(vector) for A64 where FPMax() in the >> pseudocode gets just FPCR. >> >> > Ok, that was my initial understanding but our discussion confused me. > > And that's why I tried to force DN = 1 in gdb before single-stepping over > fmax v0.4s, v0.4s, v1.4s > > but it changed nothing :-( > Hence my question about a gdb possible bug or misuse. Hmm... user error, I missed one bit set $fpcr=0x2000000 works under gdb. > I'll try modifying the test to have it force DN=1. > Forcing DN=1 in the test makes it pass. I am going to look at adding that cleanly to my test, and resubmit it. Thanks, and sorry for the noise. >> Thanks, >> Tejas. >> >> >>>> Thanks, >>>> Tejas. >>>> >>>> >>>>>> If you're running your test in the AArch64 execution state, you'd want >>>>>> to >>>>>> define the DN bit and modify the expected results accordingly or have >>>>>> the >>>>>> test poll at runtime what the DN-bit is set to and check expected >>>>>> results >>>>>> dynamically. >>>>> >>>>> >>>>> Makes sense, I hadn't noticed the different aarch64 spec here. >>>>> >>>>>> I think the test already has expected behaviour for AArch32 execution >>>>>> state >>>>>> by expecting DefaultNaN regardless. >>>>> >>>>> >>>>> Yes. >>>>> >>>>>>> I have executed the test under GDB on AArch64 HW, and noticed that >>>>>>> fpcr >>>>>>> was 0. >>>>>>> I forced it to have DN==1: >>>>>>> set $fpcr=0x1000000 >>>>>>> but this didn't change the result. >>>>>>> >>>>>>> Does setting fpcr.dn under gdb actually work? >>>>>>> >>>>>> >>>>>> It should. Possibly a bug, patches welcome :-). >>>>>> >>>>> :-) >>>>> >>>> >>>> >>> >> >>
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_op_no64.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_op_no64.inc new file mode 100644 index 0000000..36efe3a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_op_no64.inc @@ -0,0 +1,120 @@ +/* Can't use the standard binary_op.inc template because vmax has no + 64 bits variant. */ + +#include <math.h> + +#define FNNAME1(NAME) exec_ ## NAME +#define FNNAME(NAME) FNNAME1(NAME) + +void FNNAME (INSN_NAME) (void) +{ + int i; + + /* Basic test: y=vmax(x,x), then store the result. */ +#define TEST_BINARY_OP1(INSN, Q, T1, T2, W, N) \ + VECT_VAR(vector_res, T1, W, N) = \ + INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ + VECT_VAR(vector2, T1, W, N)); \ + vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) + +#define TEST_BINARY_OP(INSN, Q, T1, T2, W, N) \ + TEST_BINARY_OP1(INSN, Q, T1, T2, W, N) \ + + DECL_VARIABLE_ALL_VARIANTS(vector); + DECL_VARIABLE_ALL_VARIANTS(vector2); + DECL_VARIABLE_ALL_VARIANTS(vector_res); + + clean_results (); + + /* Initialize input "vector" from "buffer". */ + TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); +#ifndef NO_FLOAT_VARIANT + VLOAD(vector, buffer, , float, f, 32, 2); + VLOAD(vector, buffer, q, float, f, 32, 4); +#endif + + /* Choose init value arbitrarily, will be used as comparison value. */ + VDUP(vector2, , int, s, 8, 8, -13); + VDUP(vector2, , int, s, 16, 4, -14); + VDUP(vector2, , int, s, 32, 2, -16); + VDUP(vector2, , uint, u, 8, 8, 0xf3); + VDUP(vector2, , uint, u, 16, 4, 0xfff1); + VDUP(vector2, , uint, u, 32, 2, 0xfffffff0); + VDUP(vector2, q, int, s, 8, 16, -12); + VDUP(vector2, q, int, s, 16, 8, -13); + VDUP(vector2, q, int, s, 32, 4, -15); + VDUP(vector2, q, uint, u, 8, 16, 0xf9); + VDUP(vector2, q, uint, u, 16, 8, 0xfff2); + VDUP(vector2, q, uint, u, 32, 4, 0xfffffff1); +#ifndef NO_FLOAT_VARIANT + VDUP(vector2, , float, f, 32, 2, -15.5f); + VDUP(vector2, q, float, f, 32, 4, -14.5f); +#endif + +#ifndef NO_FLOAT_VARIANT +#define FLOAT_VARIANT(MACRO, VAR) \ + MACRO(VAR, , float, f, 32, 2); \ + MACRO(VAR, q, float, f, 32, 4) +#else +#define FLOAT_VARIANT(MACRO, VAR) +#endif + +#define TEST_MACRO_NO64BIT_VARIANT_1_5(MACRO, VAR) \ + MACRO(VAR, , int, s, 8, 8); \ + MACRO(VAR, , int, s, 16, 4); \ + MACRO(VAR, , int, s, 32, 2); \ + MACRO(VAR, , uint, u, 8, 8); \ + MACRO(VAR, , uint, u, 16, 4); \ + MACRO(VAR, , uint, u, 32, 2); \ + MACRO(VAR, q, int, s, 8, 16); \ + MACRO(VAR, q, int, s, 16, 8); \ + MACRO(VAR, q, int, s, 32, 4); \ + MACRO(VAR, q, uint, u, 8, 16); \ + MACRO(VAR, q, uint, u, 16, 8); \ + MACRO(VAR, q, uint, u, 32, 4); \ + FLOAT_VARIANT(MACRO, VAR) + + /* Apply a binary operator named INSN_NAME. */ + TEST_MACRO_NO64BIT_VARIANT_1_5(TEST_BINARY_OP, INSN_NAME); + + CHECK_RESULTS (TEST_MSG, ""); + +#ifndef NO_FLOAT_VARIANT + /* Extra FP tests with special values (NaN, ....) */ + VDUP(vector, q, float, f, 32, 4, 1.0f); + VDUP(vector2, q, float, f, 32, 4, NAN); + TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4); + CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_nan, " FP special (NaN)"); + + VDUP(vector, q, float, f, 32, 4, -NAN); + VDUP(vector2, q, float, f, 32, 4, 1.0f); + TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4); + CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_mnan, " FP special (-NaN)"); + + VDUP(vector, q, float, f, 32, 4, 1.0f); + VDUP(vector2, q, float, f, 32, 4, HUGE_VALF); + TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4); + CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_inf, " FP special (inf)"); + + VDUP(vector, q, float, f, 32, 4, -HUGE_VALF); + VDUP(vector2, q, float, f, 32, 4, 1.0f); + TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4); + CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_minf, " FP special (-inf)"); + + VDUP(vector, q, float, f, 32, 4, 0.0f); + VDUP(vector2, q, float, f, 32, 4, -0.0f); + TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4); + CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_zero1, " FP special (-0.0)"); + + VDUP(vector, q, float, f, 32, 4, -0.0f); + VDUP(vector2, q, float, f, 32, 4, 0.0f); + TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4); + CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_zero2, " FP special (-0.0)"); +#endif +} + +int main (void) +{ + FNNAME (INSN_NAME) (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vhadd.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vhadd.c new file mode 100644 index 0000000..0c67df9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vhadd.c @@ -0,0 +1,54 @@ +#include <arm_neon.h> +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#define INSN_NAME vhadd +#define TEST_MSG "VHADD/VHADDQ" + +#define NO_FLOAT_VARIANT + +/* Expected results. */ +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf1, 0xf2, 0xf2, 0xf3, + 0xf3, 0xf4, 0xf4, 0xf5 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff1, 0xfff1, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf1, 0xf2, 0xf2, 0xf3, + 0xf3, 0xf4, 0xf4, 0xf5 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff1, 0xfff2 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf2, 0xf2, 0xf3, 0xf3, + 0xf4, 0xf4, 0xf5, 0xf5, + 0xf6, 0xf6, 0xf7, 0xf7, + 0xf8, 0xf8, 0xf9, 0xf9 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff1, 0xfff2, 0xfff2, 0xfff3, + 0xfff3, 0xfff4, 0xfff4, 0xfff5 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, + 0xfffffff1, 0xfffffff2 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, + 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf4, 0xf5, 0xf5, 0xf6, + 0xf6, 0xf7, 0xf7, 0xf8, + 0xf8, 0xf9, 0xf9, 0xfa, + 0xfa, 0xfb, 0xfb, 0xfc }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff1, 0xfff1, 0xfff2, 0xfff2, + 0xfff3, 0xfff3, 0xfff4, 0xfff4 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, + 0xfffffff1, 0xfffffff2 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, + 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, + 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, + 0x33333333, 0x33333333 }; + +#include "binary_op_no64.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vhsub.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vhsub.c new file mode 100644 index 0000000..2431288 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vhsub.c @@ -0,0 +1,52 @@ +#include <arm_neon.h> +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#define INSN_NAME vhsub +#define TEST_MSG "VHSUB/VHSUBQ" + +#define NO_FLOAT_VARIANT + +/* Expected results. */ +VECT_VAR_DECL(expected,int,8,8) [] = { 0xfe, 0xff, 0xff, 0x0, + 0x0, 0x1, 0x1, 0x2 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xffff, 0xffff, 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xfe, 0xff, 0xff, 0x0, + 0x0, 0x1, 0x1, 0x2 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffff, 0x0, 0x0, 0x1 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xfe, 0xfe, 0xff, 0xff, + 0x0, 0x0, 0x1, 0x1, + 0x2, 0x2, 0x3, 0x3, + 0x4, 0x4, 0x5, 0x5 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfffe, 0xffff, 0xffff, 0x0, + 0x0, 0x1, 0x1, 0x2 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xffffffff, 0x0, 0x0, 0x1 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, + 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xfb, 0xfc, 0xfc, 0xfd, + 0xfd, 0xfe, 0xfe, 0xff, + 0xff, 0x0, 0x0, 0x1, + 0x1, 0x2, 0x2, 0x3 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xffff, 0xffff, 0x0, 0x0, + 0x1, 0x1, 0x2, 0x2 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0x0, 0x0, 0x1 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, + 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, + 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, + 0x33333333, 0x33333333 }; + +#include "binary_op_no64.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmax.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmax.c new file mode 100644 index 0000000..2591b16 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmax.c @@ -0,0 +1,64 @@ +#include <arm_neon.h> +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#define INSN_NAME vmax +#define TEST_MSG "VMAX/VMAXQ" + +/* Expected results. */ +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf3, 0xf3, 0xf3, 0xf3, + 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf3, 0xf3, 0xf3, 0xf3, + 0xf4, 0xf5, 0xf6, 0xf7 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff1, 0xfff1, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1780000, 0xc1700000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf4, 0xf4, 0xf4, 0xf4, + 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, + 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3, + 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff1, 0xfffffff1, + 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, + 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf9, 0xf9, 0xf9, 0xf9, + 0xf9, 0xf9, 0xf9, 0xf9, + 0xf9, 0xf9, 0xfa, 0xfb, + 0xfc, 0xfd, 0xfe, 0xff }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff3, + 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff1, 0xfffffff1, + 0xfffffff2, 0xfffffff3 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, + 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, + 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1680000, 0xc1680000, + 0xc1600000, 0xc1500000 }; + +/* Expected results with special FP values. */ +VECT_VAR_DECL(expected_nan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000, + 0x7fc00000, 0x7fc00000 }; +VECT_VAR_DECL(expected_mnan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000, + 0x7fc00000, 0x7fc00000 }; +VECT_VAR_DECL(expected_inf,hfloat,32,4) [] = { 0x7f800000, 0x7f800000, + 0x7f800000, 0x7f800000 }; +VECT_VAR_DECL(expected_minf,hfloat,32,4) [] = { 0x3f800000, 0x3f800000, + 0x3f800000, 0x3f800000 }; +VECT_VAR_DECL(expected_zero1,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL(expected_zero2,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; + +#include "binary_op_no64.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmin.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmin.c new file mode 100644 index 0000000..2b5e87c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmin.c @@ -0,0 +1,66 @@ +#include <arm_neon.h> +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#define INSN_NAME vmin +#define TEST_MSG "VMIN/VMINQ" + +/* Expected results. */ +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xf3, 0xf3, 0xf3, 0xf3 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xf3, 0xf3, 0xf3, 0xf3 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff1, 0xfff1 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff0 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1780000 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xf4, 0xf4, 0xf4, 0xf4, + 0xf4, 0xf4, 0xf4, 0xf4, + 0xf4, 0xf4, 0xf4, 0xf4 }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, + 0xfff3, 0xfff3, 0xfff3, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1, + 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, + 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xf9, 0xf9, + 0xf9, 0xf9, 0xf9, 0xf9 }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff2, + 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, + 0xfffffff1, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, + 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, + 0x3333, 0x3333, 0x3333, 0x3333 }; + +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, + 0xc1680000, 0xc1680000 }; +/* Expected results with special FP values. */ +VECT_VAR_DECL(expected_nan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000, + 0x7fc00000, 0x7fc00000 }; +VECT_VAR_DECL(expected_mnan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000, + 0x7fc00000, 0x7fc00000 }; +VECT_VAR_DECL(expected_inf,hfloat,32,4) [] = { 0x3f800000, 0x3f800000, + 0x3f800000, 0x3f800000 }; +VECT_VAR_DECL(expected_minf,hfloat,32,4) [] = { 0xff800000, 0xff800000, + 0xff800000, 0xff800000 }; +VECT_VAR_DECL(expected_zero1,hfloat,32,4) [] = { 0x80000000, 0x80000000, + 0x80000000, 0x80000000 }; +VECT_VAR_DECL(expected_zero2,hfloat,32,4) [] = { 0x80000000, 0x80000000, + 0x80000000, 0x80000000 }; + +#include "binary_op_no64.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrhadd.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrhadd.c new file mode 100644 index 0000000..8629beb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrhadd.c @@ -0,0 +1,54 @@ +#include <arm_neon.h> +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +#define INSN_NAME vrhadd +#define TEST_MSG "VRHADD/VRHADDQ" + +#define NO_FLOAT_VARIANT + +/* Expected results. */ +VECT_VAR_DECL(expected,int,8,8) [] = { 0xf2, 0xf2, 0xf3, 0xf3, + 0xf4, 0xf4, 0xf5, 0xf5 }; +VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff1, 0xfff2, 0xfff2, 0xfff3 }; +VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf2, 0xf2, 0xf3, 0xf3, + 0xf4, 0xf4, 0xf5, 0xf5 }; +VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff1, 0xfff1, 0xfff2, 0xfff2 }; +VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 }; +VECT_VAR_DECL(expected,int,8,16) [] = { 0xf2, 0xf3, 0xf3, 0xf4, + 0xf4, 0xf5, 0xf5, 0xf6, + 0xf6, 0xf7, 0xf7, 0xf8, + 0xf8, 0xf9, 0xf9, 0xfa }; +VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff2, 0xfff2, 0xfff3, 0xfff3, + 0xfff4, 0xfff4, 0xfff5, 0xfff5 }; +VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff1, 0xfffffff1, + 0xfffffff2, 0xfffffff2 }; +VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333, + 0x3333333333333333 }; +VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf5, 0xf5, 0xf6, 0xf6, + 0xf7, 0xf7, 0xf8, 0xf8, + 0xf9, 0xf9, 0xfa, 0xfa, + 0xfb, 0xfb, 0xfc, 0xfc }; +VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff1, 0xfff2, 0xfff2, 0xfff3, + 0xfff3, 0xfff4, 0xfff4, 0xfff5 }; +VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff1, 0xfffffff1, + 0xfffffff2, 0xfffffff2 }; +VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333, + 0x3333333333333333 }; +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33 }; +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333, + 0x3333, 0x3333, 0x3333, 0x3333 }; +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333, + 0x33333333, 0x33333333 }; + +#include "binary_op_no64.inc"