Message ID | 20250203212546.911043-2-adhemerval.zanella@linaro.org |
---|---|
State | New |
Headers | show |
Series | Optimize CORE-MATH internal tables | expand |
Am Montag, 3. Februar 2025, 22:24:17 Mitteleuropäische Normalzeit schrieb Adhemerval Zanella: > The libm size improvement built with "--enable-stack-protector=strong > --enable-bind-now=yes --enable-fortify-source=2": > > From: > > text data bss dec hex filename > 587896 860 12 588768 8fbe0 aarch64-linux-gnu-master/math/libm.so > 963175 1068 12 964255 eb69f x86_64-linux-gnu-master/math/libm.so > 1191542 5544 368 1197454 12458e powerpc64le-linux-gnu-master/math/libm.so > > To: > > text data bss dec hex filename > 587304 860 12 588176 8f990 aarch64-linux-gnu/math/libm.so > 962855 1068 12 963935 eb55f x86_64-linux-gnu/math/libm.so > 1191222 5544 368 1197134 12444e powerpc64le-linux-gnu/math/libm.so > > The are not code changes for x86_64 and powerpc64le, but on aarch64 > with gcc-14 I see a slight better code generation due the usage of > ldq for floating point constant loading. The b polynomial coefficients are different for asinf and acosf in the original. (And I see no changes in the polynomial itself.) If they can be changed like this, maybe add a remark why? > --- > math/Makefile | 1 + > sysdeps/ieee754/flt-32/e_acosf.c | 36 ++++------------ > sysdeps/ieee754/flt-32/e_asincosf_data.c | 53 ++++++++++++++++++++++++ > sysdeps/ieee754/flt-32/e_asincosf_data.h | 37 +++++++++++++++++ > sysdeps/ieee754/flt-32/e_asinf.c | 38 ++++------------- > 5 files changed, 106 insertions(+), 59 deletions(-) > create mode 100644 sysdeps/ieee754/flt-32/e_asincosf_data.c > create mode 100644 sysdeps/ieee754/flt-32/e_asincosf_data.h > > diff --git a/math/Makefile b/math/Makefile > index f24cee5c39..a6da38a135 100644 > --- a/math/Makefile > +++ b/math/Makefile > @@ -362,6 +362,7 @@ type-double-routines := \ > # float support > type-float-suffix := f > type-float-routines := \ > + e_asincosf_data \ > e_exp2f_data \ > e_log2f_data \ > e_logf_data \ ok > diff --git a/sysdeps/ieee754/flt-32/e_acosf.c b/sysdeps/ieee754/flt-32/e_acosf.c > index a5a4de4fc2..90196ac61e 100644 > --- a/sysdeps/ieee754/flt-32/e_acosf.c > +++ b/sysdeps/ieee754/flt-32/e_acosf.c > @@ -30,6 +30,7 @@ SOFTWARE. > #include <libm-alias-finite.h> > #include <math-barriers.h> > #include "math_config.h" > +#include "e_asincosf_data.h" > > static __attribute__ ((noinline)) float > as_special (float x) ok > @@ -77,15 +78,6 @@ __ieee754_acosf (float x) > return as_special (x); > if (__glibc_likely (ax < 0x7ec2a1dcu)) /* |x| < 0x1.c2a1dcp-1 */ > { > - static const double b[] = > - { > - 0x1.fffffffd9ccb8p-1, 0x1.5555c94838007p-3, 0x1.32ded4b7c20fap-4, > - 0x1.8566df703309ep-5, -0x1.980c959bec9a3p-6, 0x1.56fbb04998344p-1, > - -0x1.403d8e4c49f52p+2, 0x1.b06c3e9f311eap+4, -0x1.9ea97c4e2c21fp+6, > - 0x1.200b8261cc61bp+8, -0x1.2274c2799a5c7p+9, 0x1.a558a59cc19d3p+9, > - -0x1.aca4b6a529ffp+9, 0x1.228744703f813p+9, -0x1.d7dbb0b322228p+7, > - 0x1.5c2018c0c0105p+5 > - }; > /* Avoid spurious underflow exception. */ > if (__glibc_unlikely (ax <= 0x40000000u)) /* |x| < 2^-63 */ > /* GCC <= 11 wrongly assumes the rounding is to nearest and ok moved below > @@ -97,11 +89,11 @@ __ieee754_acosf (float x) > double z4 = z2 * z2; > double z8 = z4 * z4; > double z16 = z8 * z8; > - r = z * ((((b[0] + z2 * b[1]) + z4 * (b[2] + z2 * b[3])) > - + z8 * ((b[4] + z2 * b[5]) + z4 * (b[6] + z2 * b[7]))) > - + z16 * (((b[8] + z2 * b[9]) + z4 * (b[10] + z2 * b[11])) > + r = z * ((((B[0] + z2 * B[1]) + z4 * (B[2] + z2 * B[3])) > + + z8 * ((B[4] + z2 * B[5]) + z4 * (B[6] + z2 * B[7]))) > + + z16 * (((B[8] + z2 * B[9]) + z4 * (B[10] + z2 * B[11])) > + z8 > - * ((b[12] + z2 * b[13])+ z4 * (b[14] + z2 * b[15])))); > + * ((B[12] + z2 * B[13])+ z4 * (B[14] + z2 * B[15])))); > float ub = 0x1.921fb54574191p+0 - r; > float lb = 0x1.921fb543118ap+0 - r; > if (ub == lb) ok > @@ -110,33 +102,19 @@ __ieee754_acosf (float x) > /* accurate path */ > if (ax < (0x7eu << 24)) > { > - static const double c[] = > - { > - 0x1.555555555529cp-3, 0x1.333333337e0ddp-4, 0x1.6db6db3b4465ep-5, > - 0x1.f1c72e13ac306p-6, 0x1.6e89cebe06bc4p-6, 0x1.1c6dcf5289094p-6, > - 0x1.c6dbbcc7c6315p-7, 0x1.8f8dc2615e996p-7, 0x1.a5833b7bf15e8p-8, > - 0x1.43f44ace1665cp-6, -0x1.0fb17df881c73p-6, 0x1.07520c026b2d6p-5 > - }; > if (t == 0x328885a3u) > return 0x1.921fb6p+0f + 0x1p-25; > if (t == 0x39826222u) > return 0x1.920f6ap+0f + 0x1p-25; > double x2 = xs * xs; > - r = (pi2 - xs) - (xs * x2) * poly12 (x2, c); > + r = (pi2 - xs) - (xs * x2) * poly12 (x2, C0); > } > else > { > - static const double c[] = > - { > - 0x1.6a09e667f3bcbp+0, 0x1.e2b7dddff2db9p-4, 0x1.b27247ab42dbcp-6, > - 0x1.02995cc4e0744p-7, 0x1.5ffb0276ec8eap-9, 0x1.033885a928decp-10, > - 0x1.911f2be23f8c7p-12, 0x1.4c3c55d2437fdp-13, 0x1.af477e1d7b461p-15, > - 0x1.abd6bdff67dcbp-15, -0x1.1717e86d0fa28p-16, 0x1.6ff526de46023p-16 > - }; > double bx = fabs (xs); > double z = 1.0 - bx; > double s = copysign (sqrt (z), xs); > - r = o[t >> 31] + s * poly12 (z, c); > + r = o[t >> 31] + s * poly12 (z, C1); > } > return r; > } ok moved below > diff --git a/sysdeps/ieee754/flt-32/e_asincosf_data.c b/sysdeps/ieee754/flt-32/e_asincosf_data.c > new file mode 100644 > index 0000000000..2ffc2c28f3 > --- /dev/null > +++ b/sysdeps/ieee754/flt-32/e_asincosf_data.c > @@ -0,0 +1,53 @@ > +/* Common data for asinpif/acospif implementations. > + > +Copyright (c) 2022-2025 Alexei Sibidanov. > + > +The original version of this file was copied from the CORE-MATH > +project (src/binary32/sinpi/sinpif.c, revision f786e13). > + > +Permission is hereby granted, free of charge, to any person obtaining a copy > +of this software and associated documentation files (the "Software"), to deal > +in the Software without restriction, including without limitation the rights > +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell > +copies of the Software, and to permit persons to whom the Software is > +furnished to do so, subject to the following conditions: > + > +The above copyright notice and this permission notice shall be included in all > +copies or substantial portions of the Software. > + > +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE > +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, > +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > +SOFTWARE. > +*/ > + > +#include "e_asincosf_data.h" > + > +const double __asincosf_b[] = > + { > + 0x1.fffffffd9ccb8p-1, 0x1.5555c94838007p-3, 0x1.32ded4b7c20fap-4, > + 0x1.8566df703309ep-5, -0x1.980c959bec9a3p-6, 0x1.56fbb04998344p-1, > + -0x1.403d8e4c49f52p+2, 0x1.b06c3e9f311eap+4, -0x1.9ea97c4e2c21fp+6, > + 0x1.200b8261cc61bp+8, -0x1.2274c2799a5c7p+9, 0x1.a558a59cc19d3p+9, > + -0x1.aca4b6a529ffp+9, 0x1.228744703f813p+9, -0x1.d7dbb0b322228p+7, > + 0x1.5c2018c0c0105p+5 > + }; ok > + > +const double __asincosf_c0[] = > + { > + 0x1.555555555529cp-3, 0x1.333333337e0ddp-4, 0x1.6db6db3b4465ep-5, > + 0x1.f1c72e13ac306p-6, 0x1.6e89cebe06bc4p-6, 0x1.1c6dcf5289094p-6, > + 0x1.c6dbbcc7c6315p-7, 0x1.8f8dc2615e996p-7, 0x1.a5833b7bf15e8p-8, > + 0x1.43f44ace1665cp-6, -0x1.0fb17df881c73p-6, 0x1.07520c026b2d6p-5 > + }; ok > + > +const double __asincosf_c1[] = > + { > + 0x1.6a09e667f3bcbp+0, 0x1.e2b7dddff2db9p-4, 0x1.b27247ab42dbcp-6, > + 0x1.02995cc4e0744p-7, 0x1.5ffb0276ec8eap-9, 0x1.033885a928decp-10, > + 0x1.911f2be23f8c7p-12, 0x1.4c3c55d2437fdp-13, 0x1.af477e1d7b461p-15, > + 0x1.abd6bdff67dcbp-15, -0x1.1717e86d0fa28p-16, 0x1.6ff526de46023p-16 > + }; ok, copied from above > diff --git a/sysdeps/ieee754/flt-32/e_asincosf_data.h b/sysdeps/ieee754/flt-32/e_asincosf_data.h > new file mode 100644 > index 0000000000..7dffb00dbe > --- /dev/null > +++ b/sysdeps/ieee754/flt-32/e_asincosf_data.h > @@ -0,0 +1,37 @@ > +/* Common data for asinpif/acospif implementations. > + > +Copyright (c) 2022-2025 Alexei Sibidanov. > + > +The original version of this file was copied from the CORE-MATH > +project (src/binary32/sinpi/sinpif.c, revision f786e13). > + > +Permission is hereby granted, free of charge, to any person obtaining a copy > +of this software and associated documentation files (the "Software"), to deal > +in the Software without restriction, including without limitation the rights > +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell > +copies of the Software, and to permit persons to whom the Software is > +furnished to do so, subject to the following conditions: > + > +The above copyright notice and this permission notice shall be included in all > +copies or substantial portions of the Software. > + > +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE > +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, > +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > +SOFTWARE. > +*/ > + > +#ifndef _ASINCOSF_DATAH > +#define _ASINCOSF_DATAH > + > +extern const double __asincosf_b[] attribute_hidden; > +extern const double __asincosf_c0[] attribute_hidden; > +extern const double __asincosf_c1[] attribute_hidden; > +#define B __asincosf_b > +#define C0 __asincosf_c0 > +#define C1 __asincosf_c1 > + > +#endif ok > diff --git a/sysdeps/ieee754/flt-32/e_asinf.c b/sysdeps/ieee754/flt-32/e_asinf.c > index 944bf6f5ce..854466361e 100644 > --- a/sysdeps/ieee754/flt-32/e_asinf.c > +++ b/sysdeps/ieee754/flt-32/e_asinf.c > @@ -28,6 +28,7 @@ SOFTWARE. > #include <errno.h> > #include <libm-alias-finite.h> > #include "math_config.h" > +#include "e_asincosf_data.h" > > static __attribute__ ((noinline)) float > as_special (float x) ok > @@ -69,25 +70,16 @@ __ieee754_asinf (float x) > { > if (__glibc_unlikely (ax < 115 << 24)) > return fmaf (x, 0x1p-25, x); > - static const double b[] = > - { > - 0x1.0000000000005p+0, 0x1.55557aeca105dp-3, 0x1.3314ec3db7d12p-4, > - 0x1.775738a5a6f92p-5, 0x1.5d5f7ce1c8538p-8, 0x1.605c6d58740fp-2, > - -0x1.5728b732d73c6p+1, 0x1.f152170f151ebp+3, -0x1.f962ea3ca992ep+5, > - 0x1.71971e17375ap+7, -0x1.860512b4ba23p+8, 0x1.26a3b8d4bdb14p+9, > - -0x1.36f2ea5698b51p+9, 0x1.b3d722aebfa2ep+8, -0x1.6cf89703b1289p+7, > - 0x1.1518af6a65e2dp+5 > - }; different from new common one > double z = xs; > double z2 = z * z; > double z4 = z2 * z2; > double z8 = z4 * z4; > double z16 = z8 * z8; > - r = z * ((((b[0] + z2 * b[1]) + z4 * (b[2] + z2 * b[3])) > - + z8 * ((b[4] + z2 * b[5]) + z4 * (b[6] + z2 * b[7]))) > - + z16 * (((b[8] + z2 * b[9]) + z4 * (b[10] + z2 * b[11])) > - + z8 * ((b[12] + z2 * b[13]) > - + z4 * (b[14] + z2 * b[15])))); > + r = z * ((((B[0] + z2 * B[1]) + z4 * (B[2] + z2 * B[3])) > + + z8 * ((B[4] + z2 * B[5]) + z4 * (B[6] + z2 * B[7]))) > + + z16 * (((B[8] + z2 * B[9]) + z4 * (B[10] + z2 * B[11])) > + + z8 * ((B[12] + z2 * B[13]) > + + z4 * (B[14] + z2 * B[15])))); > float ub = r; > float lb = r - z * 0x1.efa8ebp-31; > if (ub == lb) > @@ -95,16 +87,9 @@ __ieee754_asinf (float x) > } > if (ax < (0x7eu << 24)) > { > - static const double c[] = > - { > - 0x1.555555555529cp-3, 0x1.333333337e0ddp-4, 0x1.6db6db3b4465ep-5, > - 0x1.f1c72e13ac306p-6, 0x1.6e89cebe06bc4p-6, 0x1.1c6dcf5289094p-6, > - 0x1.c6dbbcc7c6315p-7, 0x1.8f8dc2615e996p-7, 0x1.a5833b7bf15e8p-8, > - 0x1.43f44ace1665cp-6, -0x1.0fb17df881c73p-6, 0x1.07520c026b2d6p-5 > - }; > double z = xs; > double z2 = z * z; > - double c0 = poly12 (z2, c); > + double c0 = poly12 (z2, C0); > r = z + (z * z2) * c0; > } > else ok > @@ -116,14 +101,7 @@ __ieee754_asinf (float x) > double bx = fabs (xs); > double z = 1.0 - bx; > double s = sqrt (z); > - static const double c[] = > - { > - 0x1.6a09e667f3bcbp+0, 0x1.e2b7dddff2db9p-4, 0x1.b27247ab42dbcp-6, > - 0x1.02995cc4e0744p-7, 0x1.5ffb0276ec8eap-9, 0x1.033885a928decp-10, > - 0x1.911f2be23f8c7p-12, 0x1.4c3c55d2437fdp-13, 0x1.af477e1d7b461p-15, > - 0x1.abd6bdff67dcbp-15, -0x1.1717e86d0fa28p-16, 0x1.6ff526de46023p-16 > - }; > - r = pi2 - s * poly12 (z, c); > + r = pi2 - s * poly12 (z, C1); > r = copysign (r, xs); > } > return r; > ok
On 10/02/25 17:55, Andreas K. Huettel wrote: > Am Montag, 3. Februar 2025, 22:24:17 Mitteleuropäische Normalzeit schrieb Adhemerval Zanella: >> The libm size improvement built with "--enable-stack-protector=strong >> --enable-bind-now=yes --enable-fortify-source=2": >> >> From: >> >> text data bss dec hex filename >> 587896 860 12 588768 8fbe0 aarch64-linux-gnu-master/math/libm.so >> 963175 1068 12 964255 eb69f x86_64-linux-gnu-master/math/libm.so >> 1191542 5544 368 1197454 12458e powerpc64le-linux-gnu-master/math/libm.so >> >> To: >> >> text data bss dec hex filename >> 587304 860 12 588176 8f990 aarch64-linux-gnu/math/libm.so >> 962855 1068 12 963935 eb55f x86_64-linux-gnu/math/libm.so >> 1191222 5544 368 1197134 12444e powerpc64le-linux-gnu/math/libm.so >> >> The are not code changes for x86_64 and powerpc64le, but on aarch64 >> with gcc-14 I see a slight better code generation due the usage of >> ldq for floating point constant loading. > > > The b polynomial coefficients are different for asinf and acosf in the original. > (And I see no changes in the polynomial itself.) > > If they can be changed like this, maybe add a remark why? Because it is wrong, I will send a v2 with a correct consolidation.
diff --git a/math/Makefile b/math/Makefile index f24cee5c39..a6da38a135 100644 --- a/math/Makefile +++ b/math/Makefile @@ -362,6 +362,7 @@ type-double-routines := \ # float support type-float-suffix := f type-float-routines := \ + e_asincosf_data \ e_exp2f_data \ e_log2f_data \ e_logf_data \ diff --git a/sysdeps/ieee754/flt-32/e_acosf.c b/sysdeps/ieee754/flt-32/e_acosf.c index a5a4de4fc2..90196ac61e 100644 --- a/sysdeps/ieee754/flt-32/e_acosf.c +++ b/sysdeps/ieee754/flt-32/e_acosf.c @@ -30,6 +30,7 @@ SOFTWARE. #include <libm-alias-finite.h> #include <math-barriers.h> #include "math_config.h" +#include "e_asincosf_data.h" static __attribute__ ((noinline)) float as_special (float x) @@ -77,15 +78,6 @@ __ieee754_acosf (float x) return as_special (x); if (__glibc_likely (ax < 0x7ec2a1dcu)) /* |x| < 0x1.c2a1dcp-1 */ { - static const double b[] = - { - 0x1.fffffffd9ccb8p-1, 0x1.5555c94838007p-3, 0x1.32ded4b7c20fap-4, - 0x1.8566df703309ep-5, -0x1.980c959bec9a3p-6, 0x1.56fbb04998344p-1, - -0x1.403d8e4c49f52p+2, 0x1.b06c3e9f311eap+4, -0x1.9ea97c4e2c21fp+6, - 0x1.200b8261cc61bp+8, -0x1.2274c2799a5c7p+9, 0x1.a558a59cc19d3p+9, - -0x1.aca4b6a529ffp+9, 0x1.228744703f813p+9, -0x1.d7dbb0b322228p+7, - 0x1.5c2018c0c0105p+5 - }; /* Avoid spurious underflow exception. */ if (__glibc_unlikely (ax <= 0x40000000u)) /* |x| < 2^-63 */ /* GCC <= 11 wrongly assumes the rounding is to nearest and @@ -97,11 +89,11 @@ __ieee754_acosf (float x) double z4 = z2 * z2; double z8 = z4 * z4; double z16 = z8 * z8; - r = z * ((((b[0] + z2 * b[1]) + z4 * (b[2] + z2 * b[3])) - + z8 * ((b[4] + z2 * b[5]) + z4 * (b[6] + z2 * b[7]))) - + z16 * (((b[8] + z2 * b[9]) + z4 * (b[10] + z2 * b[11])) + r = z * ((((B[0] + z2 * B[1]) + z4 * (B[2] + z2 * B[3])) + + z8 * ((B[4] + z2 * B[5]) + z4 * (B[6] + z2 * B[7]))) + + z16 * (((B[8] + z2 * B[9]) + z4 * (B[10] + z2 * B[11])) + z8 - * ((b[12] + z2 * b[13])+ z4 * (b[14] + z2 * b[15])))); + * ((B[12] + z2 * B[13])+ z4 * (B[14] + z2 * B[15])))); float ub = 0x1.921fb54574191p+0 - r; float lb = 0x1.921fb543118ap+0 - r; if (ub == lb) @@ -110,33 +102,19 @@ __ieee754_acosf (float x) /* accurate path */ if (ax < (0x7eu << 24)) { - static const double c[] = - { - 0x1.555555555529cp-3, 0x1.333333337e0ddp-4, 0x1.6db6db3b4465ep-5, - 0x1.f1c72e13ac306p-6, 0x1.6e89cebe06bc4p-6, 0x1.1c6dcf5289094p-6, - 0x1.c6dbbcc7c6315p-7, 0x1.8f8dc2615e996p-7, 0x1.a5833b7bf15e8p-8, - 0x1.43f44ace1665cp-6, -0x1.0fb17df881c73p-6, 0x1.07520c026b2d6p-5 - }; if (t == 0x328885a3u) return 0x1.921fb6p+0f + 0x1p-25; if (t == 0x39826222u) return 0x1.920f6ap+0f + 0x1p-25; double x2 = xs * xs; - r = (pi2 - xs) - (xs * x2) * poly12 (x2, c); + r = (pi2 - xs) - (xs * x2) * poly12 (x2, C0); } else { - static const double c[] = - { - 0x1.6a09e667f3bcbp+0, 0x1.e2b7dddff2db9p-4, 0x1.b27247ab42dbcp-6, - 0x1.02995cc4e0744p-7, 0x1.5ffb0276ec8eap-9, 0x1.033885a928decp-10, - 0x1.911f2be23f8c7p-12, 0x1.4c3c55d2437fdp-13, 0x1.af477e1d7b461p-15, - 0x1.abd6bdff67dcbp-15, -0x1.1717e86d0fa28p-16, 0x1.6ff526de46023p-16 - }; double bx = fabs (xs); double z = 1.0 - bx; double s = copysign (sqrt (z), xs); - r = o[t >> 31] + s * poly12 (z, c); + r = o[t >> 31] + s * poly12 (z, C1); } return r; } diff --git a/sysdeps/ieee754/flt-32/e_asincosf_data.c b/sysdeps/ieee754/flt-32/e_asincosf_data.c new file mode 100644 index 0000000000..2ffc2c28f3 --- /dev/null +++ b/sysdeps/ieee754/flt-32/e_asincosf_data.c @@ -0,0 +1,53 @@ +/* Common data for asinpif/acospif implementations. + +Copyright (c) 2022-2025 Alexei Sibidanov. + +The original version of this file was copied from the CORE-MATH +project (src/binary32/sinpi/sinpif.c, revision f786e13). + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#include "e_asincosf_data.h" + +const double __asincosf_b[] = + { + 0x1.fffffffd9ccb8p-1, 0x1.5555c94838007p-3, 0x1.32ded4b7c20fap-4, + 0x1.8566df703309ep-5, -0x1.980c959bec9a3p-6, 0x1.56fbb04998344p-1, + -0x1.403d8e4c49f52p+2, 0x1.b06c3e9f311eap+4, -0x1.9ea97c4e2c21fp+6, + 0x1.200b8261cc61bp+8, -0x1.2274c2799a5c7p+9, 0x1.a558a59cc19d3p+9, + -0x1.aca4b6a529ffp+9, 0x1.228744703f813p+9, -0x1.d7dbb0b322228p+7, + 0x1.5c2018c0c0105p+5 + }; + +const double __asincosf_c0[] = + { + 0x1.555555555529cp-3, 0x1.333333337e0ddp-4, 0x1.6db6db3b4465ep-5, + 0x1.f1c72e13ac306p-6, 0x1.6e89cebe06bc4p-6, 0x1.1c6dcf5289094p-6, + 0x1.c6dbbcc7c6315p-7, 0x1.8f8dc2615e996p-7, 0x1.a5833b7bf15e8p-8, + 0x1.43f44ace1665cp-6, -0x1.0fb17df881c73p-6, 0x1.07520c026b2d6p-5 + }; + +const double __asincosf_c1[] = + { + 0x1.6a09e667f3bcbp+0, 0x1.e2b7dddff2db9p-4, 0x1.b27247ab42dbcp-6, + 0x1.02995cc4e0744p-7, 0x1.5ffb0276ec8eap-9, 0x1.033885a928decp-10, + 0x1.911f2be23f8c7p-12, 0x1.4c3c55d2437fdp-13, 0x1.af477e1d7b461p-15, + 0x1.abd6bdff67dcbp-15, -0x1.1717e86d0fa28p-16, 0x1.6ff526de46023p-16 + }; diff --git a/sysdeps/ieee754/flt-32/e_asincosf_data.h b/sysdeps/ieee754/flt-32/e_asincosf_data.h new file mode 100644 index 0000000000..7dffb00dbe --- /dev/null +++ b/sysdeps/ieee754/flt-32/e_asincosf_data.h @@ -0,0 +1,37 @@ +/* Common data for asinpif/acospif implementations. + +Copyright (c) 2022-2025 Alexei Sibidanov. + +The original version of this file was copied from the CORE-MATH +project (src/binary32/sinpi/sinpif.c, revision f786e13). + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#ifndef _ASINCOSF_DATAH +#define _ASINCOSF_DATAH + +extern const double __asincosf_b[] attribute_hidden; +extern const double __asincosf_c0[] attribute_hidden; +extern const double __asincosf_c1[] attribute_hidden; +#define B __asincosf_b +#define C0 __asincosf_c0 +#define C1 __asincosf_c1 + +#endif diff --git a/sysdeps/ieee754/flt-32/e_asinf.c b/sysdeps/ieee754/flt-32/e_asinf.c index 944bf6f5ce..854466361e 100644 --- a/sysdeps/ieee754/flt-32/e_asinf.c +++ b/sysdeps/ieee754/flt-32/e_asinf.c @@ -28,6 +28,7 @@ SOFTWARE. #include <errno.h> #include <libm-alias-finite.h> #include "math_config.h" +#include "e_asincosf_data.h" static __attribute__ ((noinline)) float as_special (float x) @@ -69,25 +70,16 @@ __ieee754_asinf (float x) { if (__glibc_unlikely (ax < 115 << 24)) return fmaf (x, 0x1p-25, x); - static const double b[] = - { - 0x1.0000000000005p+0, 0x1.55557aeca105dp-3, 0x1.3314ec3db7d12p-4, - 0x1.775738a5a6f92p-5, 0x1.5d5f7ce1c8538p-8, 0x1.605c6d58740fp-2, - -0x1.5728b732d73c6p+1, 0x1.f152170f151ebp+3, -0x1.f962ea3ca992ep+5, - 0x1.71971e17375ap+7, -0x1.860512b4ba23p+8, 0x1.26a3b8d4bdb14p+9, - -0x1.36f2ea5698b51p+9, 0x1.b3d722aebfa2ep+8, -0x1.6cf89703b1289p+7, - 0x1.1518af6a65e2dp+5 - }; double z = xs; double z2 = z * z; double z4 = z2 * z2; double z8 = z4 * z4; double z16 = z8 * z8; - r = z * ((((b[0] + z2 * b[1]) + z4 * (b[2] + z2 * b[3])) - + z8 * ((b[4] + z2 * b[5]) + z4 * (b[6] + z2 * b[7]))) - + z16 * (((b[8] + z2 * b[9]) + z4 * (b[10] + z2 * b[11])) - + z8 * ((b[12] + z2 * b[13]) - + z4 * (b[14] + z2 * b[15])))); + r = z * ((((B[0] + z2 * B[1]) + z4 * (B[2] + z2 * B[3])) + + z8 * ((B[4] + z2 * B[5]) + z4 * (B[6] + z2 * B[7]))) + + z16 * (((B[8] + z2 * B[9]) + z4 * (B[10] + z2 * B[11])) + + z8 * ((B[12] + z2 * B[13]) + + z4 * (B[14] + z2 * B[15])))); float ub = r; float lb = r - z * 0x1.efa8ebp-31; if (ub == lb) @@ -95,16 +87,9 @@ __ieee754_asinf (float x) } if (ax < (0x7eu << 24)) { - static const double c[] = - { - 0x1.555555555529cp-3, 0x1.333333337e0ddp-4, 0x1.6db6db3b4465ep-5, - 0x1.f1c72e13ac306p-6, 0x1.6e89cebe06bc4p-6, 0x1.1c6dcf5289094p-6, - 0x1.c6dbbcc7c6315p-7, 0x1.8f8dc2615e996p-7, 0x1.a5833b7bf15e8p-8, - 0x1.43f44ace1665cp-6, -0x1.0fb17df881c73p-6, 0x1.07520c026b2d6p-5 - }; double z = xs; double z2 = z * z; - double c0 = poly12 (z2, c); + double c0 = poly12 (z2, C0); r = z + (z * z2) * c0; } else @@ -116,14 +101,7 @@ __ieee754_asinf (float x) double bx = fabs (xs); double z = 1.0 - bx; double s = sqrt (z); - static const double c[] = - { - 0x1.6a09e667f3bcbp+0, 0x1.e2b7dddff2db9p-4, 0x1.b27247ab42dbcp-6, - 0x1.02995cc4e0744p-7, 0x1.5ffb0276ec8eap-9, 0x1.033885a928decp-10, - 0x1.911f2be23f8c7p-12, 0x1.4c3c55d2437fdp-13, 0x1.af477e1d7b461p-15, - 0x1.abd6bdff67dcbp-15, -0x1.1717e86d0fa28p-16, 0x1.6ff526de46023p-16 - }; - r = pi2 - s * poly12 (z, c); + r = pi2 - s * poly12 (z, C1); r = copysign (r, xs); } return r;