diff mbox series

[11/11] math: Use tanf from CORE-MATH

Message ID 20241111134740.1410635-12-adhemerval.zanella@linaro.org
State New
Headers show
Series Add more CORE-math implementations to libm | expand

Commit Message

Adhemerval Zanella Nov. 11, 2024, 1:45 p.m. UTC
The CORE-MATH implementation is correctly rounded (for any rounding mode)
and shows better performance to the generic tanf.

The code was adapted to glibc style, to use the definition of
math_config.h, to remove errno handling, and to use a generic
128 bit routine for ABIs that do not support it natively.

Benchtest on x64_64 (Ryzen 9 5900X, gcc 14.2.1), aarch64 (neoverse1,
gcc 13.2.1), and powerpc (POWER10, gcc 13.2.1):

latency                       master       patched  improvement
x86_64                       82.3961       54.8052       33.49%
x86_64v2                     82.3415       54.8052       33.44%
x86_64v3                     69.3661       50.4864       27.22%
i686                         219.271       45.5396       79.23%
aarch64                      29.2127       19.1951       34.29%
power10                      19.5060       16.2760       16.56%

reciprocal-throughput         master       patched  improvement
x86_64                       28.3976       19.7334       30.51%
x86_64v2                     28.4568       19.7334       30.65%
x86_64v3                     21.1815       16.1811       23.61%
i686                         105.016       15.1426       85.58%
aarch64                      18.1573       10.7681       40.70%
power10                       8.7207        8.7097        0.13%

Signed-off-by: Alexei Sibidanov <sibid@uvic.ca>
Signed-off-by: Paul Zimmermann <Paul.Zimmermann@inria.fr>
Signed-off-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
---
 SHARED-FILES                                  |   6 +
 sysdeps/aarch64/libm-test-ulps                |   4 -
 sysdeps/alpha/fpu/libm-test-ulps              |   4 -
 sysdeps/arc/fpu/libm-test-ulps                |   4 -
 sysdeps/arc/nofpu/libm-test-ulps              |   3 -
 sysdeps/arm/libm-test-ulps                    |   6 -
 sysdeps/csky/fpu/libm-test-ulps               |   6 -
 sysdeps/csky/nofpu/libm-test-ulps             |   6 -
 sysdeps/generic/math_int128.h                 | 144 ++++++++++++
 sysdeps/hppa/fpu/libm-test-ulps               |   4 -
 sysdeps/i386/fpu/libm-test-ulps               |   4 -
 .../i386/i686/fpu/multiarch/libm-test-ulps    |   4 -
 sysdeps/ieee754/flt-32/k_tanf.c               | 102 +-------
 sysdeps/ieee754/flt-32/s_tanf.c               | 220 +++++++++++++-----
 sysdeps/loongarch/lp64/libm-test-ulps         |   4 -
 sysdeps/microblaze/libm-test-ulps             |   3 -
 sysdeps/mips/mips32/libm-test-ulps            |   6 -
 sysdeps/mips/mips64/libm-test-ulps            |   4 -
 sysdeps/nios2/libm-test-ulps                  |   3 -
 sysdeps/or1k/fpu/libm-test-ulps               |   6 -
 sysdeps/or1k/nofpu/libm-test-ulps             |   6 -
 sysdeps/powerpc/fpu/libm-test-ulps            |   4 -
 sysdeps/powerpc/nofpu/libm-test-ulps          |   4 -
 sysdeps/riscv/nofpu/libm-test-ulps            |   4 -
 sysdeps/riscv/rvd/libm-test-ulps              |   4 -
 sysdeps/s390/fpu/libm-test-ulps               |   4 -
 sysdeps/sh/libm-test-ulps                     |   4 -
 sysdeps/sparc/fpu/libm-test-ulps              |   4 -
 sysdeps/x86_64/fpu/libm-test-ulps             |   4 -
 29 files changed, 311 insertions(+), 270 deletions(-)
 create mode 100644 sysdeps/generic/math_int128.h

Comments

Joseph Myers Nov. 12, 2024, 12:23 a.m. UTC | #1
On Mon, 11 Nov 2024, Adhemerval Zanella wrote:

> +#ifdef __SIZEOF_INT128__
> +typedef unsigned __int128 u128;
> +# define u128_high(__x)         (uint64_t)((__x) >> 64)
> +# define u128_low(__x)          (uint64_t)(__x)
> +# define u128_from_u64(__x)     (u128)(__x)
> +# define u128_mul(__x, __y)     (__x) * (__y)
> +# define u128_add(__x, __y)     (__x) + (__y)
> +# define u128_lshift(__x, __y)  (__x) << (__y)
> +# define u128_rshift(__x, __y)  (__x) >> (__y)
> +#else
> +typedef struct

I'd suggest also having an option for the case of defined 
__BITINT_MAXWIDTH__ && __BITINT_MAXWIDTH__ >= 128, that uses unsigned 
_BitInt (128) in that case (and would look essentially like the unsigned 
__int128 case).  (You can test that with GCC 14 or later on 32-bit x86.)
Paul Zimmermann Nov. 12, 2024, 1:17 p.m. UTC | #2
Hi Adhemerval,

some typos and possible improvements (this is not a review since I am in
the "Signed-off-by" fields):

> +  (src/binary32/tan/tanf.cc in CORE-MATH)

should be src/binary32/tan/tanf.c

> +  - The code was adapted to use glibc code style and internal
> +    functions to handle errno, overflow, and underflow.  It was changed
> +    to use an internal wrapper for 128 bit unsigned integer operation

maybe "operations"?

> +/* Limited support for internal 128 bit integer, used on some math
> +   implementations.  It uses compiler builtin type if supported, otherwise
> +   it is emulate.  Only unsigned and some operations are currently supported:

emulate -> emulated

> +
> +   - u128_t:         the 128 bit unsigned type.
> +   - u128_high:      return the high part of the number.
> +   - u128_low:       return the low part of the number.
> +   - u128_from_u64:  createa 128 bit number from a 64 bit one.

create a

> +   - u128_mul:       multiple two 128 bit numbers.

multiply

> +   - u128_add:       add two 128 bit numbers.
> +   - u128_lshift:    left shift a number.
> +   - u128_rshift:    right shift a number.
> + */
> +
> +#ifdef __SIZEOF_INT128__
> +typedef unsigned __int128 u128;
> +# define u128_high(__x)         (uint64_t)((__x) >> 64)
> +# define u128_low(__x)          (uint64_t)(__x)
> +# define u128_from_u64(__x)     (u128)(__x)
> +# define u128_mul(__x, __y)     (__x) * (__y)
> +# define u128_add(__x, __y)     (__x) + (__y)
> +# define u128_lshift(__x, __y)  (__x) << (__y)
> +# define u128_rshift(__x, __y)  (__x) >> (__y)
> +#else
> +typedef struct
> +{
> +  uint64_t low;
> +  uint64_t high;
> +} u128;
> +
> +# define u128_high(__x)         (__x).high
> +# define u128_low(__x)          (__x).low
> +
> +# define u128_from_u64(__x)     (u128){.low = (__x), .high = 0}
> +
> +# define TOPBIT                 (UINT64_C(1) << 63)
> +# define MASK32                 (UINT64_C(0xffffffff))
> +
> +static u128 u128_add (u128 x, u128 y)
> +{
> +  uint64_t lower = (x.low & ~TOPBIT) + (y.low & ~TOPBIT);
> +  bool carry = (lower >> 63) + (x.low >> 63) + (y.low >> 63) > 1;
> +  return (u128) { .high = x.high + y.high + carry, .low = x.low + y.low };

why not simply the following?

    bool carry = x.low + y.low < x.low;
    return (u128) { ... };

> +}
> +
> +static u128 u128_lshift (u128 x, unsigned int n)
> +{
> +  switch (n)
> +    {
> +    case 0:         return x;
> +    case 1 ... 63:  return (u128) { .high = (x.high << n) | (x.low >> (64 - n)),
> +				    .low = x.low << n };
> +    case 64 ...127: return (u128) { .high = x.low << (n - 64), .low = 0};
> +    default:        return (u128) { .high = 0, .low = 0 };
> +    }
> +}
> +
> +static u128 u128_rshift (u128 x, unsigned int n)
> +{
> +  switch (n)
> +    {
> +    case 0:         return x;
> +    case 1 ... 63:  return (u128) { .high = x.high >> n,
> +				    .low = (x.high << (64 - n)) | (x.low >> n) };
> +    case 64 ...127: return (u128) { .high = 0, .low = x.high >> (n - 64) };
> +    default:        return (u128) { .high = 0, .low = 0 };
> +    }
> +}
> +
> +static u128 u128_mul (u128 x, u128 y)
> +{
> +  if (x.high == 0 && y.high == 0)
> +    {
> +      uint64_t x0 = x.low & MASK32;
> +      uint64_t x1 = x.low >> 32;
> +      uint64_t y0 = y.low & MASK32;
> +      uint64_t y1 = y.low >> 32;
> +      u128 x0y0 = { .high = 0, .low = x0 * y0 };
> +      u128 x0y1 = { .high = 0, .low = x0 * y1 };
> +      u128 x1y0 = { .high = 0, .low = x1 * y0 };
> +      u128 x1y1 = { .high = 0, .low = x1 * y1 };
> +      /* x0y0 + ((x0y1 + x1y0) << 32) + (x1y1 << 64)  */
> +      return u128_add (u128_add (x0y0, u128_lshift (u128_add (x0y1,
> +							      x1y0),
> +						    32)),
> +		       u128_lshift (x1y1, 64));

maybe you can form x1y1 = { .high = x1 * y1, .low = 0 } to avoid the left
shift of x1y1

Paul
Adhemerval Zanella Nov. 12, 2024, 5:42 p.m. UTC | #3
On 11/11/24 21:23, Joseph Myers wrote:
> On Mon, 11 Nov 2024, Adhemerval Zanella wrote:
> 
>> +#ifdef __SIZEOF_INT128__
>> +typedef unsigned __int128 u128;
>> +# define u128_high(__x)         (uint64_t)((__x) >> 64)
>> +# define u128_low(__x)          (uint64_t)(__x)
>> +# define u128_from_u64(__x)     (u128)(__x)
>> +# define u128_mul(__x, __y)     (__x) * (__y)
>> +# define u128_add(__x, __y)     (__x) + (__y)
>> +# define u128_lshift(__x, __y)  (__x) << (__y)
>> +# define u128_rshift(__x, __y)  (__x) >> (__y)
>> +#else
>> +typedef struct
> 
> I'd suggest also having an option for the case of defined 
> __BITINT_MAXWIDTH__ && __BITINT_MAXWIDTH__ >= 128, that uses unsigned 
> _BitInt (128) in that case (and would look essentially like the unsigned 
> __int128 case).  (You can test that with GCC 14 or later on 32-bit x86.)
> 

Sounds reasonable, I checked on some others ABIs and seems that not
everyone that supports _uint128 also support _BitInt (128).
Adhemerval Zanella Nov. 12, 2024, 6:41 p.m. UTC | #4
On 12/11/24 10:17, Paul Zimmermann wrote:
>        Hi Adhemerval,
> 
> some typos and possible improvements (this is not a review since I am in
> the "Signed-off-by" fields):
> 
>> +  (src/binary32/tan/tanf.cc in CORE-MATH)
> 
> should be src/binary32/tan/tanf.c

Ack.

> 
>> +  - The code was adapted to use glibc code style and internal
>> +    functions to handle errno, overflow, and underflow.  It was changed
>> +    to use an internal wrapper for 128 bit unsigned integer operation
> 
> maybe "operations"?
>

Ack.

 
>> +/* Limited support for internal 128 bit integer, used on some math
>> +   implementations.  It uses compiler builtin type if supported, otherwise
>> +   it is emulate.  Only unsigned and some operations are currently supported:
> 
> emulate -> emulated
> 

Ack.

>> +
>> +   - u128_t:         the 128 bit unsigned type.
>> +   - u128_high:      return the high part of the number.
>> +   - u128_low:       return the low part of the number.
>> +   - u128_from_u64:  createa 128 bit number from a 64 bit one.
> 
> create a

Ack.

> 
>> +   - u128_mul:       multiple two 128 bit numbers.
> 
> multiply
> 

Ack.

>> +   - u128_add:       add two 128 bit numbers.
>> +   - u128_lshift:    left shift a number.
>> +   - u128_rshift:    right shift a number.
>> + */
>> +
>> +#ifdef __SIZEOF_INT128__
>> +typedef unsigned __int128 u128;
>> +# define u128_high(__x)         (uint64_t)((__x) >> 64)
>> +# define u128_low(__x)          (uint64_t)(__x)
>> +# define u128_from_u64(__x)     (u128)(__x)
>> +# define u128_mul(__x, __y)     (__x) * (__y)
>> +# define u128_add(__x, __y)     (__x) + (__y)
>> +# define u128_lshift(__x, __y)  (__x) << (__y)
>> +# define u128_rshift(__x, __y)  (__x) >> (__y)
>> +#else
>> +typedef struct
>> +{
>> +  uint64_t low;
>> +  uint64_t high;
>> +} u128;
>> +
>> +# define u128_high(__x)         (__x).high
>> +# define u128_low(__x)          (__x).low
>> +
>> +# define u128_from_u64(__x)     (u128){.low = (__x), .high = 0}
>> +
>> +# define TOPBIT                 (UINT64_C(1) << 63)
>> +# define MASK32                 (UINT64_C(0xffffffff))
>> +
>> +static u128 u128_add (u128 x, u128 y)
>> +{
>> +  uint64_t lower = (x.low & ~TOPBIT) + (y.low & ~TOPBIT);
>> +  bool carry = (lower >> 63) + (x.low >> 63) + (y.low >> 63) > 1;
>> +  return (u128) { .high = x.high + y.high + carry, .low = x.low + y.low };
> 
> why not simply the following?
> 
>     bool carry = x.low + y.low < x.low;
>     return (u128) { ... };

Because I forgot these are unsigned valued. Ack.

> 
>> +}
>> +
>> +static u128 u128_lshift (u128 x, unsigned int n)
>> +{
>> +  switch (n)
>> +    {
>> +    case 0:         return x;
>> +    case 1 ... 63:  return (u128) { .high = (x.high << n) | (x.low >> (64 - n)),
>> +				    .low = x.low << n };
>> +    case 64 ...127: return (u128) { .high = x.low << (n - 64), .low = 0};
>> +    default:        return (u128) { .high = 0, .low = 0 };
>> +    }
>> +}
>> +
>> +static u128 u128_rshift (u128 x, unsigned int n)
>> +{
>> +  switch (n)
>> +    {
>> +    case 0:         return x;
>> +    case 1 ... 63:  return (u128) { .high = x.high >> n,
>> +				    .low = (x.high << (64 - n)) | (x.low >> n) };
>> +    case 64 ...127: return (u128) { .high = 0, .low = x.high >> (n - 64) };
>> +    default:        return (u128) { .high = 0, .low = 0 };
>> +    }
>> +}
>> +
>> +static u128 u128_mul (u128 x, u128 y)
>> +{
>> +  if (x.high == 0 && y.high == 0)
>> +    {
>> +      uint64_t x0 = x.low & MASK32;
>> +      uint64_t x1 = x.low >> 32;
>> +      uint64_t y0 = y.low & MASK32;
>> +      uint64_t y1 = y.low >> 32;
>> +      u128 x0y0 = { .high = 0, .low = x0 * y0 };
>> +      u128 x0y1 = { .high = 0, .low = x0 * y1 };
>> +      u128 x1y0 = { .high = 0, .low = x1 * y0 };
>> +      u128 x1y1 = { .high = 0, .low = x1 * y1 };
>> +      /* x0y0 + ((x0y1 + x1y0) << 32) + (x1y1 << 64)  */
>> +      return u128_add (u128_add (x0y0, u128_lshift (u128_add (x0y1,
>> +							      x1y0),
>> +						    32)),
>> +		       u128_lshift (x1y1, 64));
> 
> maybe you can form x1y1 = { .high = x1 * y1, .low = 0 } to avoid the left
> shift of x1y1

Indeed, I will change it.

> 
> Paul
diff mbox series

Patch

diff --git a/SHARED-FILES b/SHARED-FILES
index 033ce7f092..580e6b231a 100644
--- a/SHARED-FILES
+++ b/SHARED-FILES
@@ -288,3 +288,9 @@  sysdeps/ieee754/flt-32/e_lgammaf_r.c:
   - remove the errno stuff (this is done by the wrapper)
   - replace 0x1p127f * 0x1p127f by math_narrow_eval (x * 0x1p127f)
   - add libm_alias_finite (__ieee754_lgammaf_r, __lgammaf_r) at the end
+sysdeps/ieee754/flt-32/s_tanf.c:
+  (src/binary32/tan/tanf.cc in CORE-MATH)
+  - The code was adapted to use glibc code style and internal
+    functions to handle errno, overflow, and underflow.  It was changed
+    to use an internal wrapper for 128 bit unsigned integer operation
+    for ABIs that do not support the type natively.
diff --git a/sysdeps/aarch64/libm-test-ulps b/sysdeps/aarch64/libm-test-ulps
index 1d3d1f9b6a..89b166b71b 100644
--- a/sysdeps/aarch64/libm-test-ulps
+++ b/sysdeps/aarch64/libm-test-ulps
@@ -1561,7 +1561,6 @@  float: 3
 ldouble: 4
 
 Function: "tan":
-float: 1
 ldouble: 1
 
 Function: "tan_advsimd":
@@ -1570,7 +1569,6 @@  float: 2
 
 Function: "tan_downward":
 double: 1
-float: 2
 ldouble: 1
 
 Function: "tan_sve":
@@ -1579,12 +1577,10 @@  float: 2
 
 Function: "tan_towardzero":
 double: 1
-float: 1
 ldouble: 1
 
 Function: "tan_upward":
 double: 1
-float: 1
 ldouble: 1
 
 Function: "tanh":
diff --git a/sysdeps/alpha/fpu/libm-test-ulps b/sysdeps/alpha/fpu/libm-test-ulps
index 7256e674bb..0f7628b75b 100644
--- a/sysdeps/alpha/fpu/libm-test-ulps
+++ b/sysdeps/alpha/fpu/libm-test-ulps
@@ -1342,22 +1342,18 @@  float: 3
 ldouble: 4
 
 Function: "tan":
-float: 1
 ldouble: 1
 
 Function: "tan_downward":
 double: 1
-float: 2
 ldouble: 1
 
 Function: "tan_towardzero":
 double: 1
-float: 1
 ldouble: 1
 
 Function: "tan_upward":
 double: 1
-float: 1
 ldouble: 1
 
 Function: "tanh":
diff --git a/sysdeps/arc/fpu/libm-test-ulps b/sysdeps/arc/fpu/libm-test-ulps
index 66a2b541c6..4d4b22db47 100644
--- a/sysdeps/arc/fpu/libm-test-ulps
+++ b/sysdeps/arc/fpu/libm-test-ulps
@@ -1081,19 +1081,15 @@  float: 3
 
 Function: "tan":
 double: 1
-float: 1
 
 Function: "tan_downward":
 double: 1
-float: 2
 
 Function: "tan_towardzero":
 double: 1
-float: 2
 
 Function: "tan_upward":
 double: 1
-float: 2
 
 Function: "tanh":
 double: 3
diff --git a/sysdeps/arc/nofpu/libm-test-ulps b/sysdeps/arc/nofpu/libm-test-ulps
index 38836ddc38..4faf784aad 100644
--- a/sysdeps/arc/nofpu/libm-test-ulps
+++ b/sysdeps/arc/nofpu/libm-test-ulps
@@ -259,9 +259,6 @@  Function: "sinh":
 double: 2
 float: 2
 
-Function: "tan":
-float: 1
-
 Function: "tanh":
 double: 2
 float: 2
diff --git a/sysdeps/arm/libm-test-ulps b/sysdeps/arm/libm-test-ulps
index 2651046cfa..c80122de79 100644
--- a/sysdeps/arm/libm-test-ulps
+++ b/sysdeps/arm/libm-test-ulps
@@ -1078,20 +1078,14 @@  Function: "sinh_upward":
 double: 3
 float: 3
 
-Function: "tan":
-float: 1
-
 Function: "tan_downward":
 double: 1
-float: 2
 
 Function: "tan_towardzero":
 double: 1
-float: 1
 
 Function: "tan_upward":
 double: 1
-float: 1
 
 Function: "tanh":
 double: 2
diff --git a/sysdeps/csky/fpu/libm-test-ulps b/sysdeps/csky/fpu/libm-test-ulps
index 02b4cb4934..d67cfe1785 100644
--- a/sysdeps/csky/fpu/libm-test-ulps
+++ b/sysdeps/csky/fpu/libm-test-ulps
@@ -1000,20 +1000,14 @@  Function: "sinh_upward":
 double: 3
 float: 3
 
-Function: "tan":
-float: 1
-
 Function: "tan_downward":
 double: 1
-float: 2
 
 Function: "tan_towardzero":
 double: 1
-float: 1
 
 Function: "tan_upward":
 double: 1
-float: 1
 
 Function: "tanh":
 double: 2
diff --git a/sysdeps/csky/nofpu/libm-test-ulps b/sysdeps/csky/nofpu/libm-test-ulps
index 34312f5a06..6cdf9fd034 100644
--- a/sysdeps/csky/nofpu/libm-test-ulps
+++ b/sysdeps/csky/nofpu/libm-test-ulps
@@ -1031,20 +1031,14 @@  Function: "sinh_upward":
 double: 3
 float: 3
 
-Function: "tan":
-float: 1
-
 Function: "tan_downward":
 double: 1
-float: 2
 
 Function: "tan_towardzero":
 double: 1
-float: 1
 
 Function: "tan_upward":
 double: 1
-float: 1
 
 Function: "tanh":
 double: 2
diff --git a/sysdeps/generic/math_int128.h b/sysdeps/generic/math_int128.h
new file mode 100644
index 0000000000..16bd34dd60
--- /dev/null
+++ b/sysdeps/generic/math_int128.h
@@ -0,0 +1,144 @@ 
+/* Internal 128 bit int support.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _MATH_INT128_H
+#define _MATH_INT128_H
+
+/* Limited support for internal 128 bit integer, used on some math
+   implementations.  It uses compiler builtin type if supported, otherwise
+   it is emulate.  Only unsigned and some operations are currently supported:
+
+   - u128_t:         the 128 bit unsigned type.
+   - u128_high:      return the high part of the number.
+   - u128_low:       return the low part of the number.
+   - u128_from_u64:  createa 128 bit number from a 64 bit one.
+   - u128_mul:       multiple two 128 bit numbers.
+   - u128_add:       add two 128 bit numbers.
+   - u128_lshift:    left shift a number.
+   - u128_rshift:    right shift a number.
+ */
+
+#ifdef __SIZEOF_INT128__
+typedef unsigned __int128 u128;
+# define u128_high(__x)         (uint64_t)((__x) >> 64)
+# define u128_low(__x)          (uint64_t)(__x)
+# define u128_from_u64(__x)     (u128)(__x)
+# define u128_mul(__x, __y)     (__x) * (__y)
+# define u128_add(__x, __y)     (__x) + (__y)
+# define u128_lshift(__x, __y)  (__x) << (__y)
+# define u128_rshift(__x, __y)  (__x) >> (__y)
+#else
+typedef struct
+{
+  uint64_t low;
+  uint64_t high;
+} u128;
+
+# define u128_high(__x)         (__x).high
+# define u128_low(__x)          (__x).low
+
+# define u128_from_u64(__x)     (u128){.low = (__x), .high = 0}
+
+# define TOPBIT                 (UINT64_C(1) << 63)
+# define MASK32                 (UINT64_C(0xffffffff))
+
+static u128 u128_add (u128 x, u128 y)
+{
+  uint64_t lower = (x.low & ~TOPBIT) + (y.low & ~TOPBIT);
+  bool carry = (lower >> 63) + (x.low >> 63) + (y.low >> 63) > 1;
+  return (u128) { .high = x.high + y.high + carry, .low = x.low + y.low };
+}
+
+static u128 u128_lshift (u128 x, unsigned int n)
+{
+  switch (n)
+    {
+    case 0:         return x;
+    case 1 ... 63:  return (u128) { .high = (x.high << n) | (x.low >> (64 - n)),
+				    .low = x.low << n };
+    case 64 ...127: return (u128) { .high = x.low << (n - 64), .low = 0};
+    default:        return (u128) { .high = 0, .low = 0 };
+    }
+}
+
+static u128 u128_rshift (u128 x, unsigned int n)
+{
+  switch (n)
+    {
+    case 0:         return x;
+    case 1 ... 63:  return (u128) { .high = x.high >> n,
+				    .low = (x.high << (64 - n)) | (x.low >> n) };
+    case 64 ...127: return (u128) { .high = 0, .low = x.high >> (n - 64) };
+    default:        return (u128) { .high = 0, .low = 0 };
+    }
+}
+
+static u128 u128_mul (u128 x, u128 y)
+{
+  if (x.high == 0 && y.high == 0)
+    {
+      uint64_t x0 = x.low & MASK32;
+      uint64_t x1 = x.low >> 32;
+      uint64_t y0 = y.low & MASK32;
+      uint64_t y1 = y.low >> 32;
+      u128 x0y0 = { .high = 0, .low = x0 * y0 };
+      u128 x0y1 = { .high = 0, .low = x0 * y1 };
+      u128 x1y0 = { .high = 0, .low = x1 * y0 };
+      u128 x1y1 = { .high = 0, .low = x1 * y1 };
+      /* x0y0 + ((x0y1 + x1y0) << 32) + (x1y1 << 64)  */
+      return u128_add (u128_add (x0y0, u128_lshift (u128_add (x0y1,
+							      x1y0),
+						    32)),
+		       u128_lshift (x1y1, 64));
+    }
+  else
+    {
+      uint64_t x0 = x.low & MASK32;
+      uint64_t x1 = x.low >> 32;
+      uint64_t x2 = x.high & MASK32;
+      uint64_t x3 = x.high >> 32;
+      uint64_t y0 = y.low & MASK32;
+      uint64_t y1 = y.low >> 32;
+      uint64_t y2 = y.high & MASK32;
+      uint64_t y3 = y.high >> 32;
+      u128 x0y0 = { .high = 0, .low = x0 * y0 };
+      u128 x0y1 = { .high = 0, .low = x0 * y1 };
+      u128 x0y2 = { .high = 0, .low = x0 * y2 };
+      u128 x0y3 = { .high = 0, .low = x0 * y3 };
+      u128 x1y0 = { .high = 0, .low = x1 * y0 };
+      u128 x1y1 = { .high = 0, .low = x1 * y1 };
+      u128 x1y2 = { .high = 0, .low = x1 * y2 };
+      u128 x2y0 = { .high = 0, .low = x2 * y0 };
+      u128 x2y1 = { .high = 0, .low = x2 * y1 };
+      u128 x3y0 = { .high = 0, .low = x3 * y0 };
+      /* x0y0 + ((x0y1 + x1y0) << 32) + ((x0y2 + x1y1 + x2y0) << 64) +
+          ((x0y3 + x1y2 + x2y1 + x3y0) << 96)  */
+      u128 r0 = u128_add (x0y0,
+			  u128_lshift (u128_add (x0y1, x1y0),
+				       32));
+      u128 r1 = u128_add (u128_lshift (u128_add (u128_add (x0y2, x1y1), x2y0),
+				       64),
+			  u128_lshift (u128_add (u128_add (x0y3, x1y2),
+						 u128_add (x2y1, x3y0)),
+				       96));
+      return u128_add (r0, r1);
+   }
+}
+#endif /* __SIZEOF_INT128__ */
+
+#endif
diff --git a/sysdeps/hppa/fpu/libm-test-ulps b/sysdeps/hppa/fpu/libm-test-ulps
index 47bdd48e7f..d40d7e8d82 100644
--- a/sysdeps/hppa/fpu/libm-test-ulps
+++ b/sysdeps/hppa/fpu/libm-test-ulps
@@ -1104,20 +1104,16 @@  float: 3
 
 Function: "tan":
 double: 1
-float: 1
 ldouble: 1
 
 Function: "tan_downward":
 double: 1
-float: 2
 
 Function: "tan_towardzero":
 double: 1
-float: 1
 
 Function: "tan_upward":
 double: 1
-float: 1
 
 Function: "tanh":
 double: 2
diff --git a/sysdeps/i386/fpu/libm-test-ulps b/sysdeps/i386/fpu/libm-test-ulps
index 170e7cfc65..c06da68b45 100644
--- a/sysdeps/i386/fpu/libm-test-ulps
+++ b/sysdeps/i386/fpu/libm-test-ulps
@@ -1614,25 +1614,21 @@  float128: 4
 ldouble: 5
 
 Function: "tan":
-float: 1
 float128: 1
 ldouble: 2
 
 Function: "tan_downward":
 double: 1
-float: 2
 float128: 1
 ldouble: 3
 
 Function: "tan_towardzero":
 double: 1
-float: 2
 float128: 1
 ldouble: 3
 
 Function: "tan_upward":
 double: 1
-float: 2
 float128: 1
 ldouble: 2
 
diff --git a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps b/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps
index a9ce632e6a..43ffbd7978 100644
--- a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps
+++ b/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps
@@ -1619,25 +1619,21 @@  float128: 4
 ldouble: 5
 
 Function: "tan":
-float: 1
 float128: 1
 ldouble: 2
 
 Function: "tan_downward":
 double: 1
-float: 2
 float128: 1
 ldouble: 3
 
 Function: "tan_towardzero":
 double: 1
-float: 2
 float128: 1
 ldouble: 3
 
 Function: "tan_upward":
 double: 1
-float: 2
 float128: 1
 ldouble: 2
 
diff --git a/sysdeps/ieee754/flt-32/k_tanf.c b/sysdeps/ieee754/flt-32/k_tanf.c
index e1c9d14104..1cc8931700 100644
--- a/sysdeps/ieee754/flt-32/k_tanf.c
+++ b/sysdeps/ieee754/flt-32/k_tanf.c
@@ -1,101 +1 @@ 
-/* k_tanf.c -- float version of k_tan.c
- */
-
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-
-#if defined(LIBM_SCCS) && !defined(lint)
-static char rcsid[] = "$NetBSD: k_tanf.c,v 1.4 1995/05/10 20:46:39 jtc Exp $";
-#endif
-
-#include <float.h>
-#include <math.h>
-#include <math_private.h>
-#include <math-underflow.h>
-static const float
-one   =  1.0000000000e+00, /* 0x3f800000 */
-pio4  =  7.8539812565e-01, /* 0x3f490fda */
-pio4lo=  3.7748947079e-08, /* 0x33222168 */
-T[] =  {
-  3.3333334327e-01, /* 0x3eaaaaab */
-  1.3333334029e-01, /* 0x3e088889 */
-  5.3968254477e-02, /* 0x3d5d0dd1 */
-  2.1869488060e-02, /* 0x3cb327a4 */
-  8.8632395491e-03, /* 0x3c11371f */
-  3.5920790397e-03, /* 0x3b6b6916 */
-  1.4562094584e-03, /* 0x3abede48 */
-  5.8804126456e-04, /* 0x3a1a26c8 */
-  2.4646313977e-04, /* 0x398137b9 */
-  7.8179444245e-05, /* 0x38a3f445 */
-  7.1407252108e-05, /* 0x3895c07a */
- -1.8558637748e-05, /* 0xb79bae5f */
-  2.5907305826e-05, /* 0x37d95384 */
-};
-
-float __kernel_tanf(float x, float y, int iy)
-{
-	float z,r,v,w,s;
-	int32_t ix,hx;
-	GET_FLOAT_WORD(hx,x);
-	ix = hx&0x7fffffff;	/* high word of |x| */
-	if(ix<0x39000000)			/* x < 2**-13 */
-	    {if((int)x==0) {			/* generate inexact */
-		if((ix|(iy+1))==0) return one/fabsf(x);
-		else if (iy == 1)
-		  {
-		    math_check_force_underflow (x);
-		    return x;
-		  }
-		else
-		  return -one / x;
-	    }
-	    }
-	if(ix>=0x3f2ca140) { 			/* |x|>=0.6744 */
-	    if(hx<0) {x = -x; y = -y;}
-	    z = pio4-x;
-	    w = pio4lo-y;
-	    x = z+w; y = 0.0;
-	    if (fabsf (x) < 0x1p-13f)
-		return (1 - ((hx >> 30) & 2)) * iy * (1.0f - 2 * iy * x);
-	}
-	z	=  x*x;
-	w 	=  z*z;
-    /* Break x^5*(T[1]+x^2*T[2]+...) into
-     *	  x^5(T[1]+x^4*T[3]+...+x^20*T[11]) +
-     *	  x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12]))
-     */
-	r = T[1]+w*(T[3]+w*(T[5]+w*(T[7]+w*(T[9]+w*T[11]))));
-	v = z*(T[2]+w*(T[4]+w*(T[6]+w*(T[8]+w*(T[10]+w*T[12])))));
-	s = z*x;
-	r = y + z*(s*(r+v)+y);
-	r += T[0]*s;
-	w = x+r;
-	if(ix>=0x3f2ca140) {
-	    v = (float)iy;
-	    return (float)(1-((hx>>30)&2))*(v-(float)2.0*(x-(w*w/(w+v)-r)));
-	}
-	if(iy==1) return w;
-	else {		/* if allow error up to 2 ulp,
-			   simply return -1.0/(x+r) here */
-     /*  compute -1.0/(x+r) accurately */
-	    float a,t;
-	    int32_t i;
-	    z  = w;
-	    GET_FLOAT_WORD(i,z);
-	    SET_FLOAT_WORD(z,i&0xfffff000);
-	    v  = r-(z - x); 	/* z+v = r+x */
-	    t = a  = -(float)1.0/w;	/* a = -1.0/w */
-	    GET_FLOAT_WORD(i,t);
-	    SET_FLOAT_WORD(t,i&0xfffff000);
-	    s  = (float)1.0+t*z;
-	    return t+a*(s+t*v);
-	}
-}
+/* Not needed.  */
diff --git a/sysdeps/ieee754/flt-32/s_tanf.c b/sysdeps/ieee754/flt-32/s_tanf.c
index ae6600bd57..41fd8fe496 100644
--- a/sysdeps/ieee754/flt-32/s_tanf.c
+++ b/sysdeps/ieee754/flt-32/s_tanf.c
@@ -1,76 +1,176 @@ 
-/* s_tanf.c -- float version of s_tan.c.
- */
+/* Correctly-rounded tangent of binary32 value.
 
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
+Copyright (c) 2022-2024 Alexei Sibidanov.
 
-#if defined(LIBM_SCCS) && !defined(lint)
-static char rcsid[] = "$NetBSD: s_tanf.c,v 1.4 1995/05/10 20:48:20 jtc Exp $";
-#endif
+The original version of this file was copied from the CORE-MATH
+project (file src/binary32/tan/tanf.c, revision 59d21d7).
 
-#include <errno.h>
-#include <math.h>
-#include <math_private.h>
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*/
+
+#include <array_length.h>
+#include <stdint.h>
 #include <libm-alias-float.h>
-#include "s_sincosf.h"
+#include "math_config.h"
+#include <math_int128.h>
 
-/* Reduce range of X to a multiple of PI/2.  The modulo result is between
-   -PI/4 and PI/4 and returned as a high part y[0] and a low part y[1].
-   The low bit in the return value indicates the first or 2nd half of tanf.  */
-static inline int32_t
-rem_pio2f (float x, float *y)
+static inline double
+rltl (float z, int *q)
 {
-  double dx = x;
-  int n;
-  const sincos_t *p = &__sincosf_table[0];
+  double x = z;
+  double idl = -0x1.b1bbead603d8bp-32 * x;
+  double idh = 0x1.45f306ep-1 * x;
+  double id = roundeven (idh);
+  *q = (int64_t) id;
+  return (idh - id) + idl;
+}
 
-  if (__glibc_likely (abstop12 (x) < abstop12 (120.0f)))
-    dx = reduce_fast (dx, p, &n);
-  else
+static double __attribute__ ((noinline))
+rbig (uint32_t u, int *q)
+{
+  static const uint64_t ipi[] =
     {
-      uint32_t xi = asuint (x);
-      int sign = xi >> 31;
-
-      dx = reduce_large (xi, &n);
-      dx = sign ? -dx : dx;
+      0xfe5163abdebbc562, 0xdb6295993c439041,
+      0xfc2757d1f534ddc0, 0xa2f9836e4e441529
+    };
+  int e = (u >> 23) & 0xff, i;
+  uint64_t m = (u & (~0u >> 9)) | 1 << 23;
+  u128 p0 = u128_mul (u128_from_u64 (m), u128_from_u64 (ipi[0]));
+  u128 p1 = u128_mul (u128_from_u64 (m), u128_from_u64 (ipi[1]));
+  p1 = u128_add (p1, u128_rshift (p0, 64));
+  u128 p2 = u128_mul (u128_from_u64 (m), u128_from_u64 (ipi[2]));
+  p2 = u128_add (p2, u128_rshift (p1, 64));
+  u128 p3 = u128_mul (u128_from_u64 (m), u128_from_u64 (ipi[3]));
+  p3 = u128_add (p3, u128_rshift (p2, 64));
+  uint64_t p3h = u128_high (p3);
+  uint64_t p3l = u128_low (p3);
+  uint64_t p2l = u128_low (p2);
+  uint64_t p1l = u128_low (p1);
+  int64_t a;
+  int k = e - 127, s = k - 23;
+  /* in ctanf(), rbig() is called in the case 127+28 <= e < 0xff
+     thus 155 <= e <= 254, which yields 28 <= k <= 127 and 5 <= s <= 104 */
+  if (s < 64)
+    {
+      i = p3h << s | p3l >> (64 - s);
+      a = p3l << s | p2l >> (64 - s);
     }
-
-  y[0] = dx;
-  y[1] = dx - y[0];
-  return n;
+  else if (s == 64)
+    {
+      i = p3l;
+      a = p2l;
+    }
+  else
+    { /* s > 64 */
+      i = p3l << (s - 64) | p2l >> (128 - s);
+      a = p2l << (s - 64) | p1l >> (128 - s);
+    }
+  int sgn = u;
+  sgn >>= 31;
+  int64_t sm = a >> 63;
+  i -= sm;
+  double z = (a ^ sgn) * 0x1p-64;
+  i = (i ^ sgn) - sgn;
+  *q = i;
+  return z;
 }
 
-float __tanf(float x)
+float
+__tanf (float x)
 {
-	float y[2],z=0.0;
-	int32_t n, ix;
-
-	GET_FLOAT_WORD(ix,x);
-
-    /* |x| ~< pi/4 */
-	ix &= 0x7fffffff;
-	if(ix <= 0x3f490fda) return __kernel_tanf(x,z,1);
-
-    /* tan(Inf or NaN) is NaN */
-	else if (ix>=0x7f800000) {
-	  if (ix==0x7f800000)
-	    __set_errno (EDOM);
-	  return x-x;		/* NaN */
+  uint32_t t = asuint (x);
+  int e = (t >> 23) & 0xff;
+  int i;
+  double z;
+  if (__glibc_likely (e < 127 + 28))
+    {
+      if (__glibc_unlikely (e < 115))
+	{
+	  if (__glibc_unlikely (e < 102))
+	    return fmaf (x, fabsf (x), x);
+	  float x2 = x * x;
+	  return fmaf (x, 0x1.555556p-2f * x2, x);
 	}
-
-    /* argument reduction needed */
-	else {
-	    n = rem_pio2f(x,y);
-	    return __kernel_tanf(y[0],y[1],1-((n&1)<<1)); /*   1 -- n even
-							      -1 -- n odd */
+      z = rltl (x, &i);
+    }
+  else if (e < 0xff)
+    z = rbig (t, &i);
+  else
+    {
+      if (t << 9)
+	return x + x; /* nan */
+      return __math_invalidf (x);
+    }
+  double z2 = z * z;
+  double z4 = z2 * z2;
+  static const double cn[] =
+    {
+      0x1.921fb54442d18p+0, -0x1.fd226e573289fp-2,
+      0x1.b7a60c8dac9f6p-6, -0x1.725beb40f33e5p-13
+    };
+  static const double cd[] =
+    {
+      0x1p+0,               -0x1.2395347fb829dp+0,
+      0x1.2313660f29c36p-3, -0x1.9a707ab98d1c1p-9
+    };
+  static const double s[] = { 0, 1 };
+  double n = cn[0] + z2 * cn[1];
+  double n2 = cn[2] + z2 * cn[3];
+  n += z4 * n2;
+  double d = cd[0] + z2 * cd[1];
+  double d2 = cd[2] + z2 * cd[3];
+  d += z4 * d2;
+  n *= z;
+  double s0 = s[i & 1];
+  double s1 = s[1 - (i & 1)];
+  double r1 = (n * s1 - d * s0) / (n * s0 + d * s1);
+  uint64_t tail = (asuint64 (r1) + 7) & (~UINT64_C(0) >> 35);
+  if (__glibc_unlikely (tail <= 14))
+    {
+      static const struct
+      {
+	float arg;
+	float rh;
+        float rl;
+      } st[] = {
+	{ 0x1.143ec4p+0f,    0x1.ddf9f6p+0f, -0x1.891d24p-52f },
+	{ 0x1.ada6aap+27f,   0x1.e80304p-3f,  0x1.419f46p-58f },
+	{ 0x1.af61dap+48f,   0x1.60d1c8p-2f, -0x1.2d6c3ap-55f },
+	{ 0x1.0088bcp+52f,   0x1.ca1edp+0f,   0x1.f6053p-53f },
+	{ 0x1.f90dfcp+72f,   0x1.597f9cp-1f,  0x1.925978p-53f },
+	{ 0x1.cc4e22p+85f,  -0x1.f33584p+1f,  0x1.d7254ap-51f },
+	{ 0x1.a6ce12p+86f,  -0x1.c5612ep-1f, -0x1.26c33ep-53f },
+	{ 0x1.6a0b76p+102f, -0x1.e42a1ep+0f, -0x1.1dc906p-52f },
+      };
+      uint32_t ax = t & (~0u >> 1);
+      uint32_t sgn = t >> 31;
+      for (int j = 0; j < array_length (st); j++)
+	{
+	  if (__glibc_unlikely (asfloat (st[j].arg) == ax))
+	    {
+	      if (sgn)
+		return -st[j].rh - st[j].rl;
+	      else
+		return st[j].rh + st[j].rl;
+	    }
 	}
+    }
+  return r1;
 }
 libm_alias_float (__tan, tan)
diff --git a/sysdeps/loongarch/lp64/libm-test-ulps b/sysdeps/loongarch/lp64/libm-test-ulps
index fbb590f98a..a626f32736 100644
--- a/sysdeps/loongarch/lp64/libm-test-ulps
+++ b/sysdeps/loongarch/lp64/libm-test-ulps
@@ -1348,22 +1348,18 @@  ldouble: 4
 
 Function: "tan":
 double: 1
-float: 1
 ldouble: 1
 
 Function: "tan_downward":
 double: 1
-float: 2
 ldouble: 1
 
 Function: "tan_towardzero":
 double: 1
-float: 1
 ldouble: 1
 
 Function: "tan_upward":
 double: 1
-float: 1
 ldouble: 1
 
 Function: "tanh":
diff --git a/sysdeps/microblaze/libm-test-ulps b/sysdeps/microblaze/libm-test-ulps
index e7dda11ac3..c96652a022 100644
--- a/sysdeps/microblaze/libm-test-ulps
+++ b/sysdeps/microblaze/libm-test-ulps
@@ -241,9 +241,6 @@  Function: "sinh":
 double: 2
 float: 2
 
-Function: "tan":
-float: 1
-
 Function: "tanh":
 double: 2
 float: 2
diff --git a/sysdeps/mips/mips32/libm-test-ulps b/sysdeps/mips/mips32/libm-test-ulps
index 8a9140e6f9..17034d44ba 100644
--- a/sysdeps/mips/mips32/libm-test-ulps
+++ b/sysdeps/mips/mips32/libm-test-ulps
@@ -1079,20 +1079,14 @@  Function: "sinh_upward":
 double: 3
 float: 3
 
-Function: "tan":
-float: 1
-
 Function: "tan_downward":
 double: 1
-float: 2
 
 Function: "tan_towardzero":
 double: 1
-float: 1
 
 Function: "tan_upward":
 double: 1
-float: 1
 
 Function: "tanh":
 double: 2
diff --git a/sysdeps/mips/mips64/libm-test-ulps b/sysdeps/mips/mips64/libm-test-ulps
index 6e60768b5f..a757f69a9a 100644
--- a/sysdeps/mips/mips64/libm-test-ulps
+++ b/sysdeps/mips/mips64/libm-test-ulps
@@ -1360,22 +1360,18 @@  float: 3
 ldouble: 4
 
 Function: "tan":
-float: 1
 ldouble: 1
 
 Function: "tan_downward":
 double: 1
-float: 2
 ldouble: 1
 
 Function: "tan_towardzero":
 double: 1
-float: 1
 ldouble: 1
 
 Function: "tan_upward":
 double: 1
-float: 1
 ldouble: 1
 
 Function: "tanh":
diff --git a/sysdeps/nios2/libm-test-ulps b/sysdeps/nios2/libm-test-ulps
index f9f5edbe63..a0a2b6923f 100644
--- a/sysdeps/nios2/libm-test-ulps
+++ b/sysdeps/nios2/libm-test-ulps
@@ -250,9 +250,6 @@  Function: "sinh":
 double: 2
 float: 2
 
-Function: "tan":
-float: 1
-
 Function: "tanh":
 double: 2
 float: 2
diff --git a/sysdeps/or1k/fpu/libm-test-ulps b/sysdeps/or1k/fpu/libm-test-ulps
index 46605505f1..115a73ef26 100644
--- a/sysdeps/or1k/fpu/libm-test-ulps
+++ b/sysdeps/or1k/fpu/libm-test-ulps
@@ -1013,20 +1013,14 @@  Function: "sinh_upward":
 double: 3
 float: 3
 
-Function: "tan":
-float: 1
-
 Function: "tan_downward":
 double: 1
-float: 2
 
 Function: "tan_towardzero":
 double: 1
-float: 1
 
 Function: "tan_upward":
 double: 1
-float: 1
 
 Function: "tanh":
 double: 2
diff --git a/sysdeps/or1k/nofpu/libm-test-ulps b/sysdeps/or1k/nofpu/libm-test-ulps
index b00a55a2a3..e130fd8a1d 100644
--- a/sysdeps/or1k/nofpu/libm-test-ulps
+++ b/sysdeps/or1k/nofpu/libm-test-ulps
@@ -1003,20 +1003,14 @@  Function: "sinh_upward":
 double: 3
 float: 3
 
-Function: "tan":
-float: 1
-
 Function: "tan_downward":
 double: 1
-float: 2
 
 Function: "tan_towardzero":
 double: 1
-float: 1
 
 Function: "tan_upward":
 double: 1
-float: 1
 
 Function: "tanh":
 double: 2
diff --git a/sysdeps/powerpc/fpu/libm-test-ulps b/sysdeps/powerpc/fpu/libm-test-ulps
index 56ca580497..adaa10fb56 100644
--- a/sysdeps/powerpc/fpu/libm-test-ulps
+++ b/sysdeps/powerpc/fpu/libm-test-ulps
@@ -1737,25 +1737,21 @@  double: 1
 float: 1
 
 Function: "tan":
-float: 3
 float128: 1
 ldouble: 2
 
 Function: "tan_downward":
 double: 1
-float: 3
 float128: 1
 ldouble: 3
 
 Function: "tan_towardzero":
 double: 1
-float: 3
 float128: 1
 ldouble: 2
 
 Function: "tan_upward":
 double: 1
-float: 3
 float128: 1
 ldouble: 3
 
diff --git a/sysdeps/powerpc/nofpu/libm-test-ulps b/sysdeps/powerpc/nofpu/libm-test-ulps
index 752d1937c6..2c44e39c2c 100644
--- a/sysdeps/powerpc/nofpu/libm-test-ulps
+++ b/sysdeps/powerpc/nofpu/libm-test-ulps
@@ -1476,22 +1476,18 @@  double: 1
 float: 1
 
 Function: "tan":
-float: 1
 ldouble: 2
 
 Function: "tan_downward":
 double: 1
-float: 2
 ldouble: 3
 
 Function: "tan_towardzero":
 double: 1
-float: 1
 ldouble: 2
 
 Function: "tan_upward":
 double: 1
-float: 1
 ldouble: 3
 
 Function: "tanh":
diff --git a/sysdeps/riscv/nofpu/libm-test-ulps b/sysdeps/riscv/nofpu/libm-test-ulps
index acb3db4045..84b9e15f43 100644
--- a/sysdeps/riscv/nofpu/libm-test-ulps
+++ b/sysdeps/riscv/nofpu/libm-test-ulps
@@ -1289,22 +1289,18 @@  float: 3
 ldouble: 4
 
 Function: "tan":
-float: 1
 ldouble: 1
 
 Function: "tan_downward":
 double: 1
-float: 2
 ldouble: 1
 
 Function: "tan_towardzero":
 double: 1
-float: 1
 ldouble: 1
 
 Function: "tan_upward":
 double: 1
-float: 1
 ldouble: 1
 
 Function: "tanh":
diff --git a/sysdeps/riscv/rvd/libm-test-ulps b/sysdeps/riscv/rvd/libm-test-ulps
index 3f7673ccc5..b9446e5a7c 100644
--- a/sysdeps/riscv/rvd/libm-test-ulps
+++ b/sysdeps/riscv/rvd/libm-test-ulps
@@ -1347,22 +1347,18 @@  float: 3
 ldouble: 4
 
 Function: "tan":
-float: 1
 ldouble: 1
 
 Function: "tan_downward":
 double: 1
-float: 2
 ldouble: 1
 
 Function: "tan_towardzero":
 double: 1
-float: 1
 ldouble: 1
 
 Function: "tan_upward":
 double: 1
-float: 1
 ldouble: 1
 
 Function: "tanh":
diff --git a/sysdeps/s390/fpu/libm-test-ulps b/sysdeps/s390/fpu/libm-test-ulps
index 3a1ad5c4e9..62d3f145fc 100644
--- a/sysdeps/s390/fpu/libm-test-ulps
+++ b/sysdeps/s390/fpu/libm-test-ulps
@@ -1346,22 +1346,18 @@  float: 3
 ldouble: 4
 
 Function: "tan":
-float: 1
 ldouble: 1
 
 Function: "tan_downward":
 double: 1
-float: 2
 ldouble: 1
 
 Function: "tan_towardzero":
 double: 1
-float: 1
 ldouble: 1
 
 Function: "tan_upward":
 double: 1
-float: 1
 ldouble: 1
 
 Function: "tanh":
diff --git a/sysdeps/sh/libm-test-ulps b/sysdeps/sh/libm-test-ulps
index 810a73648c..a6d271eb3f 100644
--- a/sysdeps/sh/libm-test-ulps
+++ b/sysdeps/sh/libm-test-ulps
@@ -501,12 +501,8 @@  Function: "sinh_towardzero":
 double: 3
 float: 2
 
-Function: "tan":
-float: 1
-
 Function: "tan_towardzero":
 double: 1
-float: 1
 
 Function: "tanh":
 double: 2
diff --git a/sysdeps/sparc/fpu/libm-test-ulps b/sysdeps/sparc/fpu/libm-test-ulps
index 9c6ddd10c1..9276ff9726 100644
--- a/sysdeps/sparc/fpu/libm-test-ulps
+++ b/sysdeps/sparc/fpu/libm-test-ulps
@@ -1360,22 +1360,18 @@  float: 3
 ldouble: 4
 
 Function: "tan":
-float: 1
 ldouble: 1
 
 Function: "tan_downward":
 double: 1
-float: 2
 ldouble: 1
 
 Function: "tan_towardzero":
 double: 1
-float: 1
 ldouble: 1
 
 Function: "tan_upward":
 double: 1
-float: 1
 ldouble: 1
 
 Function: "tanh":
diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps
index 8f531e2992..f4360598a9 100644
--- a/sysdeps/x86_64/fpu/libm-test-ulps
+++ b/sysdeps/x86_64/fpu/libm-test-ulps
@@ -2136,25 +2136,21 @@  Function: "sinh_vlen8_avx2":
 float: 1
 
 Function: "tan":
-float: 1
 float128: 1
 ldouble: 2
 
 Function: "tan_downward":
 double: 1
-float: 2
 float128: 1
 ldouble: 3
 
 Function: "tan_towardzero":
 double: 1
-float: 1
 float128: 1
 ldouble: 3
 
 Function: "tan_upward":
 double: 1
-float: 1
 float128: 1
 ldouble: 2