[3/7] math: Simplify and optimize modff implementation

Message ID	20250528180100.172042-4-adhemerval.zanella@linaro.org
State	New
Headers	show Delivered-To: patch@linaro.org Received-SPF: pass (google.com: domain of libc-alpha-bounces~patch=linaro.org@sourceware.org designates 2620:52:3:1:0:246e:9693:128c as permitted sender) client-ip=2620:52:3:1:0:246e:9693:128c; DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org 6A079385B52F From: Adhemerval Zanella <adhemerval.zanella@linaro.org> To: libc-alpha@sourceware.org Subject: [PATCH 3/7] math: Simplify and optimize modff implementation Date: Wed, 28 May 2025 14:59:29 -0300 Message-ID: <20250528180100.172042-4-adhemerval.zanella@linaro.org> In-Reply-To: <20250528180100.172042-1-adhemerval.zanella@linaro.org> References: <20250528180100.172042-1-adhemerval.zanella@linaro.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: list Errors-To: libc-alpha-bounces~patch=linaro.org@sourceware.org
Series	Simplify and optimize modf/modff \| expand [0/7] Simplify and optimize modf/modff [1/7] benchtests: Add modff benchtest [2/7] benchtests: Improve modf benchtest [3/7] math: Simplify and optimize modff implementation [4/7] math: Simplify and optimize modf implementation [5/7] powerpc: Remove modff optimization [6/7] powerpc: Remove modf optimization [7/7] x86_64: Optimize modf/modff for x86_64-v2

Message ID

20250528180100.172042-4-adhemerval.zanella@linaro.org

State

New

Headers

Received-SPF: pass (google.com: domain of
 libc-alpha-bounces~patch=linaro.org@sourceware.org designates
 2620:52:3:1:0:246e:9693:128c as permitted sender)
 client-ip=2620:52:3:1:0:246e:9693:128c;
DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org 6A079385B52F
From: Adhemerval Zanella <adhemerval.zanella@linaro.org>
To: libc-alpha@sourceware.org
Subject: [PATCH 3/7] math: Simplify and optimize modff implementation
Date: Wed, 28 May 2025 14:59:29 -0300
Message-ID: <20250528180100.172042-4-adhemerval.zanella@linaro.org>
In-Reply-To: <20250528180100.172042-1-adhemerval.zanella@linaro.org>
References: <20250528180100.172042-1-adhemerval.zanella@linaro.org>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
Precedence: list
Errors-To: libc-alpha-bounces~patch=linaro.org@sourceware.org

Series

Simplify and optimize modf/modff | expand

Commit Message

Adhemerval Zanella Netto May 28, 2025, 5:59 p.m. UTC

Refactor the generic implementation to use math_config.h definitions,
and add an alternative one if the ABI supports truncf instructions
(gated through math-use-builtins-trunc.h).

The generic implementation generates similar code for x86_64, while
the optimization path aarch64 (where truncf is supported as a builtin)
through frintz), the improvements are:

reciprocal-throughput           master     patch    difference
workload-0_1                    3.0740    3.0326         1.35%
workload-1_maxint               5.2231    3.0436        41.73%
workload-maxint_maxfloat        4.0962    3.0551        25.42%
workload-integral               3.7093    3.0612        17.47%

latency                         master     patch    difference
workload-0_1                    3.5521    4.7313       -33.20%
workload-1_maxint               6.7148    4.7314        29.54%
workload-maxint_maxfloat        4.0458    4.7518       -17.45%
workload-integral               3.9719    4.7427       -19.40%

Checked on aarch64-linux-gnu and x86_64-linux-gnu.
---
 sysdeps/ieee754/flt-32/math_config.h |  13 ++++
 sysdeps/ieee754/flt-32/s_modff.c     | 101 +++++++++++++++------------
 2 files changed, 70 insertions(+), 44 deletions(-)

diff --git a/sysdeps/ieee754/flt-32/math_config.h b/sysdeps/ieee754/flt-32/math_config.h
index 8d9c8ee3ad..e3e49835ce 100644
--- a/sysdeps/ieee754/flt-32/math_config.h
+++ b/sysdeps/ieee754/flt-32/math_config.h
@@ -165,6 +165,7 @@  issignalingf_inline (float x)
 #define BIT_WIDTH       32
 #define MANTISSA_WIDTH  23
 #define EXPONENT_WIDTH  8
+#define EXPONENT_BIAS   127
 #define MANTISSA_MASK   0x007fffff
 #define EXPONENT_MASK   0x7f800000
 #define EXP_MANT_MASK   0x7fffffff
@@ -177,12 +178,24 @@  is_nan (uint32_t x)
   return (x & EXP_MANT_MASK) > EXPONENT_MASK;
 }
 
+static inline bool
+is_inf (uint32_t x)
+{
+  return (x << 1) == (EXPONENT_MASK << 1);
+}
+
 static inline uint32_t
 get_mantissa (uint32_t x)
 {
   return x & MANTISSA_MASK;
 }
 
+static inline int
+get_exponent (uint32_t x)
+{
+  return (int)(x >> MANTISSA_WIDTH & 0xff) - EXPONENT_BIAS;
+}
+
 /* Convert integer number X, unbiased exponent EP, and sign S to double:
 
    result = X * 2^(EP+1 - exponent_bias)
diff --git a/sysdeps/ieee754/flt-32/s_modff.c b/sysdeps/ieee754/flt-32/s_modff.c
index ad2e91de25..6eb7fac989 100644
--- a/sysdeps/ieee754/flt-32/s_modff.c
+++ b/sysdeps/ieee754/flt-32/s_modff.c
@@ -1,54 +1,67 @@ 
-/* s_modff.c -- float version of s_modf.c.
- */
+/* Extract signed integral and fractional values.
+   Copyright (C) 1993-2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
 
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
 
 #include <math.h>
-#include <math_private.h>
 #include <libm-alias-float.h>
-
-static const float one = 1.0;
+#include "math_config.h"
+#include <math-use-builtins-trunc.h>
 
 float
-__modff(float x, float *iptr)
+__modff (float x, float *iptr)
 {
-	int32_t i0,j0;
-	uint32_t i;
-	GET_FLOAT_WORD(i0,x);
-	j0 = ((i0>>23)&0xff)-0x7f;	/* exponent of x */
-	if(__builtin_expect(j0<23, 1)) {		/* integer part in x */
-	    if(j0<0) {			/* |x|<1 */
-		SET_FLOAT_WORD(*iptr,i0&0x80000000);	/* *iptr = +-0 */
-		return x;
-	    } else {
-		i = (0x007fffff)>>j0;
-		if((i0&i)==0) {			/* x is integral */
-		    uint32_t ix;
-		    *iptr = x;
-		    GET_FLOAT_WORD(ix,x);
-		    SET_FLOAT_WORD(x,ix&0x80000000);	/* return +-0 */
-		    return x;
-		} else {
-		    SET_FLOAT_WORD(*iptr,i0&(~i));
-		    return x - *iptr;
-		}
-	    }
-	} else {			/* no fraction part */
-	    *iptr = x*one;
-	    /* We must handle NaNs separately.  */
-	    if (j0 == 0x80 && (i0 & 0x7fffff))
-	      return x*one;
-	    SET_FLOAT_WORD(x,i0&0x80000000);	/* return +-0 */
-	    return x;
+  uint32_t t = asuint (x);
+#if USE_TRUNCF_BUILTIN
+  if (is_inf (t))
+    {
+      *iptr = x;
+      return copysign (0.0, x);
+    }
+  *iptr = truncf (x);
+  return copysignf (x - *iptr, x);
+#else
+  int e = get_exponent (t);
+  /* No fraction part.  */
+  if (e < MANTISSA_WIDTH)
+    {
+      if (e < 0)
+	{
+	  /* |x|<1 -> *iptr = +-0 */
+	  *iptr = asfloat (t & SIGN_MASK);;
+	  return x;
 	}
+
+      uint32_t i = 0x007fffff >> e;
+      if ((t & i) == 0)
+	{
+	  /* x in integral, return +-0  */
+	  *iptr = x;
+	  return asfloat (t & SIGN_MASK);
+	}
+
+      *iptr = asfloat (t & ~i);
+      return x - *iptr;
+    }
+
+  /* Set invalid operation for sNaN.  */
+  *iptr = x * 1.0f;
+  if ((e == 0x80) && (t & 0x7fffffu))
+    return *iptr;
+  return asfloat (t & SIGN_MASK);
+#endif
 }
 libm_alias_float (__modf, modf)

[3/7] math: Simplify and optimize modff implementation

Commit Message

Patch