aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoe Ramsay <Joe.Ramsay@arm.com>2024-04-30 11:09:11 +0100
committerJoe Ramsay <joe.ramsay@arm.com>2024-04-30 11:09:11 +0100
commit8928e471b062b008717c9c2958240c446317de53 (patch)
tree6a91dacdf1cd2f5612871bddd38403d42d1c58f2
parent233453aae6dbf8ef37074edeac2018e505523599 (diff)
downloadarm-optimized-routines-upstream-master.tar.gz
pl/math: Remove some unnecessary indirection in pow scalar fallbackupstream-master
For AArch64 eval_as_double is an identity so unnecessary, and the opt barriers are only relevant for scalar, not vector, routines.
-rw-r--r--pl/math/finite_pow.h20
-rw-r--r--pl/math/sv_pow_1u5.c6
2 files changed, 10 insertions, 16 deletions
diff --git a/pl/math/finite_pow.h b/pl/math/finite_pow.h
index 8944d4f..67dd01d 100644
--- a/pl/math/finite_pow.h
+++ b/pl/math/finite_pow.h
@@ -1,7 +1,7 @@
/*
* Double-precision x^y function.
*
- * Copyright (c) 2018-2023, Arm Limited.
+ * Copyright (c) 2018-2024, Arm Limited.
* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
@@ -108,7 +108,7 @@ special_case (double tmp, uint64_t sbits, uint64_t ki)
sbits -= 1009ull << 52;
scale = asdouble (sbits);
y = 0x1p1009 * (scale + scale * tmp);
- return check_oflow (eval_as_double (y));
+ return y;
}
/* k < 0, need special care in the subnormal range. */
sbits += 1022ull << 52;
@@ -128,7 +128,7 @@ special_case (double tmp, uint64_t sbits, uint64_t ki)
lo = scale - y + scale * tmp;
hi = one + y;
lo = one - hi + y + lo;
- y = eval_as_double (hi + lo) - one;
+ y = (hi + lo) - one;
/* Fix the sign of 0. */
if (y == 0.0)
y = asdouble (sbits & 0x8000000000000000);
@@ -137,7 +137,7 @@ special_case (double tmp, uint64_t sbits, uint64_t ki)
}
#endif
y = 0x1p-1022 * y;
- return check_uflow (eval_as_double (y));
+ return y;
}
/* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
@@ -192,7 +192,7 @@ exp_inline (double x, double xtail, uint32_t sign_bias)
double scale = asdouble (sbits);
/* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
is no spurious underflow here even without fma. */
- return eval_as_double (scale + scale * tmp);
+ return scale + scale * tmp;
}
/* Computes exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
@@ -239,7 +239,7 @@ exp_nosignbias (double x, double xtail)
double scale = asdouble (sbits);
/* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
is no spurious underflow here even without fma. */
- return eval_as_double (scale + scale * tmp);
+ return scale + scale * tmp;
}
/* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
@@ -311,9 +311,7 @@ __pl_finite_pow (double x, double y)
if (2 * ix == 0 && iy >> 63)
return __math_divzero (sign_bias);
#endif
- /* Without the barrier some versions of clang hoist the 1/x2 and
- thus division by zero exception can be signaled spuriously. */
- return iy >> 63 ? opt_barrier_double (1 / x2) : x2;
+ return iy >> 63 ? 1 / x2 : x2;
}
/* Here x and y are non-zero finite. */
if (ix >> 63)
@@ -349,9 +347,7 @@ __pl_finite_pow (double x, double y)
if (topx == 0)
{
/* Normalize subnormal x so exponent becomes negative. */
- /* Without the barrier some versions of clang evalutate the mul
- unconditionally causing spurious overflow exceptions. */
- ix = asuint64 (opt_barrier_double (x) * 0x1p52);
+ ix = asuint64 (x * 0x1p52);
ix &= 0x7fffffffffffffff;
ix -= 52ULL << 52;
}
diff --git a/pl/math/sv_pow_1u5.c b/pl/math/sv_pow_1u5.c
index 0838810..6b8efee 100644
--- a/pl/math/sv_pow_1u5.c
+++ b/pl/math/sv_pow_1u5.c
@@ -1,7 +1,7 @@
/*
* Double-precision SVE pow(x, y) function.
*
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
@@ -323,9 +323,7 @@ pow_sc (double x, double y)
double_t x2 = x * x;
if (ix >> 63 && checkint (iy) == 1)
x2 = -x2;
- /* Without the barrier some versions of clang hoist the 1/x2 and
- thus division by zero exception can be signaled spuriously. */
- return (iy >> 63) ? opt_barrier_double (1 / x2) : x2;
+ return (iy >> 63) ? 1 / x2 : x2;
}
return x;
}