aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoe Ramsay <joe.ramsay@arm.com>2022-12-13 10:21:39 +0000
committerJoe Ramsay <joe.ramsay@arm.com>2022-12-13 10:21:39 +0000
commit80abd605ee62de59fbfbaba397028326a1148a16 (patch)
tree7db53b4941866fafd31c61a899b3820246176549
parent0d1cc42cec9fb097102bd603dea29c6581a4b7cd (diff)
downloadarm-optimized-routines-80abd605ee62de59fbfbaba397028326a1148a16.tar.gz
pl/math: Set fenv flags in Neon tanf
New behaviour is hidden behind WANT_ERRNO config option.
-rwxr-xr-xpl/math/test/runulp.sh8
-rw-r--r--pl/math/v_tanf_3u2.c36
2 files changed, 33 insertions, 11 deletions
diff --git a/pl/math/test/runulp.sh b/pl/math/test/runulp.sh
index 9f49270..21d0a8c 100755
--- a/pl/math/test/runulp.sh
+++ b/pl/math/test/runulp.sh
@@ -759,10 +759,10 @@ log2f __s_log2f $runs fenv
log2f __v_log2f $runv fenv
log2f __vn_log2f $runvn fenv
log2f _ZGVnN4v_log2f $runvn fenv
-tanf __s_tanf $runs
-tanf __v_tanf $runv
-tanf __vn_tanf $runvn
-tanf _ZGVnN4v_tanf $runvn
+tanf __s_tanf $runs fenv
+tanf __v_tanf $runv fenv
+tanf __vn_tanf $runvn fenv
+tanf _ZGVnN4v_tanf $runvn fenv
log1p __s_log1p $runs
log1p __v_log1p $runv
log1p __vn_log1p $runvn
diff --git a/pl/math/v_tanf_3u2.c b/pl/math/v_tanf_3u2.c
index 01f7f65..8b3869c 100644
--- a/pl/math/v_tanf_3u2.c
+++ b/pl/math/v_tanf_3u2.c
@@ -15,8 +15,10 @@
#define NegPio2_2 (v_f32 (0x1.777a5cp-25f))
#define NegPio2_3 (v_f32 (0x1.ee59dap-50f))
#define InvPio2 (v_f32 (0x1.45f306p-1f))
-#define RangeVal (v_f32 (0x1p17f))
+#define RangeVal (0x48000000) /* asuint32(0x1p17f). */
+#define TinyBound (0x30000000) /* asuint32 (0x1p-31). */
#define Shift (v_f32 (0x1.8p+23f))
+#define AbsMask (v_u32 (0x7fffffff))
#define poly(i) v_f32 (__tanf_poly_data.poly_tan[i])
@@ -33,6 +35,13 @@ static inline v_f32_t
eval_poly (v_f32_t z)
{
v_f32_t z2 = z * z;
+#if WANT_ERRNO
+ /* Tiny z (<= 0x1p-31) will underflow when calculating z^4. If errno is to be
+ set correctly, sidestep this by fixing such lanes to 0. */
+ v_u32_t will_uflow = v_cond_u32 ((v_as_u32_f32 (z) & AbsMask) <= TinyBound);
+ if (unlikely (v_any_u32 (will_uflow)))
+ z2 = v_sel_f32 (will_uflow, v_f32 (0), z2);
+#endif
v_f32_t z4 = z2 * z2;
return ESTRIN_6 (z, z2, z4, poly);
}
@@ -44,8 +53,23 @@ eval_poly (v_f32_t z)
VPCS_ATTR
v_f32_t V_NAME (tanf) (v_f32_t x)
{
- /* Determine whether input is too large to perform fast regression. */
- v_u32_t cmp = v_cage_f32 (x, RangeVal);
+ v_f32_t special_arg = x;
+ v_u32_t ix = v_as_u32_f32 (x);
+ v_u32_t iax = ix & AbsMask;
+
+ /* iax >= RangeVal means x, if not inf or NaN, is too large to perform fast
+ regression. */
+#if WANT_ERRNO
+ /* If errno is to be set correctly, also special-case tiny input, as this will
+ load to overflow later. Fix any special lanes to 1 to prevent any
+ exceptions being triggered. */
+ v_u32_t special = v_cond_u32 (iax - TinyBound >= RangeVal - TinyBound);
+ if (unlikely (v_any_u32 (special)))
+ x = v_sel_f32 (special, v_f32 (1.0f), x);
+#else
+ /* Otherwise, special-case large and special values. */
+ v_u32_t special = v_cond_u32 (iax >= RangeVal);
+#endif
/* n = rint(x/(pi/2)). */
v_f32_t q = v_fma_f32 (InvPio2, x, Shift);
@@ -85,10 +109,8 @@ v_f32_t V_NAME (tanf) (v_f32_t x)
therefore it is fixed here. */
y = v_sel_f32 (x == v_f32 (-0.0), x, y);
- /* No need to pass pg to specialcase here since cmp is a strict subset,
- guaranteed by the cmpge above. */
- if (unlikely (v_any_u32 (cmp)))
- return specialcase (x, y, cmp);
+ if (unlikely (v_any_u32 (special)))
+ return specialcase (special_arg, y, special);
return y;
}
VPCS_ALIAS