test/ulp: Enable user to specify predicate for SVE routines

User can now pass a predicate to the ULP tool. The expected format is as a uint64, where if bit N is 1 then the corresponding lane in the predicate will be active, otherwise inactive (SVE max vector length is 64 lanes for single-precision). For example -p 2 tests only the second lane, -p 0xf tests the first 4 lanes. This revealed that SVE log2 and log2f were being tested for correct FP exceptions, despite getting them wrong. Prior to this the flag had been ignored, so this was not detectewd, however it now needs to be removed. Expose -p argument via 'pred' environment variable for runulp.sh, and improve func handling.
author: Joe Ramsay <Joe.Ramsay@arm.com> 2024-04-24 09:32:00 +0100
committer: Joe Ramsay <joe.ramsay@arm.com> 2024-04-24 09:32:00 +0100
commit: 3aba7ed692806936d13235444268aad618726ae5 (patch)
tree: b131de2e5d7a61600ee1ddf0c807a8971ca1f9f2
parent: 1fa1ac8a40920ea3034bf510044e51bec77ec70a (diff)
download: arm-optimized-routines-3aba7ed692806936d13235444268aad618726ae5.tar.gz
8 files changed, 182 insertions, 122 deletions
diff --git a/math/test/ulp.c b/math/test/ulp.c
index 22ac6fe..0baef89 100644
--- a/math/test/ulp.c
+++ b/math/test/ulp.c
@@ -198,19 +198,6 @@ next_d2 (void *g)
   return (struct args_d2){asdouble (x), asdouble (x2)};
 }
 
-struct conf
-{
-  int r;
-  int rc;
-  int quiet;
-  int mpfr;
-  int fenv;
-  unsigned long long n;
-  double softlim;
-  double errlim;
-  int ignore_zero_sign;
-};
-
 /* A bit of a hack: call vector functions twice with the same
    input in lane 0 but a different value in other lanes: once
    with an in-range value and then with a special case value.  */
@@ -246,20 +233,53 @@ static inline sv_double svargd(double x) {
 	base[n-1] = dv[secondcall];
 	return svld1(svptrue_b64(), base);
 }
-static inline float svretf(sv_float vec)  {
-	int n = svcntw();
-	float res[n];
-	svst1(svptrue_b32(), res, vec);
-	return res[0];
+static inline float
+svretf (sv_float vec, svbool_t pg)
+{
+  return svlastb_f32 (svpfirst (pg, svpfalse ()), vec);
 }
-static inline double svretd(sv_double vec) {
-	int n = svcntd();
-	double res[n];
-	svst1(svptrue_b64(), res, vec);
-	return res[0];
+static inline double
+svretd (sv_double vec, svbool_t pg)
+{
+  return svlastb_f64 (svpfirst (pg, svpfalse ()), vec);
+}
+
+static inline svbool_t
+parse_pg (uint64_t p, int is_single)
+{
+  if (is_single)
+    {
+      uint32_t tmp[svcntw ()];
+      for (unsigned i = 0; i < svcntw (); i++)
+	tmp[i] = (p >> i) & 1;
+      return svcmpne (svptrue_b32 (), svld1 (svptrue_b32 (), tmp), 0);
+    }
+  else
+    {
+      uint64_t tmp[svcntd ()];
+      for (unsigned i = 0; i < svcntd (); i++)
+	tmp[i] = (p >> i) & 1;
+      return svcmpne (svptrue_b64 (), svld1 (svptrue_b64 (), tmp), 0);
+    }
 }
+# endif
 #endif
+
+struct conf
+{
+  int r;
+  int rc;
+  int quiet;
+  int mpfr;
+  int fenv;
+  unsigned long long n;
+  double softlim;
+  double errlim;
+  int ignore_zero_sign;
+#if WANT_SVE_MATH
+  svbool_t *pg;
 #endif
+};
 
 #include "test/ulp_wrappers.h"
 
@@ -269,12 +289,19 @@ struct fun
   int arity;
   int singleprec;
   int twice;
+  int is_predicated;
   union
   {
     float (*f1) (float);
     float (*f2) (float, float);
     double (*d1) (double);
     double (*d2) (double, double);
+#if WANT_SVE_MATH
+    float (*f1_pred) (svbool_t, float);
+    float (*f2_pred) (svbool_t, float, float);
+    double (*d1_pred) (svbool_t, double);
+    double (*d2_pred) (svbool_t, double, double);
+#endif
   } fun;
   union
   {
@@ -294,44 +321,33 @@ struct fun
 #endif
 };
 
+// clang-format off
 static const struct fun fun[] = {
 #if USE_MPFR
-# define F(x, x_wrap, x_long, x_mpfr, a, s, t, twice) \
-  {#x, a, s, twice, {.t = x_wrap}, {.t = x_long}, {.t = x_mpfr}},
+#  define F(x, x_wrap, x_long, x_mpfr, a, s, t, twice)                        \
+    { #x, a, s, twice, 0 { .t = x_wrap }, { .t = x_long }, { .t = x_mpfr } },
+#  define SVF(x, x_wrap, x_long, x_mpfr, a, s, t, twice)                      \
+    { #x, a, s, twice, 1, { .t##_pred = x_wrap }, { .t = x_long }, { .t = x_mpfr } },
 #else
-# define F(x, x_wrap, x_long, x_mpfr, a, s, t, twice) \
-  {#x, a, s, twice, {.t = x_wrap}, {.t = x_long}},
+#  define F(x, x_wrap, x_long, x_mpfr, a, s, t, twice)                        \
+    { #x, a, s, twice, 0, { .t = x_wrap }, { .t = x_long } },
+#  define SVF(x, x_wrap, x_long, x_mpfr, a, s, t, twice)                      \
+    { #x, a, s, twice, 1, { .t##_pred = x_wrap }, { .t = x_long } },
 #endif
 #define F1(x) F (x##f, x##f, x, mpfr_##x, 1, 1, f1, 0)
 #define F2(x) F (x##f, x##f, x, mpfr_##x, 2, 1, f2, 0)
 #define D1(x) F (x, x, x##l, mpfr_##x, 1, 0, d1, 0)
 #define D2(x) F (x, x, x##l, mpfr_##x, 2, 0, d2, 0)
 /* Neon routines.  */
-#define VF1(x) F (__v_##x##f, v_##x##f, x, mpfr_##x, 1, 1, f1, 0)
-#define VF2(x) F (__v_##x##f, v_##x##f, x, mpfr_##x, 2, 1, f2, 0)
-#define VD1(x) F (__v_##x, v_##x, x##l, mpfr_##x, 1, 0, d1, 0)
-#define VD2(x) F (__v_##x, v_##x, x##l, mpfr_##x, 2, 0, d2, 0)
-#define VNF1(x) F (__vn_##x##f, vn_##x##f, x, mpfr_##x, 1, 1, f1, 0)
-#define VNF2(x) F (__vn_##x##f, vn_##x##f, x, mpfr_##x, 2, 1, f2, 0)
-#define VND1(x) F (__vn_##x, vn_##x, x##l, mpfr_##x, 1, 0, d1, 0)
-#define VND2(x) F (__vn_##x, vn_##x, x##l, mpfr_##x, 2, 0, d2, 0)
-#define ZVF1(x) F (_ZGVnN4v_##x##f, Z_##x##f, x, mpfr_##x, 1, 1, f1, 0)
-#define ZVF2(x) F (_ZGVnN4vv_##x##f, Z_##x##f, x, mpfr_##x, 2, 1, f2, 0)
-#define ZVD1(x) F (_ZGVnN2v_##x, Z_##x, x##l, mpfr_##x, 1, 0, d1, 0)
-#define ZVD2(x) F (_ZGVnN2vv_##x, Z_##x, x##l, mpfr_##x, 2, 0, d2, 0)
-#define ZVNF1(x) VNF1 (x) ZVF1 (x)
-#define ZVNF2(x) VNF2 (x) ZVF2 (x)
-#define ZVND1(x) VND1 (x) ZVD1 (x)
-#define ZVND2(x) VND2 (x) ZVD2 (x)
+#define ZVNF1(x) F (_ZGVnN4v_##x##f, Z_##x##f, x, mpfr_##x, 1, 1, f1, 0)
+#define ZVNF2(x) F (_ZGVnN4vv_##x##f, Z_##x##f, x, mpfr_##x, 2, 1, f2, 0)
+#define ZVND1(x) F (_ZGVnN2v_##x, Z_##x, x##l, mpfr_##x, 1, 0, d1, 0)
+#define ZVND2(x) F (_ZGVnN2vv_##x, Z_##x, x##l, mpfr_##x, 2, 0, d2, 0)
 /* SVE routines.  */
-#define SVF1(x) F (__sv_##x##f, sv_##x##f, x, mpfr_##x, 1, 1, f1, 0)
-#define SVF2(x) F (__sv_##x##f, sv_##x##f, x, mpfr_##x, 2, 1, f2, 0)
-#define SVD1(x) F (__sv_##x, sv_##x, x##l, mpfr_##x, 1, 0, d1, 0)
-#define SVD2(x) F (__sv_##x, sv_##x, x##l, mpfr_##x, 2, 0, d2, 0)
-#define ZSVF1(x) F (_ZGVsMxv_##x##f, Z_sv_##x##f, x, mpfr_##x, 1, 1, f1, 0)
-#define ZSVF2(x) F (_ZGVsMxvv_##x##f, Z_sv_##x##f, x, mpfr_##x, 2, 1, f2, 0)
-#define ZSVD1(x) F (_ZGVsMxv_##x, Z_sv_##x, x##l, mpfr_##x, 1, 0, d1, 0)
-#define ZSVD2(x) F (_ZGVsMxvv_##x, Z_sv_##x, x##l, mpfr_##x, 2, 0, d2, 0)
+#define ZSVF1(x) SVF (_ZGVsMxv_##x##f, Z_sv_##x##f, x, mpfr_##x, 1, 1, f1, 0)
+#define ZSVF2(x) SVF (_ZGVsMxvv_##x##f, Z_sv_##x##f, x, mpfr_##x, 2, 1, f2, 0)
+#define ZSVD1(x) SVF (_ZGVsMxv_##x, Z_sv_##x, x##l, mpfr_##x, 1, 0, d1, 0)
+#define ZSVD2(x) SVF (_ZGVsMxvv_##x, Z_sv_##x, x##l, mpfr_##x, 2, 0, d2, 0)
 
 #include "test/ulp_funcs.h"
 
@@ -340,11 +356,13 @@ static const struct fun fun[] = {
 #undef F2
 #undef D1
 #undef D2
-#undef SVF1
-#undef SVF2
-#undef SVD1
-#undef SVD2
- {0}};
+#undef ZSVF1
+#undef ZSVF2
+#undef ZSVD1
+#undef ZSVD2
+  { 0 }
+};
+// clang-format on
 
 /* Boilerplate for generic calls.  */
 
@@ -365,24 +383,40 @@ ulpscale_d (double x)
   return e - 0x3ff - 52;
 }
 static inline float
-call_f1 (const struct fun *f, struct args_f1 a)
+call_f1 (const struct fun *f, struct args_f1 a, const struct conf *conf)
 {
+#if WANT_SVE_MATH
+  if (f->is_predicated)
+    return f->fun.f1_pred (*conf->pg, a.x);
+#endif
   return f->fun.f1 (a.x);
 }
 static inline float
-call_f2 (const struct fun *f, struct args_f2 a)
+call_f2 (const struct fun *f, struct args_f2 a, const struct conf *conf)
 {
+#if WANT_SVE_MATH
+  if (f->is_predicated)
+    return f->fun.f2_pred (*conf->pg, a.x, a.x2);
+#endif
   return f->fun.f2 (a.x, a.x2);
 }
 
 static inline double
-call_d1 (const struct fun *f, struct args_d1 a)
+call_d1 (const struct fun *f, struct args_d1 a, const struct conf *conf)
 {
+#if WANT_SVE_MATH
+  if (f->is_predicated)
+    return f->fun.d1_pred (*conf->pg, a.x);
+#endif
   return f->fun.d1 (a.x);
 }
 static inline double
-call_d2 (const struct fun *f, struct args_d2 a)
+call_d2 (const struct fun *f, struct args_d2 a, const struct conf *conf)
 {
+#if WANT_SVE_MATH
+  if (f->is_predicated)
+    return f->fun.d2_pred (*conf->pg, a.x, a.x2);
+#endif
   return f->fun.d2 (a.x, a.x2);
 }
 static inline double
@@ -595,6 +629,11 @@ usage (void)
 	"    This should be different from tested input in other lanes, and non-special \n"
 	"    (i.e. should not trigger fenv exceptions). Default is 1.");
 #endif
+#if WANT_SVE_MATH
+  puts ("-p: integer input for controlling predicate passed to SVE function. "
+	"If bit N is set, lane N is activated (bits past the vector length "
+	"are ignored). Default is UINT64_MAX (ptrue).");
+#endif
   puts ("-z: ignore sign of 0.");
   puts ("Supported func:");
   for (const struct fun *f = fun; f->name; f++)
@@ -740,6 +779,9 @@ main (int argc, char *argv[])
   conf.softlim = 0;
   conf.errlim = INFINITY;
   conf.ignore_zero_sign = 0;
+#if WANT_SVE_MATH
+  uint64_t pg_int = UINT64_MAX;
+#endif
   for (;;)
     {
       argc--;
@@ -795,6 +837,13 @@ main (int argc, char *argv[])
 	  dv[0] = strtod(argv[0], 0);
 	  break;
 #endif
+#if WANT_SVE_MATH
+	case 'p':
+	  argc--;
+	  argv++;
+	  pg_int = strtoull (argv[0], 0, 0);
+	  break;
+#endif
 	default:
 	  usage ();
 	}
@@ -844,5 +893,9 @@ main (int argc, char *argv[])
   argv++;
   parsegen (&gen, argc, argv, f);
   conf.n = gen.cnt;
+#if WANT_SVE_MATH
+  svbool_t pg = parse_pg (pg_int, f->singleprec);
+  conf.pg = &pg;
+#endif
   return cmp (f, &gen, &conf);
 }
diff --git a/math/test/ulp.h b/math/test/ulp.h
index b0bc59a..a360092 100644
--- a/math/test/ulp.h
+++ b/math/test/ulp.h
@@ -1,13 +1,13 @@
 /*
  * Generic functions for ULP error estimation.
  *
- * Copyright (c) 2019-2023, Arm Limited.
+ * Copyright (c) 2019-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 /* For each different math function type,
    T(x) should add a different suffix to x.
-   RT(x) should add a return type specific suffix to x. */
+   RT(x) should add a return type specific suffix to x.  */
 
 #ifdef NEW_RT
 #undef NEW_RT
@@ -80,7 +80,7 @@ static double RT (ulperr) (RT (float) got, const struct RT (ret) * p, int r,
       // TODO: incorrect when got vs want cross a powof2 boundary
       /* error = got > want
 	      ? got - want - tail ulp - 0.5 ulp
-	      : got - want - tail ulp + 0.5 ulp;  */
+	      : got - want - tail ulp + 0.5 ulp.  */
       d = got - want;
       e = d > 0 ? -p->tail - 0.5 : -p->tail + 0.5;
     }
@@ -108,32 +108,34 @@ static int RT(isok_nofenv) (RT(float) ygot, RT(float) ywant)
 }
 #endif
 
-static inline void T(call_fenv) (const struct fun *f, struct T(args) a, int r,
-				  RT(float) * y, int *ex)
+static inline void T (call_fenv) (const struct fun *f, struct T (args) a,
+				  int r, RT (float) * y, int *ex,
+				  const struct conf *conf)
 {
   if (r != FE_TONEAREST)
     fesetround (r);
   feclearexcept (FE_ALL_EXCEPT);
-  *y = T(call) (f, a);
+  *y = T (call) (f, a, conf);
   *ex = fetestexcept (FE_ALL_EXCEPT);
   if (r != FE_TONEAREST)
     fesetround (FE_TONEAREST);
 }
 
-static inline void T(call_nofenv) (const struct fun *f, struct T(args) a,
-				    int r, RT(float) * y, int *ex)
+static inline void T (call_nofenv) (const struct fun *f, struct T (args) a,
+				    int r, RT (float) * y, int *ex,
+				    const struct conf *conf)
 {
   if (r != FE_TONEAREST)
     fesetround (r);
-  *y = T(call) (f, a);
+  *y = T (call) (f, a, conf);
   *ex = 0;
   if (r != FE_TONEAREST)
     fesetround (FE_TONEAREST);
 }
 
-static inline int T(call_long_fenv) (const struct fun *f, struct T(args) a,
-				      int r, struct RT(ret) * p,
-				      RT(float) ygot, int exgot)
+static inline int T (call_long_fenv) (const struct fun *f, struct T (args) a,
+				      int r, struct RT (ret) * p,
+				      RT (float) ygot, int exgot)
 {
   if (r != FE_TONEAREST)
     fesetround (r);
@@ -269,6 +271,7 @@ static int T(cmp) (const struct fun *f, struct gen *gen,
   int r = conf->r;
   int use_mpfr = conf->mpfr;
   int fenv = conf->fenv;
+
   for (;;)
     {
       struct RT(ret) want;
@@ -279,15 +282,15 @@ static int T(cmp) (const struct fun *f, struct gen *gen,
       RT(float) ygot2;
       int fail = 0;
       if (fenv)
-	T(call_fenv) (f, a, r, &ygot, &exgot);
+	T (call_fenv) (f, a, r, &ygot, &exgot, conf);
       else
-	T(call_nofenv) (f, a, r, &ygot, &exgot);
+	T (call_nofenv) (f, a, r, &ygot, &exgot, conf);
       if (f->twice) {
 	secondcall = 1;
 	if (fenv)
-	  T(call_fenv) (f, a, r, &ygot2, &exgot2);
+	  T (call_fenv) (f, a, r, &ygot2, &exgot2, conf);
 	else
-	  T(call_nofenv) (f, a, r, &ygot2, &exgot2);
+	  T (call_nofenv) (f, a, r, &ygot2, &exgot2, conf);
 	secondcall = 0;
 	if (RT(asuint) (ygot) != RT(asuint) (ygot2))
 	  {
diff --git a/pl/math/Dir.mk b/pl/math/Dir.mk
index 94b26cf..c38837c 100644
--- a/pl/math/Dir.mk
+++ b/pl/math/Dir.mk
@@ -193,6 +193,7 @@ check-pl/math-ulp: $(math-tools) $(ulp-lims) $(fenv-exps) $(ulp-itvs)
 	INTERVALS=../../../$(ulp-itvs) \
 	FENV=../../../$(fenv-exps) \
 	FUNC=$(func) \
+	PRED=$(pred) \
 	build/pl/bin/runulp.sh $(EMULATOR)
 
 check-pl/math: check-pl/math-test check-pl/math-rtest check-pl/math-ulp
diff --git a/pl/math/sv_log2_3u.c b/pl/math/sv_log2_3u.c
index 0775a39..3b88a7b 100644
--- a/pl/math/sv_log2_3u.c
+++ b/pl/math/sv_log2_3u.c
@@ -1,7 +1,7 @@
 /*
  * Double-precision SVE log2 function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
@@ -64,7 +64,6 @@ svfloat64_t SV_NAME_D1 (log2) (svfloat64_t x, const svbool_t pg)
 
 PL_SIG (SV, D, 1, log2, 0.01, 11.1)
 PL_TEST_ULP (SV_NAME_D1 (log2), 2.09)
-PL_TEST_EXPECT_FENV_ALWAYS (SV_NAME_D1 (log2))
 PL_TEST_INTERVAL (SV_NAME_D1 (log2), -0.0, -0x1p126, 1000)
 PL_TEST_INTERVAL (SV_NAME_D1 (log2), 0.0, 0x1p-126, 4000)
 PL_TEST_INTERVAL (SV_NAME_D1 (log2), 0x1p-126, 0x1p-23, 50000)
diff --git a/pl/math/sv_log2f_2u5.c b/pl/math/sv_log2f_2u5.c
index 9e96c62..c28217c 100644
--- a/pl/math/sv_log2f_2u5.c
+++ b/pl/math/sv_log2f_2u5.c
@@ -1,7 +1,7 @@
 /*
  * Single-precision vector/SVE log2 function.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
@@ -77,7 +77,6 @@ svfloat32_t SV_NAME_F1 (log2) (svfloat32_t x, const svbool_t pg)
 
 PL_SIG (SV, F, 1, log2, 0.01, 11.1)
 PL_TEST_ULP (SV_NAME_F1 (log2), 1.99)
-PL_TEST_EXPECT_FENV_ALWAYS (SV_NAME_F1 (log2))
 PL_TEST_INTERVAL (SV_NAME_F1 (log2), -0.0, -0x1p126, 4000)
 PL_TEST_INTERVAL (SV_NAME_F1 (log2), 0.0, 0x1p-126, 4000)
 PL_TEST_INTERVAL (SV_NAME_F1 (log2), 0x1p-126, 0x1p-23, 50000)
diff --git a/pl/math/test/runulp.sh b/pl/math/test/runulp.sh
index 4f7fe67..adc0f02 100755
--- a/pl/math/test/runulp.sh
+++ b/pl/math/test/runulp.sh
@@ -25,6 +25,7 @@ t() {
 	L=$(cat $LIMITS | grep "^$routine " | awk '{print $2}')
 	[[ $L =~ ^[0-9]+\.[0-9]+$ ]]
 	extra_flags=
+	[[ -z "${PRED:-}" ]] || extra_flags="$extra_flags -p $PRED"
 	[[ -z "${5:-}" ]] || extra_flags="$extra_flags -c $5"
 	grep -q "^$routine$" $FENV || extra_flags="$extra_flags -f"
 	IFS=',' read -ra LO <<< "$2"
@@ -55,14 +56,18 @@ runsv=
 if [ $WANT_SVE_MATH -eq 1 ]; then
 # No guarantees about powi accuracy, so regression-test for exactness
 # w.r.t. the custom reference impl in ulp_wrappers.h
-check -q -f -e 0 _ZGVsMxvv_powi  0  inf x  0  1000 100000 && runsv=1
-check -q -f -e 0 _ZGVsMxvv_powi -0 -inf x  0  1000 100000 && runsv=1
-check -q -f -e 0 _ZGVsMxvv_powi  0  inf x -0 -1000 100000 && runsv=1
-check -q -f -e 0 _ZGVsMxvv_powi -0 -inf x -0 -1000 100000 && runsv=1
-check -q -f -e 0 _ZGVsMxvv_powk  0  inf x  0  1000 100000 && runsv=1
-check -q -f -e 0 _ZGVsMxvv_powk -0 -inf x  0  1000 100000 && runsv=1
-check -q -f -e 0 _ZGVsMxvv_powk  0  inf x -0 -1000 100000 && runsv=1
-check -q -f -e 0 _ZGVsMxvv_powk -0 -inf x -0 -1000 100000 && runsv=1
+    if [ -z "$FUNC" ] || [ "$FUNC" == "_ZGVsMxvv_powi" ]; then
+	check -q -f -e 0 _ZGVsMxvv_powi  0  inf x  0  1000 100000 && runsv=1
+	check -q -f -e 0 _ZGVsMxvv_powi -0 -inf x  0  1000 100000 && runsv=1
+	check -q -f -e 0 _ZGVsMxvv_powi  0  inf x -0 -1000 100000 && runsv=1
+	check -q -f -e 0 _ZGVsMxvv_powi -0 -inf x -0 -1000 100000 && runsv=1
+    fi
+    if [ -z "$FUNC" ] || [ "$FUNC" == "_ZGVsMxvv_powk" ]; then
+	check -q -f -e 0 _ZGVsMxvv_powk  0  inf x  0  1000 100000 && runsv=1
+	check -q -f -e 0 _ZGVsMxvv_powk -0 -inf x  0  1000 100000 && runsv=1
+	check -q -f -e 0 _ZGVsMxvv_powk  0  inf x -0 -1000 100000 && runsv=1
+	check -q -f -e 0 _ZGVsMxvv_powk -0 -inf x -0 -1000 100000 && runsv=1
+    fi
 fi
 
 while read F LO HI N C
diff --git a/pl/math/test/ulp_funcs.h b/pl/math/test/ulp_funcs.h
index 4929b48..3607ce8 100644
--- a/pl/math/test/ulp_funcs.h
+++ b/pl/math/test/ulp_funcs.h
@@ -1,16 +1,16 @@
 /*
  * Function entries for ulp.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
 #if defined(__vpcs) && __aarch64__
 
-#define _ZVF1(f) ZVF1 (f)
-#define _ZVD1(f) ZVD1 (f)
-#define _ZVF2(f) ZVF2 (f)
-#define _ZVD2(f) ZVD2 (f)
+#define _ZVF1(f) ZVNF1 (f)
+#define _ZVD1(f) ZVND1 (f)
+#define _ZVF2(f) ZVNF2 (f)
+#define _ZVD2(f) ZVND2 (f)
 
 #else
 
@@ -55,16 +55,16 @@ F (_ZGVnN2v_cexpi_sin, v_cexpi_sin, sinl, mpfr_sin, 1, 0, d1, 0)
 F (_ZGVnN2v_cexpi_cos, v_cexpi_cos, cosl, mpfr_cos, 1, 0, d1, 0)
 
 #if WANT_SVE_MATH
-F (_ZGVsMxvv_powk, Z_sv_powk, ref_powi, mpfr_powi, 2, 0, d2, 0)
-F (_ZGVsMxvv_powi, Z_sv_powi, ref_powif, mpfr_powi, 2, 1, f2, 0)
-
-F (_ZGVsMxv_sincosf_sin, sv_sincosf_sin, sin, mpfr_sin, 1, 1, f1, 0)
-F (_ZGVsMxv_sincosf_cos, sv_sincosf_cos, cos, mpfr_cos, 1, 1, f1, 0)
-F (_ZGVsMxv_cexpif_sin, sv_cexpif_sin, sin, mpfr_sin, 1, 1, f1, 0)
-F (_ZGVsMxv_cexpif_cos, sv_cexpif_cos, cos, mpfr_cos, 1, 1, f1, 0)
-
-F (_ZGVsMxv_sincos_sin, sv_sincos_sin, sinl, mpfr_sin, 1, 0, d1, 0)
-F (_ZGVsMxv_sincos_cos, sv_sincos_cos, cosl, mpfr_cos, 1, 0, d1, 0)
-F (_ZGVsMxv_cexpi_sin, sv_cexpi_sin, sinl, mpfr_sin, 1, 0, d1, 0)
-F (_ZGVsMxv_cexpi_cos, sv_cexpi_cos, cosl, mpfr_cos, 1, 0, d1, 0)
+SVF (_ZGVsMxvv_powk, Z_sv_powk, ref_powi, mpfr_powi, 2, 0, d2, 0)
+SVF (_ZGVsMxvv_powi, Z_sv_powi, ref_powif, mpfr_powi, 2, 1, f2, 0)
+
+SVF (_ZGVsMxv_sincosf_sin, sv_sincosf_sin, sin, mpfr_sin, 1, 1, f1, 0)
+SVF (_ZGVsMxv_sincosf_cos, sv_sincosf_cos, cos, mpfr_cos, 1, 1, f1, 0)
+SVF (_ZGVsMxv_cexpif_sin, sv_cexpif_sin, sin, mpfr_sin, 1, 1, f1, 0)
+SVF (_ZGVsMxv_cexpif_cos, sv_cexpif_cos, cos, mpfr_cos, 1, 1, f1, 0)
+
+SVF (_ZGVsMxv_sincos_sin, sv_sincos_sin, sinl, mpfr_sin, 1, 0, d1, 0)
+SVF (_ZGVsMxv_sincos_cos, sv_sincos_cos, cosl, mpfr_cos, 1, 0, d1, 0)
+SVF (_ZGVsMxv_cexpi_sin, sv_cexpi_sin, sinl, mpfr_sin, 1, 0, d1, 0)
+SVF (_ZGVsMxv_cexpi_cos, sv_cexpi_cos, cosl, mpfr_cos, 1, 0, d1, 0)
 #endif
diff --git a/pl/math/test/ulp_wrappers.h b/pl/math/test/ulp_wrappers.h
index 0f7b689..565bce5 100644
--- a/pl/math/test/ulp_wrappers.h
+++ b/pl/math/test/ulp_wrappers.h
@@ -2,7 +2,7 @@
 /*
  * Function wrappers for ulp.
  *
- * Copyright (c) 2022-2023, Arm Limited.
+ * Copyright (c) 2022-2024, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */
 
@@ -83,10 +83,10 @@ DECL_POW_INT_REF(ref_powi, long double, double, int)
 
 #endif
 
-#define ZSVF1_WRAP(func) static float Z_sv_##func##f(float x) { return svretf(_ZGVsMxv_##func##f(svargf(x), svptrue_b32())); }
-#define ZSVF2_WRAP(func) static float Z_sv_##func##f(float x, float y) { return svretf(_ZGVsMxvv_##func##f(svargf(x), svargf(y), svptrue_b32())); }
-#define ZSVD1_WRAP(func) static double Z_sv_##func(double x) { return svretd(_ZGVsMxv_##func(svargd(x), svptrue_b64())); }
-#define ZSVD2_WRAP(func) static double Z_sv_##func(double x, double y) { return svretd(_ZGVsMxvv_##func(svargd(x), svargd(y), svptrue_b64())); }
+#define ZSVF1_WRAP(func) static float Z_sv_##func##f(svbool_t pg, float x) { return svretf(_ZGVsMxv_##func##f(svargf(x), pg), pg); }
+#define ZSVF2_WRAP(func) static float Z_sv_##func##f(svbool_t pg, float x, float y) { return svretf(_ZGVsMxvv_##func##f(svargf(x), svargf(y), pg), pg); }
+#define ZSVD1_WRAP(func) static double Z_sv_##func(svbool_t pg, double x) { return svretd(_ZGVsMxv_##func(svargd(x), pg), pg); }
+#define ZSVD2_WRAP(func) static double Z_sv_##func(svbool_t pg, double x, double y) { return svretd(_ZGVsMxvv_##func(svargd(x), svargd(y), pg), pg); }
 
 #if WANT_SVE_MATH
 
@@ -123,18 +123,18 @@ double v_cexpi_sin(double x) { return _ZGVnN2v_cexpi(vdupq_n_f64(x)).val[0][0];
 double v_cexpi_cos(double x) { return _ZGVnN2v_cexpi(vdupq_n_f64(x)).val[1][0]; }
 
 #if WANT_SVE_MATH
-static float Z_sv_powi(float x, float y) { return svretf(_ZGVsMxvv_powi(svargf(x), svdup_s32((int)round(y)), svptrue_b32())); }
-static double Z_sv_powk(double x, double y) { return svretd(_ZGVsMxvv_powk(svargd(x), svdup_s64((long)round(y)), svptrue_b64())); }
-
-float sv_sincosf_sin(float x) { float s[svcntw()], c[svcntw()]; _ZGVsMxvl4l4_sincosf(svdup_f32(x), s, c, svptrue_b32()); return s[0]; }
-float sv_sincosf_cos(float x) { float s[svcntw()], c[svcntw()]; _ZGVsMxvl4l4_sincosf(svdup_f32(x), s, c, svptrue_b32()); return c[0]; }
-float sv_cexpif_sin(float x) { return svretf(svget2(_ZGVsMxv_cexpif(svdup_f32(x), svptrue_b32()), 0)); }
-float sv_cexpif_cos(float x) { return svretf(svget2(_ZGVsMxv_cexpif(svdup_f32(x), svptrue_b32()), 1)); }
-
-double sv_sincos_sin(double x) { double s[svcntd()], c[svcntd()]; _ZGVsMxvl8l8_sincos(svdup_f64(x), s, c, svptrue_b64()); return s[0]; }
-double sv_sincos_cos(double x) { double s[svcntd()], c[svcntd()]; _ZGVsMxvl8l8_sincos(svdup_f64(x), s, c, svptrue_b64()); return c[0]; }
-double sv_cexpi_sin(double x) { return svretd(svget2(_ZGVsMxv_cexpi(svdup_f64(x), svptrue_b64()), 0)); }
-double sv_cexpi_cos(double x) { return svretd(svget2(_ZGVsMxv_cexpi(svdup_f64(x), svptrue_b64()), 1)); }
+static float Z_sv_powi(svbool_t pg, float x, float y) { return svretf(_ZGVsMxvv_powi(svargf(x), svdup_s32((int)round(y)), pg), pg); }
+static double Z_sv_powk(svbool_t pg, double x, double y) { return svretd(_ZGVsMxvv_powk(svargd(x), svdup_s64((long)round(y)), pg), pg); }
+
+float sv_sincosf_sin(svbool_t pg, float x) { float s[svcntw()], c[svcntw()]; _ZGVsMxvl4l4_sincosf(svdup_f32(x), s, c, pg); return svretf(svld1(pg, s), pg); }
+float sv_sincosf_cos(svbool_t pg, float x) { float s[svcntw()], c[svcntw()]; _ZGVsMxvl4l4_sincosf(svdup_f32(x), s, c, pg); return svretf(svld1(pg, c), pg); }
+float sv_cexpif_sin(svbool_t pg, float x) { return svretf(svget2(_ZGVsMxv_cexpif(svdup_f32(x), pg), 0), pg); }
+float sv_cexpif_cos(svbool_t pg, float x) { return svretf(svget2(_ZGVsMxv_cexpif(svdup_f32(x), pg), 1), pg); }
+
+double sv_sincos_sin(svbool_t pg, double x) { double s[svcntd()], c[svcntd()]; _ZGVsMxvl8l8_sincos(svdup_f64(x), s, c, pg); return svretd(svld1(pg, s), pg); }
+double sv_sincos_cos(svbool_t pg, double x) { double s[svcntd()], c[svcntd()]; _ZGVsMxvl8l8_sincos(svdup_f64(x), s, c, pg); return svretd(svld1(pg, c), pg); }
+double sv_cexpi_sin(svbool_t pg, double x) { return svretd(svget2(_ZGVsMxv_cexpi(svdup_f64(x), pg), 0), pg); }
+double sv_cexpi_cos(svbool_t pg, double x) { return svretd(svget2(_ZGVsMxv_cexpi(svdup_f64(x), pg), 1), pg); }
 
 #endif
 // clang-format on
author	Joe Ramsay <Joe.Ramsay@arm.com>	2024-04-24 09:32:00 +0100
committer	Joe Ramsay <joe.ramsay@arm.com>	2024-04-24 09:32:00 +0100
commit	3aba7ed692806936d13235444268aad618726ae5 (patch)
tree	b131de2e5d7a61600ee1ddf0c807a8971ca1f9f2
parent	1fa1ac8a40920ea3034bf510044e51bec77ec70a (diff)
download	arm-optimized-routines-3aba7ed692806936d13235444268aad618726ae5.tar.gz