diff options
Diffstat (limited to 'math/test/mathbench.c')
-rw-r--r-- | math/test/mathbench.c | 281 |
1 files changed, 105 insertions, 176 deletions
diff --git a/math/test/mathbench.c b/math/test/mathbench.c index 0c17826..6e18e36 100644 --- a/math/test/mathbench.c +++ b/math/test/mathbench.c @@ -1,8 +1,8 @@ /* * Microbenchmark for math functions. * - * Copyright (c) 2018-2020, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2018-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #undef _GNU_SOURCE @@ -66,6 +66,43 @@ v_float_dup (float x) { return (v_float){x, x, x, x}; } +#if WANT_SVE_MATH +#include <arm_sve.h> +typedef svbool_t sv_bool; +typedef svfloat64_t sv_double; + +#define sv_double_len() svcntd() + +static inline sv_double +sv_double_load (const double *p) +{ + svbool_t pg = svptrue_b64(); + return svld1(pg, p); +} + +static inline sv_double +sv_double_dup (double x) +{ + return svdup_n_f64(x); +} + +typedef svfloat32_t sv_float; + +#define sv_float_len() svcntw() + +static inline sv_float +sv_float_load (const float *p) +{ + svbool_t pg = svptrue_b32(); + return svld1(pg, p); +} + +static inline sv_float +sv_float_dup (float x) +{ + return svdup_n_f32(x); +} +#endif #else /* dummy definitions to make things compile. */ typedef double v_double; @@ -89,7 +126,6 @@ dummyf (float x) { return x; } - #if WANT_VMATH #if __aarch64__ static v_double @@ -116,101 +152,25 @@ __vn_dummyf (v_float x) { return x; } - -__vpcs static v_float -xy__vn_powf (v_float x) -{ - return __vn_powf (x, x); -} - -__vpcs static v_float -xy_Z_powf (v_float x) +#endif +#if WANT_SVE_MATH +static sv_double +__sv_dummy (sv_double x, sv_bool pg) { - return _ZGVnN4vv_powf (x, x); + return x; } -__vpcs static v_double -xy__vn_pow (v_double x) +static sv_float +__sv_dummyf (sv_float x, sv_bool pg) { - return __vn_pow (x, x); + return x; } -__vpcs static v_double -xy_Z_pow (v_double x) -{ - return _ZGVnN2vv_pow (x, x); -} #endif - -static v_float -xy__v_powf (v_float x) -{ - return __v_powf (x, x); -} - -static v_double -xy__v_pow (v_double x) -{ - return __v_pow (x, x); -} #endif - -static float -xy__s_powf (float x) -{ - return __s_powf (x, x); -} - -static double -xy__s_pow (double x) -{ - return __s_pow (x, x); -} #endif -static double -xypow (double x) -{ - return pow (x, x); -} - -static float -xypowf (float x) -{ - return powf (x, x); -} - -static double -xpow (double x) -{ - return pow (x, 23.4); -} - -static float -xpowf (float x) -{ - return powf (x, 23.4f); -} - -static double -ypow (double x) -{ - return pow (2.34, x); -} - -static float -ypowf (float x) -{ - return powf (2.34f, x); -} - -static float -sincosf_wrap (float x) -{ - float s, c; - sincosf (x, &s, &c); - return s + c; -} +#include "test/mathbench_wrappers.h" static const struct fun { @@ -229,6 +189,10 @@ static const struct fun __vpcs v_double (*vnd) (v_double); __vpcs v_float (*vnf) (v_float); #endif +#if WANT_SVE_MATH + sv_double (*svd) (sv_double, sv_bool); + sv_float (*svf) (sv_float, sv_bool); +#endif } fun; } funtab[] = { #define D(func, lo, hi) {#func, 'd', 0, lo, hi, {.d = func}}, @@ -237,106 +201,25 @@ static const struct fun #define VF(func, lo, hi) {#func, 'f', 'v', lo, hi, {.vf = func}}, #define VND(func, lo, hi) {#func, 'd', 'n', lo, hi, {.vnd = func}}, #define VNF(func, lo, hi) {#func, 'f', 'n', lo, hi, {.vnf = func}}, +#define SVD(func, lo, hi) {#func, 'd', 's', lo, hi, {.svd = func}}, +#define SVF(func, lo, hi) {#func, 'f', 's', lo, hi, {.svf = func}}, D (dummy, 1.0, 2.0) -D (exp, -9.9, 9.9) -D (exp, 0.5, 1.0) -D (exp2, -9.9, 9.9) -D (log, 0.01, 11.1) -D (log, 0.999, 1.001) -D (log2, 0.01, 11.1) -D (log2, 0.999, 1.001) -{"pow", 'd', 0, 0.01, 11.1, {.d = xypow}}, -D (xpow, 0.01, 11.1) -D (ypow, -9.9, 9.9) -D (erf, -6.0, 6.0) - F (dummyf, 1.0, 2.0) -F (expf, -9.9, 9.9) -F (exp2f, -9.9, 9.9) -F (logf, 0.01, 11.1) -F (log2f, 0.01, 11.1) -{"powf", 'f', 0, 0.01, 11.1, {.f = xypowf}}, -F (xpowf, 0.01, 11.1) -F (ypowf, -9.9, 9.9) -{"sincosf", 'f', 0, 0.1, 0.7, {.f = sincosf_wrap}}, -{"sincosf", 'f', 0, 0.8, 3.1, {.f = sincosf_wrap}}, -{"sincosf", 'f', 0, -3.1, 3.1, {.f = sincosf_wrap}}, -{"sincosf", 'f', 0, 3.3, 33.3, {.f = sincosf_wrap}}, -{"sincosf", 'f', 0, 100, 1000, {.f = sincosf_wrap}}, -{"sincosf", 'f', 0, 1e6, 1e32, {.f = sincosf_wrap}}, -F (sinf, 0.1, 0.7) -F (sinf, 0.8, 3.1) -F (sinf, -3.1, 3.1) -F (sinf, 3.3, 33.3) -F (sinf, 100, 1000) -F (sinf, 1e6, 1e32) -F (cosf, 0.1, 0.7) -F (cosf, 0.8, 3.1) -F (cosf, -3.1, 3.1) -F (cosf, 3.3, 33.3) -F (cosf, 100, 1000) -F (cosf, 1e6, 1e32) -F (erff, -4.0, 4.0) #if WANT_VMATH -D (__s_sin, -3.1, 3.1) -D (__s_cos, -3.1, 3.1) -D (__s_exp, -9.9, 9.9) -D (__s_log, 0.01, 11.1) -{"__s_pow", 'd', 0, 0.01, 11.1, {.d = xy__s_pow}}, -F (__s_expf, -9.9, 9.9) -F (__s_expf_1u, -9.9, 9.9) -F (__s_exp2f, -9.9, 9.9) -F (__s_exp2f_1u, -9.9, 9.9) -F (__s_logf, 0.01, 11.1) -{"__s_powf", 'f', 0, 0.01, 11.1, {.f = xy__s_powf}}, -F (__s_sinf, -3.1, 3.1) -F (__s_cosf, -3.1, 3.1) #if __aarch64__ VD (__v_dummy, 1.0, 2.0) -VD (__v_sin, -3.1, 3.1) -VD (__v_cos, -3.1, 3.1) -VD (__v_exp, -9.9, 9.9) -VD (__v_log, 0.01, 11.1) -{"__v_pow", 'd', 'v', 0.01, 11.1, {.vd = xy__v_pow}}, VF (__v_dummyf, 1.0, 2.0) -VF (__v_expf, -9.9, 9.9) -VF (__v_expf_1u, -9.9, 9.9) -VF (__v_exp2f, -9.9, 9.9) -VF (__v_exp2f_1u, -9.9, 9.9) -VF (__v_logf, 0.01, 11.1) -{"__v_powf", 'f', 'v', 0.01, 11.1, {.vf = xy__v_powf}}, -VF (__v_sinf, -3.1, 3.1) -VF (__v_cosf, -3.1, 3.1) #ifdef __vpcs VND (__vn_dummy, 1.0, 2.0) -VND (__vn_exp, -9.9, 9.9) -VND (_ZGVnN2v_exp, -9.9, 9.9) -VND (__vn_log, 0.01, 11.1) -VND (_ZGVnN2v_log, 0.01, 11.1) -{"__vn_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy__vn_pow}}, -{"_ZGVnN2vv_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy_Z_pow}}, -VND (__vn_sin, -3.1, 3.1) -VND (_ZGVnN2v_sin, -3.1, 3.1) -VND (__vn_cos, -3.1, 3.1) -VND (_ZGVnN2v_cos, -3.1, 3.1) VNF (__vn_dummyf, 1.0, 2.0) -VNF (__vn_expf, -9.9, 9.9) -VNF (_ZGVnN4v_expf, -9.9, 9.9) -VNF (__vn_expf_1u, -9.9, 9.9) -VNF (__vn_exp2f, -9.9, 9.9) -VNF (_ZGVnN4v_exp2f, -9.9, 9.9) -VNF (__vn_exp2f_1u, -9.9, 9.9) -VNF (__vn_logf, 0.01, 11.1) -VNF (_ZGVnN4v_logf, 0.01, 11.1) -{"__vn_powf", 'f', 'n', 0.01, 11.1, {.vnf = xy__vn_powf}}, -{"_ZGVnN4vv_powf", 'f', 'n', 0.01, 11.1, {.vnf = xy_Z_powf}}, -VNF (__vn_sinf, -3.1, 3.1) -VNF (_ZGVnN4v_sinf, -3.1, 3.1) -VNF (__vn_cosf, -3.1, 3.1) -VNF (_ZGVnN4v_cosf, -3.1, 3.1) +#endif +#if WANT_SVE_MATH +SVD (__sv_dummy, 1.0, 2.0) +SVF (__sv_dummyf, 1.0, 2.0) #endif #endif #endif +#include "test/mathbench_funcs.h" {0}, #undef F #undef D @@ -344,6 +227,8 @@ VNF (_ZGVnN4v_cosf, -3.1, 3.1) #undef VD #undef VNF #undef VND +#undef SVF +#undef SVD }; static void @@ -508,6 +393,40 @@ runf_vn_latency (__vpcs v_float f (v_float)) } #endif +#if WANT_SVE_MATH +static void +run_sv_thruput (sv_double f (sv_double, sv_bool)) +{ + for (int i = 0; i < N; i += sv_double_len ()) + f (sv_double_load (A+i), svptrue_b64 ()); +} + +static void +runf_sv_thruput (sv_float f (sv_float, sv_bool)) +{ + for (int i = 0; i < N; i += sv_float_len ()) + f (sv_float_load (Af+i), svptrue_b32 ()); +} + +static void +run_sv_latency (sv_double f (sv_double, sv_bool)) +{ + sv_double z = sv_double_dup (zero); + sv_double prev = z; + for (int i = 0; i < N; i += sv_double_len ()) + prev = f (svmad_f64_x (svptrue_b64 (), prev, z, sv_double_load (A+i)), svptrue_b64 ()); +} + +static void +runf_sv_latency (sv_float f (sv_float, sv_bool)) +{ + sv_float z = sv_float_dup (zero); + sv_float prev = z; + for (int i = 0; i < N; i += sv_float_len ()) + prev = f (svmad_f32_x (svptrue_b32 (), prev, z, sv_float_load (Af+i)), svptrue_b32 ()); +} +#endif + static uint64_t tic (void) { @@ -570,6 +489,16 @@ bench1 (const struct fun *f, int type, double lo, double hi) else if (f->prec == 'f' && type == 'l' && f->vec == 'n') TIMEIT (runf_vn_latency, f->fun.vnf); #endif +#if WANT_SVE_MATH + else if (f->prec == 'd' && type == 't' && f->vec == 's') + TIMEIT (run_sv_thruput, f->fun.svd); + else if (f->prec == 'd' && type == 'l' && f->vec == 's') + TIMEIT (run_sv_latency, f->fun.svd); + else if (f->prec == 'f' && type == 't' && f->vec == 's') + TIMEIT (runf_sv_thruput, f->fun.svf); + else if (f->prec == 'f' && type == 'l' && f->vec == 's') + TIMEIT (runf_sv_latency, f->fun.svf); +#endif if (type == 't') { |