Skip to content

Commit e8784db

Browse files
committed
math/aarch64: Tidy up all vector powers.
Consistently document accuracy in vector pow(r)(f). Adjust error threshold to match comments.
1 parent 5045516 commit e8784db

File tree

9 files changed

+77
-57
lines changed

9 files changed

+77
-57
lines changed

math/aarch64/advsimd/pow.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Double-precision vector pow function.
2+
* Double-precision vector x^y function.
33
*
44
* Copyright (c) 2020-2025, Arm Limited.
55
* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
@@ -12,6 +12,11 @@
1212
#define WANT_V_POW_SIGN_BIAS 1
1313
#include "v_pow_inline.h"
1414

15+
/* Implementation of AdvSIMD pow.
16+
Maximum measured error is 1.04 ULPs:
17+
_ZGVnN2vv_pow(0x1.024a3e56b3c3p-136, 0x1.87910248b58acp-13)
18+
got 0x1.f71162f473251p-1
19+
want 0x1.f71162f473252p-1. */
1520
float64x2_t VPCS_ATTR V_NAME_D2 (pow) (float64x2_t x, float64x2_t y)
1621
{
1722
return v_pow_inline (x, y);

math/aarch64/advsimd/powf.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Single-precision vector powf function.
2+
* Single-precision vector x^y function.
33
*
44
* Copyright (c) 2019-2025, Arm Limited.
55
* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
@@ -117,7 +117,6 @@ v_powf_x_is_neg_or_small (float32x4_t x, float32x4_t y, const struct data *d)
117117
}
118118

119119
/* Implementation of AdvSIMD powf.
120-
The theoretical maximum error is under 2.60 ULPs.
121120
Maximum measured error is 2.57 ULPs:
122121
V_NAME_F2 (pow) (0x1.031706p+0, 0x1.ce2ec2p+12)
123122
got 0x1.fff868p+127
@@ -153,7 +152,7 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F2 (pow) (float32x4_t x, float32x4_t y)
153152
HALF_WIDTH_ALIAS_F2 (pow)
154153

155154
TEST_SIG (V, F, 2, pow)
156-
TEST_ULP (V_NAME_F2 (pow), 2.1)
155+
TEST_ULP (V_NAME_F2 (pow), 2.08)
157156
#define V_POWF_INTERVAL2(xlo, xhi, ylo, yhi, n) \
158157
TEST_INTERVAL2 (V_NAME_F2 (pow), xlo, xhi, ylo, yhi, n) \
159158
TEST_INTERVAL2 (V_NAME_F2 (pow), xlo, xhi, -ylo, -yhi, n)

math/aarch64/advsimd/powr.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Double-precision vector powr function.
2+
* Double-precision vector exp(y * log(x)) function.
33
*
44
* Copyright (c) 2025, Arm Limited.
55
* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
@@ -11,6 +11,11 @@
1111
#define WANT_V_POW_SIGN_BIAS 0
1212
#include "v_pow_inline.h"
1313

14+
/* Implementation of AdvSIMD powr.
15+
Maximum measured error is 1.04 ULPs:
16+
_ZGVnN2vv_powr(0x1.024a3e56b3c3p-136, 0x1.87910248b58acp-13)
17+
got 0x1.f71162f473251p-1
18+
want 0x1.f71162f473252p-1. */
1419
float64x2_t VPCS_ATTR V_NAME_D2 (powr) (float64x2_t x, float64x2_t y)
1520
{
1621
return v_pow_inline (x, y);

math/aarch64/advsimd/powrf.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Single-precision vector powrf function.
2+
* Single-precision vector exp(y * log(x)) function.
33
*
44
* Copyright (c) 2025, Arm Limited.
55
* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
@@ -66,12 +66,14 @@ v_powrf_x_is_neg_or_sub (float32x4_t x, float32x4_t y, const struct data *d)
6666
}
6767

6868
/* Implementation of AdvSIMD powrf.
69+
6970
powr(x,y) := exp(y * log (x))
71+
7072
This means powr(x,y) core computation matches that of pow(x,y)
7173
but powr returns NaN for negative x even if y is an integer.
72-
The theoretical maximum error is under 2.60 ULPs.
74+
7375
Maximum measured error is 2.57 ULPs:
74-
V_NAME_F2 (pow) (0x1.031706p+0, 0x1.ce2ec2p+12)
76+
V_NAME_F2 (powr) (0x1.031706p+0, 0x1.ce2ec2p+12)
7577
got 0x1.fff868p+127
7678
want 0x1.fff862p+127. */
7779
float32x4_t VPCS_ATTR NOINLINE V_NAME_F2 (powr) (float32x4_t x, float32x4_t y)
@@ -104,7 +106,7 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F2 (powr) (float32x4_t x, float32x4_t y)
104106
HALF_WIDTH_ALIAS_F2 (powr)
105107

106108
#if WANT_C23_TESTS
107-
TEST_ULP (V_NAME_F2 (powr), 2.1)
109+
TEST_ULP (V_NAME_F2 (powr), 2.08)
108110
# define V_POWRF_INTERVAL2(xlo, xhi, ylo, yhi, n) \
109111
TEST_INTERVAL2 (V_NAME_F2 (powr), xlo, xhi, ylo, yhi, n) \
110112
TEST_INTERVAL2 (V_NAME_F2 (powr), xlo, xhi, -ylo, -yhi, n)

math/aarch64/advsimd/v_pow_inline.h

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -56,17 +56,6 @@ static const struct data
5656
.ln2_lo_n = -0x1.c610ca86c3899p-45,
5757
};
5858

59-
/* This version implements an algorithm close to scalar pow but
60-
- does not implement the trick in the exp's specialcase subroutine to avoid
61-
double-rounding,
62-
- does not use a tail in the exponential core computation,
63-
- and pow's exp polynomial order and table bits might differ.
64-
65-
Maximum measured error is 1.04 ULPs:
66-
_ZGVnN2vv_pow(0x1.024a3e56b3c3p-136, 0x1.87910248b58acp-13)
67-
got 0x1.f71162f473251p-1
68-
want 0x1.f71162f473252p-1. */
69-
7059
static inline float64x2_t VPCS_ATTR
7160
v_masked_lookup_f64 (const double *table, uint64x2_t i)
7261
{
@@ -181,6 +170,11 @@ scalar_fallback (float64x2_t x, float64x2_t y)
181170
pow_scalar_special_case (x[1], y[1]) };
182171
}
183172

173+
/* This version of AdvSIMD pow implements an algorithm close to AOR scalar pow
174+
but:
175+
- it does not prevent double-rounding in the exp's specialcase subroutine,
176+
- it does not use a tail in the exponential core computation,
177+
- and pow's exp polynomial order and table bits might differ. */
184178
static inline float64x2_t VPCS_ATTR
185179
v_pow_inline (float64x2_t x, float64x2_t y)
186180
{

math/aarch64/sve/pow.c

Lines changed: 27 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Double-precision SVE pow(x, y) function.
2+
* Double-precision SVE x^y function.
33
*
44
* Copyright (c) 2022-2025, Arm Limited.
55
* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
@@ -9,28 +9,6 @@
99
#include "test_sig.h"
1010
#include "test_defs.h"
1111

12-
/* This version share a similar algorithm as AOR scalar pow.
13-
14-
The core computation consists in computing pow(x, y) as
15-
16-
exp (y * log (x)).
17-
18-
The algorithms for exp and log are very similar to scalar exp and log.
19-
The log relies on table lookup for 3 variables and an order 8 polynomial.
20-
It returns a high and a low contribution that are then passed to the exp,
21-
to minimise the loss of accuracy in both routines.
22-
The exp is based on 8-bit table lookup for scale and order-4 polynomial.
23-
The SVE algorithm drops the tail in the exp computation at the price of
24-
a lower accuracy, slightly above 1ULP.
25-
The SVE algorithm also drops the special treatement of small (< 2^-65) and
26-
large (> 2^63) finite values of |y|, as they only affect non-round to
27-
nearest modes.
28-
29-
Maximum measured error is 1.04 ULPs:
30-
SV_NAME_D2 (pow) (0x1.3d2d45bc848acp+63, -0x1.a48a38b40cd43p-12)
31-
got 0x1.f7116284221fcp-1
32-
want 0x1.f7116284221fdp-1. */
33-
3412
#define WANT_SV_POW_SIGN_BIAS 1
3513
#include "sv_pow_inline.h"
3614

@@ -72,6 +50,32 @@ sv_pow_specialcase (svfloat64_t x1, svfloat64_t x2, svfloat64_t y,
7250
return sv_call2_f64 (pow_specialcase, x1, x2, y, cmp);
7351
}
7452

53+
/* Implementation of SVE pow.
54+
55+
This version share a similar algorithm as AOR scalar pow.
56+
57+
The core computation consists in computing pow(x, y) as
58+
59+
exp (y * log (x)).
60+
61+
The algorithms for exp and log are very similar to scalar exp and log.
62+
The log relies on table lookup for 3 variables and an order 8 polynomial.
63+
It returns a high and a low contribution that are then passed to the exp,
64+
to minimise the loss of accuracy in both routines.
65+
The exp is based on 8-bit table lookup for scale and order-4 polynomial.
66+
The SVE algorithm drops the tail in the exp computation at the price of
67+
a lower accuracy, slightly above 1ULP.
68+
The SVE algorithm also drops the special treatement of small (< 2^-65) and
69+
large (> 2^63) finite values of |y|, as they only affect non-round to
70+
nearest modes.
71+
72+
Provides the same accuracy as AdvSIMD powf, since it relies on the same
73+
algorithm.
74+
75+
Maximum measured error is 1.04 ULPs:
76+
SV_NAME_D2 (pow) (0x1.3d2d45bc848acp+63, -0x1.a48a38b40cd43p-12)
77+
got 0x1.f7116284221fcp-1
78+
want 0x1.f7116284221fdp-1. */
7579
svfloat64_t SV_NAME_D2 (pow) (svfloat64_t x, svfloat64_t y, const svbool_t pg)
7680
{
7781
const struct data *d = ptr_barrier (&data);

math/aarch64/sve/powf.c

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Single-precision SVE powf function.
2+
* Single-precision SVE x^y function.
33
*
44
* Copyright (c) 2023-2025, Arm Limited.
55
* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
@@ -52,11 +52,14 @@ sv_call_powf_sc (svfloat32_t x1, svfloat32_t x2, svfloat32_t y, svbool_t cmp)
5252
}
5353

5454
/* Implementation of SVE powf.
55+
5556
Provides the same accuracy as AdvSIMD powf, since it relies on the same
56-
algorithm. The theoretical maximum error is under 2.60 ULPs.
57+
algorithm.
58+
5759
Maximum measured error is 2.57 ULPs:
58-
SV_NAME_F2 (pow) (0x1.031706p+0, 0x1.ce2ec2p+12) got 0x1.fff868p+127
59-
want 0x1.fff862p+127. */
60+
SV_NAME_F2 (pow) (0x1.031706p+0, 0x1.ce2ec2p+12)
61+
got 0x1.fff868p+127
62+
want 0x1.fff862p+127. */
6063
svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg)
6164
{
6265
const struct data *d = ptr_barrier (&data);

math/aarch64/sve/powr.c

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Double-precision SVE powr function.
2+
* Double-precision SVE exp(y * log(x)) function.
33
*
44
* Copyright (c) 2025, Arm Limited.
55
* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
@@ -54,9 +54,14 @@ sv_powr_specialcase (svfloat64_t x1, svfloat64_t x2, svfloat64_t y,
5454
}
5555

5656
/* Implementation of SVE powr.
57-
Provides the same accuracy as SVE pow, since it relies on the same
58-
algorithm.
59-
Maximum measured error is below 1 ULP. */
57+
58+
Provides the same accuracy as AdvSIMD pow and powr, since it relies on the
59+
same algorithm.
60+
61+
Maximum measured error is 1.04 ULPs:
62+
SV_NAME_D2 (powr) (0x1.3d2d45bc848acp+63, -0x1.a48a38b40cd43p-12)
63+
got 0x1.f7116284221fcp-1
64+
want 0x1.f7116284221fdp-1. */
6065
svfloat64_t SV_NAME_D2 (powr) (svfloat64_t x, svfloat64_t y, const svbool_t pg)
6166
{
6267
const struct data *d = ptr_barrier (&data);
@@ -99,7 +104,7 @@ svfloat64_t SV_NAME_D2 (powr) (svfloat64_t x, svfloat64_t y, const svbool_t pg)
99104
}
100105

101106
#if WANT_C23_TESTS
102-
TEST_ULP (SV_NAME_D2 (powr), 1.0)
107+
TEST_ULP (SV_NAME_D2 (powr), 0.55)
103108
/* Wide intervals spanning the positive domain. */
104109
# define SV_POWR_INTERVAL2(xlo, xhi, ylo, yhi, n) \
105110
TEST_INTERVAL2 (SV_NAME_D2 (powr), xlo, xhi, ylo, yhi, n) \

math/aarch64/sve/powrf.c

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Single-precision SVE powr function.
2+
* Single-precision SVE exp(y * log(x)) function.
33
*
44
* Copyright (c) 2025, Arm Limited.
55
* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
@@ -53,11 +53,14 @@ sv_call_powrf_sc (svfloat32_t x1, svfloat32_t x2, svfloat32_t y, svbool_t cmp)
5353
}
5454

5555
/* Implementation of SVE powrf.
56-
Provides the same accuracy as AdvSIMD powf, since it relies on the same
57-
algorithm.
56+
57+
Provides the same accuracy as AdvSIMD powf and powrf, since it relies on the
58+
same algorithm.
59+
5860
Maximum measured error is 2.57 ULPs:
59-
SV_NAME_F2 (pow) (0x1.031706p+0, 0x1.ce2ec2p+12) got 0x1.fff868p+127
60-
want 0x1.fff862p+127. */
61+
SV_NAME_F2 (powr) (0x1.031706p+0, 0x1.ce2ec2p+12)
62+
got 0x1.fff868p+127
63+
want 0x1.fff862p+127. */
6164
svfloat32_t SV_NAME_F2 (powr) (svfloat32_t x, svfloat32_t y, const svbool_t pg)
6265
{
6366
const struct data *d = ptr_barrier (&data);

0 commit comments

Comments
 (0)