math/aarch64: Tidy up all vector powers.

blapie · blapie · commit e8784db1f85a · 2025-11-28T10:53:55.000Z
Consistently document accuracy in vector pow(r)(f).
Adjust error threshold to match comments.
diff --git a/math/aarch64/advsimd/pow.c b/math/aarch64/advsimd/pow.c
@@ -1,5 +1,5 @@
 /*
- * Double-precision vector pow function.
+ * Double-precision vector x^y function.
  *
  * Copyright (c) 2020-2025, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
@@ -12,6 +12,11 @@
 #define WANT_V_POW_SIGN_BIAS 1
 #include "v_pow_inline.h"
 
+/* Implementation of AdvSIMD pow.
+   Maximum measured error is 1.04 ULPs:
+   _ZGVnN2vv_pow(0x1.024a3e56b3c3p-136, 0x1.87910248b58acp-13)
+     got 0x1.f71162f473251p-1
+    want 0x1.f71162f473252p-1.  */
 float64x2_t VPCS_ATTR V_NAME_D2 (pow) (float64x2_t x, float64x2_t y)
 {
   return v_pow_inline (x, y);
diff --git a/math/aarch64/advsimd/powf.c b/math/aarch64/advsimd/powf.c
@@ -1,5 +1,5 @@
 /*
- * Single-precision vector powf function.
+ * Single-precision vector x^y function.
  *
  * Copyright (c) 2019-2025, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
@@ -117,7 +117,6 @@ v_powf_x_is_neg_or_small (float32x4_t x, float32x4_t y, const struct data *d)
 }
 
 /* Implementation of AdvSIMD powf.
-   The theoretical maximum error is under 2.60 ULPs.
    Maximum measured error is 2.57 ULPs:
    V_NAME_F2 (pow) (0x1.031706p+0, 0x1.ce2ec2p+12)
      got 0x1.fff868p+127
@@ -153,7 +152,7 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F2 (pow) (float32x4_t x, float32x4_t y)
 HALF_WIDTH_ALIAS_F2 (pow)
 
 TEST_SIG (V, F, 2, pow)
-TEST_ULP (V_NAME_F2 (pow), 2.1)
+TEST_ULP (V_NAME_F2 (pow), 2.08)
 #define V_POWF_INTERVAL2(xlo, xhi, ylo, yhi, n)                               \
   TEST_INTERVAL2 (V_NAME_F2 (pow), xlo, xhi, ylo, yhi, n)                     \
   TEST_INTERVAL2 (V_NAME_F2 (pow), xlo, xhi, -ylo, -yhi, n)
diff --git a/math/aarch64/advsimd/powr.c b/math/aarch64/advsimd/powr.c
@@ -1,5 +1,5 @@
 /*
- * Double-precision vector powr function.
+ * Double-precision vector exp(y * log(x)) function.
  *
  * Copyright (c) 2025, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
@@ -11,6 +11,11 @@
 #define WANT_V_POW_SIGN_BIAS 0
 #include "v_pow_inline.h"
 
+/* Implementation of AdvSIMD powr.
+   Maximum measured error is 1.04 ULPs:
+   _ZGVnN2vv_powr(0x1.024a3e56b3c3p-136, 0x1.87910248b58acp-13)
+     got 0x1.f71162f473251p-1
+    want 0x1.f71162f473252p-1.  */
 float64x2_t VPCS_ATTR V_NAME_D2 (powr) (float64x2_t x, float64x2_t y)
 {
   return v_pow_inline (x, y);
diff --git a/math/aarch64/advsimd/powrf.c b/math/aarch64/advsimd/powrf.c
@@ -1,5 +1,5 @@
 /*
- * Single-precision vector powrf function.
+ * Single-precision vector exp(y * log(x)) function.
  *
  * Copyright (c) 2025, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
@@ -66,12 +66,14 @@ v_powrf_x_is_neg_or_sub (float32x4_t x, float32x4_t y, const struct data *d)
 }
 
 /* Implementation of AdvSIMD powrf.
+
      powr(x,y) := exp(y * log (x))
+
    This means powr(x,y) core computation matches that of pow(x,y)
    but powr returns NaN for negative x even if y is an integer.
-   The theoretical maximum error is under 2.60 ULPs.
+
    Maximum measured error is 2.57 ULPs:
-   V_NAME_F2 (pow) (0x1.031706p+0, 0x1.ce2ec2p+12)
+   V_NAME_F2 (powr) (0x1.031706p+0, 0x1.ce2ec2p+12)
      got 0x1.fff868p+127
     want 0x1.fff862p+127.  */
 float32x4_t VPCS_ATTR NOINLINE V_NAME_F2 (powr) (float32x4_t x, float32x4_t y)
@@ -104,7 +106,7 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F2 (powr) (float32x4_t x, float32x4_t y)
 HALF_WIDTH_ALIAS_F2 (powr)
 
 #if WANT_C23_TESTS
-TEST_ULP (V_NAME_F2 (powr), 2.1)
+TEST_ULP (V_NAME_F2 (powr), 2.08)
 #  define V_POWRF_INTERVAL2(xlo, xhi, ylo, yhi, n)                            \
     TEST_INTERVAL2 (V_NAME_F2 (powr), xlo, xhi, ylo, yhi, n)                  \
     TEST_INTERVAL2 (V_NAME_F2 (powr), xlo, xhi, -ylo, -yhi, n)
diff --git a/math/aarch64/advsimd/v_pow_inline.h b/math/aarch64/advsimd/v_pow_inline.h
@@ -56,17 +56,6 @@ static const struct data
   .ln2_lo_n = -0x1.c610ca86c3899p-45,
 };
 
-/* This version implements an algorithm close to scalar pow but
-   - does not implement the trick in the exp's specialcase subroutine to avoid
-     double-rounding,
-   - does not use a tail in the exponential core computation,
-   - and pow's exp polynomial order and table bits might differ.
-
-   Maximum measured error is 1.04 ULPs:
-   _ZGVnN2vv_pow(0x1.024a3e56b3c3p-136, 0x1.87910248b58acp-13)
-     got 0x1.f71162f473251p-1
-    want 0x1.f71162f473252p-1.  */
-
 static inline float64x2_t VPCS_ATTR
 v_masked_lookup_f64 (const double *table, uint64x2_t i)
 {
@@ -181,6 +170,11 @@ scalar_fallback (float64x2_t x, float64x2_t y)
 			pow_scalar_special_case (x[1], y[1]) };
 }
 
+/* This version of AdvSIMD pow implements an algorithm close to AOR scalar pow
+   but:
+   - it does not prevent double-rounding in the exp's specialcase subroutine,
+   - it does not use a tail in the exponential core computation,
+   - and pow's exp polynomial order and table bits might differ.  */
 static inline float64x2_t VPCS_ATTR
 v_pow_inline (float64x2_t x, float64x2_t y)
 {
diff --git a/math/aarch64/sve/pow.c b/math/aarch64/sve/pow.c
@@ -1,5 +1,5 @@
 /*
- * Double-precision SVE pow(x, y) function.
+ * Double-precision SVE x^y function.
  *
  * Copyright (c) 2022-2025, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
@@ -9,28 +9,6 @@
 #include "test_sig.h"
 #include "test_defs.h"
 
-/* This version share a similar algorithm as AOR scalar pow.
-
-   The core computation consists in computing pow(x, y) as
-
-     exp (y * log (x)).
-
-   The algorithms for exp and log are very similar to scalar exp and log.
-   The log relies on table lookup for 3 variables and an order 8 polynomial.
-   It returns a high and a low contribution that are then passed to the exp,
-   to minimise the loss of accuracy in both routines.
-   The exp is based on 8-bit table lookup for scale and order-4 polynomial.
-   The SVE algorithm drops the tail in the exp computation at the price of
-   a lower accuracy, slightly above 1ULP.
-   The SVE algorithm also drops the special treatement of small (< 2^-65) and
-   large (> 2^63) finite values of |y|, as they only affect non-round to
-   nearest modes.
-
-   Maximum measured error is 1.04 ULPs:
-   SV_NAME_D2 (pow) (0x1.3d2d45bc848acp+63, -0x1.a48a38b40cd43p-12)
-     got 0x1.f7116284221fcp-1
-    want 0x1.f7116284221fdp-1.  */
-
 #define WANT_SV_POW_SIGN_BIAS 1
 #include "sv_pow_inline.h"
 
@@ -72,6 +50,32 @@ sv_pow_specialcase (svfloat64_t x1, svfloat64_t x2, svfloat64_t y,
   return sv_call2_f64 (pow_specialcase, x1, x2, y, cmp);
 }
 
+/* Implementation of SVE pow.
+
+   This version share a similar algorithm as AOR scalar pow.
+
+   The core computation consists in computing pow(x, y) as
+
+     exp (y * log (x)).
+
+   The algorithms for exp and log are very similar to scalar exp and log.
+   The log relies on table lookup for 3 variables and an order 8 polynomial.
+   It returns a high and a low contribution that are then passed to the exp,
+   to minimise the loss of accuracy in both routines.
+   The exp is based on 8-bit table lookup for scale and order-4 polynomial.
+   The SVE algorithm drops the tail in the exp computation at the price of
+   a lower accuracy, slightly above 1ULP.
+   The SVE algorithm also drops the special treatement of small (< 2^-65) and
+   large (> 2^63) finite values of |y|, as they only affect non-round to
+   nearest modes.
+
+   Provides the same accuracy as AdvSIMD powf, since it relies on the same
+   algorithm.
+
+   Maximum measured error is 1.04 ULPs:
+   SV_NAME_D2 (pow) (0x1.3d2d45bc848acp+63, -0x1.a48a38b40cd43p-12)
+     got 0x1.f7116284221fcp-1
+    want 0x1.f7116284221fdp-1.  */
 svfloat64_t SV_NAME_D2 (pow) (svfloat64_t x, svfloat64_t y, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
diff --git a/math/aarch64/sve/powf.c b/math/aarch64/sve/powf.c
@@ -1,5 +1,5 @@
 /*
- * Single-precision SVE powf function.
+ * Single-precision SVE x^y function.
  *
  * Copyright (c) 2023-2025, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
@@ -52,11 +52,14 @@ sv_call_powf_sc (svfloat32_t x1, svfloat32_t x2, svfloat32_t y, svbool_t cmp)
 }
 
 /* Implementation of SVE powf.
+
    Provides the same accuracy as AdvSIMD powf, since it relies on the same
-   algorithm. The theoretical maximum error is under 2.60 ULPs.
+   algorithm.
+
    Maximum measured error is 2.57 ULPs:
-   SV_NAME_F2 (pow) (0x1.031706p+0, 0x1.ce2ec2p+12) got 0x1.fff868p+127
-						   want 0x1.fff862p+127.  */
+   SV_NAME_F2 (pow) (0x1.031706p+0, 0x1.ce2ec2p+12)
+     got 0x1.fff868p+127
+    want 0x1.fff862p+127.  */
 svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
diff --git a/math/aarch64/sve/powr.c b/math/aarch64/sve/powr.c
@@ -1,5 +1,5 @@
 /*
- * Double-precision SVE powr function.
+ * Double-precision SVE exp(y * log(x)) function.
  *
  * Copyright (c) 2025, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
@@ -54,9 +54,14 @@ sv_powr_specialcase (svfloat64_t x1, svfloat64_t x2, svfloat64_t y,
 }
 
 /* Implementation of SVE powr.
-   Provides the same accuracy as SVE pow, since it relies on the same
-   algorithm.
-   Maximum measured error is below 1 ULP.  */
+
+   Provides the same accuracy as AdvSIMD pow and powr, since it relies on the
+   same algorithm.
+
+   Maximum measured error is 1.04 ULPs:
+   SV_NAME_D2 (powr) (0x1.3d2d45bc848acp+63, -0x1.a48a38b40cd43p-12)
+     got 0x1.f7116284221fcp-1
+    want 0x1.f7116284221fdp-1.  */
 svfloat64_t SV_NAME_D2 (powr) (svfloat64_t x, svfloat64_t y, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
@@ -99,7 +104,7 @@ svfloat64_t SV_NAME_D2 (powr) (svfloat64_t x, svfloat64_t y, const svbool_t pg)
 }
 
 #if WANT_C23_TESTS
-TEST_ULP (SV_NAME_D2 (powr), 1.0)
+TEST_ULP (SV_NAME_D2 (powr), 0.55)
 /* Wide intervals spanning the positive domain.  */
 #  define SV_POWR_INTERVAL2(xlo, xhi, ylo, yhi, n)                            \
     TEST_INTERVAL2 (SV_NAME_D2 (powr), xlo, xhi, ylo, yhi, n)                 \
diff --git a/math/aarch64/sve/powrf.c b/math/aarch64/sve/powrf.c
@@ -1,5 +1,5 @@
 /*
- * Single-precision SVE powr function.
+ * Single-precision SVE exp(y * log(x)) function.
  *
  * Copyright (c) 2025, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
@@ -53,11 +53,14 @@ sv_call_powrf_sc (svfloat32_t x1, svfloat32_t x2, svfloat32_t y, svbool_t cmp)
 }
 
 /* Implementation of SVE powrf.
-   Provides the same accuracy as AdvSIMD powf, since it relies on the same
-   algorithm.
+
+   Provides the same accuracy as AdvSIMD powf and powrf, since it relies on the
+   same algorithm.
+
    Maximum measured error is 2.57 ULPs:
-   SV_NAME_F2 (pow) (0x1.031706p+0, 0x1.ce2ec2p+12) got 0x1.fff868p+127
-						   want 0x1.fff862p+127.  */
+   SV_NAME_F2 (powr) (0x1.031706p+0, 0x1.ce2ec2p+12)
+     got 0x1.fff868p+127
+    want 0x1.fff862p+127.  */
 svfloat32_t SV_NAME_F2 (powr) (svfloat32_t x, svfloat32_t y, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);