|
1 | 1 | /* |
2 | | - * Double-precision SVE pow(x, y) function. |
| 2 | + * Double-precision SVE x^y function. |
3 | 3 | * |
4 | 4 | * Copyright (c) 2022-2025, Arm Limited. |
5 | 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception |
|
9 | 9 | #include "test_sig.h" |
10 | 10 | #include "test_defs.h" |
11 | 11 |
|
12 | | -/* This version share a similar algorithm as AOR scalar pow. |
13 | | -
|
14 | | - The core computation consists in computing pow(x, y) as |
15 | | -
|
16 | | - exp (y * log (x)). |
17 | | -
|
18 | | - The algorithms for exp and log are very similar to scalar exp and log. |
19 | | - The log relies on table lookup for 3 variables and an order 8 polynomial. |
20 | | - It returns a high and a low contribution that are then passed to the exp, |
21 | | - to minimise the loss of accuracy in both routines. |
22 | | - The exp is based on 8-bit table lookup for scale and order-4 polynomial. |
23 | | - The SVE algorithm drops the tail in the exp computation at the price of |
24 | | - a lower accuracy, slightly above 1ULP. |
25 | | - The SVE algorithm also drops the special treatement of small (< 2^-65) and |
26 | | - large (> 2^63) finite values of |y|, as they only affect non-round to |
27 | | - nearest modes. |
28 | | -
|
29 | | - Maximum measured error is 1.04 ULPs: |
30 | | - SV_NAME_D2 (pow) (0x1.3d2d45bc848acp+63, -0x1.a48a38b40cd43p-12) |
31 | | - got 0x1.f7116284221fcp-1 |
32 | | - want 0x1.f7116284221fdp-1. */ |
33 | | - |
34 | 12 | #define WANT_SV_POW_SIGN_BIAS 1 |
35 | 13 | #include "sv_pow_inline.h" |
36 | 14 |
|
@@ -72,6 +50,32 @@ sv_pow_specialcase (svfloat64_t x1, svfloat64_t x2, svfloat64_t y, |
72 | 50 | return sv_call2_f64 (pow_specialcase, x1, x2, y, cmp); |
73 | 51 | } |
74 | 52 |
|
| 53 | +/* Implementation of SVE pow. |
| 54 | +
|
| 55 | + This version share a similar algorithm as AOR scalar pow. |
| 56 | +
|
| 57 | + The core computation consists in computing pow(x, y) as |
| 58 | +
|
| 59 | + exp (y * log (x)). |
| 60 | +
|
| 61 | + The algorithms for exp and log are very similar to scalar exp and log. |
| 62 | + The log relies on table lookup for 3 variables and an order 8 polynomial. |
| 63 | + It returns a high and a low contribution that are then passed to the exp, |
| 64 | + to minimise the loss of accuracy in both routines. |
| 65 | + The exp is based on 8-bit table lookup for scale and order-4 polynomial. |
| 66 | + The SVE algorithm drops the tail in the exp computation at the price of |
| 67 | + a lower accuracy, slightly above 1ULP. |
| 68 | + The SVE algorithm also drops the special treatement of small (< 2^-65) and |
| 69 | + large (> 2^63) finite values of |y|, as they only affect non-round to |
| 70 | + nearest modes. |
| 71 | +
|
| 72 | + Provides the same accuracy as AdvSIMD powf, since it relies on the same |
| 73 | + algorithm. |
| 74 | +
|
| 75 | + Maximum measured error is 1.04 ULPs: |
| 76 | + SV_NAME_D2 (pow) (0x1.3d2d45bc848acp+63, -0x1.a48a38b40cd43p-12) |
| 77 | + got 0x1.f7116284221fcp-1 |
| 78 | + want 0x1.f7116284221fdp-1. */ |
75 | 79 | svfloat64_t SV_NAME_D2 (pow) (svfloat64_t x, svfloat64_t y, const svbool_t pg) |
76 | 80 | { |
77 | 81 | const struct data *d = ptr_barrier (&data); |
|
0 commit comments