Skip to content

Commit 0efcc07

Browse files
committed
math/aarch64/sve: Use shorter buffers in sv_call if VL is fixed
GCC and Clang define __ARM_FEATURE_SVE_BITS in response to -msve-vector-bits - use this if available to reduce the size of the buffers.
1 parent 1c3c85d commit 0efcc07

File tree

1 file changed

+19
-14
lines changed

1 file changed

+19
-14
lines changed

math/aarch64/sve/sv_math.h

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,17 @@
2626

2727
#include "math_config.h"
2828

29+
#if !defined(__ARM_FEATURE_SVE_BITS) || __ARM_FEATURE_SVE_BITS == 0
30+
/* If not specified by -msve-vector-bits, assume maximum vector length. */
31+
# define SVE_VECTOR_BYTES 256
32+
#else
33+
# define SVE_VECTOR_BYTES (__ARM_FEATURE_SVE_BITS / 8)
34+
#endif
35+
#define SVE_NUM_FLTS (SVE_VECTOR_BYTES / sizeof (float))
36+
#define SVE_NUM_DBLS (SVE_VECTOR_BYTES / sizeof (double))
37+
/* Predicate is stored as one bit per byte of VL so requires VL / 64 bytes. */
38+
#define SVE_NUM_PG_BYTES (SVE_VECTOR_BYTES / sizeof (uint64_t))
39+
2940
#define SV_NAME_F1(fun) _ZGVsMxv_##fun##f
3041
#define SV_NAME_D1(fun) _ZGVsMxv_##fun
3142
#define SV_NAME_F2(fun) _ZGVsMxvv_##fun##f
@@ -63,9 +74,8 @@ sv_f64 (double x)
6374
static inline svfloat64_t
6475
sv_call_f64 (double (*f) (double), svfloat64_t x, svfloat64_t y, svbool_t cmp)
6576
{
66-
/* Buffer size corresponds to maximum possible vector length. */
67-
double tmp[32];
68-
uint8_t pg_bits[32];
77+
double tmp[SVE_NUM_DBLS];
78+
uint8_t pg_bits[SVE_NUM_PG_BYTES];
6979
svstr_p (pg_bits, cmp);
7080
svst1 (svptrue_b64 (), tmp, svsel (cmp, x, y));
7181

@@ -83,9 +93,8 @@ static inline svfloat64_t
8393
sv_call2_f64 (double (*f) (double, double), svfloat64_t x1, svfloat64_t x2,
8494
svfloat64_t y, svbool_t cmp)
8595
{
86-
/* Buffer size corresponds to maximum possible vector length. */
87-
double tmp1[32], tmp2[32];
88-
uint8_t pg_bits[32];
96+
double tmp1[SVE_NUM_DBLS], tmp2[SVE_NUM_DBLS];
97+
uint8_t pg_bits[SVE_NUM_PG_BYTES];
8998
svstr_p (pg_bits, cmp);
9099
svst1 (svptrue_b64 (), tmp1, svsel (cmp, x1, y));
91100
svst1 (cmp, tmp2, x2);
@@ -129,11 +138,8 @@ sv_f32 (float x)
129138
static inline svfloat32_t
130139
sv_call_f32 (float (*f) (float), svfloat32_t x, svfloat32_t y, svbool_t cmp)
131140
{
132-
/* Buffer size corresponds to maximum vector length. */
133-
float tmp[64];
134-
/* 32, not 64, is correct for pg_bits because each bit of pg_bits maps to 1
135-
byte of the vector, so a uint8_t indicates predication of two floats. */
136-
uint8_t pg_bits[32];
141+
float tmp[SVE_NUM_FLTS];
142+
uint8_t pg_bits[SVE_NUM_PG_BYTES];
137143
svstr_p (pg_bits, cmp);
138144
svst1 (svptrue_b32 (), tmp, svsel (cmp, x, y));
139145

@@ -156,9 +162,8 @@ static inline svfloat32_t
156162
sv_call2_f32 (float (*f) (float, float), svfloat32_t x1, svfloat32_t x2,
157163
svfloat32_t y, svbool_t cmp)
158164
{
159-
/* Buffer size corresponds to maximum vector length. */
160-
float tmp1[64], tmp2[64];
161-
uint8_t pg_bits[32];
165+
float tmp1[SVE_NUM_FLTS], tmp2[SVE_NUM_FLTS];
166+
uint8_t pg_bits[SVE_NUM_PG_BYTES];
162167
svstr_p (pg_bits, cmp);
163168
svst1 (svptrue_b32 (), tmp1, svsel (cmp, x1, y));
164169
svst1 (cmp, tmp2, x2);

0 commit comments

Comments
 (0)