Skip to content

Commit 02267d8

Browse files
authored
Merge pull request #5288 from guoyuanplct/develop
kernel/riscv64:Optimized the implementation of axpby on TARGET=RISCV64_ZVL256B.
2 parents 0163143 + d2003dc commit 02267d8

File tree

2 files changed

+153
-0
lines changed

2 files changed

+153
-0
lines changed

kernel/riscv64/KERNEL.RISCV64_ZVL256B

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ SSYMV_U_KERNEL = symv_U_vector.c
169169
SSYMV_L_KERNEL = symv_L_vector.c
170170
DSYMV_U_KERNEL = symv_U_vector.c
171171
DSYMV_L_KERNEL = symv_L_vector.c
172+
172173
CSYMV_U_KERNEL = ../generic/zsymv_k.c
173174
CSYMV_L_KERNEL = ../generic/zsymv_k.c
174175
ZSYMV_U_KERNEL = ../generic/zsymv_k.c
@@ -207,3 +208,6 @@ COMATCOPY_CN = zomatcopy_cn_vector.c
207208

208209
DOMATCOPY_CN = omatcopy_cn_vector.c
209210
SOMATCOPY_CN = omatcopy_cn_vector.c
211+
212+
SAXPBYKERNEL = axpby_vector_v2.c
213+
DAXPBYKERNEL = axpby_vector_v2.c

kernel/riscv64/axpby_vector_v2.c

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
/***************************************************************************
2+
Copyright (c) 2022, The OpenBLAS Project
3+
All rights reserved.
4+
Redistribution and use in source and binary forms, with or without
5+
modification, are permitted provided that the following conditions are
6+
met:
7+
1. Redistributions of source code must retain the above copyright
8+
notice, this list of conditions and the following disclaimer.
9+
2. Redistributions in binary form must reproduce the above copyright
10+
notice, this list of conditions and the following disclaimer in
11+
the documentation and/or other materials provided with the
12+
distribution.
13+
3. Neither the name of the OpenBLAS project nor the names of
14+
its contributors may be used to endorse or promote products
15+
derived from this software without specific prior written permission.
16+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
25+
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
*****************************************************************************/
27+
28+
#include "common.h"
29+
30+
#if !defined(DOUBLE)
31+
#define VSETVL(n) RISCV_RVV(vsetvl_e32m8)(n)
32+
#define FLOAT_V_T vfloat32m8_t
33+
#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m8)
34+
#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m8)
35+
#define VSEV_FLOAT RISCV_RVV(vse32_v_f32m8)
36+
#define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m8)
37+
#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f32m8)
38+
#define VFMULVF_FLOAT RISCV_RVV(vfmul_vf_f32m8)
39+
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m8)
40+
#else
41+
#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n)
42+
#define FLOAT_V_T vfloat64m4_t
43+
#define VLEV_FLOAT RISCV_RVV(vle64_v_f64m4)
44+
#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m4)
45+
#define VSEV_FLOAT RISCV_RVV(vse64_v_f64m4)
46+
#define VSSEV_FLOAT RISCV_RVV(vsse64_v_f64m4)
47+
#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f64m4)
48+
#define VFMULVF_FLOAT RISCV_RVV(vfmul_vf_f64m4)
49+
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m4)
50+
#endif
51+
52+
int CNAME(BLASLONG n, FLOAT alpha, FLOAT *x, BLASLONG inc_x, FLOAT beta, FLOAT *y, BLASLONG inc_y)
53+
{
54+
FLOAT_V_T vx, vy;
55+
unsigned int gvl;
56+
if (n <= 0)
57+
return (0);
58+
if (inc_x == 1 && inc_y == 1)
59+
{
60+
while (n > 0)
61+
{
62+
gvl = VSETVL(n);
63+
64+
vx = VLEV_FLOAT(x, gvl);
65+
vy = VLEV_FLOAT(y, gvl);
66+
67+
vy = VFMULVF_FLOAT(vy, beta, gvl);
68+
vy = VFMACCVF_FLOAT(vy, alpha, vx, gvl);
69+
70+
VSEV_FLOAT(y, vy, gvl);
71+
72+
x += gvl;
73+
y += gvl;
74+
n -= gvl;
75+
}
76+
}
77+
else if (1 == inc_x)
78+
{
79+
BLASLONG stride_y = inc_y * sizeof(FLOAT);
80+
while (n > 0)
81+
{
82+
gvl = VSETVL(n);
83+
vy = VLSEV_FLOAT(y, stride_y, gvl);
84+
vx = VLEV_FLOAT(x, gvl);
85+
86+
vy = VFMULVF_FLOAT(vy, beta, gvl);
87+
vy = VFMACCVF_FLOAT(vy, alpha, vx, gvl);
88+
89+
VSSEV_FLOAT(y, stride_y, vy, gvl);
90+
91+
x += gvl;
92+
y += gvl * inc_y;
93+
n -= gvl;
94+
}
95+
}
96+
else if (1 == inc_y)
97+
{
98+
BLASLONG stride_x = inc_x * sizeof(FLOAT);
99+
100+
while (n > 0)
101+
{
102+
gvl = VSETVL(n);
103+
104+
vx = VLSEV_FLOAT(x, stride_x, gvl);
105+
vy = VLEV_FLOAT(y, gvl);
106+
107+
vy = VFMULVF_FLOAT(vy, beta, gvl);
108+
vy = VFMACCVF_FLOAT(vy, alpha, vx, gvl);
109+
110+
VSEV_FLOAT(y, vy, gvl);
111+
112+
x += gvl * inc_x;
113+
y += gvl;
114+
n -= gvl;
115+
}
116+
}
117+
else if (inc_y == 0)
118+
{
119+
FLOAT vf = y[0];
120+
for (; n > 0; n--)
121+
{
122+
vf = (vf * beta) + (x[0] * alpha);
123+
x += inc_x;
124+
}
125+
y[0] = vf;
126+
}
127+
else
128+
{
129+
BLASLONG stride_x = inc_x * sizeof(FLOAT);
130+
BLASLONG stride_y = inc_y * sizeof(FLOAT);
131+
while (n > 0)
132+
{
133+
gvl = VSETVL(n);
134+
vy = VLSEV_FLOAT(y, stride_y, gvl);
135+
vx = VLSEV_FLOAT(x, stride_x, gvl);
136+
137+
vy = VFMULVF_FLOAT(vy, beta, gvl);
138+
vy = VFMACCVF_FLOAT(vy, alpha, vx, gvl);
139+
140+
VSSEV_FLOAT(y, stride_y, vy, gvl);
141+
142+
x += gvl * inc_x;
143+
y += gvl * inc_y;
144+
n -= gvl;
145+
}
146+
}
147+
148+
return (0);
149+
}

0 commit comments

Comments
 (0)