Skip to content

Commit 50b7117

Browse files
authored
Use intrinsics for all sifive_x280 kernels (flame#822)
Details: - Replace all assembly kernels in the `sifive_x280` kernel set with intrinsic versions. - Fixes bug encountered in flame#805. - Update the RISC-V toolchain used in CI testing. - Special thanks to Michael Yeh (@myeh01) and SiFive.
1 parent 827c50b commit 50b7117

File tree

62 files changed

+6902
-12931
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+6902
-12931
lines changed

config/sifive_x280/bli_cntx_init_sifive_x280.c

Lines changed: 53 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,10 @@ void bli_cntx_init_sifive_x280( cntx_t* cntx )
5454
BLIS_ADDV_KER, BLIS_SCOMPLEX, bli_caddv_sifive_x280_intr,
5555
BLIS_ADDV_KER, BLIS_DCOMPLEX, bli_zaddv_sifive_x280_intr,
5656

57-
BLIS_AMAXV_KER, BLIS_FLOAT, bli_samaxv_sifive_x280_asm,
58-
BLIS_AMAXV_KER, BLIS_DOUBLE, bli_damaxv_sifive_x280_asm,
59-
BLIS_AMAXV_KER, BLIS_SCOMPLEX, bli_camaxv_sifive_x280_asm,
60-
BLIS_AMAXV_KER, BLIS_DCOMPLEX, bli_zamaxv_sifive_x280_asm,
57+
BLIS_AMAXV_KER, BLIS_FLOAT, bli_samaxv_sifive_x280_intr,
58+
BLIS_AMAXV_KER, BLIS_DOUBLE, bli_damaxv_sifive_x280_intr,
59+
BLIS_AMAXV_KER, BLIS_SCOMPLEX, bli_camaxv_sifive_x280_intr,
60+
BLIS_AMAXV_KER, BLIS_DCOMPLEX, bli_zamaxv_sifive_x280_intr,
6161

6262
BLIS_AXPBYV_KER, BLIS_FLOAT, bli_saxpbyv_sifive_x280_intr,
6363
BLIS_AXPBYV_KER, BLIS_DOUBLE, bli_daxpbyv_sifive_x280_intr,
@@ -69,10 +69,10 @@ void bli_cntx_init_sifive_x280( cntx_t* cntx )
6969
BLIS_AXPYV_KER, BLIS_SCOMPLEX, bli_caxpyv_sifive_x280_intr,
7070
BLIS_AXPYV_KER, BLIS_DCOMPLEX, bli_zaxpyv_sifive_x280_intr,
7171

72-
BLIS_COPYV_KER, BLIS_FLOAT, bli_scopyv_sifive_x280_asm,
73-
BLIS_COPYV_KER, BLIS_DOUBLE, bli_dcopyv_sifive_x280_asm,
74-
BLIS_COPYV_KER, BLIS_SCOMPLEX, bli_ccopyv_sifive_x280_asm,
75-
BLIS_COPYV_KER, BLIS_DCOMPLEX, bli_zcopyv_sifive_x280_asm,
72+
BLIS_COPYV_KER, BLIS_FLOAT, bli_scopyv_sifive_x280_intr,
73+
BLIS_COPYV_KER, BLIS_DOUBLE, bli_dcopyv_sifive_x280_intr,
74+
BLIS_COPYV_KER, BLIS_SCOMPLEX, bli_ccopyv_sifive_x280_intr,
75+
BLIS_COPYV_KER, BLIS_DCOMPLEX, bli_zcopyv_sifive_x280_intr,
7676

7777
BLIS_DOTV_KER, BLIS_FLOAT, bli_sdotv_sifive_x280_intr,
7878
BLIS_DOTV_KER, BLIS_DOUBLE, bli_ddotv_sifive_x280_intr,
@@ -84,15 +84,15 @@ void bli_cntx_init_sifive_x280( cntx_t* cntx )
8484
BLIS_DOTXV_KER, BLIS_SCOMPLEX, bli_cdotxv_sifive_x280_intr,
8585
BLIS_DOTXV_KER, BLIS_DCOMPLEX, bli_zdotxv_sifive_x280_intr,
8686

87-
BLIS_INVERTV_KER, BLIS_FLOAT, bli_sinvertv_sifive_x280_asm,
88-
BLIS_INVERTV_KER, BLIS_DOUBLE, bli_dinvertv_sifive_x280_asm,
89-
BLIS_INVERTV_KER, BLIS_SCOMPLEX, bli_cinvertv_sifive_x280_asm,
90-
BLIS_INVERTV_KER, BLIS_DCOMPLEX, bli_zinvertv_sifive_x280_asm,
87+
BLIS_INVERTV_KER, BLIS_FLOAT, bli_sinvertv_sifive_x280_intr,
88+
BLIS_INVERTV_KER, BLIS_DOUBLE, bli_dinvertv_sifive_x280_intr,
89+
BLIS_INVERTV_KER, BLIS_SCOMPLEX, bli_cinvertv_sifive_x280_intr,
90+
BLIS_INVERTV_KER, BLIS_DCOMPLEX, bli_zinvertv_sifive_x280_intr,
9191

92-
BLIS_INVSCALV_KER, BLIS_FLOAT, bli_sinvscalv_sifive_x280_asm,
93-
BLIS_INVSCALV_KER, BLIS_DOUBLE, bli_dinvscalv_sifive_x280_asm,
94-
BLIS_INVSCALV_KER, BLIS_SCOMPLEX, bli_cinvscalv_sifive_x280_asm,
95-
BLIS_INVSCALV_KER, BLIS_DCOMPLEX, bli_zinvscalv_sifive_x280_asm,
92+
BLIS_INVSCALV_KER, BLIS_FLOAT, bli_sinvscalv_sifive_x280_intr,
93+
BLIS_INVSCALV_KER, BLIS_DOUBLE, bli_dinvscalv_sifive_x280_intr,
94+
BLIS_INVSCALV_KER, BLIS_SCOMPLEX, bli_cinvscalv_sifive_x280_intr,
95+
BLIS_INVSCALV_KER, BLIS_DCOMPLEX, bli_zinvscalv_sifive_x280_intr,
9696

9797
BLIS_SCAL2V_KER, BLIS_FLOAT, bli_sscal2v_sifive_x280_intr,
9898
BLIS_SCAL2V_KER, BLIS_DOUBLE, bli_dscal2v_sifive_x280_intr,
@@ -104,20 +104,20 @@ void bli_cntx_init_sifive_x280( cntx_t* cntx )
104104
BLIS_SCALV_KER, BLIS_SCOMPLEX, bli_cscalv_sifive_x280_intr,
105105
BLIS_SCALV_KER, BLIS_DCOMPLEX, bli_zscalv_sifive_x280_intr,
106106

107-
BLIS_SETV_KER, BLIS_FLOAT, bli_ssetv_sifive_x280_asm,
108-
BLIS_SETV_KER, BLIS_DOUBLE, bli_dsetv_sifive_x280_asm,
109-
BLIS_SETV_KER, BLIS_SCOMPLEX, bli_csetv_sifive_x280_asm,
110-
BLIS_SETV_KER, BLIS_DCOMPLEX, bli_zsetv_sifive_x280_asm,
107+
BLIS_SETV_KER, BLIS_FLOAT, bli_ssetv_sifive_x280_intr,
108+
BLIS_SETV_KER, BLIS_DOUBLE, bli_dsetv_sifive_x280_intr,
109+
BLIS_SETV_KER, BLIS_SCOMPLEX, bli_csetv_sifive_x280_intr,
110+
BLIS_SETV_KER, BLIS_DCOMPLEX, bli_zsetv_sifive_x280_intr,
111111

112112
BLIS_SUBV_KER, BLIS_FLOAT, bli_ssubv_sifive_x280_intr,
113113
BLIS_SUBV_KER, BLIS_DOUBLE, bli_dsubv_sifive_x280_intr,
114114
BLIS_SUBV_KER, BLIS_SCOMPLEX, bli_csubv_sifive_x280_intr,
115115
BLIS_SUBV_KER, BLIS_DCOMPLEX, bli_zsubv_sifive_x280_intr,
116116

117-
BLIS_SWAPV_KER, BLIS_FLOAT, bli_sswapv_sifive_x280_asm,
118-
BLIS_SWAPV_KER, BLIS_DOUBLE, bli_dswapv_sifive_x280_asm,
119-
BLIS_SWAPV_KER, BLIS_SCOMPLEX, bli_cswapv_sifive_x280_asm,
120-
BLIS_SWAPV_KER, BLIS_DCOMPLEX, bli_zswapv_sifive_x280_asm,
117+
BLIS_SWAPV_KER, BLIS_FLOAT, bli_sswapv_sifive_x280_intr,
118+
BLIS_SWAPV_KER, BLIS_DOUBLE, bli_dswapv_sifive_x280_intr,
119+
BLIS_SWAPV_KER, BLIS_SCOMPLEX, bli_cswapv_sifive_x280_intr,
120+
BLIS_SWAPV_KER, BLIS_DCOMPLEX, bli_zswapv_sifive_x280_intr,
121121

122122
BLIS_XPBYV_KER, BLIS_FLOAT, bli_sxpbyv_sifive_x280_intr,
123123
BLIS_XPBYV_KER, BLIS_DOUBLE, bli_dxpbyv_sifive_x280_intr,
@@ -130,46 +130,46 @@ void bli_cntx_init_sifive_x280( cntx_t* cntx )
130130
BLIS_AXPY2V_KER, BLIS_SCOMPLEX, bli_caxpy2v_sifive_x280_intr,
131131
BLIS_AXPY2V_KER, BLIS_DCOMPLEX, bli_zaxpy2v_sifive_x280_intr,
132132

133-
BLIS_AXPYF_KER, BLIS_FLOAT, bli_saxpyf_sifive_x280_asm,
134-
BLIS_AXPYF_KER, BLIS_DOUBLE, bli_daxpyf_sifive_x280_asm,
135-
BLIS_AXPYF_KER, BLIS_SCOMPLEX, bli_caxpyf_sifive_x280_asm,
136-
BLIS_AXPYF_KER, BLIS_DCOMPLEX, bli_zaxpyf_sifive_x280_asm,
133+
BLIS_AXPYF_KER, BLIS_FLOAT, bli_saxpyf_sifive_x280_intr,
134+
BLIS_AXPYF_KER, BLIS_DOUBLE, bli_daxpyf_sifive_x280_intr,
135+
BLIS_AXPYF_KER, BLIS_SCOMPLEX, bli_caxpyf_sifive_x280_intr,
136+
BLIS_AXPYF_KER, BLIS_DCOMPLEX, bli_zaxpyf_sifive_x280_intr,
137137

138-
BLIS_DOTXF_KER, BLIS_FLOAT, bli_sdotxf_sifive_x280_asm,
139-
BLIS_DOTXF_KER, BLIS_DOUBLE, bli_ddotxf_sifive_x280_asm,
140-
BLIS_DOTXF_KER, BLIS_SCOMPLEX, bli_cdotxf_sifive_x280_asm,
141-
BLIS_DOTXF_KER, BLIS_DCOMPLEX, bli_zdotxf_sifive_x280_asm,
138+
BLIS_DOTXF_KER, BLIS_FLOAT, bli_sdotxf_sifive_x280_intr,
139+
BLIS_DOTXF_KER, BLIS_DOUBLE, bli_ddotxf_sifive_x280_intr,
140+
BLIS_DOTXF_KER, BLIS_SCOMPLEX, bli_cdotxf_sifive_x280_intr,
141+
BLIS_DOTXF_KER, BLIS_DCOMPLEX, bli_zdotxf_sifive_x280_intr,
142142

143143
BLIS_DOTAXPYV_KER, BLIS_FLOAT, bli_sdotaxpyv_sifive_x280_intr,
144144
BLIS_DOTAXPYV_KER, BLIS_DOUBLE, bli_ddotaxpyv_sifive_x280_intr,
145145
BLIS_DOTAXPYV_KER, BLIS_SCOMPLEX, bli_cdotaxpyv_sifive_x280_intr,
146146
BLIS_DOTAXPYV_KER, BLIS_DCOMPLEX, bli_zdotaxpyv_sifive_x280_intr,
147147

148-
BLIS_DOTXAXPYF_KER, BLIS_FLOAT, bli_sdotxaxpyf_sifive_x280_asm,
149-
BLIS_DOTXAXPYF_KER, BLIS_DOUBLE, bli_ddotxaxpyf_sifive_x280_asm,
150-
BLIS_DOTXAXPYF_KER, BLIS_SCOMPLEX, bli_cdotxaxpyf_sifive_x280_asm,
151-
BLIS_DOTXAXPYF_KER, BLIS_DCOMPLEX, bli_zdotxaxpyf_sifive_x280_asm,
148+
BLIS_DOTXAXPYF_KER, BLIS_FLOAT, bli_sdotxaxpyf_sifive_x280_intr,
149+
BLIS_DOTXAXPYF_KER, BLIS_DOUBLE, bli_ddotxaxpyf_sifive_x280_intr,
150+
BLIS_DOTXAXPYF_KER, BLIS_SCOMPLEX, bli_cdotxaxpyf_sifive_x280_intr,
151+
BLIS_DOTXAXPYF_KER, BLIS_DCOMPLEX, bli_zdotxaxpyf_sifive_x280_intr,
152152

153153
// Level 1m
154-
BLIS_PACKM_KER, BLIS_FLOAT, bli_spackm_sifive_x280_asm_7m4,
155-
BLIS_PACKM_KER, BLIS_DOUBLE, bli_dpackm_sifive_x280_asm_7m4,
156-
BLIS_PACKM_KER, BLIS_SCOMPLEX, bli_cpackm_sifive_x280_asm_6m2,
157-
BLIS_PACKM_KER, BLIS_DCOMPLEX, bli_zpackm_sifive_x280_asm_6m2,
154+
BLIS_PACKM_KER, BLIS_FLOAT, bli_spackm_sifive_x280_intr,
155+
BLIS_PACKM_KER, BLIS_DOUBLE, bli_dpackm_sifive_x280_intr,
156+
BLIS_PACKM_KER, BLIS_SCOMPLEX, bli_cpackm_sifive_x280_intr,
157+
BLIS_PACKM_KER, BLIS_DCOMPLEX, bli_zpackm_sifive_x280_intr,
158158

159159
// Level 3
160-
BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_sifive_x280_asm_7m4,
161-
BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_sifive_x280_asm_7m4,
162-
BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_sifive_x280_asm_6m2,
163-
BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_sifive_x280_asm_6m2,
164-
165-
BLIS_GEMMTRSM_L_UKR, BLIS_FLOAT, bli_sgemmtrsm_l_sifive_x280_asm,
166-
BLIS_GEMMTRSM_L_UKR, BLIS_DOUBLE, bli_dgemmtrsm_l_sifive_x280_asm,
167-
BLIS_GEMMTRSM_L_UKR, BLIS_SCOMPLEX, bli_cgemmtrsm_l_sifive_x280_asm,
168-
BLIS_GEMMTRSM_L_UKR, BLIS_DCOMPLEX, bli_zgemmtrsm_l_sifive_x280_asm,
169-
BLIS_GEMMTRSM_U_UKR, BLIS_FLOAT, bli_sgemmtrsm_u_sifive_x280_asm,
170-
BLIS_GEMMTRSM_U_UKR, BLIS_DOUBLE, bli_dgemmtrsm_u_sifive_x280_asm,
171-
BLIS_GEMMTRSM_U_UKR, BLIS_SCOMPLEX, bli_cgemmtrsm_u_sifive_x280_asm,
172-
BLIS_GEMMTRSM_U_UKR, BLIS_DCOMPLEX, bli_zgemmtrsm_u_sifive_x280_asm,
160+
BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_sifive_x280_intr,
161+
BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_sifive_x280_intr,
162+
BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_sifive_x280_intr,
163+
BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_sifive_x280_intr,
164+
165+
BLIS_GEMMTRSM_L_UKR, BLIS_FLOAT, bli_sgemmtrsm_l_sifive_x280_intr,
166+
BLIS_GEMMTRSM_L_UKR, BLIS_DOUBLE, bli_dgemmtrsm_l_sifive_x280_intr,
167+
BLIS_GEMMTRSM_L_UKR, BLIS_SCOMPLEX, bli_cgemmtrsm_l_sifive_x280_intr,
168+
BLIS_GEMMTRSM_L_UKR, BLIS_DCOMPLEX, bli_zgemmtrsm_l_sifive_x280_intr,
169+
BLIS_GEMMTRSM_U_UKR, BLIS_FLOAT, bli_sgemmtrsm_u_sifive_x280_intr,
170+
BLIS_GEMMTRSM_U_UKR, BLIS_DOUBLE, bli_dgemmtrsm_u_sifive_x280_intr,
171+
BLIS_GEMMTRSM_U_UKR, BLIS_SCOMPLEX, bli_cgemmtrsm_u_sifive_x280_intr,
172+
BLIS_GEMMTRSM_U_UKR, BLIS_DCOMPLEX, bli_zgemmtrsm_u_sifive_x280_intr,
173173

174174
BLIS_VA_END
175175
);

config/sifive_x280/make_defs.mk

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ endif
6161
ifeq ($(DEBUG_TYPE),noopt)
6262
COPTFLAGS := -O0
6363
else
64-
COPTFLAGS := -Ofast
64+
COPTFLAGS := -O3
6565
endif
6666

6767
# Flags specific to optimized kernels.

0 commit comments

Comments
 (0)