Skip to content

Commit 46db353

Browse files
qinxk-interWu Zhongmin
andauthored
Enable SIMD for AARCH64 Platform (#11) (#610)
Signed-off-by: Wu Zhongmin <[email protected]> Signed-off-by: Xiaokang Qin <[email protected]> Co-authored-by: Wu Zhongmin <[email protected]> Co-authored-by: Wu Zhongmin <[email protected]>
1 parent 8b96f4f commit 46db353

File tree

7 files changed

+557
-8
lines changed

7 files changed

+557
-8
lines changed

core/iwasm/aot/aot_loader.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,7 @@ check_machine_info(AOTTargetInfo *target_info,
285285
error_buf, error_buf_size))
286286
return false;
287287

288-
if (strcmp(target_expected, target_got)) {
288+
if (strncmp(target_expected, target_got, strlen(target_expected))) {
289289
set_error_buf_v(error_buf, error_buf_size,
290290
"invalid target type, expected %s but got %s",
291291
target_expected, target_got);
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
/*
2+
* Copyright (C) 2020 Intel Corporation Corporation. All rights reserved.
3+
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4+
*/
5+
.text
6+
.align 2
7+
#ifndef BH_PLATFORM_DARWIN
8+
.globl invokeNative
9+
.type invokeNative, function
10+
invokeNative:
11+
#else
12+
.globl _invokeNative
13+
_invokeNative:
14+
#endif /* end of BH_PLATFORM_DARWIN */
15+
16+
/*
17+
* Arguments passed in:
18+
*
19+
* x0 function ptr
20+
* x1 argv
21+
* x2 nstacks
22+
*/
23+
24+
sub sp, sp, #0x30
25+
stp x19, x20, [sp, #0x20] /* save the registers */
26+
stp x21, x22, [sp, #0x10]
27+
stp x23, x24, [sp, #0x0]
28+
29+
mov x19, x0 /* x19 = function ptr */
30+
mov x20, x1 /* x20 = argv */
31+
mov x21, x2 /* x21 = nstacks */
32+
mov x22, sp /* save the sp before call function */
33+
34+
/* Fill in float-point registers */
35+
ld1 {v0.2D, v1.2D, v2.2D, v3.2D}, [x20], #64 /* v0 = argv[0], v1 = argv[1], v2 = argv[2], v3 = argv[3]*/
36+
ld1 {v4.2D, v5.2D, v6.2D, v7.2D}, [x20], #64 /* v4 = argv[4], v5 = argv[5], v6 = argv[6], v7 = argv[7]*/
37+
38+
/* Fill inteter registers */
39+
ldp x0, x1, [x20], #16 /* x0 = argv[8] = exec_env, x1 = argv[9] */
40+
ldp x2, x3, [x20], #16 /* x2 = argv[10], x3 = argv[11] */
41+
ldp x4, x5, [x20], #16 /* x4 = argv[12], x5 = argv[13] */
42+
ldp x6, x7, [x20], #16 /* x6 = argv[14], x7 = argv[15] */
43+
44+
/* Now x20 points to stack args */
45+
46+
/* Directly call the fucntion if no args in stack */
47+
cmp x21, #0
48+
beq call_func
49+
50+
/* Fill all stack args: reserve stack space and fill one by one */
51+
mov x23, sp
52+
bic sp, x23, #15 /* Ensure stack is 16 bytes aligned */
53+
lsl x23, x21, #3 /* x23 = nstacks * 8 */
54+
add x23, x23, #15 /* x23 = (x23 + 15) & ~15 */
55+
bic x23, x23, #15
56+
sub sp, sp, x23 /* reserved stack space for stack arguments */
57+
mov x23, sp
58+
59+
loop_stack_args: /* copy stack arguments to stack */
60+
cmp x21, #0
61+
beq call_func
62+
ldr x24, [x20], #8
63+
str x24, [x23], #8
64+
sub x21, x21, #1
65+
b loop_stack_args
66+
67+
call_func:
68+
mov x20, x30 /* save x30(lr) */
69+
blr x19
70+
mov sp, x22 /* restore sp which is saved before calling fuction*/
71+
72+
return:
73+
mov x30, x20 /* restore x30(lr) */
74+
ldp x19, x20, [sp, #0x20] /* restore the registers in stack */
75+
ldp x21, x22, [sp, #0x10]
76+
ldp x23, x24, [sp, #0x0]
77+
add sp, sp, #0x30 /* restore sp */
78+
ret
79+

core/iwasm/common/iwasm_common.cmake

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,11 @@ elseif (WAMR_BUILD_TARGET MATCHES "THUMB.*")
4343
set (source_all ${c_source_all} ${IWASM_COMMON_DIR}/arch/invokeNative_thumb.s)
4444
endif ()
4545
elseif (WAMR_BUILD_TARGET MATCHES "AARCH64.*")
46-
set (source_all ${c_source_all} ${IWASM_COMMON_DIR}/arch/invokeNative_aarch64.s)
46+
if (NOT WAMR_BUILD_SIMD EQUAL 1)
47+
set (source_all ${c_source_all} ${IWASM_COMMON_DIR}/arch/invokeNative_aarch64.s)
48+
else()
49+
set (source_all ${c_source_all} ${IWASM_COMMON_DIR}/arch/invokeNative_aarch64_simd.s)
50+
endif()
4751
elseif (WAMR_BUILD_TARGET STREQUAL "MIPS")
4852
set (source_all ${c_source_all} ${IWASM_COMMON_DIR}/arch/invokeNative_mips.s)
4953
elseif (WAMR_BUILD_TARGET STREQUAL "XTENSA")

core/iwasm/common/wasm_runtime_common.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3414,10 +3414,14 @@ typedef union __declspec(intrin_type) __declspec(align(8)) v128 {
34143414
unsigned __int32 m128i_u32[4];
34153415
unsigned __int64 m128i_u64[2];
34163416
} v128;
3417-
#else
3417+
#elif defined(BUILD_TARGET_X86_64) || defined(BUILD_TARGET_AMD_64)
34183418
typedef long long v128 __attribute__ ((__vector_size__ (16),
34193419
__may_alias__, __aligned__ (1)));
3420-
#endif /* end of defined(_WIN32) || defined(_WIN32_) */
3420+
#elif defined(BUILD_TARGET_AARCH64)
3421+
#include <arm_neon.h>
3422+
typedef uint32x4_t __m128i;
3423+
#define v128 __m128i
3424+
#endif
34213425

34223426
#endif /* end of WASM_ENABLE_SIMD != 0 */
34233427

core/iwasm/compilation/aot_llvm.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1496,7 +1496,8 @@ aot_create_comp_context(AOTCompData *comp_data,
14961496
}
14971497

14981498
if (option->enable_simd
1499-
&& strcmp(comp_ctx->target_arch, "x86_64") != 0) {
1499+
&& strcmp(comp_ctx->target_arch, "x86_64") != 0
1500+
&& strncmp(comp_ctx->target_arch, "aarch64", 7) != 0) {
15001501
/* Disable simd if it isn't supported by target arch */
15011502
option->enable_simd = false;
15021503
}

core/iwasm/compilation/simd/simd_access_lanes.c

Lines changed: 111 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,13 @@
88
#include "../aot_emit_exception.h"
99
#include "../../aot/aot_runtime.h"
1010

11+
static bool
12+
is_target_x86(AOTCompContext *comp_ctx)
13+
{
14+
return !strncmp(comp_ctx->target_arch, "x86_64", 6) ||
15+
!strncmp(comp_ctx->target_arch, "i386", 4);
16+
}
17+
1118
static LLVMValueRef
1219
build_intx16_vector(const AOTCompContext *comp_ctx,
1320
const LLVMTypeRef element_type,
@@ -86,7 +93,7 @@ aot_compile_simd_shuffle(AOTCompContext *comp_ctx,
8693
/* TODO: instructions for other CPUs */
8794
/* shufflevector is not an option, since it requires *mask as a const */
8895
bool
89-
aot_compile_simd_swizzle(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
96+
aot_compile_simd_swizzle_x86(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
9097
{
9198
LLVMValueRef vector, mask, max_lanes, condition, mask_lanes, result;
9299
LLVMTypeRef param_types[2];
@@ -151,6 +158,109 @@ aot_compile_simd_swizzle(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
151158
return false;
152159
}
153160

161+
bool
162+
aot_compile_simd_swizzle(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx)
163+
{
164+
LLVMValueRef vector, mask, default_lane_value, condition, max_lane_id,
165+
result, idx, id, replace_with_zero, elem, elem_or_zero, undef;
166+
uint8 i;
167+
168+
if (is_target_x86(comp_ctx)) {
169+
return aot_compile_simd_swizzle_x86(comp_ctx, func_ctx);
170+
}
171+
172+
int const_lane_ids[16] = { 16, 16, 16, 16, 16, 16, 16, 16,
173+
16, 16, 16, 16, 16, 16, 16, 16 },
174+
const_zeors[16] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
175+
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
176+
177+
if (!(mask = simd_pop_v128_and_bitcast(comp_ctx, func_ctx, V128_i8x16_TYPE,
178+
"mask"))) {
179+
goto fail;
180+
}
181+
182+
if (!(vector = simd_pop_v128_and_bitcast(comp_ctx, func_ctx,
183+
V128_i8x16_TYPE, "vec"))) {
184+
goto fail;
185+
}
186+
187+
if (!(undef = LLVMGetUndef(V128_i8x16_TYPE))) {
188+
HANDLE_FAILURE("LLVMGetUndef");
189+
goto fail;
190+
}
191+
192+
/* icmp uge <16 x i8> mask, <16, 16, 16, 16, ...> */
193+
if (!(max_lane_id =
194+
build_intx16_vector(comp_ctx, INT8_TYPE, const_lane_ids))) {
195+
goto fail;
196+
}
197+
198+
if (!(condition = LLVMBuildICmp(comp_ctx->builder, LLVMIntUGE, mask,
199+
max_lane_id, "out_of_range"))) {
200+
HANDLE_FAILURE("LLVMBuldICmp");
201+
goto fail;
202+
}
203+
204+
/* if the id is out of range (>=16), set the id as 0 */
205+
if (!(default_lane_value =
206+
build_intx16_vector(comp_ctx, INT8_TYPE, const_zeors))) {
207+
goto fail;
208+
}
209+
210+
if (!(idx = LLVMBuildSelect(comp_ctx->builder, condition,
211+
default_lane_value, mask, "mask"))) {
212+
HANDLE_FAILURE("LLVMBuildSelect");
213+
goto fail;
214+
}
215+
216+
for (i = 0; i < 16; i++) {
217+
if (!(id = LLVMBuildExtractElement(comp_ctx->builder, idx, I8_CONST(i),
218+
"id"))) {
219+
HANDLE_FAILURE("LLVMBuildExtractElement");
220+
goto fail;
221+
}
222+
223+
if (!(replace_with_zero =
224+
LLVMBuildExtractElement(comp_ctx->builder, condition,
225+
I8_CONST(i), "replace_with_zero"))) {
226+
HANDLE_FAILURE("LLVMBuildExtractElement");
227+
goto fail;
228+
}
229+
230+
if (!(elem = LLVMBuildExtractElement(comp_ctx->builder, vector, id,
231+
"vector[mask[i]]"))) {
232+
HANDLE_FAILURE("LLVMBuildExtractElement");
233+
goto fail;
234+
}
235+
236+
if (!(elem_or_zero =
237+
LLVMBuildSelect(comp_ctx->builder, replace_with_zero,
238+
I8_CONST(0), elem, "elem_or_zero"))) {
239+
HANDLE_FAILURE("LLVMBuildSelect");
240+
goto fail;
241+
}
242+
243+
if (!(undef =
244+
LLVMBuildInsertElement(comp_ctx->builder, undef, elem_or_zero,
245+
I8_CONST(i), "new_vector"))) {
246+
HANDLE_FAILURE("LLVMBuildInsertElement");
247+
goto fail;
248+
}
249+
}
250+
251+
if (!(result = LLVMBuildBitCast(comp_ctx->builder, undef, V128_i64x2_TYPE,
252+
"ret"))) {
253+
HANDLE_FAILURE("LLVMBuildBitCast");
254+
goto fail;
255+
}
256+
257+
PUSH_V128(result);
258+
259+
return true;
260+
fail:
261+
return false;
262+
}
263+
154264
static bool
155265
aot_compile_simd_extract(AOTCompContext *comp_ctx,
156266
AOTFuncContext *func_ctx,

0 commit comments

Comments
 (0)