Skip to content

Commit 240ca2e

Browse files
authored
Implement performance profiler and call stack dump, and update toolchain document (#501)
And remove redundant FAST_INTERP macros in wasm_interp_fast.c, and fix wamrc --help wrong line order issue. Signed-off-by: Wenyong Huang <[email protected]>
1 parent 794028a commit 240ca2e

26 files changed

+752
-109
lines changed

build-scripts/SConscript_config

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,14 @@
1-
2-
3-
import os
4-
import re
5-
6-
from building import *
71
#
82
# Copyright (c) 2021, RT-Thread Development Team
93
#
104
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
115
#
126

7+
import os
8+
import re
9+
10+
from building import *
11+
1312
Import('rtconfig')
1413

1514
src = Split('''

build-scripts/config_common.cmake

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,13 +173,21 @@ if (WAMR_BUILD_MEMORY_PROFILING EQUAL 1)
173173
add_definitions (-DWASM_ENABLE_MEMORY_PROFILING=1)
174174
message (" Memory profiling enabled")
175175
endif ()
176+
if (WAMR_BUILD_PERF_PROFILING EQUAL 1)
177+
add_definitions (-DWASM_ENABLE_PERF_PROFILING=1)
178+
message (" Performance profiling enabled")
179+
endif ()
176180
if (DEFINED WAMR_APP_THREAD_STACK_SIZE_MAX)
177181
add_definitions (-DAPP_THREAD_STACK_SIZE_MAX=${WAMR_APP_THREAD_STACK_SIZE_MAX})
178182
endif ()
179183
if (WAMR_BUILD_CUSTOM_NAME_SECTION EQUAL 1)
180184
add_definitions (-DWASM_ENABLE_CUSTOM_NAME_SECTION=1)
181185
message (" Custom name section enabled")
182186
endif ()
187+
if (WAMR_BUILD_DUMP_CALL_STACK EQUAL 1)
188+
add_definitions (-DWASM_ENABLE_DUMP_CALL_STACK=1)
189+
message (" Dump call stack enabled")
190+
endif ()
183191
if (WAMR_BUILD_TAIL_CALL EQUAL 1)
184192
add_definitions (-DWASM_ENABLE_TAIL_CALL=1)
185193
message (" Tail call enabled")

core/config.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,16 @@
180180
#define WASM_ENABLE_MEMORY_TRACING 0
181181
#endif
182182

183+
/* Performance profiling */
184+
#ifndef WASM_ENABLE_PERF_PROFILING
185+
#define WASM_ENABLE_PERF_PROFILING 0
186+
#endif
187+
188+
/* Dump call stack */
189+
#ifndef WASM_ENABLE_DUMP_CALL_STACK
190+
#define WASM_ENABLE_DUMP_CALL_STACK 0
191+
#endif
192+
183193
/* Heap verification */
184194
#ifndef BH_ENABLE_GC_VERIFY
185195
#define BH_ENABLE_GC_VERIFY 0

core/iwasm/aot/aot_loader.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2174,6 +2174,9 @@ aot_convert_wasm_module(WASMModule *wasm_module,
21742174
#endif
21752175
#if WASM_ENABLE_SIMD != 0
21762176
option.enable_simd = true;
2177+
#endif
2178+
#if (WASM_ENABLE_PERF_PROFILING != 0) || (WASM_ENABLE_DUMP_CALL_STACK != 0)
2179+
option.enable_aux_stack_frame = true;
21772180
#endif
21782181
comp_ctx = aot_create_comp_context(comp_data, &option);
21792182
if (!comp_ctx) {

core/iwasm/aot/aot_reloc.h

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,14 @@ typedef struct {
2929
#define REG_ATOMIC_WAIT_SYM()
3030
#endif
3131

32+
#if (WASM_ENABLE_PERF_PROFILING != 0) || (WASM_ENABLE_DUMP_CALL_STACK != 0)
33+
#define REG_AOT_TRACE_SYM() \
34+
REG_SYM(aot_alloc_frame), \
35+
REG_SYM(aot_free_frame),
36+
#else
37+
#define REG_AOT_TRACE_SYM()
38+
#endif
39+
3240
#if (defined(_WIN32) || defined(_WIN32_)) && defined(NDEBUG)
3341
#define REG_COMMON_SYMBOLS \
3442
REG_SYM(aot_set_exception_with_id), \
@@ -39,7 +47,8 @@ typedef struct {
3947
REG_SYM(aot_memset), \
4048
REG_SYM(aot_memmove), \
4149
REG_BULK_MEMORY_SYM() \
42-
REG_ATOMIC_WAIT_SYM()
50+
REG_ATOMIC_WAIT_SYM() \
51+
REG_AOT_TRACE_SYM()
4352
#else /* else of (defined(_WIN32) || defined(_WIN32_)) && defined(NDEBUG) */
4453
#define REG_COMMON_SYMBOLS \
4554
REG_SYM(aot_set_exception_with_id), \
@@ -62,7 +71,8 @@ typedef struct {
6271
REG_SYM(rint), \
6372
REG_SYM(rintf), \
6473
REG_BULK_MEMORY_SYM() \
65-
REG_ATOMIC_WAIT_SYM()
74+
REG_ATOMIC_WAIT_SYM() \
75+
REG_AOT_TRACE_SYM()
6676
#endif /* end of (defined(_WIN32) || defined(_WIN32_)) && defined(NDEBUG) */
6777

6878
#define CHECK_RELOC_OFFSET(data_size) do { \

core/iwasm/aot/aot_runtime.c

Lines changed: 188 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -814,6 +814,15 @@ aot_instantiate(AOTModule *module, bool is_sub_inst,
814814
#endif
815815
module_inst->default_wasm_stack_size = stack_size;
816816

817+
#if WASM_ENABLE_PERF_PROFILING != 0
818+
total_size = (uint64)sizeof(AOTFuncPerfProfInfo) *
819+
(module->import_func_count + module->func_count);
820+
if (!(module_inst->func_perf_profilings.ptr =
821+
runtime_malloc(total_size, error_buf, error_buf_size))) {
822+
goto fail;
823+
}
824+
#endif
825+
817826
/* Execute __post_instantiate function and start function*/
818827
if (!execute_post_inst_function(module_inst)
819828
|| !execute_start_function(module_inst)) {
@@ -866,6 +875,11 @@ aot_deinstantiate(AOTModuleInstance *module_inst, bool is_sub_inst)
866875
wasm_runtime_destroy_wasi((WASMModuleInstanceCommon*)module_inst);
867876
#endif
868877

878+
#if WASM_ENABLE_PERF_PROFILING != 0
879+
if (module_inst->func_perf_profilings.ptr)
880+
wasm_runtime_free(module_inst->func_perf_profilings.ptr);
881+
#endif
882+
869883
if (module_inst->memories.ptr)
870884
memories_deinstantiate(module_inst);
871885

@@ -1128,16 +1142,38 @@ aot_call_function(WASMExecEnv *exec_env,
11281142
cell_num += wasm_value_type_cell_num(ext_ret_types[i]);
11291143
}
11301144

1145+
#if (WASM_ENABLE_DUMP_CALL_STACK != 0) || (WASM_ENABLE_PERF_PROFILING != 0)
1146+
if (!aot_alloc_frame(exec_env, function->func_index)) {
1147+
wasm_runtime_free(argv1);
1148+
return false;
1149+
}
1150+
#endif
1151+
11311152
ret = invoke_native_internal(exec_env, function->u.func.func_ptr,
11321153
func_type, NULL, NULL, argv1, argc, argv);
1154+
11331155
if (!ret || aot_get_exception(module_inst)) {
11341156
if (argv1 != argv1_buf)
11351157
wasm_runtime_free(argv1);
1158+
11361159
if (clear_wasi_proc_exit_exception(module_inst))
1137-
return true;
1138-
return false;
1160+
ret = true;
1161+
else
1162+
ret = false;
11391163
}
11401164

1165+
#if WASM_ENABLE_DUMP_CALL_STACK != 0
1166+
if (!ret) {
1167+
aot_dump_call_stack(exec_env);
1168+
}
1169+
#endif
1170+
1171+
#if (WASM_ENABLE_DUMP_CALL_STACK != 0) || (WASM_ENABLE_PERF_PROFILING != 0)
1172+
aot_free_frame(exec_env);
1173+
#endif
1174+
if (!ret)
1175+
return ret;
1176+
11411177
/* Get extra result values */
11421178
switch (func_type->types[func_type->param_count]) {
11431179
case VALUE_TYPE_I32:
@@ -1161,10 +1197,28 @@ aot_call_function(WASMExecEnv *exec_env,
11611197
return true;
11621198
}
11631199
else {
1200+
#if (WASM_ENABLE_DUMP_CALL_STACK != 0) || (WASM_ENABLE_PERF_PROFILING != 0)
1201+
if (!aot_alloc_frame(exec_env, function->func_index)) {
1202+
return false;
1203+
}
1204+
#endif
1205+
11641206
ret = invoke_native_internal(exec_env, function->u.func.func_ptr,
11651207
func_type, NULL, NULL, argv, argc, argv);
1208+
11661209
if (clear_wasi_proc_exit_exception(module_inst))
1167-
return true;
1210+
ret = true;
1211+
1212+
#if WASM_ENABLE_DUMP_CALL_STACK != 0
1213+
if (aot_get_exception(module_inst)) {
1214+
aot_dump_call_stack(exec_env);
1215+
}
1216+
#endif
1217+
1218+
#if (WASM_ENABLE_DUMP_CALL_STACK != 0) || (WASM_ENABLE_PERF_PROFILING != 0)
1219+
aot_free_frame(exec_env);
1220+
#endif
1221+
11681222
return ret && !aot_get_exception(module_inst) ? true : false;
11691223
}
11701224
}
@@ -2224,3 +2278,134 @@ aot_get_module_inst_mem_consumption(const AOTModuleInstance *module_inst,
22242278
}
22252279
#endif /* end of (WASM_ENABLE_MEMORY_PROFILING != 0)
22262280
|| (WASM_ENABLE_MEMORY_TRACING != 0) */
2281+
2282+
#if (WASM_ENABLE_DUMP_CALL_STACK != 0) || (WASM_ENABLE_PERF_PROFILING != 0)
2283+
static const char *
2284+
get_func_name_from_index(const AOTModuleInstance *module_inst,
2285+
uint32 func_index)
2286+
{
2287+
const char *func_name = NULL;
2288+
AOTModule *module = module_inst->aot_module.ptr;
2289+
2290+
if (func_index < module->import_func_count) {
2291+
func_name = module->import_funcs[func_index].func_name;
2292+
}
2293+
else {
2294+
uint32 i;
2295+
2296+
for (i = 0; i < module->export_count; i++) {
2297+
AOTExport export = module->exports[i];
2298+
if (export.index == func_index
2299+
&& export.kind == EXPORT_KIND_FUNC) {
2300+
func_name = export.name;
2301+
break;
2302+
}
2303+
}
2304+
}
2305+
2306+
return func_name;
2307+
}
2308+
2309+
bool
2310+
aot_alloc_frame(WASMExecEnv *exec_env, uint32 func_index)
2311+
{
2312+
AOTFrame *frame =
2313+
wasm_exec_env_alloc_wasm_frame(exec_env, sizeof(AOTFrame));
2314+
#if WASM_ENABLE_PERF_PROFILING != 0
2315+
AOTModuleInstance *module_inst =
2316+
(AOTModuleInstance*)exec_env->module_inst;
2317+
AOTFuncPerfProfInfo *func_perf_prof =
2318+
(AOTFuncPerfProfInfo*)module_inst->func_perf_profilings.ptr + func_index;
2319+
#endif
2320+
2321+
if (!frame) {
2322+
aot_set_exception((AOTModuleInstance*)exec_env->module_inst,
2323+
"auxiliary call stack overflow");
2324+
return false;
2325+
}
2326+
2327+
#if WASM_ENABLE_PERF_PROFILING != 0
2328+
frame->time_started = os_time_get_boot_microsecond();
2329+
frame->func_perf_prof_info = func_perf_prof;
2330+
#endif
2331+
2332+
frame->prev_frame = (AOTFrame *)exec_env->cur_frame;
2333+
exec_env->cur_frame = (struct WASMInterpFrame *)frame;
2334+
2335+
frame->func_index = func_index;
2336+
return true;
2337+
}
2338+
2339+
void
2340+
aot_free_frame(WASMExecEnv *exec_env)
2341+
{
2342+
AOTFrame *cur_frame = (AOTFrame *)exec_env->cur_frame;
2343+
AOTFrame *prev_frame = cur_frame->prev_frame;
2344+
2345+
#if WASM_ENABLE_PERF_PROFILING != 0
2346+
cur_frame->func_perf_prof_info->total_exec_time +=
2347+
os_time_get_boot_microsecond() - cur_frame->time_started;
2348+
cur_frame->func_perf_prof_info->total_exec_cnt++;
2349+
#endif
2350+
2351+
wasm_exec_env_free_wasm_frame(exec_env, cur_frame);
2352+
exec_env->cur_frame = (struct WASMInterpFrame *)prev_frame;
2353+
}
2354+
#endif /* end of (WASM_ENABLE_DUMP_CALL_STACK != 0)
2355+
|| (WASM_ENABLE_PERF_PROFILING != 0) */
2356+
2357+
#if WASM_ENABLE_DUMP_CALL_STACK != 0
2358+
void
2359+
aot_dump_call_stack(WASMExecEnv *exec_env)
2360+
{
2361+
AOTFrame *cur_frame = (AOTFrame *)exec_env->cur_frame;
2362+
AOTModuleInstance *module_inst =
2363+
(AOTModuleInstance *)exec_env->module_inst;
2364+
const char *func_name;
2365+
uint32 n = 0;
2366+
2367+
os_printf("\n");
2368+
while (cur_frame) {
2369+
func_name =
2370+
get_func_name_from_index(module_inst, cur_frame->func_index);
2371+
2372+
/* function name not exported, print number instead */
2373+
if (func_name == NULL) {
2374+
os_printf("#%02d $f%d \n", n, cur_frame->func_index);
2375+
}
2376+
else {
2377+
os_printf("#%02d %s \n", n, func_name);
2378+
}
2379+
2380+
cur_frame = cur_frame->prev_frame;
2381+
n++;
2382+
}
2383+
os_printf("\n");
2384+
}
2385+
#endif /* end of WASM_ENABLE_DUMP_CALL_STACK */
2386+
2387+
#if WASM_ENABLE_PERF_PROFILING != 0
2388+
void
2389+
aot_dump_perf_profiling(const AOTModuleInstance *module_inst)
2390+
{
2391+
AOTFuncPerfProfInfo *perf_prof = (AOTFuncPerfProfInfo *)
2392+
module_inst->func_perf_profilings.ptr;
2393+
AOTModule *module = (AOTModule *)module_inst->aot_module.ptr;
2394+
uint32 total_func_count = module->import_func_count + module->func_count, i;
2395+
const char *func_name;
2396+
2397+
os_printf("Performance profiler data:\n");
2398+
for (i = 0; i < total_func_count; i++, perf_prof++) {
2399+
func_name = get_func_name_from_index(module_inst, i);
2400+
2401+
if (func_name)
2402+
os_printf(" func %s, execution time: %.3f ms, execution count: %d times\n",
2403+
func_name, perf_prof->total_exec_time / 1000.0f,
2404+
perf_prof->total_exec_cnt);
2405+
else
2406+
os_printf(" func %d, execution time: %.3f ms, execution count: %d times\n",
2407+
i, perf_prof->total_exec_time / 1000.0f,
2408+
perf_prof->total_exec_cnt);
2409+
}
2410+
}
2411+
#endif /* end of WASM_ENABLE_PERF_PROFILING */

core/iwasm/aot/aot_runtime.h

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -281,8 +281,13 @@ typedef struct AOTModuleInstance {
281281
uint32 llvm_stack;
282282
uint32 default_wasm_stack_size;
283283

284+
uint32 __padding;
285+
286+
/* function performance profiling info list */
287+
AOTPointer func_perf_profilings;
288+
284289
/* reserved */
285-
uint32 reserved[11];
290+
uint32 reserved[8];
286291

287292
union {
288293
uint64 _make_it_8_byte_aligned_;
@@ -311,6 +316,24 @@ typedef struct AOTTargetInfo {
311316
char arch[16];
312317
} AOTTargetInfo;
313318

319+
typedef struct AOTFuncPerfProfInfo
320+
{
321+
/* total execution time */
322+
uint64 total_exec_time;
323+
/* total execution count */
324+
uint32 total_exec_cnt;
325+
} AOTFuncPerfProfInfo;
326+
327+
/* AOT auxiliary call stack */
328+
typedef struct AOTFrame {
329+
struct AOTFrame *prev_frame;
330+
uint32 func_index;
331+
#if WASM_ENABLE_PERF_PROFILING != 0
332+
uint64 time_started;
333+
AOTFuncPerfProfInfo *func_perf_prof_info;
334+
#endif
335+
} AOTFrame;
336+
314337
/**
315338
* Load a AOT module from aot file buffer
316339
* @param buf the byte buffer which contains the AOT file data
@@ -568,6 +591,18 @@ void
568591
aot_get_module_inst_mem_consumption(const AOTModuleInstance *module_inst,
569592
WASMModuleInstMemConsumption *mem_conspn);
570593

594+
bool
595+
aot_alloc_frame(WASMExecEnv *exec_env, uint32 func_index);
596+
597+
void
598+
aot_free_frame(WASMExecEnv *exec_env);
599+
600+
void
601+
aot_dump_call_stack(WASMExecEnv *exec_env);
602+
603+
void
604+
aot_dump_perf_profiling(const AOTModuleInstance *module_inst);
605+
571606
#ifdef __cplusplus
572607
} /* end of extern "C" */
573608
#endif

0 commit comments

Comments
 (0)