Skip to content

Commit 73a7ff8

Browse files
committed
mem props API POC
1 parent c6d8e11 commit 73a7ff8

39 files changed

+975
-77
lines changed

.github/workflows/.spellcheck-conf.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[default]
22
# Don't correct the following words:
3-
extend-ignore-words-re = ["ASSER", "Tne", "ba", "BA", "PN"]
3+
extend-ignore-words-re = ["ASSER", "Tne", "ba", "BA", "PN", "usm"]
44

55
[files]
66
# completely exclude those files from consideration:

.github/workflows/nightly.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ jobs:
204204
VCPKG_PATH: "${{github.workspace}}/build/vcpkg/packages/hwloc_x64-windows;${{github.workspace}}/build/vcpkg/packages/tbb_x64-windows;${{github.workspace}}/build/vcpkg/packages/jemalloc_x64-windows"
205205
strategy:
206206
matrix:
207-
os: ['windows-2019', 'windows-2022']
207+
os: ['windows-2022']
208208
build_type: [Debug]
209209
compiler: [{c: icx, cxx: icx}]
210210
shared_library: ['ON', 'OFF']

.github/workflows/reusable_basic.yml

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -195,11 +195,12 @@ jobs:
195195
${{ matrix.compiler.cxx == 'icpx' && '. /opt/intel/oneapi/setvars.sh' || true }}
196196
cmake --build ${{env.BUILD_DIR}} -j $(nproc)
197197
198+
# UMF_LOG="level:debug;flush:debug;output:stderr;pid:no"
198199
- name: Run tests
199200
working-directory: ${{env.BUILD_DIR}}
200201
run: |
201202
${{ matrix.compiler.cxx == 'icpx' && '. /opt/intel/oneapi/setvars.sh' || true }}
202-
LD_LIBRARY_PATH="${{env.BUILD_DIR}}/lib/:${LD_LIBRARY_PATH}" ctest --output-on-failure
203+
LD_LIBRARY_PATH="${{env.BUILD_DIR}}/lib/:${LD_LIBRARY_PATH}" ctest --output-on-failure -R "test_provider_os_memory"
203204
204205
- name: Check coverage
205206
if: ${{ matrix.build_type == 'Debug' && matrix.compiler.c == 'gcc' }}
@@ -237,23 +238,14 @@ jobs:
237238
VCPKG_PATH_BIN: "${{github.workspace}}/build/vcpkg/packages/hwloc_x64-windows/bin;${{github.workspace}}/build/vcpkg/packages/tbb_x64-windows/bin;${{github.workspace}}/build/vcpkg/packages/jemalloc_x64-windows/bin"
238239
strategy:
239240
matrix:
240-
os: ['windows-2019', 'windows-2022']
241+
os: ['windows-2022']
241242
build_type: [Debug, Release]
242243
compiler: [{c: cl, cxx: cl}]
243244
shared_library: ['ON', 'OFF']
244245
level_zero_provider: ['ON']
245246
cuda_provider: ['ON']
246247
cmake_ver: ['default']
247248
include:
248-
- os: 'windows-2019'
249-
# clang build fails on Windows 2022
250-
build_type: Release
251-
compiler: {c: clang-cl, cxx: clang-cl}
252-
shared_library: 'ON'
253-
level_zero_provider: 'ON'
254-
cuda_provider: 'ON'
255-
toolset: "-T ClangCL"
256-
cmake_ver: '3.14.0-win64-x64'
257249
- os: 'windows-2022'
258250
build_type: Release
259251
compiler: {c: cl, cxx: cl}

FlameGraph

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Subproject commit 41fee1f99f9276008b7cd112fca19dc3ea84ac32

bench.sh

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
2+
#!/bin/bash
3+
#
4+
# Copyright (C) 2025 Intel Corporation
5+
#
6+
# Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT.
7+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8+
#
9+
10+
# Check if at least one argument is provided
11+
if [ "$#" -lt 1 ]; then
12+
echo "Usage: $0 <application> [args...]"
13+
exit 1
14+
fi
15+
16+
# Variables
17+
APP="$1" # The application to run
18+
shift # Remove the application from the arguments list
19+
ARGS="$@" # Remaining arguments passed to the application
20+
USER="rrudnick" # The user to own the generated perf data
21+
FLAMEGRAPH_DIR="./FlameGraph" # Path to the FlameGraph repository
22+
23+
# Check if FlameGraph repository exists
24+
if [ ! -d "$FLAMEGRAPH_DIR" ]; then
25+
echo "Error: FlameGraph directory not found at $FLAMEGRAPH_DIR."
26+
echo "Clone it using: git clone https://github.com/brendangregg/FlameGraph.git"
27+
exit 1
28+
fi
29+
30+
# Run application under perf
31+
echo "Recording performance data..."
32+
sudo perf record -F 99 -g --call-graph dwarf -- "$APP" $ARGS
33+
34+
# Change ownership of the generated perf data
35+
echo "Changing ownership of perf data..."
36+
sudo chown "$USER" perf.data
37+
38+
# Process perf.data into a readable format
39+
echo "Processing perf data..."
40+
perf script > out.perf
41+
42+
# Generate folded stacks
43+
echo "Generating folded stacks..."
44+
"$FLAMEGRAPH_DIR/stackcollapse-perf.pl" out.perf > out.folded
45+
46+
# Generate the flame graph
47+
echo "Generating flame graph..."
48+
"$FLAMEGRAPH_DIR/flamegraph.pl" out.folded > flamegraph.svg
49+
50+
# Open the flame graph in Firefox
51+
echo "Opening flame graph in Firefox..."
52+
firefox flamegraph.svg &
53+
54+
echo "Done! The flame graph is saved as flamegraph.svg."
55+
56+

benchmark/ubench.c

Lines changed: 234 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
#include <umf/ipc.h>
1717
#include <umf/memory_pool.h>
18+
#include <umf/memory_props.h>
1819
#include <umf/pools/pool_disjoint.h>
1920
#include <umf/pools/pool_proxy.h>
2021
#include <umf/pools/pool_scalable.h>
@@ -438,6 +439,47 @@ static void do_ipc_get_put_benchmark(alloc_t *allocs, size_t num_allocs,
438439
}
439440
}
440441

442+
static void do_umf_mem_props_benchmark(ze_context_handle_t context,
443+
bool use_umf, alloc_t *allocs,
444+
size_t num_allocs, size_t repeats) {
445+
assert(context != NULL);
446+
447+
for (size_t r = 0; r < repeats * 10; ++r) {
448+
for (size_t i = 0; i < num_allocs; ++i) {
449+
if (use_umf) {
450+
umf_memory_properties_handle_t props_handle = NULL;
451+
umf_result_t res =
452+
umfGetMemoryPropertiesHandle(allocs[i].ptr, &props_handle);
453+
(void)res;
454+
assert(res == UMF_RESULT_SUCCESS);
455+
456+
umf_usm_memory_type_t type = UMF_MEMORY_TYPE_UNKNOWN;
457+
res = umfGetMemoryProperty(
458+
props_handle, UMF_MEMORY_PROPERTY_POINTER_TYPE, &type);
459+
assert(res == UMF_RESULT_SUCCESS);
460+
if (type != UMF_MEMORY_TYPE_DEVICE) {
461+
fprintf(stderr,
462+
"error: unexpected alloc_props.type value: %d\n",
463+
type);
464+
exit(-1);
465+
}
466+
} else {
467+
ze_memory_allocation_properties_t alloc_props = {0};
468+
ze_device_handle_t device = 0;
469+
// calls zeMemGetAllocProperties()
470+
utils_ze_get_mem_props(context, allocs[i].ptr, &alloc_props,
471+
&device);
472+
if (alloc_props.type != ZE_MEMORY_TYPE_DEVICE) {
473+
fprintf(stderr,
474+
"error: unexpected alloc_props.type value: %d\n",
475+
alloc_props.type);
476+
exit(-1);
477+
}
478+
}
479+
}
480+
}
481+
}
482+
441483
static int create_level_zero_params(ze_context_handle_t *context,
442484
ze_device_handle_t *device) {
443485
uint32_t driver_idx = 0;
@@ -623,6 +665,198 @@ UBENCH_EX(ipc, disjoint_pool_with_level_zero_provider) {
623665
err_destroy_context:
624666
utils_ze_destroy_context(context);
625667
}
668+
669+
UBENCH_EX(mem_props, level_zero) {
670+
const size_t BUFFER_SIZE = 100;
671+
const size_t N_BUFFERS = 1000;
672+
673+
alloc_t *allocs = alloc_array(N_BUFFERS);
674+
if (allocs == NULL) {
675+
fprintf(stderr, "error: alloc_array() failed\n");
676+
}
677+
678+
ze_context_handle_t context = NULL;
679+
ze_device_handle_t device = NULL;
680+
int ret = create_level_zero_params(&context, &device);
681+
if (ret != 0) {
682+
fprintf(stderr, "error: create_level_zero_params() failed\n");
683+
exit(-1);
684+
}
685+
686+
ze_device_mem_alloc_desc_t dev_desc = {
687+
.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC,
688+
.pNext = NULL,
689+
.flags = 0,
690+
.ordinal = 0};
691+
692+
for (size_t i = 0; i < N_BUFFERS; ++i) {
693+
ze_result_t ze_result = zeMemAllocDevice(
694+
context, &dev_desc, BUFFER_SIZE, 0, device, &allocs[i].ptr);
695+
if (ze_result != ZE_RESULT_SUCCESS) {
696+
fprintf(stderr, "error: zeMemAllocDevice() failed\n");
697+
}
698+
allocs[i].size = BUFFER_SIZE;
699+
}
700+
701+
do_umf_mem_props_benchmark(context, false, allocs, N_BUFFERS,
702+
1); // WARMUP
703+
UBENCH_DO_BENCHMARK() {
704+
do_umf_mem_props_benchmark(context, false, allocs, N_BUFFERS,
705+
N_ITERATIONS);
706+
}
707+
708+
for (size_t i = 0; i < N_BUFFERS; ++i) {
709+
zeMemFree(context, allocs[i].ptr);
710+
}
711+
712+
free(allocs);
713+
utils_ze_destroy_context(context);
714+
}
715+
716+
UBENCH_EX(mem_props, disjoint_pool_with_level_zero_provider_use_umf) {
717+
const size_t BUFFER_SIZE = 4 * 1024;
718+
const size_t N_BUFFERS = 1000;
719+
umf_result_t umf_result;
720+
ze_context_handle_t context = NULL;
721+
ze_device_handle_t device = NULL;
722+
umf_level_zero_memory_provider_params_handle_t level_zero_params = NULL;
723+
724+
int ret = create_level_zero_params(&context, &device);
725+
if (ret != 0) {
726+
fprintf(stderr, "error: create_level_zero_params() failed\n");
727+
exit(-1);
728+
}
729+
730+
umf_result = umfLevelZeroMemoryProviderParamsCreate(&level_zero_params);
731+
if (umf_result != UMF_RESULT_SUCCESS) {
732+
fprintf(stderr,
733+
"error: umfLevelZeroMemoryProviderParamsCreate() failed\n");
734+
goto err_destroy_context;
735+
}
736+
737+
umf_result =
738+
umfLevelZeroMemoryProviderParamsSetContext(level_zero_params, context);
739+
if (umf_result != UMF_RESULT_SUCCESS) {
740+
fprintf(stderr,
741+
"error: umfLevelZeroMemoryProviderParamsSetContext() failed\n");
742+
goto err_destroy_params;
743+
}
744+
745+
umf_result =
746+
umfLevelZeroMemoryProviderParamsSetDevice(level_zero_params, device);
747+
if (umf_result != UMF_RESULT_SUCCESS) {
748+
fprintf(stderr,
749+
"error: umfLevelZeroMemoryProviderParamsSetDevice() failed\n");
750+
goto err_destroy_params;
751+
}
752+
753+
umf_result = umfLevelZeroMemoryProviderParamsSetMemoryType(
754+
level_zero_params, UMF_MEMORY_TYPE_DEVICE);
755+
if (umf_result != UMF_RESULT_SUCCESS) {
756+
fprintf(
757+
stderr,
758+
"error: umfLevelZeroMemoryProviderParamsSetMemoryType() failed\n");
759+
goto err_destroy_params;
760+
}
761+
762+
alloc_t *allocs = alloc_array(N_BUFFERS);
763+
if (allocs == NULL) {
764+
fprintf(stderr, "error: alloc_array() failed\n");
765+
goto err_destroy_context;
766+
}
767+
768+
umf_memory_provider_handle_t provider = NULL;
769+
umf_result = umfMemoryProviderCreate(umfLevelZeroMemoryProviderOps(),
770+
level_zero_params, &provider);
771+
if (umf_result != UMF_RESULT_SUCCESS) {
772+
fprintf(stderr, "error: umfMemoryProviderCreate() failed\n");
773+
goto err_free_allocs;
774+
}
775+
776+
umf_disjoint_pool_params_handle_t disjoint_params = NULL;
777+
umf_result = umfDisjointPoolParamsCreate(&disjoint_params);
778+
if (umf_result != UMF_RESULT_SUCCESS) {
779+
fprintf(stderr, "ERROR: umfDisjointPoolParamsCreate failed\n");
780+
goto err_provider_destroy;
781+
}
782+
783+
umf_result =
784+
umfDisjointPoolParamsSetSlabMinSize(disjoint_params, BUFFER_SIZE * 10);
785+
if (umf_result != UMF_RESULT_SUCCESS) {
786+
fprintf(stderr,
787+
"error: umfDisjointPoolParamsSetSlabMinSize() failed\n");
788+
goto err_params_destroy;
789+
}
790+
791+
umf_result = umfDisjointPoolParamsSetMaxPoolableSize(
792+
disjoint_params, 4ull * 1024ull * 1024ull);
793+
if (umf_result != UMF_RESULT_SUCCESS) {
794+
fprintf(stderr,
795+
"error: umfDisjointPoolParamsSetMaxPoolableSize() failed\n");
796+
goto err_params_destroy;
797+
}
798+
799+
umf_result =
800+
umfDisjointPoolParamsSetCapacity(disjoint_params, 64ull * 1024ull);
801+
if (umf_result != UMF_RESULT_SUCCESS) {
802+
fprintf(stderr, "error: umfDisjointPoolParamsSetCapacity() failed\n");
803+
goto err_params_destroy;
804+
}
805+
806+
umf_result = umfDisjointPoolParamsSetMinBucketSize(disjoint_params, 64);
807+
if (umf_result != UMF_RESULT_SUCCESS) {
808+
fprintf(stderr,
809+
"error: umfDisjointPoolParamsSetMinBucketSize() failed\n");
810+
goto err_params_destroy;
811+
}
812+
813+
umf_pool_create_flags_t flags = UMF_POOL_CREATE_FLAG_NONE;
814+
umf_memory_pool_handle_t pool;
815+
umf_result = umfPoolCreate(umfDisjointPoolOps(), provider, disjoint_params,
816+
flags, &pool);
817+
if (umf_result != UMF_RESULT_SUCCESS) {
818+
fprintf(stderr, "error: umfPoolCreate() failed\n");
819+
goto err_params_destroy;
820+
}
821+
822+
for (size_t i = 0; i < N_BUFFERS; ++i) {
823+
allocs[i].ptr = umfPoolMalloc(pool, BUFFER_SIZE);
824+
if (allocs[i].ptr == NULL) {
825+
goto err_buffer_destroy;
826+
}
827+
allocs[i].size = BUFFER_SIZE;
828+
}
829+
830+
do_umf_mem_props_benchmark(context, true, allocs, N_BUFFERS,
831+
1); // WARMUP
832+
UBENCH_DO_BENCHMARK() {
833+
do_umf_mem_props_benchmark(context, true, allocs, N_BUFFERS,
834+
N_ITERATIONS);
835+
}
836+
837+
err_buffer_destroy:
838+
for (size_t i = 0; i < N_BUFFERS; ++i) {
839+
umfPoolFree(pool, allocs[i].ptr);
840+
}
841+
842+
umfPoolDestroy(pool);
843+
844+
err_params_destroy:
845+
umfDisjointPoolParamsDestroy(disjoint_params);
846+
847+
err_provider_destroy:
848+
umfMemoryProviderDestroy(provider);
849+
850+
err_free_allocs:
851+
free(allocs);
852+
853+
err_destroy_params:
854+
umfLevelZeroMemoryProviderParamsDestroy(level_zero_params);
855+
856+
err_destroy_context:
857+
//utils_ze_destroy_context(context);
858+
}
859+
626860
#endif /* (defined UMF_BUILD_LEVEL_ZERO_PROVIDER && defined UMF_BUILD_GPU_TESTS) */
627861

628862
// TODO add IPC benchmark for CUDA

docs/config/api.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,14 @@ memory as well as functions that create, destroy and operate on the pool.
2828
.. doxygenfile:: memory_pool.h
2929
:sections: define enum typedef func var
3030

31+
TODO
32+
------------------------------------------
33+
34+
TODO
35+
36+
.. doxygenfile:: memory_props.h
37+
:sections: define enum typedef func var
38+
3139
Disjoint Pool
3240
------------------------------------------
3341

0 commit comments

Comments
 (0)