Skip to content

Commit 5ad9790

Browse files
authored
[SYCL][Graph] Fix profiling info when bypassing scheduler (#14678)
- Fix not recording submission time when bypassing scheduler for graph execution - Add a new test for profiling info which better covers this case
1 parent d99277f commit 5ad9790

File tree

4 files changed

+198
-65
lines changed

4 files changed

+198
-65
lines changed

sycl/source/detail/graph_impl.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -907,8 +907,10 @@ exec_graph_impl::enqueue(const std::shared_ptr<sycl::detail::queue_impl> &Queue,
907907
// If we have no requirements or dependent events for the command buffer,
908908
// enqueue it directly
909909
if (CGData.MRequirements.empty() && CGData.MEvents.empty()) {
910-
if (NewEvent != nullptr)
910+
if (NewEvent != nullptr) {
911+
NewEvent->setSubmissionTime();
911912
NewEvent->setHostEnqueueTime();
913+
}
912914
pi_result Res =
913915
Queue->getPlugin()
914916
->call_nocheck<

sycl/test-e2e/Graph/Profiling/event_profiling_info.cpp

Lines changed: 1 addition & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -10,75 +10,12 @@
1010
// from graph submission with event::get_profiling_info().
1111
// It first tests a graph made exclusively of memory operations,
1212
// then tests a graph made of kernels.
13+
#define GRAPH_TESTS_VERBOSE_PRINT 0
1314

1415
#include "../graph_common.hpp"
1516

1617
#include <sycl/properties/all_properties.hpp>
1718

18-
#define GRAPH_TESTS_VERBOSE_PRINT 0
19-
20-
#if GRAPH_TESTS_VERBOSE_PRINT
21-
#include <chrono>
22-
#endif
23-
24-
bool verifyProfiling(event Event) {
25-
auto Submit =
26-
Event.get_profiling_info<sycl::info::event_profiling::command_submit>();
27-
auto Start =
28-
Event.get_profiling_info<sycl::info::event_profiling::command_start>();
29-
auto End =
30-
Event.get_profiling_info<sycl::info::event_profiling::command_end>();
31-
32-
#if GRAPH_TESTS_VERBOSE_PRINT
33-
std::cout << "Submit = " << Submit << std::endl;
34-
std::cout << "Start = " << Start << std::endl;
35-
std::cout << "End = " << End << " ( " << (End - Start) << " ) "
36-
<< " => full ( " << (End - Submit) << " ) " << std::endl;
37-
#endif
38-
39-
assert((Submit && Start && End) && "Profiling information failed.");
40-
assert(Submit <= Start);
41-
assert(Start < End);
42-
43-
bool Pass = sycl::info::event_command_status::complete ==
44-
Event.get_info<sycl::info::event::command_execution_status>();
45-
46-
return Pass;
47-
}
48-
49-
bool compareProfiling(event Event1, event Event2) {
50-
assert(Event1 != Event2);
51-
52-
auto SubmitEvent1 =
53-
Event1.get_profiling_info<sycl::info::event_profiling::command_submit>();
54-
auto StartEvent1 =
55-
Event1.get_profiling_info<sycl::info::event_profiling::command_start>();
56-
auto EndEvent1 =
57-
Event1.get_profiling_info<sycl::info::event_profiling::command_end>();
58-
assert((SubmitEvent1 && StartEvent1 && EndEvent1) &&
59-
"Profiling information failed.");
60-
61-
auto SubmitEvent2 =
62-
Event2.get_profiling_info<sycl::info::event_profiling::command_submit>();
63-
auto StartEvent2 =
64-
Event2.get_profiling_info<sycl::info::event_profiling::command_start>();
65-
auto EndEvent2 =
66-
Event2.get_profiling_info<sycl::info::event_profiling::command_end>();
67-
assert((SubmitEvent2 && StartEvent2 && EndEvent2) &&
68-
"Profiling information failed.");
69-
70-
assert(SubmitEvent1 != SubmitEvent2);
71-
assert(StartEvent1 != StartEvent2);
72-
assert(EndEvent1 != EndEvent2);
73-
74-
bool Pass1 = sycl::info::event_command_status::complete ==
75-
Event1.get_info<sycl::info::event::command_execution_status>();
76-
bool Pass2 = sycl::info::event_command_status::complete ==
77-
Event2.get_info<sycl::info::event::command_execution_status>();
78-
79-
return (Pass1 && Pass2);
80-
}
81-
8219
// The test checks that get_profiling_info waits for command associated with
8320
// event to complete execution.
8421
int main() {
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
// REQUIRES: level_zero || cuda, gpu
2+
// RUN: %{build} -o %t.out
3+
// RUN: %{run} %t.out
4+
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
5+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
6+
// Extra run to check for immediate-command-list in Level Zero
7+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
8+
9+
// This test checks the profiling of an event returned
10+
// from graph submission with event::get_profiling_info().
11+
// It first tests a graph made exclusively of memory operations,
12+
// then tests a graph made of kernels. This test uses USM isntead of buffers to
13+
// test the path with no implicit dependencies which bypasses the SYCL
14+
// scheduler.
15+
#define GRAPH_TESTS_VERBOSE_PRINT 0
16+
17+
#include "../graph_common.hpp"
18+
19+
#include <sycl/properties/all_properties.hpp>
20+
21+
// The test checks that get_profiling_info waits for command associated with
22+
// event to complete execution.
23+
int main() {
24+
device Dev;
25+
26+
// Queue used for graph recording
27+
queue Queue{Dev};
28+
29+
// Queue that will be used for execution
30+
queue ExecutionQueue{Queue.get_device(),
31+
{sycl::property::queue::enable_profiling()}};
32+
33+
const size_t Size = 100000;
34+
int Data[Size] = {0};
35+
for (size_t I = 0; I < Size; ++I) {
36+
Data[I] = I;
37+
}
38+
int Values[Size] = {0};
39+
int *PtrFrom = malloc_device<int>(Size, Queue);
40+
int *PtrTo = malloc_device<int>(Size, Queue);
41+
Queue.copy(Data, PtrFrom, Size);
42+
Queue.copy(Values, PtrTo, Size);
43+
44+
int *PtrA = malloc_device<int>(Size, Queue);
45+
int *PtrB = malloc_device<int>(Size, Queue);
46+
int *PtrC = malloc_device<int>(Size, Queue);
47+
48+
Queue.copy(Data, PtrA, Size);
49+
Queue.copy(Values, PtrB, Size);
50+
Queue.copy(Values, PtrC, Size);
51+
52+
Queue.wait_and_throw();
53+
54+
{ // USM copy
55+
exp_ext::command_graph CopyGraph{Queue.get_context(), Queue.get_device()};
56+
CopyGraph.begin_recording(Queue);
57+
58+
Queue.submit([&](sycl::handler &Cgh) {
59+
Cgh.memcpy(PtrTo, PtrFrom, Size * sizeof(int));
60+
});
61+
62+
CopyGraph.end_recording(Queue);
63+
64+
// kernel launch
65+
exp_ext::command_graph KernelGraph{Queue.get_context(), Queue.get_device()};
66+
KernelGraph.begin_recording(Queue);
67+
68+
run_kernels_usm(Queue, Size, PtrA, PtrB, PtrC);
69+
70+
KernelGraph.end_recording(Queue);
71+
72+
auto CopyGraphExec =
73+
CopyGraph.finalize({exp_ext::property::graph::enable_profiling{}});
74+
auto KernelGraphExec =
75+
KernelGraph.finalize({exp_ext::property::graph::enable_profiling{}});
76+
77+
event CopyEvent, KernelEvent1, KernelEvent2;
78+
// Run graphs
79+
#if GRAPH_TESTS_VERBOSE_PRINT
80+
auto StartCopyGraph = std::chrono::high_resolution_clock::now();
81+
#endif
82+
CopyEvent = ExecutionQueue.submit(
83+
[&](handler &CGH) { CGH.ext_oneapi_graph(CopyGraphExec); });
84+
ExecutionQueue.wait_and_throw();
85+
#if GRAPH_TESTS_VERBOSE_PRINT
86+
auto EndCopyGraph = std::chrono::high_resolution_clock::now();
87+
auto StartKernelSubmit1 = std::chrono::high_resolution_clock::now();
88+
#endif
89+
KernelEvent1 = ExecutionQueue.submit(
90+
[&](handler &CGH) { CGH.ext_oneapi_graph(KernelGraphExec); });
91+
ExecutionQueue.wait_and_throw();
92+
#if GRAPH_TESTS_VERBOSE_PRINT
93+
auto endKernelSubmit1 = std::chrono::high_resolution_clock::now();
94+
auto StartKernelSubmit2 = std::chrono::high_resolution_clock::now();
95+
#endif
96+
KernelEvent2 = ExecutionQueue.submit(
97+
[&](handler &CGH) { CGH.ext_oneapi_graph(KernelGraphExec); });
98+
ExecutionQueue.wait_and_throw();
99+
#if GRAPH_TESTS_VERBOSE_PRINT
100+
auto endKernelSubmit2 = std::chrono::high_resolution_clock::now();
101+
102+
double DelayCopy = std::chrono::duration_cast<std::chrono::nanoseconds>(
103+
EndCopyGraph - StartCopyGraph)
104+
.count();
105+
std::cout << "Copy Graph delay (in ns) : " << DelayCopy << std::endl;
106+
double DelayKernel1 = std::chrono::duration_cast<std::chrono::nanoseconds>(
107+
endKernelSubmit1 - StartKernelSubmit1)
108+
.count();
109+
std::cout << "Kernel 1st Execution delay (in ns) : " << DelayKernel1
110+
<< std::endl;
111+
double DelayKernel2 = std::chrono::duration_cast<std::chrono::nanoseconds>(
112+
endKernelSubmit2 - StartKernelSubmit2)
113+
.count();
114+
std::cout << "Kernel 2nd Execution delay (in ns) : " << DelayKernel2
115+
<< std::endl;
116+
#endif
117+
118+
// Checks profiling times
119+
assert(verifyProfiling(CopyEvent) && verifyProfiling(KernelEvent1) &&
120+
verifyProfiling(KernelEvent2) &&
121+
compareProfiling(KernelEvent1, KernelEvent2));
122+
}
123+
124+
std::vector<int> HostData(Size);
125+
Queue.copy(PtrTo, HostData.data(), Size).wait_and_throw();
126+
127+
for (size_t I = 0; I < Size; ++I) {
128+
assert(HostData[I] == Data[I]);
129+
}
130+
131+
return 0;
132+
}

sycl/test-e2e/Graph/graph_common.hpp

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@
88
#include <mutex> // std::mutex, std::unique_lock
99
#include <numeric>
1010

11+
#if GRAPH_TESTS_VERBOSE_PRINT
12+
#include <chrono>
13+
#endif
14+
1115
// Test constants.
1216
constexpr size_t Size = 1024; // Number of data elements in a buffer.
1317
constexpr unsigned Iterations = 5; // Iterations of graph to execute.
@@ -480,3 +484,61 @@ loadKernelsFromFile(queue &Q, std::string FileName) {
480484
exp_ext::build(KernelBundleSrc);
481485
return KernelBundleExe;
482486
}
487+
488+
bool verifyProfiling(event Event) {
489+
auto Submit =
490+
Event.get_profiling_info<sycl::info::event_profiling::command_submit>();
491+
auto Start =
492+
Event.get_profiling_info<sycl::info::event_profiling::command_start>();
493+
auto End =
494+
Event.get_profiling_info<sycl::info::event_profiling::command_end>();
495+
496+
#if GRAPH_TESTS_VERBOSE_PRINT
497+
std::cout << "Submit = " << Submit << std::endl;
498+
std::cout << "Start = " << Start << std::endl;
499+
std::cout << "End = " << End << " ( " << (End - Start) << " ) "
500+
<< " => full ( " << (End - Submit) << " ) " << std::endl;
501+
#endif
502+
503+
assert((Submit && Start && End) && "Profiling information failed.");
504+
assert(Submit <= Start);
505+
assert(Start < End);
506+
507+
bool Pass = sycl::info::event_command_status::complete ==
508+
Event.get_info<sycl::info::event::command_execution_status>();
509+
510+
return Pass;
511+
}
512+
513+
bool compareProfiling(event Event1, event Event2) {
514+
assert(Event1 != Event2);
515+
516+
auto SubmitEvent1 =
517+
Event1.get_profiling_info<sycl::info::event_profiling::command_submit>();
518+
auto StartEvent1 =
519+
Event1.get_profiling_info<sycl::info::event_profiling::command_start>();
520+
auto EndEvent1 =
521+
Event1.get_profiling_info<sycl::info::event_profiling::command_end>();
522+
assert((SubmitEvent1 && StartEvent1 && EndEvent1) &&
523+
"Profiling information failed.");
524+
525+
auto SubmitEvent2 =
526+
Event2.get_profiling_info<sycl::info::event_profiling::command_submit>();
527+
auto StartEvent2 =
528+
Event2.get_profiling_info<sycl::info::event_profiling::command_start>();
529+
auto EndEvent2 =
530+
Event2.get_profiling_info<sycl::info::event_profiling::command_end>();
531+
assert((SubmitEvent2 && StartEvent2 && EndEvent2) &&
532+
"Profiling information failed.");
533+
534+
assert(SubmitEvent1 != SubmitEvent2);
535+
assert(StartEvent1 != StartEvent2);
536+
assert(EndEvent1 != EndEvent2);
537+
538+
bool Pass1 = sycl::info::event_command_status::complete ==
539+
Event1.get_info<sycl::info::event::command_execution_status>();
540+
bool Pass2 = sycl::info::event_command_status::complete ==
541+
Event2.get_info<sycl::info::event::command_execution_status>();
542+
543+
return (Pass1 && Pass2);
544+
}

0 commit comments

Comments
 (0)