@@ -227,15 +227,15 @@ extern "C" EXPORT_FUNC PyObject *load_binary(PyObject *args) {
227
227
compileLevelZeroObjects (binary_ptr , binary_size , kernel_name , l0_device ,
228
228
l0_context , build_flags (), is_spv );
229
229
230
+ const bool debugEnabled = getBoolEnv ("TRITON_DEBUG" );
231
+
230
232
if (is_spv ) {
231
233
constexpr int32_t max_reg_spill = 1000 ;
232
234
const bool is_GRF_mode_specified = build_flags .hasGRFSizeFlag ();
233
235
234
236
// If the register mode isn't set, and the number of spills is greater
235
237
// than the threshold, recompile the kernel using large GRF mode.
236
238
if (!is_GRF_mode_specified && n_spills > max_reg_spill ) {
237
- const std ::optional < bool > debugEnabled =
238
- isEnvValueBool (getStrEnv ("TRITON_DEBUG" ));
239
239
if (debugEnabled )
240
240
std ::cout << "(I): Detected " << n_spills
241
241
<< " spills, recompiling the kernel using large GRF mode"
@@ -244,13 +244,32 @@ extern "C" EXPORT_FUNC PyObject *load_binary(PyObject *args) {
244
244
build_flags .addLargeGRFSizeFlag ();
245
245
246
246
try {
247
- auto [l0_module , l0_kernel , n_spills ] = compileLevelZeroObjects (
248
- binary_ptr , binary_size , kernel_name , l0_device , l0_context ,
249
- build_flags (), is_spv );
247
+ auto [l0_module_dgrf , l0_kernel_dgrf , n_spills_dgrf ] =
248
+ compileLevelZeroObjects (binary_ptr , binary_size , kernel_name ,
249
+ l0_device , l0_context , build_flags (),
250
+ is_spv );
250
251
251
252
if (debugEnabled )
252
- std ::cout << "(I): Kernel has now " << n_spills << " spills"
253
+ std ::cout << "(I): Kernel has now " << n_spills_dgrf << " spills"
253
254
<< std ::endl ;
255
+ if (n_spills_dgrf < n_spills ) {
256
+ std ::swap (l0_module , l0_module_dgrf );
257
+ std ::swap (l0_kernel , l0_kernel_dgrf );
258
+ std ::swap (n_spills , n_spills_dgrf );
259
+ }
260
+ // clean up the unused module and kernel.
261
+ auto error_no = zeKernelDestroy (l0_kernel_dgrf );
262
+ if (error_no != ZE_RESULT_SUCCESS ) {
263
+ std ::cerr
264
+ << "[Ignoring] Intel - Error during destroy unused L0 kernel"
265
+ << std ::endl ;
266
+ }
267
+ error_no = zeModuleDestroy (l0_module_dgrf );
268
+ if (error_no != ZE_RESULT_SUCCESS ) {
269
+ std ::cerr
270
+ << "[Ignoring] Intel - Error during destroy unused L0 module"
271
+ << std ::endl ;
272
+ }
254
273
} catch (const std ::exception & e ) {
255
274
std ::cerr << "[Ignoring] Error during Intel loadBinary with large "
256
275
"registers: "
@@ -261,6 +280,11 @@ extern "C" EXPORT_FUNC PyObject *load_binary(PyObject *args) {
261
280
}
262
281
}
263
282
283
+ if (debugEnabled && n_spills ) {
284
+ std ::cout << "(I): Detected " << n_spills << " spills for \""
285
+ << kernel_name << "\"" << std ::endl ;
286
+ }
287
+
264
288
auto n_regs = build_flags .n_regs ();
265
289
266
290
auto mod = new sycl ::kernel_bundle < sycl ::bundle_state ::executable > (
0 commit comments