Skip to content

Commit 1b4ed66

Browse files
authored
Handle MPI init/finalize failures (#447)
- check the return codes of `MPI_Init` and `MPI_Finalize` - abort and return the error code if MPI setup/teardown fails - document new behaviour in runners header
1 parent 58f945d commit 1b4ed66

File tree

2 files changed

+14
-3
lines changed

2 files changed

+14
-3
lines changed

modules/core/runners/include/runners.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ class WorkerTestFailurePrinter : public ::testing::EmptyTestEventListener {
3939
/// @brief Initializes the testing environment (e.g., MPI, logging).
4040
/// @param argc Argument count.
4141
/// @param argv Argument vector.
42-
/// @return Exit code: 0 for success, non-zero for failure.
42+
/// @return Exit code from RUN_ALL_TESTS or MPI error code if initialization/
43+
/// finalization fails.
4344
int Init(int argc, char** argv);
4445

4546
} // namespace ppc::core

modules/core/runners/src/runners.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,12 @@ void WorkerTestFailurePrinter::PrintProcessRank() {
6262
}
6363

6464
int Init(int argc, char** argv) {
65-
MPI_Init(&argc, &argv);
65+
const int init_res = MPI_Init(&argc, &argv);
66+
if (init_res != MPI_SUCCESS) {
67+
std::cerr << std::format("[ ERROR ] MPI_Init failed with code {}", init_res) << '\n';
68+
MPI_Abort(MPI_COMM_WORLD, init_res);
69+
return init_res;
70+
}
6671

6772
// Limit the number of threads in TBB
6873
tbb::global_control control(tbb::global_control::max_allowed_parallelism, ppc::util::GetNumThreads());
@@ -79,7 +84,12 @@ int Init(int argc, char** argv) {
7984
listeners.Append(new ppc::core::UnreadMessagesDetector());
8085
auto status = RUN_ALL_TESTS();
8186

82-
MPI_Finalize();
87+
const int finalize_res = MPI_Finalize();
88+
if (finalize_res != MPI_SUCCESS) {
89+
std::cerr << std::format("[ ERROR ] MPI_Finalize failed with code {}", finalize_res) << '\n';
90+
MPI_Abort(MPI_COMM_WORLD, finalize_res);
91+
return finalize_res;
92+
}
8393
return status;
8494
}
8595

0 commit comments

Comments
 (0)