Skip to content

Commit a6acfed

Browse files
authored
Support more than 1024 CPUs for CPUPool (#3716) (#3758)
Use extended APIs to support more CPUs.
1 parent 216d20b commit a6acfed

File tree

1 file changed

+41
-14
lines changed

1 file changed

+41
-14
lines changed

csrc/cpu/runtime/CPUPool.cpp

Lines changed: 41 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -144,29 +144,56 @@ std::vector<int32_t> init_process_available_cores() {
144144
// But this information makes no sense and shouldn't be used without preload
145145
// IOMP.
146146
// Step1: Get the main thread affinity
147-
cpu_set_t main_thread_pre_set;
148-
CPU_ZERO(&main_thread_pre_set);
149-
if (sched_getaffinity(0, sizeof(cpu_set_t), &main_thread_pre_set) != 0) {
150-
throw std::runtime_error("Fail to get the thread affinity information");
151-
}
152147

153-
// Step2:
154148
// https://man7.org/linux/man-pages/man3/sysconf.3.html
155149
// Please note these value may not be standard.
156150
// _SC_NPROCESSORS_ONLN: processors available, may be less than
157151
// _SC_NPROCESSORS_CONF because processors may be offline.
158152
// _SC_NPROCESSORS_CONF: processors configured.
159153
int nproc_online = sysconf(_SC_NPROCESSORS_CONF);
160-
for (int i = 0; i < nproc_online; i++) {
161-
if (CPU_ISSET(i, &main_thread_pre_set)) {
162-
available_cpu_cores_internal.emplace_back(i);
154+
cpu_set_t main_thread_pre_set;
155+
CPU_ZERO(&main_thread_pre_set);
156+
if (sched_getaffinity(0, sizeof(cpu_set_t), &main_thread_pre_set) == 0) {
157+
// Step2:
158+
for (int i = 0; i < nproc_online; i++) {
159+
if (CPU_ISSET(i, &main_thread_pre_set)) {
160+
available_cpu_cores_internal.emplace_back(i);
161+
}
163162
}
164-
}
165163

166-
// Step3: restore main thread affinity
167-
if (sched_setaffinity(0, sizeof(cpu_set_t), &main_thread_pre_set) != 0) {
168-
throw std::runtime_error(
169-
"Fail to restore the main thread affinity in step3.");
164+
// Step3: restore main thread affinity
165+
if (sched_setaffinity(0, sizeof(cpu_set_t), &main_thread_pre_set) != 0) {
166+
throw std::runtime_error("Fail to restore the main thread affinity");
167+
}
168+
} else {
169+
// sched_getaffinity() may fail because the Affinity
170+
// mask is small for the system has more than 1024 CPUs.
171+
// Allocate a mask large enough for more CPUs.
172+
size_t setsize = CPU_ALLOC_SIZE(nproc_online);
173+
cpu_set_t* dymain_thread_pre_set = CPU_ALLOC(nproc_online);
174+
if (dymain_thread_pre_set == NULL) {
175+
throw std::runtime_error("Failed to allocate CPU mask");
176+
}
177+
178+
CPU_ZERO_S(setsize, dymain_thread_pre_set);
179+
if (sched_getaffinity(0, setsize, dymain_thread_pre_set) != 0) {
180+
CPU_FREE(dymain_thread_pre_set);
181+
throw std::runtime_error("Fail to get the thread affinity information");
182+
}
183+
184+
// Step2:
185+
for (int i = 0; i < nproc_online; i++) {
186+
if (CPU_ISSET_S(i, setsize, dymain_thread_pre_set)) {
187+
available_cpu_cores_internal.emplace_back(i);
188+
}
189+
}
190+
191+
// Step3: restore main thread affinity
192+
if (sched_setaffinity(0, setsize, dymain_thread_pre_set) != 0) {
193+
CPU_FREE(dymain_thread_pre_set);
194+
throw std::runtime_error("Fail to restore the main thread affinity");
195+
}
196+
CPU_FREE(dymain_thread_pre_set);
170197
}
171198
#endif
172199
}

0 commit comments

Comments
 (0)