Skip to content

Commit a8fba6b

Browse files
add parallel_range_blocks_multi
1 parent e33d770 commit a8fba6b

File tree

2 files changed

+94
-15
lines changed

2 files changed

+94
-15
lines changed

src/Tools.hh

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,9 @@ IntT parallel_range_blocks(
158158
if (num_threads == 0) {
159159
num_threads = std::thread::hardware_concurrency();
160160
}
161+
if (num_threads < 1) {
162+
throw std::logic_error("thread count must be at least 1");
163+
}
161164

162165
std::atomic<IntT> current_value(start_value);
163166
std::atomic<IntT> result_value(end_value);
@@ -189,4 +192,38 @@ IntT parallel_range_blocks(
189192
return result_value;
190193
}
191194

195+
// Like parallel_range_blocks, but returns all values for which fn returned
196+
// true. (Unlike the other parallel_range functions, this one does not return
197+
// early.)
198+
template <typename IntT = uint64_t, typename RetT = std::unordered_set<IntT>>
199+
std::unordered_set<IntT> parallel_range_blocks_multi(
200+
std::function<bool(IntT value, size_t thread_num)> fn,
201+
IntT start_value,
202+
IntT end_value,
203+
IntT block_size,
204+
size_t num_threads = 0,
205+
std::function<void(IntT start_value, IntT end_value, IntT current_value, uint64_t start_time_usecs)> progress_fn = parallel_range_default_progress_fn<IntT>) {
206+
207+
if (num_threads == 0) {
208+
num_threads = std::thread::hardware_concurrency();
209+
}
210+
211+
std::vector<RetT> thread_rets(num_threads);
212+
parallel_range_blocks<IntT>([&](IntT z, size_t thread_num) {
213+
if (fn(z, thread_num)) {
214+
thread_rets[thread_num].emplace(z);
215+
}
216+
return false;
217+
},
218+
start_value, end_value, block_size, num_threads, progress_fn);
219+
220+
RetT ret = std::move(thread_rets[0]);
221+
for (size_t z = 1; z < thread_rets.size(); z++) {
222+
auto& thread_ret = thread_rets[z];
223+
ret.insert(std::make_move_iterator(thread_ret.begin()), std::make_move_iterator(thread_ret.end()));
224+
}
225+
226+
return ret;
227+
}
228+
192229
} // namespace phosg

src/ToolsTest.cc

Lines changed: 57 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,22 @@ int main(int, char**) {
4848
expect_eq(sum, hits.size());
4949
}
5050

51+
{
52+
printf("-- parallel_range return value\n");
53+
uint64_t target_value = 0xC349;
54+
auto is_equal = [&](uint64_t v, size_t) -> bool {
55+
return (v == target_value);
56+
};
57+
expect_eq((parallel_range<uint64_t>(is_equal, 0, 0x10000, num_threads, nullptr)), target_value);
58+
// Note: We can't check that parallel_range ends early when fn returns true
59+
// because it's not actually guaranteed to do so - it's only guaranteed to
60+
// return any of the values for which fn returns true. One could imagine a sequence of events in
61+
// which the target value's call takes a very long time, and all other threads
62+
// could finish checking all other values before the target one returns true.
63+
target_value = 0xCC349; // > end_value; should not be found
64+
expect_eq((parallel_range<uint64_t>(is_equal, 0, 0x10000, num_threads, nullptr)), 0x10000);
65+
}
66+
5167
{
5268
printf("-- parallel_range_blocks\n");
5369
vector<uint8_t> hits(0x1000000, 0);
@@ -76,35 +92,61 @@ int main(int, char**) {
7692
}
7793

7894
{
79-
printf("-- parallel_range return value\n");
95+
printf("-- parallel_range_blocks return value\n");
8096
uint64_t target_value = 0xC349;
8197
auto is_equal = [&](uint64_t v, size_t) -> bool {
8298
return (v == target_value);
8399
};
84-
expect_eq((parallel_range<uint64_t>(is_equal, 0, 0x10000, num_threads, nullptr)), target_value);
100+
expect_eq((parallel_range_blocks<uint64_t>(is_equal, 0, 0x100000, 0x1000, num_threads, nullptr)), target_value);
85101
// Note: We can't check that parallel_range ends early when fn returns true
86102
// because it's not actually guaranteed to do so - it's only guaranteed to
87103
// return any of the values for which fn returns true. One could imagine a sequence of events in
88104
// which the target value's call takes a very long time, and all other threads
89105
// could finish checking all other values before the target one returns true.
90-
target_value = 0xCC349; // > end_value; should not be found
91-
expect_eq((parallel_range<uint64_t>(is_equal, 0, 0x10000, num_threads, nullptr)), 0x10000);
106+
target_value = 0xCCC349; // > end_value; should not be found
107+
expect_eq((parallel_range_blocks<uint64_t>(is_equal, 0, 0x100000, 0x1000, num_threads, nullptr)), 0x100000);
92108
}
93109

94110
{
95-
printf("-- parallel_range_blocks return value\n");
96-
uint64_t target_value = 0xC349;
111+
printf("-- parallel_range_blocks_multi\n");
112+
vector<uint8_t> hits(0x1000000, 0);
113+
auto handle_value = [&](uint64_t v, size_t thread_num) -> bool {
114+
hits[v] = thread_num + 1;
115+
return false;
116+
};
117+
uint64_t start_time = now();
118+
parallel_range_blocks_multi<uint64_t>(handle_value, 0, hits.size(), 0x1000, num_threads, nullptr);
119+
uint64_t duration = now() - start_time;
120+
fprintf(stderr, "---- time: %" PRIu64 "\n", duration);
121+
122+
vector<size_t> thread_counts(num_threads, 0);
123+
for (size_t x = 0; x < hits.size(); x++) {
124+
expect_ne(hits[x], 0);
125+
thread_counts.at(hits[x] - 1)++;
126+
}
127+
128+
size_t sum = 0;
129+
for (size_t x = 0; x < thread_counts.size(); x++) {
130+
expect_ne(thread_counts[x], 0);
131+
fprintf(stderr, "---- thread %zu: %zu\n", x, thread_counts[x]);
132+
sum += thread_counts[x];
133+
}
134+
expect_eq(sum, hits.size());
135+
}
136+
137+
{
138+
printf("-- parallel_range_blocks_multi return value\n");
139+
uint64_t target_value1 = 0xC349;
140+
uint64_t target_value2 = 0x53A0;
141+
uint64_t target_value3 = 0x034D;
97142
auto is_equal = [&](uint64_t v, size_t) -> bool {
98-
return (v == target_value);
143+
return ((v == target_value1) || (v == target_value2) || (v == target_value3));
99144
};
100-
expect_eq((parallel_range_blocks<uint64_t>(is_equal, 0, 0x100000, 0x1000, num_threads, nullptr)), target_value);
101-
// Note: We can't check that parallel_range ends early when fn returns true
102-
// because it's not actually guaranteed to do so - it's only guaranteed to
103-
// return any of the values for which fn returns true. One could imagine a sequence of events in
104-
// which the target value's call takes a very long time, and all other threads
105-
// could finish checking all other values before the target one returns true.
106-
target_value = 0xCCC349; // > end_value; should not be found
107-
expect_eq((parallel_range_blocks<uint64_t>(is_equal, 0, 0x100000, 0x1000, num_threads, nullptr)), 0x100000);
145+
auto found = parallel_range_blocks_multi<uint64_t>(is_equal, 0, 0x100000, 0x1000, num_threads, nullptr);
146+
expect_eq(3, found.size());
147+
expect(found.count(target_value1));
148+
expect(found.count(target_value2));
149+
expect(found.count(target_value3));
108150
}
109151

110152
printf("ToolsTest: all tests passed\n");

0 commit comments

Comments
 (0)