Skip to content

Commit 451ef42

Browse files
authored
Improved performance (#31)
* fix * lto * bind * fix * fix * fix * fix * fix * fix * fxi * fix * fix
1 parent a8998f5 commit 451ef42

File tree

9 files changed

+43
-32
lines changed

9 files changed

+43
-32
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,4 +39,5 @@ set_target_properties(PRTree PROPERTIES
3939
POSITION_INDEPENDENT_CODE ON
4040
C_VISIBILITY_PRESET hidden
4141
CXX_VISIBILITY_PRESET hidden
42+
INTERPROCEDURAL_OPTIMIZATION TRUE
4243
)

cpp/parallel.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ void parallel_for_each(const Iter first, const Iter last, T &result, const F &fu
2525
iters.emplace_back(last);
2626

2727
result.reserve(total);
28-
for (auto &&r : rr)
28+
for (auto &r : rr)
2929
{
3030
r.reserve(total / nthreads + 1);
3131
}

cpp/prtree.h

Lines changed: 41 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,12 @@ class BB
161161
}
162162
}
163163

164+
Real val_for_comp(const int &axis) const
165+
{
166+
const int axis2 = (axis + 1) % (2 * D);
167+
return values[axis] + values[axis2];
168+
}
169+
164170
BB operator+(const BB &rhs) const
165171
{
166172
Real result[2 * D];
@@ -189,18 +195,28 @@ class BB
189195
}
190196
}
191197

192-
inline bool operator()(const BB &target) const
198+
bool operator()(const BB &target) const
193199
{ // whether this and target has any intersect
200+
201+
Real minima[D];
202+
Real maxima[D];
203+
bool flags[D];
204+
bool flag = true;
205+
194206
for (int i = 0; i < D; ++i)
195207
{
196-
Real m = std::min(values[i], target.values[i]);
197-
Real M = std::min(values[i + D], target.values[i + D]);
198-
if (-m > M)
199-
{
200-
return false;
201-
}
208+
minima[i] = std::min(values[i], target.values[i]);
209+
maxima[i] = std::min(values[i + D], target.values[i + D]);
210+
}
211+
for (int i = 0; i < D; ++i)
212+
{
213+
flags[i] = -minima[i] < maxima[i];
202214
}
203-
return true;
215+
for (int i = 0; i < D; ++i)
216+
{
217+
flag &= flags[i];
218+
}
219+
return flag;
204220
}
205221

206222
Real area() const
@@ -257,10 +273,10 @@ template <class T, int B = 6, int D = 2>
257273
class Leaf
258274
{
259275
public:
260-
int axis = 0;
261-
Real min_val = 1e100;
262276
BB<D> mbb;
263277
svec<DataType<T, D>, B> data; // You can swap when filtering
278+
int axis = 0;
279+
264280
// T is type of keys(ids) which will be returned when you post a query.
265281
Leaf()
266282
{
@@ -283,41 +299,36 @@ class Leaf
283299
void update_mbb()
284300
{
285301
mbb.clear();
286-
min_val = 1e100;
287302
for (const auto &datum : data)
288303
{
289304
mbb += datum.second;
290-
min_val = std::min(min_val, datum.second[axis]);
291305
}
292306
}
293307

294-
inline auto find_swapee()
295-
{
296-
auto it = std::min_element(data.begin(), data.end(), [&](const auto &a, const auto &b) noexcept
297-
{ return a.second[axis] < b.second[axis]; });
298-
return it;
299-
}
300-
301308
bool filter(DataType<T, D> &value)
302309
{ // false means given value is ignored
310+
auto comp = [=](const auto &a, const auto &b) noexcept
311+
{ return a.second.val_for_comp(axis) < b.second.val_for_comp(axis); };
312+
303313
if (data.size() < B)
304314
{ // if there is room, just push the candidate
305-
data.push_back(value);
315+
auto iter = std::lower_bound(data.begin(), data.end(), value, comp);
316+
DataType<T, D> tmp_value = DataType<T, D>(value);
317+
data.insert(iter, std::move(tmp_value));
306318
mbb += value.second;
307-
min_val = std::min(min_val, value.second[axis]);
308319
return true;
309320
}
310321
else
311322
{ // if there is no room, check the priority and swap if needed
312-
/*
313-
auto iter = std::upper_bound(data.begin(), data.end(), value, [&](const auto &a, const auto &b) noexcept
314-
{ return a.second[axis] < b.second[axis]; });
315-
if (iter != data.end())
316-
*/
317-
if (min_val < value.second[axis])
323+
if (data[0].second.val_for_comp(axis) < value.second.val_for_comp(axis))
318324
{
319-
auto iter = find_swapee();
320-
std::swap(*iter, value);
325+
size_t n_swap = std::lower_bound(data.begin(), data.end(), value, comp) - data.begin();
326+
std::swap(*data.begin(), value);
327+
auto iter = data.begin();
328+
for (size_t i = 0; i < n_swap - 1; ++i)
329+
{
330+
std::swap(*(iter + i), *(iter + i + 1));
331+
}
321332
update_mbb();
322333
}
323334
return false;
@@ -1230,9 +1241,8 @@ class PRTree
12301241
X.push_back(std::move(bb));
12311242
}
12321243
}
1233-
T length = X.size();
12341244
vec<vec<T>> out;
1235-
out.reserve(length);
1245+
out.reserve(X.size());
12361246
#ifdef MY_DEBUG
12371247
std::for_each(X.begin(), X.end(),
12381248
[&](const BB<D> &x)

docs/images/2d_fig1.png

-268 Bytes
Loading

docs/images/2d_fig2.png

-206 Bytes
Loading

docs/images/2d_fig3.png

341 Bytes
Loading

docs/images/3d_fig1.png

356 Bytes
Loading

docs/images/3d_fig2.png

203 Bytes
Loading

docs/images/3d_fig3.png

1.04 KB
Loading

0 commit comments

Comments
 (0)