Skip to content

Commit b2596a7

Browse files
committed
libutil: Add custom PeekSort implementation
Unlike std::sort and std::stable_sort, this implementation does not lead to out-of-bounds memory reads or other undefined behavior when the predicate is not strict weak ordering. This makes it possible to use this function in libexpr for builtins.sort, where an incorrectly implemented comparator in the user nix code currently can crash and burn the evaluator by invoking C++ UB.
1 parent e73fcd0 commit b2596a7

File tree

4 files changed

+575
-0
lines changed

4 files changed

+575
-0
lines changed

src/libutil-tests/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ sources = files(
6565
'position.cc',
6666
'processes.cc',
6767
'references.cc',
68+
'sort.cc',
6869
'spawn.cc',
6970
'strings.cc',
7071
'suggestions.cc',

src/libutil-tests/sort.cc

Lines changed: 274 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,274 @@
1+
#include <gtest/gtest.h>
2+
#include <rapidcheck/gtest.h>
3+
#include "nix/util/sort.hh"
4+
5+
#include <vector>
6+
#include <list>
7+
#include <algorithm>
8+
#include <random>
9+
10+
namespace nix {
11+
12+
struct MonotonicSubranges : public ::testing::Test
13+
{
14+
std::vector<int> empty_;
15+
std::vector<int> basic_ = {1, 0, -1, -100, 10, 10, 20, 40, 5, 5, 20, 10, 10, 1, -5};
16+
};
17+
18+
TEST_F(MonotonicSubranges, empty)
19+
{
20+
ASSERT_EQ(weaklyIncreasingPrefix(empty_.begin(), empty_.end()), empty_.begin());
21+
ASSERT_EQ(weaklyIncreasingSuffix(empty_.begin(), empty_.end()), empty_.begin());
22+
ASSERT_EQ(strictlyDecreasingPrefix(empty_.begin(), empty_.end()), empty_.begin());
23+
ASSERT_EQ(strictlyDecreasingSuffix(empty_.begin(), empty_.end()), empty_.begin());
24+
}
25+
26+
TEST_F(MonotonicSubranges, basic)
27+
{
28+
ASSERT_EQ(strictlyDecreasingPrefix(basic_.begin(), basic_.end()), basic_.begin() + 4);
29+
ASSERT_EQ(strictlyDecreasingSuffix(basic_.begin(), basic_.end()), basic_.begin() + 12);
30+
std::reverse(basic_.begin(), basic_.end());
31+
ASSERT_EQ(weaklyIncreasingPrefix(basic_.begin(), basic_.end()), basic_.begin() + 5);
32+
ASSERT_EQ(weaklyIncreasingSuffix(basic_.begin(), basic_.end()), basic_.begin() + 11);
33+
}
34+
35+
template<typename T>
36+
class SortTestPermutations : public ::testing::Test
37+
{
38+
std::vector<T> initialData = {std::numeric_limits<T>::max(), std::numeric_limits<T>::min(), 0, 0, 42, 126, 36};
39+
std::vector<T> vectorData;
40+
std::list<T> listData;
41+
42+
public:
43+
std::vector<T> scratchVector;
44+
std::list<T> scratchList;
45+
std::vector<T> empty;
46+
47+
void SetUp() override
48+
{
49+
vectorData = initialData;
50+
std::sort(vectorData.begin(), vectorData.end());
51+
listData = std::list(vectorData.begin(), vectorData.end());
52+
}
53+
54+
bool nextPermutation()
55+
{
56+
std::next_permutation(vectorData.begin(), vectorData.end());
57+
std::next_permutation(listData.begin(), listData.end());
58+
scratchList = listData;
59+
scratchVector = vectorData;
60+
return vectorData == initialData;
61+
}
62+
};
63+
64+
using SortPermutationsTypes = ::testing::Types<int, long long, short, unsigned, unsigned long>;
65+
66+
TYPED_TEST_SUITE(SortTestPermutations, SortPermutationsTypes);
67+
68+
TYPED_TEST(SortTestPermutations, insertionsort)
69+
{
70+
while (!this->nextPermutation()) {
71+
auto & list = this->scratchList;
72+
insertionsort(list.begin(), list.end());
73+
ASSERT_TRUE(std::is_sorted(list.begin(), list.end()));
74+
auto & vector = this->scratchVector;
75+
insertionsort(vector.begin(), vector.end());
76+
ASSERT_TRUE(std::is_sorted(vector.begin(), vector.end()));
77+
}
78+
}
79+
80+
TYPED_TEST(SortTestPermutations, peeksort)
81+
{
82+
while (!this->nextPermutation()) {
83+
auto & vector = this->scratchVector;
84+
peeksort(vector.begin(), vector.end());
85+
ASSERT_TRUE(std::is_sorted(vector.begin(), vector.end()));
86+
}
87+
}
88+
89+
TEST(InsertionSort, empty)
90+
{
91+
std::vector<int> empty;
92+
insertionsort(empty.begin(), empty.end());
93+
}
94+
95+
struct RandomPeekSort : public ::testing::TestWithParam<
96+
std::tuple</*maxSize*/ std::size_t, /*min*/ int, /*max*/ int, /*iterations*/ std::size_t>>
97+
{
98+
using ValueType = int;
99+
std::vector<ValueType> data_;
100+
std::mt19937 urng_;
101+
std::uniform_int_distribution<int> distribution_;
102+
103+
void SetUp() override
104+
{
105+
auto [maxSize, min, max, iterations] = GetParam();
106+
urng_ = std::mt19937(GTEST_FLAG_GET(random_seed));
107+
distribution_ = std::uniform_int_distribution<int>(min, max);
108+
}
109+
110+
auto regenerate()
111+
{
112+
auto [maxSize, min, max, iterations] = GetParam();
113+
std::size_t dataSize = std::uniform_int_distribution<std::size_t>(0, maxSize)(urng_);
114+
data_.resize(dataSize);
115+
std::generate(data_.begin(), data_.end(), [&]() { return distribution_(urng_); });
116+
}
117+
};
118+
119+
TEST_P(RandomPeekSort, defaultComparator)
120+
{
121+
auto [maxSize, min, max, iterations] = GetParam();
122+
123+
for (std::size_t i = 0; i < iterations; ++i) {
124+
regenerate();
125+
peeksort(data_.begin(), data_.end());
126+
ASSERT_TRUE(std::is_sorted(data_.begin(), data_.end()));
127+
/* Sorting is idempotent */
128+
peeksort(data_.begin(), data_.end());
129+
ASSERT_TRUE(std::is_sorted(data_.begin(), data_.end()));
130+
}
131+
}
132+
133+
TEST_P(RandomPeekSort, greater)
134+
{
135+
auto [maxSize, min, max, iterations] = GetParam();
136+
137+
for (std::size_t i = 0; i < iterations; ++i) {
138+
regenerate();
139+
peeksort(data_.begin(), data_.end(), std::greater<int>{});
140+
ASSERT_TRUE(std::is_sorted(data_.begin(), data_.end(), std::greater<int>{}));
141+
/* Sorting is idempotent */
142+
peeksort(data_.begin(), data_.end(), std::greater<int>{});
143+
ASSERT_TRUE(std::is_sorted(data_.begin(), data_.end(), std::greater<int>{}));
144+
}
145+
}
146+
147+
TEST_P(RandomPeekSort, brokenComparator)
148+
{
149+
auto [maxSize, min, max, iterations] = GetParam();
150+
151+
/* This is a pretty nice way of modeling a worst-case scenario for a broken comparator.
152+
If the sorting algorithm doesn't break in such case, then surely all deterministic
153+
predicates won't break it. */
154+
auto comp = [&]([[maybe_unused]] const auto & lhs, [[maybe_unused]] const auto & rhs) -> bool {
155+
return std::uniform_int_distribution<unsigned>(0, 1)(urng_);
156+
};
157+
158+
for (std::size_t i = 0; i < iterations; ++i) {
159+
regenerate();
160+
auto originalData = data_;
161+
peeksort(data_.begin(), data_.end(), comp);
162+
/* Check that the output is just a reordering of the input. This is the
163+
contract of the implementation in regard to comparators that don't
164+
define a strict weak order. */
165+
std::sort(data_.begin(), data_.end());
166+
std::sort(originalData.begin(), originalData.end());
167+
ASSERT_EQ(originalData, data_);
168+
}
169+
}
170+
171+
TEST_P(RandomPeekSort, stability)
172+
{
173+
auto [maxSize, min, max, iterations] = GetParam();
174+
175+
for (std::size_t i = 0; i < iterations; ++i) {
176+
regenerate();
177+
std::vector<std::pair<int, int>> pairs;
178+
179+
/* Assign sequential ids to objects. After the sort ids for equivalent
180+
elements should be in ascending order. */
181+
std::transform(
182+
data_.begin(), data_.end(), std::back_inserter(pairs), [id = std::size_t{0}](auto && val) mutable {
183+
return std::pair{val, ++id};
184+
});
185+
186+
auto comp = [&]([[maybe_unused]] const auto & lhs, [[maybe_unused]] const auto & rhs) -> bool {
187+
return lhs.first > rhs.first;
188+
};
189+
190+
peeksort(pairs.begin(), pairs.end(), comp);
191+
ASSERT_TRUE(std::is_sorted(pairs.begin(), pairs.end(), comp));
192+
193+
for (auto begin = pairs.begin(), end = pairs.end(); begin < end; ++begin) {
194+
auto key = begin->first;
195+
auto innerEnd = std::find_if_not(begin, end, [key](const auto & lhs) { return lhs.first == key; });
196+
ASSERT_TRUE(std::is_sorted(begin, innerEnd, [](const auto & lhs, const auto & rhs) {
197+
return lhs.second < rhs.second;
198+
}));
199+
begin = innerEnd;
200+
}
201+
}
202+
}
203+
204+
using RandomPeekSortParamType = RandomPeekSort::ParamType;
205+
206+
INSTANTIATE_TEST_SUITE_P(
207+
PeekSort,
208+
RandomPeekSort,
209+
::testing::Values(
210+
RandomPeekSortParamType{128, std::numeric_limits<int>::min(), std::numeric_limits<int>::max(), 1024},
211+
RandomPeekSortParamType{7753, -32, 32, 128},
212+
RandomPeekSortParamType{11719, std::numeric_limits<int>::min(), std::numeric_limits<int>::max(), 64},
213+
RandomPeekSortParamType{4063, 0, 32, 256},
214+
RandomPeekSortParamType{771, -8, 8, 2048},
215+
RandomPeekSortParamType{433, 0, 1, 2048},
216+
RandomPeekSortParamType{0, 0, 0, 1}, /* empty case */
217+
RandomPeekSortParamType{
218+
1, std::numeric_limits<int>::min(), std::numeric_limits<int>::max(), 1}, /* single element */
219+
RandomPeekSortParamType{
220+
2, std::numeric_limits<int>::min(), std::numeric_limits<int>::max(), 2}, /* two elements */
221+
RandomPeekSortParamType{55425, std::numeric_limits<int>::min(), std::numeric_limits<int>::max(), 128}));
222+
223+
template<typename T>
224+
struct SortProperty : public ::testing::Test
225+
{};
226+
227+
using SortPropertyTypes = ::testing::Types<int, unsigned, long long, short, std::string>;
228+
TYPED_TEST_SUITE(SortProperty, SortPropertyTypes);
229+
230+
RC_GTEST_TYPED_FIXTURE_PROP(SortProperty, peeksortSorted, (std::vector<TypeParam> vec))
231+
{
232+
peeksort(vec.begin(), vec.end());
233+
RC_ASSERT(std::is_sorted(vec.begin(), vec.end()));
234+
}
235+
236+
RC_GTEST_TYPED_FIXTURE_PROP(SortProperty, peeksortSortedGreater, (std::vector<TypeParam> vec))
237+
{
238+
auto comp = std::greater<TypeParam>();
239+
peeksort(vec.begin(), vec.end(), comp);
240+
RC_ASSERT(std::is_sorted(vec.begin(), vec.end(), comp));
241+
}
242+
243+
RC_GTEST_TYPED_FIXTURE_PROP(SortProperty, insertionsortSorted, (std::vector<TypeParam> vec))
244+
{
245+
insertionsort(vec.begin(), vec.end());
246+
RC_ASSERT(std::is_sorted(vec.begin(), vec.end()));
247+
}
248+
249+
RC_GTEST_PROP(SortProperty, peeksortStability, (std::vector<std::pair<char, char>> vec))
250+
{
251+
auto comp = [](auto lhs, auto rhs) { return lhs.first < rhs.first; };
252+
auto copy = vec;
253+
std::stable_sort(copy.begin(), copy.end(), comp);
254+
peeksort(vec.begin(), vec.end(), comp);
255+
RC_ASSERT(copy == vec);
256+
}
257+
258+
RC_GTEST_TYPED_FIXTURE_PROP(SortProperty, peeksortSortedLinearComparisonComplexity, (std::vector<TypeParam> vec))
259+
{
260+
peeksort(vec.begin(), vec.end());
261+
RC_ASSERT(std::is_sorted(vec.begin(), vec.end()));
262+
std::size_t comparisonCount = 0;
263+
auto countingComp = [&](auto lhs, auto rhs) {
264+
++comparisonCount;
265+
return lhs < rhs;
266+
};
267+
268+
peeksort(vec.begin(), vec.end(), countingComp);
269+
270+
/* In the sorted case comparison complexify should be linear. */
271+
RC_ASSERT(comparisonCount <= vec.size());
272+
}
273+
274+
} // namespace nix

src/libutil/include/nix/util/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ headers = files(
5959
'signals.hh',
6060
'signature/local-keys.hh',
6161
'signature/signer.hh',
62+
'sort.hh',
6263
'source-accessor.hh',
6364
'source-path.hh',
6465
'split.hh',

0 commit comments

Comments
 (0)