Skip to content

Commit 34ae5ef

Browse files
vnepogodinjochil
authored andcommitted
optimize and cleanup
- use algorithms instead of 'raw loops' - use std::string_view instead of std::string in some places - don't initialize std::regex multiple times in the loop - use move iterators - remove fnmatch from header - remove unused headers - move our glob header to the top - separate C lib header from C++ headers improvements 2.80-4.93x some benches Ryzen 5 1600 original: ``` ❯ hyperfine -r 412 -N --warmup 10 './build/standalone/glob -i /home/vl/Downloads/**/*.cpp' Benchmark 1: ./build/standalone/glob -i /home/vl/Downloads/**/*.cpp Time (mean ± σ): 51.3 ms ± 10.4 ms [User: 47.3 ms, System: 3.3 ms] Range (min … max): 37.2 ms … 75.5 ms 412 runs ``` optimized version: ``` ❯ hyperfine -r 412 -N --warmup 10 './build/standalone/glob -i /home/vl/Downloads/**/*.cpp' Benchmark 1: ./build/standalone/glob -i /home/vl/Downloads/**/*.cpp Time (mean ± σ): 10.4 ms ± 3.9 ms [User: 6.4 ms, System: 3.6 ms] Range (min … max): 6.3 ms … 17.7 ms 412 run ``` Ryzen 9 7950X3D original version recursive on a lot of files: peak ram usage 391M ``` ❯ hyperfine -N './glob -r -i /var/lib/temppath/**/*.tar.zst' Benchmark 1: ./glob -r -i /var/lib/temppath/**/*.tar.zst Time (mean ± σ): 16.163 s ± 1.225 s [User: 13.867 s, System: 2.273 s] Range (min … max): 15.344 s … 18.979 s 10 runs ``` optimized version recursive on a lot of files: peak ram usage 261M ``` ❯ hyperfine -N './glob -r -i /var/lib/temppath/**/*.tar.zst' Benchmark 1: ./glob -r -i /var/lib/temppath/**/*.tar.zst Time (mean ± σ): 5.757 s ± 0.602 s [User: 3.469 s, System: 2.278 s] Range (min … max): 4.519 s … 6.659 s 10 runs ```
1 parent 91060de commit 34ae5ef

File tree

2 files changed

+72
-96
lines changed

2 files changed

+72
-96
lines changed

include/glob/glob.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,4 @@ std::vector<fs::path> glob(const std::initializer_list<std::string> &pathnames);
4545
/// Initializer list overload for convenience
4646
std::vector<fs::path> rglob(const std::initializer_list<std::string> &pathnames);
4747

48-
/// Returns true if the input path matche the glob pattern
49-
bool fnmatch(const fs::path &name, const std::string &pattern);
50-
5148
} // namespace glob

source/glob.cpp

Lines changed: 72 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,29 @@
1+
#include <glob/glob.h>
12

23
#include <cassert>
3-
#include <functional>
4-
#include <glob/glob.h>
5-
#include <iostream>
4+
5+
#include <algorithm>
66
#include <map>
77
#include <regex>
8+
#include <string_view>
89

910
namespace glob {
1011

1112
namespace {
1213

13-
bool string_replace(std::string &str, const std::string &from, const std::string &to) {
14+
static constexpr auto SPECIAL_CHARACTERS = std::string_view{"()[]{}?*+-|^$\\.&~# \t\n\r\v\f"};
15+
static const auto ESCAPE_SET_OPER = std::regex(std::string{R"([&~|])"});
16+
static const auto ESCAPE_REPL_STR = std::string{R"(\\\1)"};
17+
18+
bool string_replace(std::string &str, std::string_view from, std::string_view to) {
1419
std::size_t start_pos = str.find(from);
1520
if (start_pos == std::string::npos)
1621
return false;
1722
str.replace(start_pos, from.length(), to);
1823
return true;
1924
}
2025

21-
std::string translate(const std::string &pattern) {
26+
std::string translate(std::string_view pattern) {
2227
std::size_t i = 0, n = pattern.size();
2328
std::string result_string;
2429

@@ -45,7 +50,7 @@ std::string translate(const std::string &pattern) {
4550
} else {
4651
auto stuff = std::string(pattern.begin() + i, pattern.begin() + j);
4752
if (stuff.find("--") == std::string::npos) {
48-
string_replace(stuff, std::string{"\\"}, std::string{R"(\\)"});
53+
string_replace(stuff, std::string_view{"\\"}, std::string_view{R"(\\)"});
4954
} else {
5055
std::vector<std::string> chunks;
5156
std::size_t k = 0;
@@ -57,7 +62,7 @@ std::string translate(const std::string &pattern) {
5762

5863
while (true) {
5964
k = pattern.find("-", k, j);
60-
if (k == std::string::npos) {
65+
if (k == std::string_view::npos) {
6166
break;
6267
}
6368
chunks.push_back(std::string(pattern.begin() + i, pattern.begin() + k));
@@ -69,24 +74,24 @@ std::string translate(const std::string &pattern) {
6974
// Escape backslashes and hyphens for set difference (--).
7075
// Hyphens that create ranges shouldn't be escaped.
7176
bool first = true;
72-
for (auto &s : chunks) {
73-
string_replace(s, std::string{"\\"}, std::string{R"(\\)"});
74-
string_replace(s, std::string{"-"}, std::string{R"(\-)"});
77+
for (auto &chunk : chunks) {
78+
string_replace(chunk, std::string_view{"\\"}, std::string_view{R"(\\)"});
79+
string_replace(chunk, std::string_view{"-"}, std::string_view{R"(\-)"});
7580
if (first) {
76-
stuff += s;
81+
stuff += chunk;
7782
first = false;
7883
} else {
79-
stuff += "-" + s;
84+
stuff += "-" + chunk;
8085
}
8186
}
8287
}
8388

8489
// Escape set operations (&&, ~~ and ||).
85-
std::string result;
86-
std::regex_replace(std::back_inserter(result), // ressult
87-
stuff.begin(), stuff.end(), // string
88-
std::regex(std::string{R"([&~|])"}), // pattern
89-
std::string{R"(\\\1)"}); // repl
90+
std::string result{};
91+
std::regex_replace(std::back_inserter(result), // result
92+
stuff.begin(), stuff.end(), // string
93+
ESCAPE_SET_OPER, // pattern
94+
ESCAPE_REPL_STR); // repl
9095
stuff = result;
9196
i = j + 1;
9297
if (stuff[0] == '!') {
@@ -102,16 +107,14 @@ std::string translate(const std::string &pattern) {
102107
// '-' (a range in character set)
103108
// '&', '~', (extended character set operations)
104109
// '#' (comment) and WHITESPACE (ignored) in verbose mode
105-
static std::string special_characters = "()[]{}?*+-|^$\\.&~# \t\n\r\v\f";
106110
static std::map<int, std::string> special_characters_map;
107111
if (special_characters_map.empty()) {
108-
for (auto &sc : special_characters) {
109-
special_characters_map.insert(
110-
std::make_pair(static_cast<int>(sc), std::string{"\\"} + std::string(1, sc)));
112+
for (auto &&sc : SPECIAL_CHARACTERS) {
113+
special_characters_map.emplace(static_cast<int>(sc), std::string{"\\"} + std::string(1, sc));
111114
}
112115
}
113116

114-
if (special_characters.find(c) != std::string::npos) {
117+
if (SPECIAL_CHARACTERS.find(c) != std::string_view::npos) {
115118
result_string += special_characters_map[static_cast<int>(c)];
116119
} else {
117120
result_string += c;
@@ -121,24 +124,22 @@ std::string translate(const std::string &pattern) {
121124
return std::string{"(("} + result_string + std::string{R"()|[\r\n])$)"};
122125
}
123126

124-
std::regex compile_pattern(const std::string &pattern) {
127+
std::regex compile_pattern(std::string_view pattern) {
125128
return std::regex(translate(pattern), std::regex::ECMAScript);
126129
}
127130

128-
bool fnmatch(const fs::path &name, const std::string &pattern) {
129-
return std::regex_match(name.string(), compile_pattern(pattern));
131+
bool fnmatch(std::string&& name, const std::regex& pattern) {
132+
return std::regex_match(std::move(name), pattern);
130133
}
131134

132135
std::vector<fs::path> filter(const std::vector<fs::path> &names,
133-
const std::string &pattern) {
136+
std::string_view pattern) {
134137
// std::cout << "Pattern: " << pattern << "\n";
138+
const auto pattern_re = compile_pattern(pattern);
135139
std::vector<fs::path> result;
136-
for (auto &name : names) {
137-
// std::cout << "Checking for " << name.string() << "\n";
138-
if (fnmatch(name, pattern)) {
139-
result.push_back(name);
140-
}
141-
}
140+
std::copy_if(std::make_move_iterator(names.begin()), std::make_move_iterator(names.end()),
141+
std::back_inserter(result),
142+
[&pattern_re](const fs::path& name) { return fnmatch(name.string(), pattern_re); });
142143
return result;
143144
}
144145

@@ -158,21 +159,20 @@ fs::path expand_tilde(fs::path path) {
158159

159160
std::string s = path.string();
160161
if (s[0] == '~') {
161-
s = std::string(home) + s.substr(1, s.size() - 1);
162+
s = std::string{home} + s.substr(1, s.size() - 1);
162163
return fs::path(s);
163-
} else {
164-
return path;
165164
}
165+
return path;
166166
}
167167

168168
bool has_magic(const std::string &pathname) {
169169
static const auto magic_check = std::regex("([*?[])");
170170
return std::regex_search(pathname, magic_check);
171171
}
172172

173-
bool is_hidden(const std::string &pathname) { return pathname[0] == '.'; }
173+
constexpr bool is_hidden(std::string_view pathname) noexcept { return pathname[0] == '.'; }
174174

175-
bool is_recursive(const std::string &pattern) { return pattern == "**"; }
175+
constexpr bool is_recursive(std::string_view pattern) noexcept { return pattern == std::string_view{"**"}; }
176176

177177
std::vector<fs::path> iter_directory(const fs::path &dirname, bool dironly) {
178178
std::vector<fs::path> result;
@@ -208,12 +208,11 @@ std::vector<fs::path> iter_directory(const fs::path &dirname, bool dironly) {
208208
std::vector<fs::path> rlistdir(const fs::path &dirname, bool dironly) {
209209
std::vector<fs::path> result;
210210
auto names = iter_directory(dirname, dironly);
211-
for (auto &x : names) {
212-
if (!is_hidden(x.string())) {
213-
result.push_back(x);
214-
for (auto &y : rlistdir(x, dironly)) {
215-
result.push_back(y);
216-
}
211+
for (auto &&name : names) {
212+
if (!is_hidden(name.string())) {
213+
result.push_back(name);
214+
auto matched_dirs = rlistdir(name, dironly);
215+
std::copy(std::make_move_iterator(matched_dirs.begin()), std::make_move_iterator(matched_dirs.end()), std::back_inserter(result));
217216
}
218217
}
219218
return result;
@@ -226,9 +225,8 @@ std::vector<fs::path> glob2(const fs::path &dirname, [[maybe_unused]] const fs::
226225
// std::cout << "In glob2\n";
227226
std::vector<fs::path> result{"."};
228227
assert(is_recursive(pattern.string()));
229-
for (auto &dir : rlistdir(dirname, dironly)) {
230-
result.push_back(dir);
231-
}
228+
auto matched_dirs = rlistdir(dirname, dironly);
229+
std::copy(std::make_move_iterator(matched_dirs.begin()), std::make_move_iterator(matched_dirs.end()), std::back_inserter(result));
232230
return result;
233231
}
234232

@@ -239,17 +237,17 @@ std::vector<fs::path> glob2(const fs::path &dirname, [[maybe_unused]] const fs::
239237
std::vector<fs::path> glob1(const fs::path &dirname, const fs::path &pattern,
240238
bool dironly) {
241239
// std::cout << "In glob1\n";
242-
auto names = iter_directory(dirname, dironly);
243240
std::vector<fs::path> filtered_names;
244-
for (auto &n : names) {
245-
if (!is_hidden(n.string())) {
246-
filtered_names.push_back(n.filename());
247-
// if (n.is_relative()) {
248-
// // std::cout << "Filtered (Relative): " << n << "\n";
249-
// filtered_names.push_back(fs::relative(n));
241+
auto names = iter_directory(dirname, dironly);
242+
for (auto &&name : names) {
243+
if (!is_hidden(name.string())) {
244+
filtered_names.push_back(name.filename());
245+
// if (name.is_relative()) {
246+
// // std::cout << "Filtered (Relative): " << name << "\n";
247+
// filtered_names.push_back(fs::relative(name));
250248
// } else {
251-
// // std::cout << "Filtered (Absolute): " << n << "\n";
252-
// filtered_names.push_back(n.filename());
249+
// // std::cout << "Filtered (Absolute): " << name << "\n";
250+
// filtered_names.push_back(name.filename());
253251
// }
254252
}
255253
}
@@ -259,18 +257,12 @@ std::vector<fs::path> glob1(const fs::path &dirname, const fs::path &pattern,
259257
std::vector<fs::path> glob0(const fs::path &dirname, const fs::path &basename,
260258
bool /*dironly*/) {
261259
// std::cout << "In glob0\n";
262-
std::vector<fs::path> result;
263-
if (basename.empty()) {
264-
// 'q*x/' should match only directories.
265-
if (fs::is_directory(dirname)) {
266-
result = {basename};
267-
}
268-
} else {
269-
if (fs::exists(dirname / basename)) {
270-
result = {basename};
271-
}
260+
261+
// 'q*x/' should match only directories.
262+
if ((basename.empty() && fs::is_directory(dirname)) || (!basename.empty() && fs::exists(dirname / basename))) {
263+
return {basename};
272264
}
273-
return result;
265+
return {};
274266
}
275267

276268
std::vector<fs::path> glob(const fs::path &inpath, bool recursive = false,
@@ -290,48 +282,37 @@ std::vector<fs::path> glob(const fs::path &inpath, bool recursive = false,
290282

291283
if (!has_magic(pathname)) {
292284
assert(!dironly);
293-
if (!basename.empty()) {
294-
if (fs::exists(path)) {
295-
result.push_back(path);
296-
}
297-
} else {
298-
// Patterns ending with a slash should match only directories
299-
if (fs::is_directory(dirname)) {
300-
result.push_back(path);
301-
}
285+
286+
// Patterns ending with a slash should match only directories
287+
if ((!basename.empty() && fs::exists(path)) || (basename.empty() && fs::is_directory(dirname))) {
288+
result.push_back(path);
302289
}
303290
return result;
304291
}
305292

306293
if (dirname.empty()) {
307294
if (recursive && is_recursive(basename.string())) {
308295
return glob2(dirname, basename, dironly);
309-
} else {
310-
return glob1(dirname, basename, dironly);
311296
}
297+
return glob1(dirname, basename, dironly);
312298
}
313299

314-
std::vector<fs::path> dirs;
300+
std::vector<fs::path> dirs{dirname};
315301
if (dirname != fs::path(pathname) && has_magic(dirname.string())) {
316302
dirs = glob(dirname, recursive, true);
317-
} else {
318-
dirs = {dirname};
319303
}
320304

321-
std::function<std::vector<fs::path>(const fs::path &, const fs::path &, bool)>
322-
glob_in_dir;
305+
auto glob_in_dir = glob0;
323306
if (has_magic(basename.string())) {
324307
if (recursive && is_recursive(basename.string())) {
325308
glob_in_dir = glob2;
326309
} else {
327310
glob_in_dir = glob1;
328311
}
329-
} else {
330-
glob_in_dir = glob0;
331312
}
332313

333314
for (auto &d : dirs) {
334-
for (auto &name : glob_in_dir(d, basename, dironly)) {
315+
for (auto &&name : glob_in_dir(d, basename, dironly)) {
335316
fs::path subresult = name;
336317
if (name.parent_path().empty()) {
337318
subresult = d / name;
@@ -355,20 +336,18 @@ std::vector<fs::path> rglob(const std::string &pathname) {
355336

356337
std::vector<fs::path> glob(const std::vector<std::string> &pathnames) {
357338
std::vector<fs::path> result;
358-
for (auto &pathname : pathnames) {
359-
for (auto &match : glob(pathname, false)) {
360-
result.push_back(std::move(match));
361-
}
339+
for (const auto &pathname : pathnames) {
340+
auto matched_res = glob(pathname, false);
341+
std::copy(std::make_move_iterator(matched_res.begin()), std::make_move_iterator(matched_res.end()), std::back_inserter(result));
362342
}
363343
return result;
364344
}
365345

366346
std::vector<fs::path> rglob(const std::vector<std::string> &pathnames) {
367347
std::vector<fs::path> result;
368-
for (auto &pathname : pathnames) {
369-
for (auto &match : glob(pathname, true)) {
370-
result.push_back(std::move(match));
371-
}
348+
for (const auto &pathname : pathnames) {
349+
auto matched_res = glob(pathname, true);
350+
std::copy(std::make_move_iterator(matched_res.begin()), std::make_move_iterator(matched_res.end()), std::back_inserter(result));
372351
}
373352
return result;
374353
}

0 commit comments

Comments
 (0)