Skip to content

Commit 70e424e

Browse files
authored
Update web-crawler-multithreaded.cpp
1 parent f0fae44 commit 70e424e

File tree

1 file changed

+72
-0
lines changed

1 file changed

+72
-0
lines changed

C++/web-crawler-multithreaded.cpp

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,3 +77,75 @@ class Solution {
7777
mutex m_;
7878
condition_variable cv_;
7979
};
80+
81+
// Time: O(|V| + |E|)
82+
// Space: O(|V|)
83+
class Solution2 {
84+
public:
85+
vector<string> crawl(string startUrl, HtmlParser htmlParser) {
86+
q_.emplace(startUrl);
87+
unordered_set<string> lookup = {startUrl};
88+
vector<thread> workers;
89+
for (int i = 0; i < NUMBER_OF_WORKERS; ++i) {
90+
workers.emplace_back(bind(&Solution::worker, this, &htmlParser, &lookup));
91+
}
92+
{
93+
unique_lock<mutex> lock{m_};
94+
cv_.wait(lock, [this]() { return q_.empty() && !working_count_; });
95+
for (const auto& t : workers) {
96+
q_.emplace();
97+
}
98+
cv_.notify_all();
99+
}
100+
for (auto& t : workers) {
101+
t.join();
102+
}
103+
return vector<string>(lookup.cbegin(), lookup.cend());
104+
}
105+
106+
private:
107+
void worker(HtmlParser *htmlParser, unordered_set<string> *lookup) {
108+
while (true) {
109+
string from_url;
110+
{
111+
unique_lock<mutex> lock{m_};
112+
cv_.wait(lock, [this]() { return !q_.empty(); });
113+
from_url = q_.front(); q_.pop();
114+
if (from_url.empty()) {
115+
break;
116+
}
117+
++working_count_;
118+
}
119+
const auto& name = hostname(from_url);
120+
for (const auto& to_url: htmlParser->getUrls(from_url)) {
121+
if (name != hostname(to_url)) {
122+
continue;
123+
}
124+
unique_lock<mutex> lock{m_};
125+
if (!lookup->count(to_url)) {
126+
lookup->emplace(to_url);
127+
q_.emplace(to_url);
128+
cv_.notify_all();
129+
}
130+
}
131+
{
132+
unique_lock<mutex> lock{m_};
133+
--working_count_;
134+
if (q_.empty() && !working_count_) {
135+
cv_.notify_all();
136+
}
137+
}
138+
}
139+
}
140+
141+
string hostname(const string& url) {
142+
static const string scheme = "http://";
143+
return url.substr(0, url.find('/', scheme.length()));
144+
}
145+
146+
static const int NUMBER_OF_WORKERS = 4;
147+
queue<string> q_;
148+
int working_count_ = 0;
149+
mutex m_;
150+
condition_variable cv_;
151+
};

0 commit comments

Comments
 (0)