Skip to content

Commit ea13d34

Browse files
committed
breaking: change Handler match
http.Response as new parameter type
1 parent a73af58 commit ea13d34

File tree

1 file changed

+22
-11
lines changed

1 file changed

+22
-11
lines changed

crawler.go

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,8 @@ func (c *Crawler) enqueue(req *http.Request, timeout time.Duration) error {
103103
}
104104

105105
// Handle registers the Handler for the given pattern.
106-
// If pattern is "*" means matches all requests.
106+
// If pattern is "*" means will matches all requests if
107+
// no any pattern matches.
107108
func (c *Crawler) Handle(pattern string, handler Handler) {
108109
c.mu.Lock()
109110
defer c.mu.Unlock()
@@ -120,9 +121,9 @@ func (c *Crawler) Handle(pattern string, handler Handler) {
120121
c.m[pattern] = muxEntry{pattern: pattern, h: handler}
121122
}
122123

123-
// Handler returns a Handler for the give URL.
124-
func (c *Crawler) Handler(u *url.URL) (h Handler, pattern string) {
125-
return c.handler(u)
124+
// Handler returns a Handler for the give HTTP Response.
125+
func (c *Crawler) Handler(res *http.Response) (h Handler, pattern string) {
126+
return c.handler(res.Request.Host, res.Request.URL.Path)
126127
}
127128

128129
// UseMiddleware adds a Middleware to the crawler.
@@ -192,10 +193,18 @@ func (f roundTripperFunc) RoundTrip(req *http.Request) (*http.Response, error) {
192193
return f(req)
193194
}
194195

195-
func (c *Crawler) pathMatch(path string) (h Handler, pattern string) {
196+
func (c *Crawler) pathMatch(pattern, path string) bool {
197+
n := len(pattern)
198+
if pattern[n-1] == '/' {
199+
pattern = pattern[:n-1]
200+
}
201+
return strings.Index(path, pattern) >= 0
202+
}
203+
204+
func (c *Crawler) matchHandler(path string) (h Handler, pattern string) {
196205
var n = 0
197206
for k, v := range c.m {
198-
if strings.Index(k, path) == -1 {
207+
if !c.pathMatch(k, path) {
199208
continue
200209
}
201210
if h == nil || len(k) > n {
@@ -207,14 +216,16 @@ func (c *Crawler) pathMatch(path string) (h Handler, pattern string) {
207216
return
208217
}
209218

210-
func (c *Crawler) handler(u *url.URL) (h Handler, pattern string) {
219+
func (c *Crawler) handler(host, path string) (h Handler, pattern string) {
211220
c.mu.RLock()
212221
defer c.mu.RUnlock()
213222

214-
host, _, _ := net.SplitHostPort(u.Host)
215-
h, pattern = c.pathMatch(host)
223+
h, pattern = c.matchHandler(host + path)
224+
if h == nil {
225+
h, pattern = c.matchHandler(host)
226+
}
216227
if h == nil {
217-
h, pattern = c.pathMatch("*")
228+
h, pattern = c.matchHandler("*")
218229
}
219230
if h == nil {
220231
h, pattern = VoidHandler(), ""
@@ -298,7 +309,7 @@ func (c *Crawler) scanRequestWork(workCh chan chan *http.Request, closeCh chan i
298309
logrus.Panicf("antch: Handler got panic error: %v", r)
299310
}
300311
}()
301-
h, _ := c.Handler(res.Request.URL)
312+
h, _ := c.Handler(res)
302313
h.ServeSpider(c.writeCh, res)
303314
}(re.res)
304315
}

0 commit comments

Comments
 (0)