@@ -103,7 +103,8 @@ func (c *Crawler) enqueue(req *http.Request, timeout time.Duration) error {
103
103
}
104
104
105
105
// Handle registers the Handler for the given pattern.
106
- // If pattern is "*" means matches all requests.
106
+ // If pattern is "*" means will matches all requests if
107
+ // no any pattern matches.
107
108
func (c * Crawler ) Handle (pattern string , handler Handler ) {
108
109
c .mu .Lock ()
109
110
defer c .mu .Unlock ()
@@ -120,9 +121,9 @@ func (c *Crawler) Handle(pattern string, handler Handler) {
120
121
c .m [pattern ] = muxEntry {pattern : pattern , h : handler }
121
122
}
122
123
123
- // Handler returns a Handler for the give URL .
124
- func (c * Crawler ) Handler (u * url. URL ) (h Handler , pattern string ) {
125
- return c .handler (u )
124
+ // Handler returns a Handler for the give HTTP Response .
125
+ func (c * Crawler ) Handler (res * http. Response ) (h Handler , pattern string ) {
126
+ return c .handler (res . Request . Host , res . Request . URL . Path )
126
127
}
127
128
128
129
// UseMiddleware adds a Middleware to the crawler.
@@ -192,10 +193,18 @@ func (f roundTripperFunc) RoundTrip(req *http.Request) (*http.Response, error) {
192
193
return f (req )
193
194
}
194
195
195
- func (c * Crawler ) pathMatch (path string ) (h Handler , pattern string ) {
196
+ func (c * Crawler ) pathMatch (pattern , path string ) bool {
197
+ n := len (pattern )
198
+ if pattern [n - 1 ] == '/' {
199
+ pattern = pattern [:n - 1 ]
200
+ }
201
+ return strings .Index (path , pattern ) >= 0
202
+ }
203
+
204
+ func (c * Crawler ) matchHandler (path string ) (h Handler , pattern string ) {
196
205
var n = 0
197
206
for k , v := range c .m {
198
- if strings . Index (k , path ) == - 1 {
207
+ if ! c . pathMatch (k , path ) {
199
208
continue
200
209
}
201
210
if h == nil || len (k ) > n {
@@ -207,14 +216,16 @@ func (c *Crawler) pathMatch(path string) (h Handler, pattern string) {
207
216
return
208
217
}
209
218
210
- func (c * Crawler ) handler (u * url. URL ) (h Handler , pattern string ) {
219
+ func (c * Crawler ) handler (host , path string ) (h Handler , pattern string ) {
211
220
c .mu .RLock ()
212
221
defer c .mu .RUnlock ()
213
222
214
- host , _ , _ := net .SplitHostPort (u .Host )
215
- h , pattern = c .pathMatch (host )
223
+ h , pattern = c .matchHandler (host + path )
224
+ if h == nil {
225
+ h , pattern = c .matchHandler (host )
226
+ }
216
227
if h == nil {
217
- h , pattern = c .pathMatch ("*" )
228
+ h , pattern = c .matchHandler ("*" )
218
229
}
219
230
if h == nil {
220
231
h , pattern = VoidHandler (), ""
@@ -298,7 +309,7 @@ func (c *Crawler) scanRequestWork(workCh chan chan *http.Request, closeCh chan i
298
309
logrus .Panicf ("antch: Handler got panic error: %v" , r )
299
310
}
300
311
}()
301
- h , _ := c .Handler (res . Request . URL )
312
+ h , _ := c .Handler (res )
302
313
h .ServeSpider (c .writeCh , res )
303
314
}(re .res )
304
315
}
0 commit comments