@@ -80,50 +80,52 @@ async function crawlAvito(task1Collection) {
80
80
81
81
let totalPageCount = 0 ;
82
82
83
- for ( let metroSet of metroSets ) {
84
- console . log ( `pmin: ${ metroSet . pmin } pmax: ${ metroSet . pmax } ` ) ;
85
-
86
- let p = 1 , pageCount = 0 ;
87
- while ( true ) {
88
- try {
89
- let response = await request ( {
90
- uri : ( metroSet . pmax ?
91
- `https://www.avito.ru/moskva/kollektsionirovanie/monety?p=${ p } &view=list&pmin=${ metroSet . pmin } &pmax=${ metroSet . pmax } ` :
92
- `https://www.avito.ru/moskva/kollektsionirovanie/monety?p=${ p } &view=list&pmin=${ metroSet . pmin } ` ) ,
93
- resolveWithFullResponse : true ,
94
- followRedirect : function ( ) {
95
- return false ;
96
- } ,
97
- headers : {
98
- 'User-Agent' :
'Vadim Vinogradov ([email protected] ). Avito BI contest. Sorry :)'
99
- } ,
100
- simple : false
101
- } ) ;
102
-
103
- if ( response . statusCode === 200 ) {
104
- const $ = cheerio . load ( response . body ) ;
105
- await grepData ( $ , task1Collection ) ;
106
- pageCount = getActualPageCountFromCurrentPage ( $ ) ;
107
- console . log ( p , pageCount ) ;
108
- if ( pageCount === undefined ) {
109
- // we are on the last page
110
- totalPageCount += p ;
111
- break ;
83
+ while ( true ) {
84
+ for ( let metroSet of metroSets ) {
85
+ console . log ( `pmin: ${ metroSet . pmin } pmax: ${ metroSet . pmax } ` ) ;
86
+
87
+ let p = 1 , pageCount = 0 ;
88
+ while ( true ) {
89
+ try {
90
+ let response = await request ( {
91
+ uri : ( metroSet . pmax ?
92
+ `https://www.avito.ru/moskva/kollektsionirovanie/monety?p=${ p } &view=list&pmin=${ metroSet . pmin } &pmax=${ metroSet . pmax } ` :
93
+ `https://www.avito.ru/moskva/kollektsionirovanie/monety?p=${ p } &view=list&pmin=${ metroSet . pmin } ` ) ,
94
+ resolveWithFullResponse : true ,
95
+ followRedirect : function ( ) {
96
+ return false ;
97
+ } ,
98
+ headers : {
99
+ 'User-Agent' :
'Vadim Vinogradov ([email protected] ). Avito BI contest. Sorry :)'
100
+ } ,
101
+ simple : false
102
+ } ) ;
103
+
104
+ if ( response . statusCode === 200 ) {
105
+ const $ = cheerio . load ( response . body ) ;
106
+ await grepData ( $ , task1Collection ) ;
107
+ pageCount = getActualPageCountFromCurrentPage ( $ ) ;
108
+ console . log ( p , pageCount ) ;
109
+ if ( pageCount === undefined ) {
110
+ // we are on the last page
111
+ totalPageCount += p ;
112
+ break ;
113
+ } else {
114
+ assert ( p < pageCount ) ;
115
+ p ++ ;
116
+ }
112
117
} else {
113
- assert ( p < pageCount ) ;
114
- p ++ ;
118
+ console . log ( `FAIL: ` ) ;
119
+ process . exit ( 0 ) ;
115
120
}
116
- } else {
117
- console . log ( `FAIL: ` ) ;
118
- process . exit ( 0 ) ;
121
+ } catch ( exception ) {
122
+ console . log ( `Caught exception: ${ exception } . Waiting... ` ) ;
123
+ await mySetTimeout ( ) ;
119
124
}
120
- } catch ( exception ) {
121
- console . log ( `Caught exception: ${ exception } . Waiting...` ) ;
122
- await mySetTimeout ( ) ;
123
125
}
124
- }
125
126
126
- console . log ( `totalPageCount: ${ totalPageCount } ` ) ;
127
+ console . log ( `totalPageCount: ${ totalPageCount } ` ) ;
128
+ }
127
129
}
128
130
}
129
131
0 commit comments