@@ -5,6 +5,8 @@ use futures::AsyncReadExt;
55use hf_xet_wasm:: blob_reader:: BlobReader ;
66use hf_xet_wasm:: configurations:: { DataConfig , RepoSalt , ShardConfig , TranslatorConfig } ;
77use hf_xet_wasm:: wasm_file_upload_session:: FileUploadSession ;
8+ use hf_xet_wasm:: wasm_timer:: Timer ;
9+ use log:: Level ;
810use tokio:: sync:: mpsc;
911use utils:: auth:: AuthConfig ;
1012use wasm_bindgen:: prelude:: * ;
@@ -13,7 +15,7 @@ use wasm_thread as thread;
1315fn main ( ) {
1416 #[ cfg( target_arch = "wasm32" ) ]
1517 {
16- console_log:: init ( ) . unwrap ( ) ;
18+ console_log:: init_with_level ( Level :: Info ) . unwrap ( ) ;
1719 console_error_panic_hook:: set_once ( ) ;
1820 }
1921
@@ -142,7 +144,10 @@ pub async fn test_async_blob_reader(file: web_sys::File) -> String {
142144pub async fn clean_file ( file : web_sys:: File , endpoint : String , jwt_token : String , expiration : u64 ) -> String {
143145 log:: debug!( "clean_file called with {file:?}, {endpoint}, {jwt_token}, {expiration}" ) ;
144146
147+ let _timer = Timer :: new_enforce_report ( "clean file main" ) ;
148+
145149 let filename = file. name ( ) ;
150+ let filesize = file. size ( ) ;
146151
147152 let Ok ( blob) = file. slice ( ) else {
148153 log:: error!( "failed to convert a file to blob" ) ;
@@ -169,16 +174,19 @@ pub async fn clean_file(file: web_sys::File, endpoint: String, jwt_token: String
169174
170175 let upload_session = Arc :: new ( FileUploadSession :: new ( Arc :: new ( config) ) ) ;
171176
172- let mut handle = upload_session. start_clean ( filename) ;
177+ let mut handle = upload_session. start_clean ( filename, None ) ;
173178
174179 const READ_BUF_SIZE : usize = 8 * 1024 * 1024 ;
175180 let mut buf = vec ! [ 0u8 ; READ_BUF_SIZE ] ;
176181 let mut total_read = 0 ;
182+ let mut last_report = 0. ;
177183 loop {
184+ let _timer = Timer :: new ( format ! ( "read file at {total_read}" ) ) ;
178185 let Ok ( bytes) = reader. read ( & mut buf) . await else {
179186 log:: error!( "failed to read from reader" ) ;
180187 return "" . to_owned ( ) ;
181188 } ;
189+ drop ( _timer) ;
182190 if bytes == 0 {
183191 break ;
184192 }
@@ -192,7 +200,13 @@ pub async fn clean_file(file: web_sys::File, endpoint: String, jwt_token: String
192200 return "" . to_owned ( ) ;
193201 } ;
194202
195- log:: debug!( "processed {total_read} bytes" ) ;
203+ log:: debug!( "read {total_read} bytes" ) ;
204+
205+ let percentage = total_read as f64 / filesize * 100. ;
206+ if ( percentage - last_report) > 10. {
207+ log:: info!( "processing {percentage:.2}% of file" ) ;
208+ last_report = percentage;
209+ }
196210 }
197211 let Ok ( ( file_hash, sha256, _metrics) ) = handle. finish ( ) . await else {
198212 log:: error!( "failed to finish cleaner" ) ;
0 commit comments