@@ -18,6 +18,7 @@ import {
18
18
19
19
import semanticTypes from './semantic-types' ;
20
20
import { AnyIterable } from './types' ;
21
+ import { allowAbort , ALLOW_ABORT_INTERVAL_COUNT } from './util' ;
21
22
22
23
type TypeCastMap = {
23
24
Array : unknown [ ] ;
@@ -484,6 +485,10 @@ export class SchemaAnalyzer {
484
485
fields : [ ]
485
486
} ;
486
487
488
+ // Increments when every field or type is analyzed.
489
+ // Useful for occasionally checking if the analysis should be aborted.
490
+ fieldAndTypeAnalysisCounter = 0 ;
491
+
487
492
constructor ( options ?: SchemaParseOptions ) {
488
493
// Set default options.
489
494
this . options = { ...defaultSchemaParseOptions , ...options } ;
@@ -512,6 +517,13 @@ export class SchemaAnalyzer {
512
517
}
513
518
}
514
519
520
+ allowAbort ( ) {
521
+ // Allow aborting the analysis.
522
+ if ( this . fieldAndTypeAnalysisCounter ++ % ALLOW_ABORT_INTERVAL_COUNT === 0 ) {
523
+ allowAbort ( ) ;
524
+ }
525
+ }
526
+
515
527
increaseFieldCount ( ) {
516
528
if ( ! this . options . distinctFieldsAbortThreshold ) return ;
517
529
this . fieldsCount ++ ;
@@ -531,14 +543,15 @@ export class SchemaAnalyzer {
531
543
return returnValue ;
532
544
}
533
545
534
- analyzeDoc ( doc : Document ) {
546
+ async analyzeDoc ( doc : Document ) {
535
547
this . finalized = false ;
536
548
/**
537
549
* Takes a field value, determines the correct type, handles recursion into
538
550
* nested arrays and documents, and passes the value down to `addToValue`.
539
551
* Note: This mutates the `schema` argument.
540
552
*/
541
- const addToType = ( path : string [ ] , value : BSONValue , schema : SchemaAnalysisFieldTypes ) => {
553
+ const addToType = async ( path : string [ ] , value : BSONValue , schema : SchemaAnalysisFieldTypes ) => {
554
+ await this . allowAbort ( ) ;
542
555
const bsonType = getBSONType ( value ) ;
543
556
// If semantic type detection is enabled, the type is the semantic type
544
557
// or the original bson type if no semantic type was detected. If disabled,
@@ -560,13 +573,16 @@ export class SchemaAnalyzer {
560
573
type . types = type . types ?? Object . create ( null ) ;
561
574
type . lengths = type . lengths ?? [ ] ;
562
575
type . lengths . push ( ( value as BSONValue [ ] ) . length ) ;
563
- ( value as BSONValue [ ] ) . forEach ( ( v : BSONValue ) => addToType ( path , v , type . types ) ) ;
576
+ for ( const v of ( value as BSONValue [ ] ) ) {
577
+ await addToType ( path , v , type . types ) ;
578
+ }
564
579
} else if ( isDocumentType ( type ) ) {
565
580
// Recurse into nested documents by calling `addToField` for all sub-fields.
566
581
type . fields = type . fields ?? Object . create ( null ) ;
567
- Object . entries ( value as Document ) . forEach (
568
- ( [ fieldName , v ] ) => addToField ( fieldName , [ ...path , fieldName ] , v , type . fields )
569
- ) ;
582
+
583
+ for ( const [ fieldName , v ] of Object . entries ( value as Document ) ) {
584
+ await addToField ( fieldName , [ ...path , fieldName ] , v , type . fields ) ;
585
+ }
570
586
} else if ( this . options . storeValues && ! isNullType ( type ) ) {
571
587
// When the `storeValues` option is enabled, store some example values.
572
588
if ( ! type . values ) {
@@ -584,7 +600,8 @@ export class SchemaAnalyzer {
584
600
* Handles a field from a document. Passes the value to `addToType`.
585
601
* Note: This mutates the `schema` argument.
586
602
*/
587
- const addToField = ( fieldName : string , path : string [ ] , value : BSONValue , schema : SchemaAnalysisFieldsMap ) => {
603
+ const addToField = async ( fieldName : string , path : string [ ] , value : BSONValue , schema : SchemaAnalysisFieldsMap ) => {
604
+ await this . allowAbort ( ) ;
588
605
if ( ! schema [ fieldName ] ) {
589
606
schema [ fieldName ] = {
590
607
name : fieldName ,
@@ -597,11 +614,11 @@ export class SchemaAnalyzer {
597
614
const field = schema [ fieldName ] ;
598
615
599
616
field . count ++ ;
600
- addToType ( path , value , field . types ) ;
617
+ await addToType ( path , value , field . types ) ;
601
618
} ;
602
619
603
620
for ( const key of Object . keys ( doc ) ) {
604
- addToField ( key , [ key ] , doc [ key ] , this . schemaAnalysisRoot . fields ) ;
621
+ await addToField ( key , [ key ] , doc [ key ] , this . schemaAnalysisRoot . fields ) ;
605
622
}
606
623
this . schemaAnalysisRoot . count += 1 ;
607
624
}
@@ -652,7 +669,7 @@ export async function getCompletedSchemaAnalyzer(
652
669
const analyzer = new SchemaAnalyzer ( options ) ;
653
670
for await ( const doc of verifyStreamSource ( source ) ) {
654
671
if ( options ?. signal ?. aborted ) throw options . signal . reason ;
655
- analyzer . analyzeDoc ( doc ) ;
672
+ await analyzer . analyzeDoc ( doc ) ;
656
673
}
657
674
return analyzer ;
658
675
}
0 commit comments