Skip to content

Commit 7bf5720

Browse files
authored
feat: optional field count threshold (#231)
1 parent d693c0d commit 7bf5720

File tree

2 files changed

+104
-31
lines changed

2 files changed

+104
-31
lines changed

src/schema-analyzer.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,9 @@ type AllSchemaParseOptions = {
167167
storeValues: boolean;
168168
signal?: AbortSignal;
169169
storedValuesLengthLimit: number;
170+
/** Complexity limit:
171+
* The analysis will be aborted if the threshold is exceeded. */
172+
distinctFieldsAbortThreshold?: number;
170173
};
171174
export type SchemaParseOptions = Partial<AllSchemaParseOptions>;
172175

@@ -469,6 +472,7 @@ export class SchemaAnalyzer {
469472
semanticTypes: SemanticTypeMap;
470473
options: AllSchemaParseOptions;
471474
documentsAnalyzed = 0;
475+
fieldsCount = 0;
472476
schemaAnalysisRoot: SchemaAnalysisRoot = {
473477
fields: Object.create(null),
474478
count: 0
@@ -508,6 +512,14 @@ export class SchemaAnalyzer {
508512
}
509513
}
510514

515+
increaseFieldCount() {
516+
if (!this.options.distinctFieldsAbortThreshold) return;
517+
this.fieldsCount++;
518+
if (this.fieldsCount > this.options.distinctFieldsAbortThreshold) {
519+
throw new Error(`Schema analysis aborted: Fields count above ${this.options.distinctFieldsAbortThreshold}`);
520+
}
521+
}
522+
511523
getSemanticType(value: BSONValue, path: string[]) {
512524
// Pass value to semantic type detectors, return first match or undefined.
513525
const returnValue = Object.entries(this.semanticTypes)
@@ -580,6 +592,7 @@ export class SchemaAnalyzer {
580592
count: 0,
581593
types: Object.create(null)
582594
};
595+
this.increaseFieldCount();
583596
}
584597
const field = schema[fieldName];
585598

test/bloated.test.ts

Lines changed: 91 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -14,40 +14,100 @@ function generateRandomString(length: number) {
1414
}
1515

1616
describe('bloated documents', function() {
17-
it('really long string is cropped', async function() {
18-
const documents = [{
19-
str: generateRandomString(20000)
20-
}];
21-
const schema = await getSchema(documents);
22-
const stringLength = ((schema.fields[0].types[0] as PrimitiveSchemaType).values[0] as string).length;
23-
assert.ok(stringLength <= 10000);
24-
});
17+
describe('sizeable sample values', function() {
18+
it('really long string is cropped', async function() {
19+
const documents = [{
20+
str: generateRandomString(20000)
21+
}];
22+
const schema = await getSchema(documents);
23+
const stringLength = ((schema.fields[0].types[0] as PrimitiveSchemaType).values[0] as string).length;
24+
assert.ok(stringLength <= 10000);
25+
});
2526

26-
it('really long code is cropped', async function() {
27-
const documents = [{
28-
code: new Code(generateRandomString(20000))
29-
}];
30-
const schema = await getSchema(documents);
31-
const codeLength = ((schema.fields[0].types[0] as PrimitiveSchemaType).values[0] as Code).code.length;
32-
assert.ok(codeLength <= 10000);
33-
});
27+
it('really long code is cropped', async function() {
28+
const documents = [{
29+
code: new Code(generateRandomString(20000))
30+
}];
31+
const schema = await getSchema(documents);
32+
const codeLength = ((schema.fields[0].types[0] as PrimitiveSchemaType).values[0] as Code).code.length;
33+
assert.ok(codeLength <= 10000);
34+
});
35+
36+
it('really long binary is cropped', async function() {
37+
const documents = [{
38+
binData: new Binary(Buffer.from(generateRandomString(20000)), 2)
39+
}];
40+
const schema = await getSchema(documents);
41+
const binary = ((schema.fields[0].types[0] as PrimitiveSchemaType).values[0] as Binary);
42+
assert.ok(binary.length() <= 10000);
43+
assert.strictEqual(binary.sub_type, 2);
44+
});
3445

35-
it('really long binary is cropped', async function() {
36-
const documents = [{
37-
binData: new Binary(Buffer.from(generateRandomString(20000)), 2)
38-
}];
39-
const schema = await getSchema(documents);
40-
const binary = ((schema.fields[0].types[0] as PrimitiveSchemaType).values[0] as Binary);
41-
assert.ok(binary.length() <= 10000);
42-
assert.strictEqual(binary.sub_type, 2);
46+
it('the limit is configurable', async function() {
47+
const documents = [{
48+
str: generateRandomString(20000)
49+
}];
50+
const schema = await getSchema(documents, { storedValuesLengthLimit: 5 });
51+
const stringLength = ((schema.fields[0].types[0] as PrimitiveSchemaType).values[0] as string).length;
52+
assert.ok(stringLength === 5);
53+
});
4354
});
4455

45-
it('the limit is configurable', async function() {
46-
const documents = [{
47-
str: generateRandomString(20000)
48-
}];
49-
const schema = await getSchema(documents, { storedValuesLengthLimit: 5 });
50-
const stringLength = ((schema.fields[0].types[0] as PrimitiveSchemaType).values[0] as string).length;
51-
assert.ok(stringLength === 5);
56+
describe('high complexity', function() {
57+
it('aborts after reaching the given limit', async function() {
58+
const documents = [{
59+
field1: 'abc',
60+
field2: 'bca',
61+
field3: 'cba',
62+
field4: 'cab',
63+
field5: 'bac'
64+
}];
65+
try {
66+
await getSchema(documents, { distinctFieldsAbortThreshold: 4 });
67+
assert.fail('Analysis did not throw');
68+
} catch (error) {
69+
assert.strictEqual((error as Error).message, 'Schema analysis aborted: Fields count above 4');
70+
}
71+
});
72+
73+
it('aborts after reaching the given limit - nested', async function() {
74+
const documents = [{
75+
field1: {
76+
field2: {
77+
field3: 'abc',
78+
field4: 'bca'
79+
},
80+
field5: 'cab'
81+
}
82+
}];
83+
try {
84+
await getSchema(documents, { distinctFieldsAbortThreshold: 4 });
85+
assert.fail('Analysis did not throw');
86+
} catch (error) {
87+
assert.strictEqual((error as Error).message, 'Schema analysis aborted: Fields count above 4');
88+
}
89+
});
90+
91+
it('does not count the same field in different documents', async function() {
92+
const documents = [{
93+
field1: {
94+
field2: {
95+
field3: 'abc'
96+
}
97+
}
98+
}, {
99+
field1: {
100+
field2: {
101+
field3: 'bca'
102+
}
103+
}
104+
}];
105+
try {
106+
await getSchema(documents, { distinctFieldsAbortThreshold: 4 });
107+
assert.ok('Analysis finished');
108+
} catch (error) {
109+
assert.fail('Analysis aborted unexpectedly');
110+
}
111+
});
52112
});
53113
});

0 commit comments

Comments
 (0)