-
Notifications
You must be signed in to change notification settings - Fork 260
perf(NODE-6246): Significantly improve memory usage and performance of ObjectId #703
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 8 commits
8986b69
4c5d5dc
1f07ba1
296ac15
21dc61d
80526c7
b858ab1
32e0a9b
639cc3c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,12 +3,15 @@ import { BSONError } from './error'; | |
import { type InspectFn, defaultInspect } from './parser/utils'; | ||
import { ByteUtils } from './utils/byte_utils'; | ||
import { NumberUtils } from './utils/number_utils'; | ||
import { flattenString } from './utils/string_utils'; | ||
|
||
// Regular expression that checks for hex value | ||
const checkForHexRegExp = new RegExp('^[0-9a-fA-F]{24}$'); | ||
const checkForHexRegExp = new RegExp('^[0-9a-f]{24}$'); | ||
|
||
// Unique sequence for the current process (initialized on first use) | ||
let PROCESS_UNIQUE: Uint8Array | null = null; | ||
let PROCESS_UNIQUE: string | null = null; | ||
|
||
const OID_SKIP_VALIDATE = Symbol(); | ||
|
||
/** @public */ | ||
export interface ObjectIdLike { | ||
|
@@ -34,13 +37,24 @@ export class ObjectId extends BSONValue { | |
|
||
/** @internal */ | ||
private static index = Math.floor(Math.random() * 0xffffff); | ||
/** @internal */ | ||
private static lastTimeGenerate?: number; | ||
/** @internal */ | ||
private static timeHexCache?: string; | ||
|
||
/** @deprecated Hex string is always cached */ | ||
static cacheHexString: boolean; | ||
SeanReece marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
/** | ||
* Cache buffer internally | ||
* Uses much more memory but can speed up performance if performing lots of buffer specific tasks | ||
*/ | ||
static cacheBuffer: boolean; | ||
|
||
/** ObjectId Bytes @internal */ | ||
private buffer!: Uint8Array; | ||
private buffer?: Uint8Array; | ||
/** ObjectId hexString cache @internal */ | ||
private __id?: string; | ||
private __id!: string; | ||
|
||
SeanReece marked this conversation as resolved.
Show resolved
Hide resolved
|
||
/** | ||
* Create ObjectId from a number. | ||
|
@@ -55,6 +69,8 @@ export class ObjectId extends BSONValue { | |
* @param inputId - A 24 character hex string. | ||
*/ | ||
constructor(inputId: string); | ||
/** @internal */ | ||
constructor(inputId: string, _internalFlag?: symbol); | ||
/** | ||
* Create ObjectId from the BSON ObjectId type. | ||
* | ||
|
@@ -72,7 +88,7 @@ export class ObjectId extends BSONValue { | |
* | ||
* @param inputId - A 12 byte binary Buffer. | ||
*/ | ||
constructor(inputId: Uint8Array); | ||
constructor(inputId: Uint8Array, offset?: number); | ||
/** To generate a new ObjectId, use ObjectId() with no argument. */ | ||
constructor(); | ||
/** | ||
|
@@ -86,7 +102,11 @@ export class ObjectId extends BSONValue { | |
* | ||
* @param inputId - An input value to create a new ObjectId from. | ||
*/ | ||
constructor(inputId?: string | number | ObjectId | ObjectIdLike | Uint8Array) { | ||
constructor( | ||
inputId?: string | number | ObjectId | ObjectIdLike | Uint8Array, | ||
option?: symbol | number | ||
) { | ||
let bufferCache: Uint8Array | undefined; | ||
super(); | ||
// workingId is set based on type of input and whether valid id exists for the input | ||
let workingId; | ||
|
@@ -95,7 +115,8 @@ export class ObjectId extends BSONValue { | |
throw new BSONError('Argument passed in must have an id that is of type string or Buffer'); | ||
} | ||
if ('toHexString' in inputId && typeof inputId.toHexString === 'function') { | ||
workingId = ByteUtils.fromHex(inputId.toHexString()); | ||
workingId = inputId.toHexString(); | ||
option = OID_SKIP_VALIDATE; | ||
} else { | ||
workingId = inputId.id; | ||
} | ||
|
@@ -104,27 +125,34 @@ export class ObjectId extends BSONValue { | |
} | ||
|
||
// The following cases use workingId to construct an ObjectId | ||
if (workingId == null || typeof workingId === 'number') { | ||
if (typeof workingId === 'string') { | ||
if (option === OID_SKIP_VALIDATE) { | ||
this.__id = workingId; | ||
} else { | ||
const validString = ObjectId.validateHexString(workingId); | ||
if (validString) { | ||
this.__id = validString; | ||
} else { | ||
throw new BSONError( | ||
'input must be a 24 character hex string, 12 byte Uint8Array, or an integer' | ||
); | ||
} | ||
} | ||
} else if (workingId == null || typeof workingId === 'number') { | ||
// The most common use case (blank id, new objectId instance) | ||
// Generate a new id | ||
this.buffer = ObjectId.generate(typeof workingId === 'number' ? workingId : undefined); | ||
} else if (ArrayBuffer.isView(workingId) && workingId.byteLength === 12) { | ||
this.__id = ObjectId.generate(typeof workingId === 'number' ? workingId : undefined); | ||
} else if (ArrayBuffer.isView(workingId)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't we keep the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This change goes along with the change to deserialization code here Instead of allocating a new UInt8Array(12), copying over the 12 bytes from the parent buffer, then passing in that new buffer, only to convert it to a string and throw away that temporary buffer. You can pass in a buffer and an offset to It's probably worth enforcing the buffer size === 12 if no offset was passed in, also if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yea those sound like the correct assertions. It's important that when inspecting the ObjectId you get the same visual results as you do when serializing. So whether it is a view on a larger arrayBuffer or exactly 12 bytes, we just do not want to permit an OID to carry around a buffer that implies more or less than exactly that amount of data. |
||
// If intstanceof matches we can escape calling ensure buffer in Node.js environments | ||
this.buffer = ByteUtils.toLocalBufferType(workingId); | ||
} else if (typeof workingId === 'string') { | ||
if (workingId.length === 24 && checkForHexRegExp.test(workingId)) { | ||
this.buffer = ByteUtils.fromHex(workingId); | ||
} else { | ||
throw new BSONError( | ||
'input must be a 24 character hex string, 12 byte Uint8Array, or an integer' | ||
); | ||
} | ||
bufferCache = ByteUtils.toLocalBufferType(workingId); | ||
const offset = (option as number) || 0; | ||
this.__id = ByteUtils.toHex(bufferCache, offset, offset + 12); | ||
} else { | ||
throw new BSONError('Argument passed in does not match the accepted types'); | ||
} | ||
// If we are caching the hex string | ||
if (ObjectId.cacheHexString) { | ||
this.__id = ByteUtils.toHex(this.id); | ||
// If we are caching the buffer | ||
if (ObjectId.cacheBuffer) { | ||
SeanReece marked this conversation as resolved.
Show resolved
Hide resolved
|
||
this.buffer = bufferCache || ByteUtils.fromHex(this.__id); | ||
} | ||
} | ||
|
||
|
@@ -133,29 +161,31 @@ export class ObjectId extends BSONValue { | |
* @readonly | ||
*/ | ||
get id(): Uint8Array { | ||
return this.buffer; | ||
return this.buffer || ByteUtils.fromHex(this.__id); | ||
} | ||
|
||
set id(value: Uint8Array) { | ||
this.buffer = value; | ||
if (ObjectId.cacheHexString) { | ||
this.__id = ByteUtils.toHex(value); | ||
} | ||
this.__id = ByteUtils.toHex(value); | ||
} | ||
|
||
/** Returns the ObjectId id as a 24 lowercase character hex string representation */ | ||
toHexString(): string { | ||
if (ObjectId.cacheHexString && this.__id) { | ||
return this.__id; | ||
} | ||
|
||
const hexString = ByteUtils.toHex(this.id); | ||
|
||
if (ObjectId.cacheHexString && !this.__id) { | ||
this.__id = hexString; | ||
} | ||
return this.__id; | ||
} | ||
|
||
return hexString; | ||
/** | ||
* @internal | ||
* Validates the input string is a valid hex representation of an ObjectId. | ||
* If valid, returns the input string. Otherwise, returns false. | ||
* Returned string is lowercase. | ||
*/ | ||
private static validateHexString(input: string): false | string { | ||
if (input == null) return false; | ||
if (input.length !== 24) return false; | ||
if (checkForHexRegExp.test(input)) return input; | ||
const inputLower = input.toLowerCase(); | ||
if (checkForHexRegExp.test(inputLower)) return inputLower; | ||
return false; | ||
} | ||
|
||
/** | ||
|
@@ -167,39 +197,49 @@ export class ObjectId extends BSONValue { | |
} | ||
|
||
/** | ||
* Generate a 12 byte id buffer used in ObjectId's | ||
* | ||
* @param time - pass in a second based timestamp. | ||
* Generates the hex timestamp from a second based number or the current time. | ||
* @internal | ||
*/ | ||
static generate(time?: number): Uint8Array { | ||
private static getTimeHex(time?: number): string { | ||
if ('number' !== typeof time) { | ||
time = Math.floor(Date.now() / 1000); | ||
} else { | ||
time = time % 0xffffffff; | ||
SeanReece marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
if (!ObjectId.timeHexCache || time !== ObjectId.lastTimeGenerate) { | ||
ObjectId.lastTimeGenerate = time; | ||
// This is moderatly expensive so we can cache this for repetitive calls | ||
ObjectId.timeHexCache = time.toString(16); | ||
SeanReece marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// Dates before 1978-07-05T00:00:00.000Z can be represented in less than 8 hex digits so we need to padStart | ||
if (ObjectId.timeHexCache.length < 8) { | ||
ObjectId.timeHexCache = ObjectId.timeHexCache.padStart(8, '0'); | ||
} | ||
} | ||
return ObjectId.timeHexCache; | ||
} | ||
|
||
/** | ||
* Generate a 12 byte id buffer used in ObjectId's | ||
* | ||
* @param time - pass in a second based timestamp. | ||
*/ | ||
static generate(time?: number): string { | ||
const inc = ObjectId.getInc(); | ||
const buffer = ByteUtils.allocateUnsafe(12); | ||
|
||
// 4-byte timestamp | ||
NumberUtils.setInt32BE(buffer, 0, time); | ||
const timeString = ObjectId.getTimeHex(time); | ||
|
||
// set PROCESS_UNIQUE if yet not initialized | ||
if (PROCESS_UNIQUE === null) { | ||
PROCESS_UNIQUE = ByteUtils.randomBytes(5); | ||
PROCESS_UNIQUE = ByteUtils.toHex(ByteUtils.randomBytes(5)); | ||
} | ||
|
||
// 5-byte process unique | ||
buffer[4] = PROCESS_UNIQUE[0]; | ||
buffer[5] = PROCESS_UNIQUE[1]; | ||
buffer[6] = PROCESS_UNIQUE[2]; | ||
buffer[7] = PROCESS_UNIQUE[3]; | ||
buffer[8] = PROCESS_UNIQUE[4]; | ||
|
||
// 3-byte counter | ||
buffer[11] = inc & 0xff; | ||
buffer[10] = (inc >> 8) & 0xff; | ||
buffer[9] = (inc >> 16) & 0xff; | ||
const incString = inc.toString(16).padStart(6, '0'); | ||
|
||
return buffer; | ||
// Flatten concatenated string to save memory | ||
return flattenString(timeString + PROCESS_UNIQUE + incString); | ||
} | ||
|
||
/** | ||
|
@@ -209,13 +249,13 @@ export class ObjectId extends BSONValue { | |
toString(encoding?: 'hex' | 'base64'): string { | ||
// Is the id a buffer then use the buffer toString method to return the format | ||
if (encoding === 'base64') return ByteUtils.toBase64(this.id); | ||
if (encoding === 'hex') return this.toHexString(); | ||
return this.toHexString(); | ||
if (encoding === 'hex') return this.__id; | ||
return this.__id; | ||
} | ||
|
||
/** Converts to its JSON the 24 character hex string representation. */ | ||
toJSON(): string { | ||
return this.toHexString(); | ||
return this.__id; | ||
} | ||
|
||
/** @internal */ | ||
|
@@ -239,18 +279,16 @@ export class ObjectId extends BSONValue { | |
} | ||
|
||
if (ObjectId.is(otherId)) { | ||
return ( | ||
this.buffer[11] === otherId.buffer[11] && ByteUtils.equals(this.buffer, otherId.buffer) | ||
); | ||
return this.__id === otherId.__id; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The tests: Should we continue to check the LSB? or maybe just the least signficant hex character? Unsure if the same optimization applies to strings. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks like performing a character comparison with strings is a little slower, so I think we can update/remove these tests. const a = crypto.randomBytes(12).toString("hex");
const b = crypto.randomBytes(12).toString("hex");
console.log(a, b);
suite
.add("string compare", function () {
return a === b;
})
.add("String char compare", function () {
return a[0] === b[0] && a === b;
}) ![]() |
||
} | ||
|
||
if (typeof otherId === 'string') { | ||
return otherId.toLowerCase() === this.toHexString(); | ||
return otherId === this.__id || otherId.toLowerCase() === this.__id; | ||
} | ||
|
||
if (typeof otherId === 'object' && typeof otherId.toHexString === 'function') { | ||
const otherIdString = otherId.toHexString(); | ||
const thisIdString = this.toHexString(); | ||
const thisIdString = this.__id; | ||
return typeof otherIdString === 'string' && otherIdString.toLowerCase() === thisIdString; | ||
} | ||
|
||
|
@@ -259,10 +297,7 @@ export class ObjectId extends BSONValue { | |
|
||
/** Returns the generation date (accurate up to the second) that this ID was generated. */ | ||
getTimestamp(): Date { | ||
const timestamp = new Date(); | ||
const time = NumberUtils.getUint32BE(this.buffer, 0); | ||
timestamp.setTime(Math.floor(time) * 1000); | ||
return timestamp; | ||
return new Date(parseInt(this.__id.substring(0, 8), 16) * 1000); | ||
} | ||
|
||
/** @internal */ | ||
|
@@ -272,18 +307,26 @@ export class ObjectId extends BSONValue { | |
|
||
/** @internal */ | ||
serializeInto(uint8array: Uint8Array, index: number): 12 { | ||
uint8array[index] = this.buffer[0]; | ||
uint8array[index + 1] = this.buffer[1]; | ||
uint8array[index + 2] = this.buffer[2]; | ||
uint8array[index + 3] = this.buffer[3]; | ||
uint8array[index + 4] = this.buffer[4]; | ||
uint8array[index + 5] = this.buffer[5]; | ||
uint8array[index + 6] = this.buffer[6]; | ||
uint8array[index + 7] = this.buffer[7]; | ||
uint8array[index + 8] = this.buffer[8]; | ||
uint8array[index + 9] = this.buffer[9]; | ||
uint8array[index + 10] = this.buffer[10]; | ||
uint8array[index + 11] = this.buffer[11]; | ||
let temp = parseInt(this.__id.substring(0, 8), 16); | ||
|
||
uint8array[index + 3] = temp & 0xff; | ||
uint8array[index + 2] = (temp >> 8) & 0xff; | ||
uint8array[index + 1] = (temp >> 16) & 0xff; | ||
uint8array[index + 0] = (temp >> 24) & 0xff; | ||
|
||
SeanReece marked this conversation as resolved.
Show resolved
Hide resolved
|
||
temp = parseInt(this.__id.substring(8, 16), 16); | ||
|
||
uint8array[index + 7] = temp & 0xff; | ||
uint8array[index + 6] = (temp >> 8) & 0xff; | ||
uint8array[index + 5] = (temp >> 16) & 0xff; | ||
uint8array[index + 4] = (temp >> 24) & 0xff; | ||
|
||
temp = parseInt(this.__id.substring(16, 24), 16); | ||
|
||
uint8array[index + 11] = temp & 0xff; | ||
uint8array[index + 10] = (temp >> 8) & 0xff; | ||
uint8array[index + 9] = (temp >> 16) & 0xff; | ||
uint8array[index + 8] = (temp >> 24) & 0xff; | ||
return 12; | ||
} | ||
|
||
|
@@ -293,12 +336,8 @@ export class ObjectId extends BSONValue { | |
* @param time - an integer number representing a number of seconds. | ||
*/ | ||
static createFromTime(time: number): ObjectId { | ||
const buffer = ByteUtils.allocate(12); | ||
for (let i = 11; i >= 4; i--) buffer[i] = 0; | ||
// Encode time into first 4 bytes | ||
NumberUtils.setInt32BE(buffer, 0, time); | ||
// Return the new objectId | ||
return new ObjectId(buffer); | ||
return new ObjectId(time); | ||
} | ||
|
||
/** | ||
|
@@ -311,7 +350,7 @@ export class ObjectId extends BSONValue { | |
throw new BSONError('hex string must be 24 characters'); | ||
} | ||
|
||
return new ObjectId(ByteUtils.fromHex(hexString)); | ||
return new ObjectId(hexString); | ||
} | ||
|
||
/** Creates an ObjectId instance from a base64 string */ | ||
|
@@ -329,6 +368,7 @@ export class ObjectId extends BSONValue { | |
*/ | ||
static isValid(id: string | number | ObjectId | ObjectIdLike | Uint8Array): boolean { | ||
if (id == null) return false; | ||
if (typeof id === 'string') return !!ObjectId.validateHexString(id); | ||
|
||
try { | ||
new ObjectId(id); | ||
|
@@ -340,13 +380,12 @@ export class ObjectId extends BSONValue { | |
|
||
/** @internal */ | ||
toExtendedJSON(): ObjectIdExtended { | ||
if (this.toHexString) return { $oid: this.toHexString() }; | ||
return { $oid: this.toString('hex') }; | ||
return { $oid: this.__id }; | ||
} | ||
|
||
/** @internal */ | ||
static fromExtendedJSON(doc: ObjectIdExtended): ObjectId { | ||
return new ObjectId(doc.$oid); | ||
return new ObjectId(doc.$oid, OID_SKIP_VALIDATE); | ||
} | ||
|
||
/** | ||
|
@@ -356,6 +395,6 @@ export class ObjectId extends BSONValue { | |
*/ | ||
inspect(depth?: number, options?: unknown, inspect?: InspectFn): string { | ||
inspect ??= defaultInspect; | ||
return `new ObjectId(${inspect(this.toHexString(), options)})`; | ||
return `new ObjectId(${inspect(this.__id, options)})`; | ||
} | ||
} |
Uh oh!
There was an error while loading. Please reload this page.