Skip to content

perf(NODE-6246): Significantly improve memory usage and performance of ObjectId #703

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
217 changes: 128 additions & 89 deletions src/objectid.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,15 @@ import { BSONError } from './error';
import { type InspectFn, defaultInspect } from './parser/utils';
import { ByteUtils } from './utils/byte_utils';
import { NumberUtils } from './utils/number_utils';
import { flattenString } from './utils/string_utils';

// Regular expression that checks for hex value
const checkForHexRegExp = new RegExp('^[0-9a-fA-F]{24}$');
const checkForHexRegExp = new RegExp('^[0-9a-f]{24}$');

// Unique sequence for the current process (initialized on first use)
let PROCESS_UNIQUE: Uint8Array | null = null;
let PROCESS_UNIQUE: string | null = null;

const OID_SKIP_VALIDATE = Symbol();

/** @public */
export interface ObjectIdLike {
Expand All @@ -34,13 +37,24 @@ export class ObjectId extends BSONValue {

/** @internal */
private static index = Math.floor(Math.random() * 0xffffff);
/** @internal */
private static lastTimeGenerate?: number;
/** @internal */
private static timeHexCache?: string;

/** @deprecated Hex string is always cached */
static cacheHexString: boolean;

/**
* Cache buffer internally
* Uses much more memory but can speed up performance if performing lots of buffer specific tasks
*/
static cacheBuffer: boolean;

/** ObjectId Bytes @internal */
private buffer!: Uint8Array;
private buffer?: Uint8Array;
/** ObjectId hexString cache @internal */
private __id?: string;
private __id!: string;

/**
* Create ObjectId from a number.
Expand All @@ -55,6 +69,8 @@ export class ObjectId extends BSONValue {
* @param inputId - A 24 character hex string.
*/
constructor(inputId: string);
/** @internal */
constructor(inputId: string, _internalFlag?: symbol);
/**
* Create ObjectId from the BSON ObjectId type.
*
Expand All @@ -72,7 +88,7 @@ export class ObjectId extends BSONValue {
*
* @param inputId - A 12 byte binary Buffer.
*/
constructor(inputId: Uint8Array);
constructor(inputId: Uint8Array, offset?: number);
/** To generate a new ObjectId, use ObjectId() with no argument. */
constructor();
/**
Expand All @@ -86,7 +102,11 @@ export class ObjectId extends BSONValue {
*
* @param inputId - An input value to create a new ObjectId from.
*/
constructor(inputId?: string | number | ObjectId | ObjectIdLike | Uint8Array) {
constructor(
inputId?: string | number | ObjectId | ObjectIdLike | Uint8Array,
option?: symbol | number
) {
let bufferCache: Uint8Array | undefined;
super();
// workingId is set based on type of input and whether valid id exists for the input
let workingId;
Expand All @@ -95,7 +115,8 @@ export class ObjectId extends BSONValue {
throw new BSONError('Argument passed in must have an id that is of type string or Buffer');
}
if ('toHexString' in inputId && typeof inputId.toHexString === 'function') {
workingId = ByteUtils.fromHex(inputId.toHexString());
workingId = inputId.toHexString();
option = OID_SKIP_VALIDATE;
} else {
workingId = inputId.id;
}
Expand All @@ -104,27 +125,34 @@ export class ObjectId extends BSONValue {
}

// The following cases use workingId to construct an ObjectId
if (workingId == null || typeof workingId === 'number') {
if (typeof workingId === 'string') {
if (option === OID_SKIP_VALIDATE) {
this.__id = workingId;
} else {
const validString = ObjectId.validateHexString(workingId);
if (validString) {
this.__id = validString;
} else {
throw new BSONError(
'input must be a 24 character hex string, 12 byte Uint8Array, or an integer'
);
}
}
} else if (workingId == null || typeof workingId === 'number') {
// The most common use case (blank id, new objectId instance)
// Generate a new id
this.buffer = ObjectId.generate(typeof workingId === 'number' ? workingId : undefined);
} else if (ArrayBuffer.isView(workingId) && workingId.byteLength === 12) {
this.__id = ObjectId.generate(typeof workingId === 'number' ? workingId : undefined);
} else if (ArrayBuffer.isView(workingId)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't we keep the && workingId.byteLength === 12 validation here? otherwise buffers could be of any length, right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change goes along with the change to deserialization code here

Instead of allocating a new UInt8Array(12), copying over the 12 bytes from the parent buffer, then passing in that new buffer, only to convert it to a string and throw away that temporary buffer. You can pass in a buffer and an offset to new ObjectId(buffer, offset) and it will just grab the next 12 bytes after that offset and encode that to hex.

It's probably worth enforcing the buffer size === 12 if no offset was passed in, also if offset + 12 > byteLength. WDYT?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's probably worth enforcing the buffer size === 12 if no offset was passed in, also if offset + 12 > byteLength. WDYT?

Yea those sound like the correct assertions. It's important that when inspecting the ObjectId you get the same visual results as you do when serializing. So whether it is a view on a larger arrayBuffer or exactly 12 bytes, we just do not want to permit an OID to carry around a buffer that implies more or less than exactly that amount of data.

// If intstanceof matches we can escape calling ensure buffer in Node.js environments
this.buffer = ByteUtils.toLocalBufferType(workingId);
} else if (typeof workingId === 'string') {
if (workingId.length === 24 && checkForHexRegExp.test(workingId)) {
this.buffer = ByteUtils.fromHex(workingId);
} else {
throw new BSONError(
'input must be a 24 character hex string, 12 byte Uint8Array, or an integer'
);
}
bufferCache = ByteUtils.toLocalBufferType(workingId);
const offset = (option as number) || 0;
this.__id = ByteUtils.toHex(bufferCache, offset, offset + 12);
} else {
throw new BSONError('Argument passed in does not match the accepted types');
}
// If we are caching the hex string
if (ObjectId.cacheHexString) {
this.__id = ByteUtils.toHex(this.id);
// If we are caching the buffer
if (ObjectId.cacheBuffer) {
this.buffer = bufferCache || ByteUtils.fromHex(this.__id);
}
}

Expand All @@ -133,29 +161,31 @@ export class ObjectId extends BSONValue {
* @readonly
*/
get id(): Uint8Array {
return this.buffer;
return this.buffer || ByteUtils.fromHex(this.__id);
}

set id(value: Uint8Array) {
this.buffer = value;
if (ObjectId.cacheHexString) {
this.__id = ByteUtils.toHex(value);
}
this.__id = ByteUtils.toHex(value);
}

/** Returns the ObjectId id as a 24 lowercase character hex string representation */
toHexString(): string {
if (ObjectId.cacheHexString && this.__id) {
return this.__id;
}

const hexString = ByteUtils.toHex(this.id);

if (ObjectId.cacheHexString && !this.__id) {
this.__id = hexString;
}
return this.__id;
}

return hexString;
/**
* @internal
* Validates the input string is a valid hex representation of an ObjectId.
* If valid, returns the input string. Otherwise, returns false.
* Returned string is lowercase.
*/
private static validateHexString(input: string): false | string {
if (input == null) return false;
if (input.length !== 24) return false;
if (checkForHexRegExp.test(input)) return input;
const inputLower = input.toLowerCase();
if (checkForHexRegExp.test(inputLower)) return inputLower;
return false;
}

/**
Expand All @@ -167,39 +197,49 @@ export class ObjectId extends BSONValue {
}

/**
* Generate a 12 byte id buffer used in ObjectId's
*
* @param time - pass in a second based timestamp.
* Generates the hex timestamp from a second based number or the current time.
* @internal
*/
static generate(time?: number): Uint8Array {
private static getTimeHex(time?: number): string {
if ('number' !== typeof time) {
time = Math.floor(Date.now() / 1000);
} else {
time = time % 0xffffffff;
}

if (!ObjectId.timeHexCache || time !== ObjectId.lastTimeGenerate) {
ObjectId.lastTimeGenerate = time;
// This is moderatly expensive so we can cache this for repetitive calls
ObjectId.timeHexCache = time.toString(16);
// Dates before 1978-07-05T00:00:00.000Z can be represented in less than 8 hex digits so we need to padStart
if (ObjectId.timeHexCache.length < 8) {
ObjectId.timeHexCache = ObjectId.timeHexCache.padStart(8, '0');
}
}
return ObjectId.timeHexCache;
}

/**
* Generate a 12 byte id buffer used in ObjectId's
*
* @param time - pass in a second based timestamp.
*/
static generate(time?: number): string {
const inc = ObjectId.getInc();
const buffer = ByteUtils.allocateUnsafe(12);

// 4-byte timestamp
NumberUtils.setInt32BE(buffer, 0, time);
const timeString = ObjectId.getTimeHex(time);

// set PROCESS_UNIQUE if yet not initialized
if (PROCESS_UNIQUE === null) {
PROCESS_UNIQUE = ByteUtils.randomBytes(5);
PROCESS_UNIQUE = ByteUtils.toHex(ByteUtils.randomBytes(5));
}

// 5-byte process unique
buffer[4] = PROCESS_UNIQUE[0];
buffer[5] = PROCESS_UNIQUE[1];
buffer[6] = PROCESS_UNIQUE[2];
buffer[7] = PROCESS_UNIQUE[3];
buffer[8] = PROCESS_UNIQUE[4];

// 3-byte counter
buffer[11] = inc & 0xff;
buffer[10] = (inc >> 8) & 0xff;
buffer[9] = (inc >> 16) & 0xff;
const incString = inc.toString(16).padStart(6, '0');

return buffer;
// Flatten concatenated string to save memory
return flattenString(timeString + PROCESS_UNIQUE + incString);
}

/**
Expand All @@ -209,13 +249,13 @@ export class ObjectId extends BSONValue {
toString(encoding?: 'hex' | 'base64'): string {
// Is the id a buffer then use the buffer toString method to return the format
if (encoding === 'base64') return ByteUtils.toBase64(this.id);
if (encoding === 'hex') return this.toHexString();
return this.toHexString();
if (encoding === 'hex') return this.__id;
return this.__id;
}

/** Converts to its JSON the 24 character hex string representation. */
toJSON(): string {
return this.toHexString();
return this.__id;
}

/** @internal */
Expand All @@ -239,18 +279,16 @@ export class ObjectId extends BSONValue {
}

if (ObjectId.is(otherId)) {
return (
this.buffer[11] === otherId.buffer[11] && ByteUtils.equals(this.buffer, otherId.buffer)
);
return this.__id === otherId.__id;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The tests: should use otherId[kId] Buffer for equality when otherId has _bsontype === ObjectId and should not rely on toString for otherIds that are instanceof ObjectId need updating. Now we want to make sure that if we're checking another ObjectId instance we use the effcient string comparison. Previously the goal was to make sure we were using buffer comparison, with an optimization of checking the LSB.

Should we continue to check the LSB? or maybe just the least signficant hex character? Unsure if the same optimization applies to strings.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like performing a character comparison with strings is a little slower, so I think we can update/remove these tests.

const a = crypto.randomBytes(12).toString("hex");
const b = crypto.randomBytes(12).toString("hex");

console.log(a, b);

suite
  .add("string compare", function () {
    return a === b;
  })
  .add("String char compare", function () {
    return a[0] === b[0] && a === b;
  })
image

}

if (typeof otherId === 'string') {
return otherId.toLowerCase() === this.toHexString();
return otherId === this.__id || otherId.toLowerCase() === this.__id;
}

if (typeof otherId === 'object' && typeof otherId.toHexString === 'function') {
const otherIdString = otherId.toHexString();
const thisIdString = this.toHexString();
const thisIdString = this.__id;
return typeof otherIdString === 'string' && otherIdString.toLowerCase() === thisIdString;
}

Expand All @@ -259,10 +297,7 @@ export class ObjectId extends BSONValue {

/** Returns the generation date (accurate up to the second) that this ID was generated. */
getTimestamp(): Date {
const timestamp = new Date();
const time = NumberUtils.getUint32BE(this.buffer, 0);
timestamp.setTime(Math.floor(time) * 1000);
return timestamp;
return new Date(parseInt(this.__id.substring(0, 8), 16) * 1000);
}

/** @internal */
Expand All @@ -272,18 +307,26 @@ export class ObjectId extends BSONValue {

/** @internal */
serializeInto(uint8array: Uint8Array, index: number): 12 {
uint8array[index] = this.buffer[0];
uint8array[index + 1] = this.buffer[1];
uint8array[index + 2] = this.buffer[2];
uint8array[index + 3] = this.buffer[3];
uint8array[index + 4] = this.buffer[4];
uint8array[index + 5] = this.buffer[5];
uint8array[index + 6] = this.buffer[6];
uint8array[index + 7] = this.buffer[7];
uint8array[index + 8] = this.buffer[8];
uint8array[index + 9] = this.buffer[9];
uint8array[index + 10] = this.buffer[10];
uint8array[index + 11] = this.buffer[11];
let temp = parseInt(this.__id.substring(0, 8), 16);

uint8array[index + 3] = temp & 0xff;
uint8array[index + 2] = (temp >> 8) & 0xff;
uint8array[index + 1] = (temp >> 16) & 0xff;
uint8array[index + 0] = (temp >> 24) & 0xff;

temp = parseInt(this.__id.substring(8, 16), 16);

uint8array[index + 7] = temp & 0xff;
uint8array[index + 6] = (temp >> 8) & 0xff;
uint8array[index + 5] = (temp >> 16) & 0xff;
uint8array[index + 4] = (temp >> 24) & 0xff;

temp = parseInt(this.__id.substring(16, 24), 16);

uint8array[index + 11] = temp & 0xff;
uint8array[index + 10] = (temp >> 8) & 0xff;
uint8array[index + 9] = (temp >> 16) & 0xff;
uint8array[index + 8] = (temp >> 24) & 0xff;
return 12;
}

Expand All @@ -293,12 +336,8 @@ export class ObjectId extends BSONValue {
* @param time - an integer number representing a number of seconds.
*/
static createFromTime(time: number): ObjectId {
const buffer = ByteUtils.allocate(12);
for (let i = 11; i >= 4; i--) buffer[i] = 0;
// Encode time into first 4 bytes
NumberUtils.setInt32BE(buffer, 0, time);
// Return the new objectId
return new ObjectId(buffer);
return new ObjectId(time);
}

/**
Expand All @@ -311,7 +350,7 @@ export class ObjectId extends BSONValue {
throw new BSONError('hex string must be 24 characters');
}

return new ObjectId(ByteUtils.fromHex(hexString));
return new ObjectId(hexString);
}

/** Creates an ObjectId instance from a base64 string */
Expand All @@ -329,6 +368,7 @@ export class ObjectId extends BSONValue {
*/
static isValid(id: string | number | ObjectId | ObjectIdLike | Uint8Array): boolean {
if (id == null) return false;
if (typeof id === 'string') return !!ObjectId.validateHexString(id);

try {
new ObjectId(id);
Expand All @@ -340,13 +380,12 @@ export class ObjectId extends BSONValue {

/** @internal */
toExtendedJSON(): ObjectIdExtended {
if (this.toHexString) return { $oid: this.toHexString() };
return { $oid: this.toString('hex') };
return { $oid: this.__id };
}

/** @internal */
static fromExtendedJSON(doc: ObjectIdExtended): ObjectId {
return new ObjectId(doc.$oid);
return new ObjectId(doc.$oid, OID_SKIP_VALIDATE);
}

/**
Expand All @@ -356,6 +395,6 @@ export class ObjectId extends BSONValue {
*/
inspect(depth?: number, options?: unknown, inspect?: InspectFn): string {
inspect ??= defaultInspect;
return `new ObjectId(${inspect(this.toHexString(), options)})`;
return `new ObjectId(${inspect(this.__id, options)})`;
}
}
Loading