Skip to content

Commit 4acef65

Browse files
authored
Merge pull request #3193 from dmichon-msft/optimize-tar
[rush-lib] Optimizations for interacting with `tar` utility
2 parents f141994 + ed487cc commit 4acef65

File tree

6 files changed

+127
-71
lines changed

6 files changed

+127
-71
lines changed

apps/rush-lib/src/logic/buildCache/ProjectBuildCache.ts

Lines changed: 70 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ import events from 'events';
66
import * as crypto from 'crypto';
77
import type * as stream from 'stream';
88
import * as tar from 'tar';
9-
import { FileSystem, Path, ITerminal, FolderItem } from '@rushstack/node-core-library';
9+
import { Async, FileSystem, Path, ITerminal, FolderItem } from '@rushstack/node-core-library';
1010

1111
import { RushConfigurationProject } from '../../api/RushConfigurationProject';
1212
import { ProjectChangeAnalyzer } from '../ProjectChangeAnalyzer';
@@ -268,16 +268,25 @@ export class ProjectBuildCache {
268268

269269
const tarUtility: TarExecutable | undefined = await ProjectBuildCache._tryGetTarUtility(terminal);
270270
if (tarUtility) {
271-
const tempLocalCacheEntryPath: string = this._localBuildCacheProvider.getCacheEntryPath(cacheId);
271+
const finalLocalCacheEntryPath: string = this._localBuildCacheProvider.getCacheEntryPath(cacheId);
272+
// Derive the temp file from the destination path to ensure they are on the same volume
273+
const tempLocalCacheEntryPath: string = `${finalLocalCacheEntryPath}.temp`;
272274
const logFilePath: string = this._getTarLogFilePath();
273275
const tarExitCode: number = await tarUtility.tryCreateArchiveFromProjectPathsAsync({
274276
archivePath: tempLocalCacheEntryPath,
275277
paths: filesToCache.outputFilePaths,
276278
project: this._project,
277279
logFilePath
278280
});
281+
279282
if (tarExitCode === 0) {
280-
localCacheEntryPath = tempLocalCacheEntryPath;
283+
// Move after the archive is finished so that if the process is interrupted we aren't left with an invalid file
284+
await FileSystem.moveAsync({
285+
sourcePath: tempLocalCacheEntryPath,
286+
destinationPath: finalLocalCacheEntryPath,
287+
overwrite: true
288+
});
289+
localCacheEntryPath = finalLocalCacheEntryPath;
281290
} else {
282291
terminal.writeWarningLine(
283292
`"tar" exited with code ${tarExitCode} while attempting to create the cache entry. ` +
@@ -358,80 +367,82 @@ export class ProjectBuildCache {
358367
return success;
359368
}
360369

370+
/**
371+
* Walks the declared output folders of the project and collects a list of files.
372+
* @returns The list of output files as project-relative paths, or `undefined` if a
373+
* symbolic link was encountered.
374+
*/
361375
private async _tryCollectPathsToCacheAsync(terminal: ITerminal): Promise<IPathsToCache | undefined> {
362-
const projectFolderPath: string = this._project.projectFolder;
363-
const outputFolderNamesThatExist: boolean[] = await Promise.all(
364-
this._projectOutputFolderNames.map((outputFolderName) =>
365-
FileSystem.existsAsync(`${projectFolderPath}/${outputFolderName}`)
366-
)
367-
);
376+
const posixPrefix: string = this._project.projectFolder;
377+
const outputFilePaths: string[] = [];
378+
const queue: [string, string][] = [];
379+
368380
const filteredOutputFolderNames: string[] = [];
369-
for (let i: number = 0; i < outputFolderNamesThatExist.length; i++) {
370-
if (outputFolderNamesThatExist[i]) {
371-
filteredOutputFolderNames.push(this._projectOutputFolderNames[i]);
372-
}
373-
}
374381

375-
let encounteredEnumerationIssue: boolean = false;
376-
function symbolicLinkPathCallback(entryPath: string): void {
377-
terminal.writeError(`Unable to include "${entryPath}" in build cache. It is a symbolic link.`);
378-
encounteredEnumerationIssue = true;
379-
}
382+
let hasSymbolicLinks: boolean = false;
380383

381-
const outputFilePaths: string[] = [];
382-
for (const filteredOutputFolderName of filteredOutputFolderNames) {
383-
if (encounteredEnumerationIssue) {
384-
return undefined;
384+
// Adds child directories to the queue, files to the path list, and bails on symlinks
385+
function processChildren(relativePath: string, diskPath: string, children: FolderItem[]): void {
386+
for (const child of children) {
387+
const childRelativePath: string = `${relativePath}/${child.name}`;
388+
if (child.isSymbolicLink()) {
389+
terminal.writeError(
390+
`Unable to include "${childRelativePath}" in build cache. It is a symbolic link.`
391+
);
392+
hasSymbolicLinks = true;
393+
} else if (child.isDirectory()) {
394+
queue.push([childRelativePath, `${diskPath}/${child.name}`]);
395+
} else {
396+
outputFilePaths.push(childRelativePath);
397+
}
385398
}
399+
}
386400

387-
const outputFilePathsForFolder: AsyncIterableIterator<string> = this._getPathsInFolder(
388-
terminal,
389-
symbolicLinkPathCallback,
390-
filteredOutputFolderName,
391-
`${projectFolderPath}/${filteredOutputFolderName}`
392-
);
401+
// Handle declared output folders.
402+
for (const outputFolder of this._projectOutputFolderNames) {
403+
const diskPath: string = `${posixPrefix}/${outputFolder}`;
404+
try {
405+
const children: FolderItem[] = await FileSystem.readFolderItemsAsync(diskPath);
406+
processChildren(outputFolder, diskPath, children);
407+
// The folder exists, record it
408+
filteredOutputFolderNames.push(outputFolder);
409+
} catch (error) {
410+
if (!FileSystem.isNotExistError(error as Error)) {
411+
throw error;
412+
}
393413

394-
for await (const outputFilePath of outputFilePathsForFolder) {
395-
outputFilePaths.push(outputFilePath);
414+
// If the folder does not exist, ignore it.
396415
}
397416
}
398417

399-
if (encounteredEnumerationIssue) {
418+
// Walk the tree in parallel
419+
await Async.forEachAsync(
420+
queue,
421+
async ([relativePath, diskPath]: [string, string]) => {
422+
const children: FolderItem[] = await FileSystem.readFolderItemsAsync(diskPath);
423+
processChildren(relativePath, diskPath, children);
424+
},
425+
{
426+
concurrency: 10
427+
}
428+
);
429+
430+
if (hasSymbolicLinks) {
431+
// Symbolic links do not round-trip safely.
400432
return undefined;
401433
}
402434

435+
// Ensure stable output path order.
436+
outputFilePaths.sort();
437+
403438
return {
404-
filteredOutputFolderNames,
405-
outputFilePaths
439+
outputFilePaths,
440+
filteredOutputFolderNames
406441
};
407442
}
408443

409-
private async *_getPathsInFolder(
410-
terminal: ITerminal,
411-
symbolicLinkPathCallback: (path: string) => void,
412-
posixPrefix: string,
413-
folderPath: string
414-
): AsyncIterableIterator<string> {
415-
const folderEntries: FolderItem[] = await FileSystem.readFolderItemsAsync(folderPath);
416-
for (const folderEntry of folderEntries) {
417-
const entryPath: string = `${posixPrefix}/${folderEntry.name}`;
418-
if (folderEntry.isSymbolicLink()) {
419-
symbolicLinkPathCallback(entryPath);
420-
} else if (folderEntry.isDirectory()) {
421-
yield* this._getPathsInFolder(
422-
terminal,
423-
symbolicLinkPathCallback,
424-
entryPath,
425-
`${folderPath}/${folderEntry.name}`
426-
);
427-
} else {
428-
yield entryPath;
429-
}
430-
}
431-
}
432-
433444
private _getTarLogFilePath(): string {
434-
return path.join(this._project.projectRushTempFolder, 'build-cache-tar.log');
445+
return path.join(this._project.projectRushTempFolder, `${this._cacheId}.log`);
435446
}
436447

437448
private static async _getCacheId(options: IProjectBuildCacheOptions): Promise<string | undefined> {

apps/rush-lib/src/utilities/TarExecutable.ts

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,8 @@ export class TarExecutable {
7171
*/
7272
public async tryCreateArchiveFromProjectPathsAsync(options: ICreateArchiveOptions): Promise<number> {
7373
const { project, archivePath, paths, logFilePath } = options;
74-
const pathsListFilePath: string = `${project.projectRushTempFolder}/tarPaths_${Date.now()}`;
75-
await FileSystem.writeFileAsync(pathsListFilePath, paths.join('\n'));
74+
75+
const tarInput: string = paths.join('\n');
7676

7777
// On Windows, tar.exe will report a "Failed to clean up compressor" error if the target folder
7878
// does not exist (GitHub #2622)
@@ -89,27 +89,24 @@ export class TarExecutable {
8989
archivePath,
9090
// [Windows bsdtar 3.3.2] -z, -j, -J, --lzma Compress archive with gzip/bzip2/xz/lzma
9191
'-z',
92-
// [Windows bsdtar 3.3.2] -C <dir> Change to <dir> before processing remaining files
93-
'-C',
94-
projectFolderPath,
9592
// [GNU tar 1.33] -T, --files-from=FILE get names to extract or create from FILE
9693
//
9794
// Windows bsdtar does not document this parameter, but seems to accept it.
98-
'--files-from',
99-
pathsListFilePath
95+
'--files-from=-'
10096
],
10197
projectFolderPath,
102-
logFilePath
98+
logFilePath,
99+
tarInput
103100
);
104-
await FileSystem.deleteFileAsync(pathsListFilePath);
105101

106102
return tarExitCode;
107103
}
108104

109105
private async _spawnTarWithLoggingAsync(
110106
args: string[],
111107
currentWorkingDirectory: string,
112-
logFilePath: string
108+
logFilePath: string,
109+
input?: string
113110
): Promise<number> {
114111
// Runs "tar" with the specified args and logs its output to the specified location.
115112
// The log file looks like this:
@@ -143,6 +140,9 @@ export class TarExecutable {
143140
`Start time: ${new Date().toString()}`,
144141
`Invoking "${this._tarExecutablePath} ${args.join(' ')}"`,
145142
'',
143+
`======= BEGIN PROCESS INPUT ======`,
144+
input || '',
145+
'======== END PROCESS INPUT =======',
146146
'======= BEGIN PROCESS OUTPUT =======',
147147
''
148148
].join('\n')
@@ -152,8 +152,13 @@ export class TarExecutable {
152152
currentWorkingDirectory: currentWorkingDirectory
153153
});
154154

155-
childProcess.stdout?.on('data', (chunk) => fileWriter.write(`[stdout] ${chunk}`));
156-
childProcess.stderr?.on('data', (chunk) => fileWriter.write(`[stderr] ${chunk}`));
155+
childProcess.stdout!.on('data', (chunk) => fileWriter.write(`[stdout] ${chunk}`));
156+
childProcess.stderr!.on('data', (chunk) => fileWriter.write(`[stderr] ${chunk}`));
157+
158+
if (input !== undefined) {
159+
childProcess.stdin!.write(input, 'utf-8');
160+
childProcess.stdin!.end();
161+
}
157162

158163
const [tarExitCode] = await events.once(childProcess, 'exit');
159164

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
{
2+
"changes": [
3+
{
4+
"packageName": "@microsoft/rush",
5+
"comment": "Optimize invocation of tar to use stdin instead of a temporary file.",
6+
"type": "none"
7+
}
8+
],
9+
"packageName": "@microsoft/rush"
10+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
{
2+
"changes": [
3+
{
4+
"packageName": "@microsoft/rush",
5+
"comment": "Revise architecture of symbolic link scan to use a queue and parallel file system calls.",
6+
"type": "none"
7+
}
8+
],
9+
"packageName": "@microsoft/rush"
10+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
{
2+
"changes": [
3+
{
4+
"packageName": "@microsoft/rush",
5+
"comment": "Create separate tar logs per phase based on cache id.",
6+
"type": "none"
7+
}
8+
],
9+
"packageName": "@microsoft/rush"
10+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
{
2+
"changes": [
3+
{
4+
"packageName": "@microsoft/rush",
5+
"comment": "Pack tar to a temp file, then move into the cache to ensure cache integrity.",
6+
"type": "none"
7+
}
8+
],
9+
"packageName": "@microsoft/rush"
10+
}

0 commit comments

Comments
 (0)