Skip to content

Commit 5f8e155

Browse files
authored
[BUGFIX] EEXIST: file already exists, symlink when file already downloaded (#1253)
Using the library in Linux environments, I have the following errors when using cached files: ```bash Error: EEXIST: file already exists, symlink '/home/node/.cache/huggingface/hub/datasets--facebook--natural_reasoning/blobs/704a73a635633c697507c74576e30798d42a3455967ef4d5bd0865b5242ee7fe' -> '/home/node/.cache/huggingface/hub/datasets--facebook--natural_reasoning/snapshots/bf8da7626caecdea87df7b5a8d97bf9650d909a3/full.jsonl' ``` This happens when trying to recreate the symbolic link. This PR checks if the symlink already exists and returns the pointer file.
1 parent 2286e04 commit 5f8e155

File tree

2 files changed

+44
-1
lines changed

2 files changed

+44
-1
lines changed

packages/hub/src/lib/download-file-to-cache-dir.spec.ts

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,47 @@ describe("downloadFileToCacheDir", () => {
114114
expect(output).toBe(expectPointer);
115115
});
116116

117+
test("existing symlinked and blob with default revision should not re-download it", async () => {
118+
// <cache>/<repo>/<revision>/snapshots/README.md
119+
const expectPointer = _getSnapshotFile({
120+
repo: DUMMY_REPO,
121+
path: "/README.md",
122+
revision: "main",
123+
});
124+
// stat ensure a symlink and the pointed file exists
125+
vi.mocked(stat).mockResolvedValue({} as Stats); // prevent default mocked reject
126+
vi.mocked(lstat).mockResolvedValue({} as Stats);
127+
vi.mocked(pathsInfo).mockResolvedValue([
128+
{
129+
oid: DUMMY_ETAG,
130+
size: 55,
131+
path: "README.md",
132+
type: "file",
133+
lastCommit: {
134+
date: new Date(),
135+
id: "main",
136+
title: "Commit msg",
137+
},
138+
},
139+
]);
140+
141+
const output = await downloadFileToCacheDir({
142+
repo: DUMMY_REPO,
143+
path: "/README.md",
144+
fetch: fetchMock,
145+
});
146+
147+
expect(stat).toHaveBeenCalledOnce();
148+
expect(symlink).not.toHaveBeenCalledOnce();
149+
// Get call argument for stat
150+
const starArg = vi.mocked(stat).mock.calls[0][0];
151+
152+
expect(starArg).toBe(expectPointer);
153+
expect(fetchMock).not.toHaveBeenCalledWith();
154+
155+
expect(output).toBe(expectPointer);
156+
});
157+
117158
test("existing blob should only create the symlink", async () => {
118159
// <cache>/<repo>/<revision>/snapshots/README.md
119160
const expectPointer = _getSnapshotFile({
@@ -150,7 +191,6 @@ describe("downloadFileToCacheDir", () => {
150191
fetch: fetchMock,
151192
});
152193

153-
expect(stat).not.toHaveBeenCalled();
154194
// should have check for the blob
155195
expect(lstat).toHaveBeenCalled();
156196
expect(vi.mocked(lstat).mock.calls[0][0]).toBe(expectedBlob);

packages/hub/src/lib/download-file-to-cache-dir.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,9 @@ export async function downloadFileToCacheDir(
9696
const pointerPath = getFilePointer(storageFolder, commitHash ?? pathsInformation[0].lastCommit.id, params.path);
9797
const blobPath = join(storageFolder, "blobs", etag);
9898

99+
// if we have the pointer file, we can shortcut the download
100+
if (await exists(pointerPath, true)) return pointerPath;
101+
99102
// mkdir blob and pointer path parent directory
100103
await mkdir(dirname(blobPath), { recursive: true });
101104
await mkdir(dirname(pointerPath), { recursive: true });

0 commit comments

Comments
 (0)