Skip to content

new filesystem architecture: btrfs + nfs #8385

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 58 commits into from
Jul 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
58 commits
Select commit Hold shift + click to select a range
f4b6852
file-server: refactor code to use image file...
williamstein May 2, 2025
d1b7929
file server: start a new cleaner compositional approach
williamstein May 2, 2025
73cc777
fileserver storage -- add filesystem
williamstein May 2, 2025
117328b
more making file storage nice
williamstein May 2, 2025
2c16779
zfs filesystems: more support -- get, set, cloning
williamstein May 2, 2025
1dbe50b
fileserver -- rolling snapshots
williamstein May 2, 2025
d25cde4
fileserver -- expand pool
williamstein May 2, 2025
3035217
filesystem: implemented shrink
williamstein May 2, 2025
08544bc
fs: implement rsync with automounting
williamstein May 2, 2025
a0fc4e1
fs: clone function
williamstein May 2, 2025
6f3b664
Merge branch 'master' into fs2
williamstein May 4, 2025
4815c98
start btrfs version of storage
williamstein May 4, 2025
1893f87
btrfs: working on quotas
williamstein May 4, 2025
591295f
btrfs: quota
williamstein May 4, 2025
5d1ff98
btrfs: rolling snapshots
williamstein May 4, 2025
cd087ba
btrfs: cloning subvolumes
williamstein May 4, 2025
92b9821
btrfs: bup snapshots
williamstein May 4, 2025
a61b545
btrfs -- bup prune
williamstein May 5, 2025
706f4d2
btrfs: implement send
williamstein May 5, 2025
3aa6661
Merge branch 'master' into fs2
williamstein May 5, 2025
9a80b44
btrfs: send/recv - thinking about it
williamstein May 5, 2025
0fa4dae
btrfs -- fix an rsync issue
williamstein May 5, 2025
093e807
Merge branch 'master' into fs2
williamstein May 7, 2025
b309dd6
improve bup support in new btrfs filesystem
williamstein May 7, 2025
f7edb90
Merge branch 'master' into fs2
williamstein Jun 23, 2025
eee6793
fixes for things noticed when building
williamstein Jun 23, 2025
b0747be
Merge branch 'master' into fs2
williamstein Jul 13, 2025
101e86e
basic btrfs testing started
williamstein Jul 13, 2025
74a0994
Merge branch 'master' into fs2
williamstein Jul 14, 2025
38eec7c
adding some btrfs unit tests
williamstein Jul 14, 2025
2a3ac15
Merge branch 'master' into fs2
williamstein Jul 14, 2025
b99c909
add missing package
williamstein Jul 14, 2025
01770b6
Merge branch 'master' into fs2
williamstein Jul 14, 2025
9ece08a
Merge branch 'master' into fs2
williamstein Jul 14, 2025
20a2444
more subvolume tests
williamstein Jul 14, 2025
92c74be
delete all the zfs code
williamstein Jul 14, 2025
d087c8e
add more btrfs tests
williamstein Jul 15, 2025
1a3019b
more btrfs unit tests
williamstein Jul 15, 2025
d6a6ea9
Merge branch 'master' into fs2
williamstein Jul 15, 2025
1dc6687
add snapshot stress test
williamstein Jul 15, 2025
1afa4ce
ts
williamstein Jul 15, 2025
d663508
btrfs: unit testing bup integration
williamstein Jul 15, 2025
346a484
Merge branch 'master' into fs2
williamstein Jul 15, 2025
91da892
btrfs tests: create more loopback devices automatically so we can run…
williamstein Jul 15, 2025
caac6dd
btrfs: adding more testing and fs operations support
williamstein Jul 15, 2025
4e2563a
btrfs: add more filesystem support
williamstein Jul 15, 2025
0268a40
btrfs: refactor fs code
williamstein Jul 15, 2025
636d589
btrfs: refactor bup backup code
williamstein Jul 15, 2025
5e5e772
btrfs: refactor snapshot code
williamstein Jul 15, 2025
8458477
btrfs: snapshot --> snapshots
williamstein Jul 15, 2025
2fe725a
btrfs: refactor quota
williamstein Jul 15, 2025
f22c75f
btrfs: subvolumes refactor
williamstein Jul 15, 2025
46ad996
btrfs cleanup code
williamstein Jul 15, 2025
832e124
btrfs: cloning with subvolumes (fix permission issues)
williamstein Jul 15, 2025
f05f511
btrfs: cleanup util
williamstein Jul 15, 2025
a03d97e
btrfs testing: fix initializing many block devices at once
williamstein Jul 16, 2025
d4e03b1
delete the support code for using the socketio cluster module
williamstein Jul 16, 2025
8a82608
use separate process for each persist server; also include pid in log…
williamstein Jul 16, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions src/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,10 @@
"version-check": "pip3 install typing_extensions mypy || pip3 install --break-system-packages typing_extensions mypy && ./workspaces.py version-check && mypy scripts/check_npm_packages.py",
"test-parallel": "unset DEBUG && pnpm run version-check && cd packages && pnpm run -r --parallel test",
"test": "unset DEBUG && pnpm run depcheck && pnpm run version-check && ./workspaces.py test",
"test-github-ci": "unset DEBUG && pnpm run depcheck && pnpm run version-check && ./workspaces.py test --exclude=jupyter --retries=1",
"test-github-ci": "unset DEBUG && pnpm run depcheck && pnpm run version-check && ./workspaces.py test --exclude=jupyter,file-server --retries=1",
"depcheck": "cd packages && pnpm run -r --parallel depcheck",
"prettier-all": "cd packages/",
"local-ci": "./scripts/ci.sh",
"conat-server": "cd packages/server && pnpm conat-server",
"conat-connections": "cd packages/backend && pnpm conat-connections",
"conat-watch": "cd packages/backend && pnpm conat-watch",
"conat-inventory": "cd packages/backend && pnpm conat-inventory"
Expand Down
2 changes: 1 addition & 1 deletion src/packages/backend/get-listing.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
*/

/*
Server directory listing through the HTTP server and Websocket API.
This is used by backends to serve directory listings to clients:

{files:[..., {size:?,name:?,mtime:?,isdir:?}]}

Expand Down
2 changes: 1 addition & 1 deletion src/packages/backend/logger.ts
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ function initTransports() {
// Similar as in debug source code, except I stuck a timestamp
// at the beginning, which I like... except also aware of
// non-printf formatting.
const line = `${new Date().toISOString()}: ${myFormat(...args)}\n`;
const line = `${new Date().toISOString()} (${process.pid}):${myFormat(...args)}\n`;

if (transports.console) {
// the console transport:
Expand Down
207 changes: 207 additions & 0 deletions src/packages/file-server/btrfs/filesystem.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
/*
BTRFS Filesystem

DEVELOPMENT:

Start node, then:

DEBUG="cocalc:*file-server*" DEBUG_CONSOLE=yes node

a = require('@cocalc/file-server/btrfs'); fs = await a.filesystem({device:'/tmp/btrfs.img', formatIfNeeded:true, mount:'/mnt/btrfs', uid:293597964})

*/

import refCache from "@cocalc/util/refcache";
import { mkdirp, btrfs, sudo } from "./util";
import { join } from "path";
import { Subvolumes } from "./subvolumes";
import { mkdir } from "fs/promises";
import { exists } from "@cocalc/backend/misc/async-utils-node";
import { executeCode } from "@cocalc/backend/execute-code";

// default size of btrfs filesystem if creating an image file.
const DEFAULT_FILESYSTEM_SIZE = "10G";

// default for newly created subvolumes
export const DEFAULT_SUBVOLUME_SIZE = "1G";

const MOUNT_ERROR = "wrong fs type, bad option, bad superblock";

export interface Options {
// the underlying block device.
// If this is a file (or filename) ending in .img, then it's a sparse file mounted as a loopback device.
// If this starts with "/dev" then it is a raw block device.
device: string;
// formatIfNeeded -- DANGEROUS! if true, format the device or image,
// if it doesn't mount with an error containing "wrong fs type,
// bad option, bad superblock". Never use this in production. Useful
// for testing and dev.
formatIfNeeded?: boolean;
// where the btrfs filesystem is mounted
mount: string;

// default size of newly created subvolumes
defaultSize?: string | number;
defaultFilesystemSize?: string | number;
}

export class Filesystem {
public readonly opts: Options;
public readonly bup: string;
public readonly subvolumes: Subvolumes;

constructor(opts: Options) {
opts = {
defaultSize: DEFAULT_SUBVOLUME_SIZE,
defaultFilesystemSize: DEFAULT_FILESYSTEM_SIZE,
...opts,
};
this.opts = opts;
this.bup = join(this.opts.mount, "bup");
this.subvolumes = new Subvolumes(this);
}

init = async () => {
await mkdirp([this.opts.mount]);
await this.initDevice();
await this.mountFilesystem();
await btrfs({
args: ["quota", "enable", "--simple", this.opts.mount],
});
await this.initBup();
};

unmount = async () => {
await sudo({
command: "umount",
args: [this.opts.mount],
err_on_exit: true,
});
};

close = () => {};

private initDevice = async () => {
if (!isImageFile(this.opts.device)) {
// raw block device -- nothing to do
return;
}
if (!(await exists(this.opts.device))) {
await sudo({
command: "truncate",
args: ["-s", `${this.opts.defaultFilesystemSize}`, this.opts.device],
});
}
};

info = async (): Promise<{ [field: string]: string }> => {
const { stdout } = await btrfs({
args: ["subvolume", "show", this.opts.mount],
});
const obj: { [field: string]: string } = {};
for (const x of stdout.split("\n")) {
const i = x.indexOf(":");
if (i == -1) continue;
obj[x.slice(0, i).trim()] = x.slice(i + 1).trim();
}
return obj;
};

private mountFilesystem = async () => {
try {
await this.info();
// already mounted
return;
} catch {}
const { stderr, exit_code } = await this._mountFilesystem();
if (exit_code) {
if (stderr.includes(MOUNT_ERROR)) {
if (this.opts.formatIfNeeded) {
await this.formatDevice();
const { stderr, exit_code } = await this._mountFilesystem();
if (exit_code) {
throw Error(stderr);
} else {
return;
}
}
}
throw Error(stderr);
}
};

private formatDevice = async () => {
await sudo({ command: "mkfs.btrfs", args: [this.opts.device] });
};

private _mountFilesystem = async () => {
const args: string[] = isImageFile(this.opts.device) ? ["-o", "loop"] : [];
args.push(
"-o",
"compress=zstd",
"-o",
"noatime",
"-o",
"space_cache=v2",
"-o",
"autodefrag",
this.opts.device,
"-t",
"btrfs",
this.opts.mount,
);
{
const { stderr, exit_code } = await sudo({
command: "mount",
args,
err_on_exit: false,
});
if (exit_code) {
return { stderr, exit_code };
}
}
const { stderr, exit_code } = await sudo({
command: "chown",
args: [
`${process.getuid?.() ?? 0}:${process.getgid?.() ?? 0}`,
this.opts.mount,
],
err_on_exit: false,
});
return { stderr, exit_code };
};

private initBup = async () => {
if (!(await exists(this.bup))) {
await mkdir(this.bup);
}
await executeCode({
command: "bup",
args: ["init"],
env: { BUP_DIR: this.bup },
});
};
}

function isImageFile(name: string) {
if (name.startsWith("/dev")) {
return false;
}
// TODO: could probably check os for a device with given name?
return name.endsWith(".img");
}

const cache = refCache<Options & { noCache?: boolean }, Filesystem>({
name: "btrfs-filesystems",
createObject: async (options: Options) => {
const filesystem = new Filesystem(options);
await filesystem.init();
return filesystem;
},
});

export async function filesystem(
options: Options & { noCache?: boolean },
): Promise<Filesystem> {
return await cache(options);
}
1 change: 1 addition & 0 deletions src/packages/file-server/btrfs/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export { filesystem } from "./filesystem";
119 changes: 119 additions & 0 deletions src/packages/file-server/btrfs/snapshots.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
import { type SubvolumeSnapshots } from "./subvolume-snapshots";
import getLogger from "@cocalc/backend/logger";

const logger = getLogger("file-server:btrfs:snapshots");

const DATE_REGEXP = /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z$/;

// Lengths of time in minutes to keep snapshots
// (code below assumes these are listed in ORDER from shortest to longest)
export const SNAPSHOT_INTERVALS_MS = {
frequent: 15 * 1000 * 60,
daily: 60 * 24 * 1000 * 60,
weekly: 60 * 24 * 7 * 1000 * 60,
monthly: 60 * 24 * 7 * 4 * 1000 * 60,
};

// How many of each type of snapshot to retain
export const DEFAULT_SNAPSHOT_COUNTS = {
frequent: 24,
daily: 14,
weekly: 7,
monthly: 4,
} as SnapshotCounts;

export interface SnapshotCounts {
frequent: number;
daily: number;
weekly: number;
monthly: number;
}

export async function updateRollingSnapshots({
snapshots,
counts,
}: {
snapshots: SubvolumeSnapshots;
counts?: Partial<SnapshotCounts>;
}) {
counts = { ...DEFAULT_SNAPSHOT_COUNTS, ...counts };

const changed = await snapshots.hasUnsavedChanges();
logger.debug("updateRollingSnapshots", {
name: snapshots.subvolume.name,
counts,
changed,
});
if (!changed) {
// definitely no data written since most recent snapshot, so nothing to do
return;
}

// get exactly the iso timestamp snapshot names:
const snapshotNames = (await snapshots.ls())
.map((x) => x.name)
.filter((name) => DATE_REGEXP.test(name));
snapshotNames.sort();
if (snapshotNames.length > 0) {
const age = Date.now() - new Date(snapshotNames.slice(-1)[0]).valueOf();
for (const key in SNAPSHOT_INTERVALS_MS) {
if (counts[key]) {
if (age < SNAPSHOT_INTERVALS_MS[key]) {
// no need to snapshot since there is already a sufficiently recent snapshot
logger.debug("updateRollingSnapshots: no need to snapshot", {
name: snapshots.subvolume.name,
});
return;
}
// counts[key] nonzero and snapshot is old enough so we'll be making a snapshot
break;
}
}
}

// make a new snapshot
const name = new Date().toISOString();
await snapshots.create(name);
// delete extra snapshots
snapshotNames.push(name);
const toDelete = snapshotsToDelete({ counts, snapshots: snapshotNames });
for (const expired of toDelete) {
try {
await snapshots.delete(expired);
} catch {
// some snapshots can't be deleted, e.g., they were used for the last send.
}
}
}

function snapshotsToDelete({ counts, snapshots }): string[] {
if (snapshots.length == 0) {
// nothing to do
return [];
}

// sorted from BIGGEST to smallest
const times = snapshots.map((x) => new Date(x).valueOf());
times.reverse();
const save = new Set<number>();
for (const type in counts) {
const count = counts[type];
const length_ms = SNAPSHOT_INTERVALS_MS[type];

// Pick the first count newest snapshots at intervals of length
// length_ms milliseconds.
let n = 0,
i = 0,
last_tm = 0;
while (n < count && i < times.length) {
const tm = times[i];
if (!last_tm || tm <= last_tm - length_ms) {
save.add(tm);
last_tm = tm;
n += 1; // found one more
}
i += 1; // move to next snapshot
}
}
return snapshots.filter((x) => !save.has(new Date(x).valueOf()));
}
Loading
Loading