diff --git a/commands/bear.js b/commands/bear.js new file mode 100644 index 000000000..8514668e1 --- /dev/null +++ b/commands/bear.js @@ -0,0 +1,34 @@ +import {Command} from 'commander'; +import source from '@tryghost/mg-bear-export'; +import {getProcessOptions} from '../lib/process-options.js'; +import logging from '@tryghost/logging'; + +const command = new Command('bear') + .description('Migrate from Bear Blog') + .requiredOption('--pathToFile ', 'Path to Bear Blog CSV export file') + .option('-V, --verbose', 'Show verbose output', false) + .option('--zip', 'Create a zip file', true) + .option('-s, --scrape ', 'Configure scraping tasks', 'all') + .option('--sizeLimit ', 'Max size (in MB) for media files', false) + .option('--addTags ', 'Additional tags to add to all posts') + .option('--fallBackHTMLCard', 'Fall back to HTML card if Lexical conversion fails', true) + .option('--cache', 'Persist local cache after migration', true) + .action(async (options) => { + const processOptions = getProcessOptions(options); + + processOptions.options = { + ...processOptions.options, + pathToFile: options.pathToFile, + fallBackHTMLCard: options.fallBackHTMLCard, + addTags: options.addTags ? options.addTags.split(',').map(tag => tag.trim()) : [] + }; + + try { + await source(processOptions); + } catch (error) { + logging.error(`Failed to migrate from Bear Blog: ${error.message}`); + process.exit(1); + } + }); + +export default command; \ No newline at end of file diff --git a/packages/mg-bear-export/README.md b/packages/mg-bear-export/README.md new file mode 100644 index 000000000..45e494d6a --- /dev/null +++ b/packages/mg-bear-export/README.md @@ -0,0 +1,103 @@ +# Migrate Bear Blog Export + +Migrate content from [Bear Blog](http://bearblog.dev/) using the supplied CSV file, and generate a `zip` file you can import into a Ghost installation. + +## Install + +To install the CLI, which is required for the Usage commands below: + +```sh +npm install --global @tryghost/migrate +``` + +To use this package in your own project: + +`npm install @tryghost/mg-bear-export --save` + +or + +`yarn add @tryghost/mg-bear-export` + +## Usage + +To run a Bear Blog migration, the required command is: + +```sh +migrate bear --pathToFile /path/to/export.csv +``` + +The CSV file should contain the following required columns: +- `title` - Post title +- `slug` - URL slug +- `published date` - Publication date (ISO 8601 format preferred) +- `content` - Post content in Markdown format + +Optional columns include: +- `first published at` - First publication date (falls back to `published date`) +- `all tags` - Tags in format `[tag1, tag2, tag3]` +- `publish` - Publication status (`True` for published, `False` for draft) +- `is page` - Content type (`True` for page, `False` for post) +- `meta description` - SEO description +- `meta image` - Featured image URL + +It's possible to pass more options, in order to achieve a better migration file for Ghost: + +- **`--pathToFile`** (required) + - Path to a Bear Blog CSV export + - string - default: `null` +- **`-V` `--verbose`** + - bool - default: `false` + - Show verbose output +- **`--zip`** + - bool - default: `true` + - Create a zip file +- **`-s` `--scrape`** + - Configure scraping tasks + - string - default: `all` + - Choices: `all`, `img`, `web`, `media`, `files`, `none` +- **`--sizeLimit`** + - number - default: `false` + - Media files larger than this size (defined in MB [i.e. `5`]) will be flagged as oversize +- **`--addTags`** + - string - default: `null` + - Provide one or more tag names which should be added to every post in this migration. + This is addition to a '#bearblog' tag, which is always added. +- **`--fallBackHTMLCard`** + - bool - default: `true` + - Fall back to convert to HTMLCard, if standard Lexical convert fails +- **`--cache`** + - Persist local cache after migration is complete (Only if `--zip` is `true`) + - bool - default: `true` + +A more complex migration command could look like this: + +```sh +migrate bear --pathToFile /path/to/export.csv --addTags "imported,migration" +``` + +This will process all posts from the CSV file and add the tags "imported" and "migration" to each post. + +## Develop + +This is a mono repository, managed with [lerna](https://lerna.js.org). + +Follow the instructions for the top-level repo. +1. `git clone` this repo & `cd` into it as usual +2. Run `yarn` to install top-level dependencies. + +## Run + +To run a local development copy, `cd` into this directory, and use `yarn dev` instead of `migrate` like so: + +```sh +yarn dev bear --pathToFile /path/to/export.csv +``` + +## Test + +- `yarn lint` run just eslint +- `yarn test` run lint and tests + +# Copyright & License + +Copyright (c) 2013-2025 Ghost Foundation - Released under the [MIT license](LICENSE). \ No newline at end of file diff --git a/packages/mg-bear-export/index.js b/packages/mg-bear-export/index.js new file mode 100644 index 000000000..1102591cd --- /dev/null +++ b/packages/mg-bear-export/index.js @@ -0,0 +1,15 @@ +import {promises as fs} from 'node:fs'; +import process from './lib/process.js'; + +/** + * Process a Bear blog export CSV file + * @param {Object} params - Migration parameters + * @param {Object} params.options - Migration options + * @returns {Promise} - Ghost JSON format data + */ +export default async ({options}) => { + const input = await fs.readFile(options.pathToFile, 'utf-8'); + const processed = await process.all(input, {options}); + + return processed; +}; \ No newline at end of file diff --git a/packages/mg-bear-export/jest.config.js b/packages/mg-bear-export/jest.config.js new file mode 100644 index 000000000..5f2962656 --- /dev/null +++ b/packages/mg-bear-export/jest.config.js @@ -0,0 +1,6 @@ +export default { + testMatch: ['**/test/**/*.test.js'], + testEnvironment: 'node', + transform: {}, + setupFilesAfterEnv: ['jest-extended/all'] +}; \ No newline at end of file diff --git a/packages/mg-bear-export/lib/process.js b/packages/mg-bear-export/lib/process.js new file mode 100644 index 000000000..7c6a31f49 --- /dev/null +++ b/packages/mg-bear-export/lib/process.js @@ -0,0 +1,197 @@ +import {parse} from 'csv-parse/sync'; +import {decode} from 'html-entities'; +import MarkdownIt from 'markdown-it'; +import mgHtmlLexical from '@tryghost/mg-html-lexical'; +import {makeTaskRunner} from '@tryghost/listr-smart-renderer'; +import errors from '@tryghost/errors'; +import fsUtils from '@tryghost/mg-fs-utils'; + +const md = new MarkdownIt({ + html: true +}); + +// Required fields in the CSV export +const REQUIRED_FIELDS = ['title', 'slug', 'published date', 'content']; + +/** + * Parse tags from Bear Blog format + * @param {string} tagString - Tag string in format "[tag1, tag2, tag3]" + * @returns {Array} Array of tag objects + */ +const parseTags = (tagString) => { + if (!tagString) { + return []; + } + + // Remove brackets and split by comma, which Bear Blog adds + const tags = tagString + .slice(1, -1) // Remove [ and ] + .split(',') + .map(tag => tag.trim()) + .filter(Boolean) // Remove empty tags + .map(tag => ({ + url: tag.trim(), + data: { + name: tag.trim(), + slug: tag.trim().toLowerCase().replace(/\s+/g, '-').replace(/[^a-z0-9-_#]/g, '') + } + })); + + return tags; +}; + +/** + * Validate CSV data has required fields + * @param {Array} posts - Array of post objects from CSV + * @throws {errors.ValidationError} If required fields are missing + */ +const validatePosts = (posts) => { + if (!Array.isArray(posts) || posts.length === 0) { + throw new errors.ValidationError({ + message: 'Invalid CSV format: No posts found' + }); + } + + // Check if we have any object with properties + if (!posts.some(post => Object.keys(post).length > 0)) { + throw new errors.ValidationError({ + message: 'Invalid CSV format: No valid columns found' + }); + } + + const firstPost = posts[0]; + const missingFields = REQUIRED_FIELDS.filter(field => !(field in firstPost)); + + if (missingFields.length > 0) { + throw new errors.ValidationError({ + message: `Missing required fields: ${missingFields.join(', ')}` + }); + } +}; + +/** + * Validate basic CSV structure + * @param {string} input - CSV content + * @throws {errors.ValidationError} If CSV structure is invalid + */ +const validateCsvStructure = (input) => { + try { + const parsed = parse(input, { + columns: true, + skip_empty_lines: true, + trim: true, + relax_column_count: false, + relax_quotes: true, + relax: false + }); + + if (parsed.length === 0) { + throw new errors.ValidationError({ + message: 'Invalid CSV format: File must have a header row and at least one data row' + }); + } + + const foundFields = new Set(Object.keys(parsed[0])); + + // Check if all required fields are present in headers + const hasAllRequiredFields = REQUIRED_FIELDS.every(field => foundFields.has(field)); + if (!hasAllRequiredFields) { + throw new errors.ValidationError({ + message: 'Invalid CSV format: Missing required columns' + }); + } + } catch (error) { + if (error instanceof errors.ValidationError) { + throw error; + } + throw new errors.ValidationError({ + message: `Invalid CSV format: ${error.message}` + }); + } +}; + +/** + * Convert Bear blog post to Ghost format + * @param {Object} post - Bear blog post data + * @returns {Promise} - Ghost post format + */ +const processPost = async (post) => { + try { + // Convert markdown to HTML + const html = md.render(post.content || ''); + + // Convert HTML to Lexical + const ctx = { + logger: console, + result: { + posts: [{ + title: post.title, + slug: post.slug, + html + }] + } + }; + + const tasks = mgHtmlLexical.convert(ctx, false); + const taskRunner = makeTaskRunner(tasks, { + renderer: 'silent' + }); + await taskRunner.run(); + const lexical = ctx.result.posts[0].lexical; + + return { + url: post.slug, + data: { + title: decode(post.title), + slug: post.slug, + status: post.publish ? 'published' : 'draft', + created_at: post['first published at'] || post['published date'], + published_at: post['published date'], + custom_excerpt: post['meta description'] || '', + feature_image: post['meta image'] || '', + type: post['is page'] === 'True' ? 'page' : 'post', + tags: parseTags(post['all tags']), + lexical + } + }; + } catch (error) { + throw new errors.InternalServerError({ + message: `Error processing post "${post.title}": ${error.message}`, + context: error + }); + } +}; + +/** + * Process all posts from Bear export + * @param {string} input - CSV content + * @returns {Promise} - Ghost data + */ +const all = async (input) => { + try { + const posts = fsUtils.csv.parseString(input); + + validatePosts(posts); + + const processedPosts = await Promise.all(posts.map(post => processPost(post))); + + return {posts: processedPosts}; + } catch (error) { + if (error instanceof errors.ValidationError || error instanceof errors.InternalServerError) { + throw error; + } + + throw new errors.InternalServerError({ + message: `Error processing CSV: ${error.message}`, + context: error + }); + } +}; + +export default { + processPost, + all, + parseTags, + validatePosts, + validateCsvStructure +}; \ No newline at end of file diff --git a/packages/mg-bear-export/package.json b/packages/mg-bear-export/package.json new file mode 100644 index 000000000..2691435a1 --- /dev/null +++ b/packages/mg-bear-export/package.json @@ -0,0 +1,35 @@ +{ + "name": "@tryghost/mg-bear-export", + "version": "0.1.0", + "repository": "https://github.com/TryGhost/migrate/tree/main/packages/mg-bear-export", + "author": "Ghost Foundation", + "license": "MIT", + "main": "index.js", + "type": "module", + "scripts": { + "dev": "node ../../migrate bear", + "test": "NODE_OPTIONS=--experimental-vm-modules jest", + "lint": "eslint . --ext .js --cache", + "posttest": "yarn lint" + }, + "files": [ + "index.js", + "lib" + ], + "publishConfig": { + "access": "public" + }, + "devDependencies": { + "eslint": "8.57.0", + "jest": "29.7.0", + "jest-extended": "4.0.2" + }, + "dependencies": { + "@tryghost/errors": "1.3.6", + "@tryghost/kg-default-cards": "10.1.1", + "@tryghost/listr-smart-renderer": "0.5.16", + "@tryghost/mg-html-lexical": "0.0.23", + "html-entities": "2.6.0", + "markdown-it": "14.1.0" + } +} \ No newline at end of file diff --git a/packages/mg-bear-export/test/process.test.js b/packages/mg-bear-export/test/process.test.js new file mode 100644 index 000000000..41ffb5ba5 --- /dev/null +++ b/packages/mg-bear-export/test/process.test.js @@ -0,0 +1,162 @@ +import process from '../lib/process.js'; + +describe('Process', () => { + test('can process a post', async () => { + const testPost = { + uid: 'test123', + title: 'Test Post', + slug: 'test-post', + 'published date': '2025-01-01T00:00:00Z', + 'first published at': '2025-01-01T00:00:00Z', + 'all tags': '[tag1, tag2]', + publish: 'True', + 'is page': 'False', + content: '# Test Content', + 'meta description': 'Test description', + 'meta image': 'https://example.com/image.jpg' + }; + + const result = await process.processPost(testPost); + + expect(result.data.title).toBe('Test Post'); + expect(result.data.slug).toBe('test-post'); + expect(result.data.status).toBe('published'); + expect(result.data.created_at).toBe('2025-01-01T00:00:00Z'); + expect(result.data.published_at).toBe('2025-01-01T00:00:00Z'); + expect(result.data.custom_excerpt).toBe('Test description'); + expect(result.data.feature_image).toBe('https://example.com/image.jpg'); + expect(result.data.type).toBe('post'); + expect(result.data.tags).toHaveLength(2); + expect(result.data.tags[0].data.name).toBe('tag1'); + expect(result.data.tags[1].data.name).toBe('tag2'); + + // Verify Lexical format + const lexical = JSON.parse(result.data.lexical); + expect(lexical.root.children[0].type).toBe('extended-heading'); + expect(lexical.root.children[0].tag).toBe('h1'); + expect(lexical.root.children[0].children[0].text).toBe('Test Content'); + }); + + test('can handle HTML in markdown content', async () => { + const testPost = { + title: 'HTML Test', + slug: 'html-test', + 'published date': '2025-01-01T00:00:00Z', + content: '# Heading\n\n
Custom HTML
\n\n**Bold**', + publish: 'True' + }; + + const result = await process.processPost(testPost); + const lexical = JSON.parse(result.data.lexical); + + expect(lexical.root.children).toHaveLength(3); + expect(lexical.root.children[0].type).toBe('extended-heading'); + expect(lexical.root.children[1].type).toBe('paragraph'); + expect(lexical.root.children[2].type).toBe('paragraph'); + }); + + test('handles special characters in title and content', async () => { + const testPost = { + title: 'Special & Characters © ®', + slug: 'special-chars', + 'published date': '2025-01-01T00:00:00Z', + content: '# Special & Heading ©\n\n**Bold & Beautiful**', + publish: 'True' + }; + + const result = await process.processPost(testPost); + expect(result.data.title).toBe('Special & Characters © ®'); + + const lexical = JSON.parse(result.data.lexical); + expect(lexical.root.children[0].children[0].text).toBe('Special & Heading ©'); + }); + + test('handles empty content gracefully', async () => { + const testPost = { + title: 'Empty Post', + slug: 'empty', + 'published date': '2025-01-01T00:00:00Z', + content: '', + publish: 'True' + }; + + const result = await process.processPost(testPost); + const lexical = JSON.parse(result.data.lexical); + expect(lexical.root.children).toHaveLength(1); + expect(lexical.root.children[0].type).toBe('paragraph'); + expect(lexical.root.children[0].children).toHaveLength(0); + }); + + test('handles different date formats', async () => { + const testPost = { + title: 'Date Test', + slug: 'date-test', + 'published date': '2025-01-01', + 'first published at': '2025/01/01 12:00:00', + content: 'Test', + publish: 'True' + }; + + const result = await process.processPost(testPost); + expect(result.data.published_at).toBe('2025-01-01'); + expect(result.data.created_at).toBe('2025/01/01 12:00:00'); + }); + + test('handles tag parsing edge cases', async () => { + const cases = [ + { + input: '[tag1, tag2 , tag3]', + expected: ['tag1', 'tag2', 'tag3'] + }, + { + input: '[]', + expected: [] + }, + { + input: '[Single Tag]', + expected: ['Single Tag'] + }, + { + input: undefined, + expected: [] + } + ]; + + for (const testCase of cases) { + const testPost = { + title: 'Tag Test', + slug: 'tag-test', + 'published date': '2025-01-01T00:00:00Z', + 'all tags': testCase.input, + content: 'Test', + publish: 'True' + }; + + const result = await process.processPost(testPost); + expect(result.data.tags.map(t => t.data.name)).toEqual(testCase.expected); + } + }); + + test('can process CSV input', async () => { + const csvInput = `uid,title,slug,published date,first published at,all tags,publish,is page,content,meta description,meta image +test123,Test Post,test-post,2025-01-01T00:00:00Z,2025-01-01T00:00:00Z,[tag1],True,False,Test content,Test description,https://example.com/image.jpg`; + + const result = await process.all(csvInput); + + expect(result.posts).toHaveLength(1); + expect(result.posts[0].data.title).toBe('Test Post'); + expect(result.posts[0].data.slug).toBe('test-post'); + }); + + test('handles invalid CSV format', async () => { + const invalidCsv = 'invalid,csv\nformat,data'; + + await expect(process.all(invalidCsv)).rejects.toThrow('Missing required fields'); + }); + + test('handles missing required fields', async () => { + const csvInput = `uid,wrong_field\ntest123,value`; + + await expect(process.all(csvInput)).rejects.toThrow('Missing required fields'); + }); +}); \ No newline at end of file diff --git a/packages/migrate/bin/cli.js b/packages/migrate/bin/cli.js index f0d524697..0cac1c912 100755 --- a/packages/migrate/bin/cli.js +++ b/packages/migrate/bin/cli.js @@ -10,6 +10,7 @@ const packageJSON = JSON.parse(readFileSync(join(__dirname, '../package.json'), import beehiivCommands from '../commands/beehiiv.js'; import beehiivMembersCommands from '../commands/beehiiv-members.js'; +import bearCommands from '../commands/bear.js'; import bloggerCommands from '../commands/blogger.js'; import cacheCommands from '../commands/cache.js'; import chorusCommands from '../commands/chorus.js'; @@ -49,6 +50,7 @@ prettyCLI.preface('Command line utilities for migrating content to Ghost.'); prettyCLI.command(beehiivCommands); prettyCLI.command(beehiivMembersCommands); +prettyCLI.command(bearCommands); prettyCLI.command(bloggerCommands); prettyCLI.command(cacheCommands); prettyCLI.command(chorusCommands); diff --git a/packages/migrate/commands/bear.js b/packages/migrate/commands/bear.js new file mode 100644 index 000000000..8369bf60e --- /dev/null +++ b/packages/migrate/commands/bear.js @@ -0,0 +1,97 @@ +import {inspect} from 'node:util'; +import {ui} from '@tryghost/pretty-cli'; +import bear from '../sources/bear.js'; +import {convertOptionsToSywac, convertOptionsToDefaults} from '../lib/utilties/options-to-sywac.js'; + +// Internal ID in case we need one. +const id = 'bear'; + +const group = 'Sources:'; + +// The command to run and any params +const flags = 'bear'; + +// Description for the top level command +const desc = 'Migrate from Bear Blog using a CSV export'; + +// Configure all the options +const options = [ + { + type: 'string', + flags: '--pathToFile', + defaultValue: null, + desc: 'Path to Bear Blog export CSV file', + required: true + }, + { + type: 'boolean', + flags: '-V --verbose', + defaultValue: Boolean(process?.env?.DEBUG), + desc: 'Show verbose output' + }, + { + type: 'boolean', + flags: '--zip', + defaultValue: true, + desc: 'Create a zip file (set to false to skip)' + }, + { + type: 'string', + flags: '--addTag', + defaultValue: null, + desc: 'Provide a tag name which should be added to every post in this migration' + }, + { + type: 'boolean', + flags: '--cache', + defaultValue: true, + desc: 'Persist local cache after migration is complete (Only if `--zip` is `true`)' + } +]; + +// Build an object of defaults to be exported +const defaults = convertOptionsToDefaults(options); + +// Convert `options` into a list of Sywac types +const setup = sywac => convertOptionsToSywac(options, sywac); + +// What to do when this command is executed +const run = async (argv) => { + let context = { + errors: [], + warnings: [] + }; + + if (argv.verbose) { + ui.log.info(`Migrating from export at ${argv.pathToFile}`); + } + + try { + // Fetch the tasks, configured correctly according to the options passed in + let migrate = bear.getTaskRunner(argv); + + // Run the migration + await migrate.run(context); + + if (argv.verbose) { + ui.log.info('Done', inspect(context.result.data, false, 2)); + } + } catch (error) { + ui.log.info('Done with errors', context.errors); + } + + if (context.warnings.length > 0) { + ui.log.warn(context.warnings); + } +}; + +export default { + id, + group, + flags, + desc, + options, + defaults, + setup, + run +}; \ No newline at end of file diff --git a/packages/migrate/sources/bear.js b/packages/migrate/sources/bear.js new file mode 100644 index 000000000..8c3109d11 --- /dev/null +++ b/packages/migrate/sources/bear.js @@ -0,0 +1,79 @@ +import fsUtils from '@tryghost/mg-fs-utils'; +import mgBearExport from '@tryghost/mg-bear-export'; +import {toGhostJSON} from '@tryghost/mg-json'; +import {makeTaskRunner} from '@tryghost/listr-smart-renderer'; +import prettyMilliseconds from 'pretty-ms'; + +const getTaskRunner = (options) => { + let runnerTasks = [ + { + title: 'Initialising Workspace', + task: (ctx, task) => { + ctx.options = options; + + // Prep a file cache + ctx.fileCache = new fsUtils.FileCache(options.pathToFile); + + task.output = `Workspace initialised at ${ctx.fileCache.cacheDir}`; + } + }, + { + title: 'Read Bear Blog export CSV', + task: async (ctx) => { + try { + const rawResult = await mgBearExport({options: ctx.options}); + ctx.result = await toGhostJSON(rawResult, ctx.options, ctx); + await ctx.fileCache.writeTmpFile(ctx.result, 'bear-export-data.json'); + } catch (error) { + ctx.errors.push(error); + throw error; + } + } + }, + { + title: 'Write Ghost import JSON File', + task: async (ctx) => { + try { + await ctx.fileCache.writeGhostImportFile(ctx.result); + await ctx.fileCache.writeErrorJSONFile(ctx.errors); + } catch (error) { + ctx.errors.push(error); + throw error; + } + } + }, + { + title: 'Write Ghost import zip', + skip: () => !options.zip, + task: async (ctx, task) => { + try { + let timer = Date.now(); + ctx.outputFile = await fsUtils.zip.write(process.cwd(), ctx.fileCache.zipDir, ctx.fileCache.defaultZipFileName); + task.output = `Successfully written zip to ${ctx.outputFile.path} in ${prettyMilliseconds(Date.now() - timer)}`; + } catch (error) { + ctx.errors.push(error); + throw error; + } + } + }, + { + title: 'Clearing cached files', + enabled: () => !options.cache && options.zip, + task: async (ctx) => { + try { + await ctx.fileCache.emptyCurrentCacheDir(); + } catch (error) { + ctx.errors.push(error); + throw error; + } + } + } + ]; + + // Configure a new Listr task manager, we can use different renderers for different configs + return makeTaskRunner(runnerTasks, Object.assign({topLevel: true}, options)); +}; + +export default { + getTaskRunner +}; \ No newline at end of file