Skip to content

Commit ba75438

Browse files
committed
Added script to fix linked markdown images mangled by URL transforms
refs TryGhost/Product#596 - fetch all posts and pages from the API - check mobiledoc for existence of linked images - fix any linked images that have mangled markup
1 parent 56b486e commit ba75438

File tree

1 file changed

+175
-0
lines changed

1 file changed

+175
-0
lines changed

fix-markdown-linked-images.js

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
/**
2+
* Finds and fixes any mangled
3+
*
4+
* Usage:
5+
*
6+
* node fix-markdown-linked-images.js https://blah.ghost.io ADMIN_API_KEY - dry run
7+
* node fix-markdown-linked-images.js https://blah.ghost.io ADMIN_API_KEY true - live run
8+
*/
9+
10+
if (process.argv.length < 4) {
11+
console.log('not enough arguments, provide an API url and admin key');
12+
process.exit(1);
13+
}
14+
15+
const Promise = require('bluebird');
16+
const GhostAdminAPI = require('@tryghost/admin-api');
17+
18+
const url = process.argv[2];
19+
const key = process.argv[3];
20+
21+
// updates passed in mobiledoc object by reference
22+
// returns true/false for whether any changes were made
23+
function fixMobiledoc(mobiledoc, post) {
24+
let edited = false;
25+
26+
const markdownCards = mobiledoc.cards.filter(c => c[0] === 'markdown');
27+
28+
// loop over cards replacing the markdown payload by reference
29+
markdownCards.forEach((card) => {
30+
let markdown = card[1].markdown;
31+
32+
markdown = markdown.replace(/\[!\[(.*?)\]\((.*?)\)\]\((.*?)\)/gm, (match, p1, p2, p3) => {
33+
// p1 = img alt, often contains repetition
34+
// p2 = img src + "title", often contains repetition
35+
// p3 = link target, rarely contains repetition
36+
37+
// p1 can have repition, last occurrence is always full contents
38+
if (p1.match(/!\[/)) {
39+
const lastAltMatch = p1.match(/!\[((.(?!!\[))+)$/);
40+
if (lastAltMatch) {
41+
p1 = lastAltMatch[1];
42+
}
43+
}
44+
45+
// p2 mangled content typically has garbage repitition at the beginning
46+
// but ends with a valid url - grab that url and replace
47+
if (p2.match(/https?:\/\//)) {
48+
// p2 is absolute
49+
const lastUrlMatch = p2.match(/https?:\/\/((.(?!http))+)$/);
50+
if (lastUrlMatch) {
51+
p2 = lastUrlMatch[0];
52+
}
53+
}
54+
55+
// very occasionally p3 can contain repetition
56+
if (p3.match(/!\[/)) {
57+
const lastUrlMatch = p3.match(/https?:\/\/((.(?!http))+)$/);
58+
if (lastUrlMatch) {
59+
p3 = lastUrlMatch[0];
60+
}
61+
}
62+
63+
const replacement = `[![${p1}](${p2})](${p3})`;
64+
65+
if (match !== replacement) {
66+
edited = true;
67+
68+
// console.log({match, replacement, imgAlt: p1, imgSrc: p2, href: p3});
69+
70+
return replacement;
71+
}
72+
73+
return match;
74+
});
75+
76+
markdown = markdown.replace(/\[!\[(.*?)\]\((?:[^)]*?)\]\((.*?)\)([^\s]+?)\)/gm, (match, p1, p2, p3) => {
77+
if (p3.match(/tent\/images\//)) {
78+
p3 = p3.replace(/.*tent\/images\/(.*)$/, '/content/images/$1');
79+
80+
const replacement = `[![${p1}](${p2})](${p3})`;
81+
82+
if (match !== replacement) {
83+
edited = true;
84+
85+
// console.log({match, replacement, p1, p2, p3});
86+
87+
return replacement;
88+
}
89+
}
90+
91+
console.log('Unfixable post', {slug: post.slug, id: post.id});
92+
// console.log({match, replacement, p1, p2, p3});
93+
94+
return match;
95+
});
96+
97+
card[1].markdown = markdown;
98+
});
99+
100+
return edited;
101+
}
102+
103+
(async function main() {
104+
const doEdit = process.argv[4] === 'true';
105+
106+
if (doEdit) {
107+
console.log('REAL Run');
108+
} else {
109+
console.log('Dry Run - nothing will be edited');
110+
}
111+
112+
// Give the user time to read...
113+
await Promise.delay(1000);
114+
115+
const api = new GhostAdminAPI({
116+
url,
117+
key,
118+
version: 'canary'
119+
});
120+
121+
try {
122+
const allPosts = await api.posts.browse({fields: 'id,slug,mobiledoc,updated_at', limit: 'all'});
123+
const allPages = await api.pages.browse({fields: 'id,slug,mobiledoc,updated_at', limit: 'all'});
124+
125+
console.log(`${allPosts.length} Posts and ${allPages.length} Pages will be checked for mangled markdown and edited if needed\n`);
126+
127+
// give time to cancel if needed
128+
await Promise.delay(2000);
129+
130+
const postsResult = await Promise.mapSeries(allPosts, async (post) => {
131+
const mobiledoc = JSON.parse(post.mobiledoc);
132+
const edited = fixMobiledoc(mobiledoc, post);
133+
134+
if (edited) {
135+
console.log(`Fixing post ${post.slug} (${post.id})`);
136+
}
137+
138+
if (doEdit && edited) {
139+
// missing data attributes won't be changed
140+
// updated_at is required to pass collision detection
141+
const postData = {id: post.id, updated_at: post.updated_at, mobiledoc: JSON.stringify(mobiledoc)};
142+
await api.posts.edit(postData);
143+
}
144+
145+
return Promise.delay(50).return(edited);
146+
});
147+
148+
console.log(`\nChecked ${postsResult.length} posts and fixed ${postsResult.filter(edited => edited).length}\n`);
149+
150+
await Promise.delay(1000);
151+
152+
const pagesResult = await Promise.mapSeries(allPages, async (page) => {
153+
const mobiledoc = JSON.parse(page.mobiledoc);
154+
const edited = fixMobiledoc(mobiledoc);
155+
156+
if (edited) {
157+
console.log(`Fixing page ${page.slug} (${page.id})`);
158+
}
159+
160+
if (doEdit && edited) {
161+
// missing data attributes won't be changed
162+
// updated_at is required to pass collision detection
163+
const pageData = {id: page.id, updated_at: page.updated_at, mobiledoc: JSON.stringify(mobiledoc)};
164+
await api.pages.edit(pageData);
165+
}
166+
167+
return Promise.delay(50).return(edited);
168+
});
169+
170+
console.log(`\nChecked ${pagesResult.length} pages and fixed ${pagesResult.filter(edited => edited).length}\n`);
171+
} catch (err) {
172+
console.error('There was an error', require('util').inspect(err, false, null));
173+
process.exit(1);
174+
}
175+
})();

0 commit comments

Comments
 (0)