Skip to content

Commit e5b0b1d

Browse files
committed
feat: initial site-wide broken link checker script
1 parent b21e086 commit e5b0b1d

File tree

4 files changed

+128
-20
lines changed

4 files changed

+128
-20
lines changed

scripts/changedFilesBrokenLinkChecker.js

Lines changed: 2 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,8 @@
22
const { execSync, fork } = require('child_process')
33
const { HtmlUrlChecker } = require('broken-link-checker')
44
const chalk = require('chalk')
5-
6-
const logger = {
7-
log: (...args) => {
8-
console.log(`${chalk.yellow(`[${new Date().toISOString()}]:`)} `, ...args)
9-
},
10-
error: (...args) => {
11-
console.error(
12-
`${chalk.bgRed(`[${new Date().toISOString()}]: ⛔️ ERROR:`)} `,
13-
...args
14-
)
15-
},
16-
}
5+
const { logger } = require('./utils/logger')
6+
const { prettyPrintStatusCode } = require('./utils/prettyPrintStatusCode')
177

188
const GIT_DIFF_NAME_STATUS_LAST_COMMIT = 'git diff --name-status HEAD~1'
199
const MARKDOWN_EXTENSION = '.md'
@@ -138,14 +128,6 @@ const getGitDiffList = () => {
138128
return []
139129
}
140130

141-
const prettyPrintStatusCode = (statusCode) => {
142-
if (statusCode >= 400) {
143-
return chalk.bgRed(`ERROR ⛔️`)
144-
}
145-
146-
return chalk.green(`OK ✅`)
147-
}
148-
149131
const makeSiteCheckerForUrl = (url) => {
150132
return async () => {
151133
return new Promise((resolve, reject) => {

scripts/recursiveBrokenLinkChecker.js

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
/* eslint-disable no-console */
2+
const { SiteChecker } = require('broken-link-checker')
3+
const chalk = require('chalk')
4+
const { logger } = require('./utils/logger')
5+
const { prettyPrintStatusCode } = require('./utils/prettyPrintStatusCode')
6+
7+
const DOCS_SITE_URL = 'https://docs.cypress.io/'
8+
9+
const makeSiteChecker = () => {
10+
return new Promise((resolve, reject) => {
11+
/**
12+
* The promise resolves the following:
13+
* @type Array<{ originUrl: string, brokenUrl: string }>
14+
*/
15+
let brokenLinkRecords = []
16+
let numLinksChecked = 0
17+
const siteChecker = new SiteChecker(
18+
{
19+
excludeExternalLinks: true,
20+
honorRobotExclusions: false,
21+
},
22+
{
23+
error: (error) => {
24+
logger.error('An error occurred', error)
25+
},
26+
html: (tree, robots, response, pageUrl) => {
27+
const currentUrl = response.url
28+
29+
const htmlNode = tree.childNodes.find(
30+
(node) => node.tagName === 'html'
31+
)
32+
const headNode = htmlNode.childNodes.find(
33+
(node) => node.tagName === 'head'
34+
)
35+
const titleNode = headNode.childNodes.find(
36+
(node) => node.tagName === 'title'
37+
)
38+
const titleTextNode = titleNode.childNodes.find(
39+
(node) => node.nodeName === '#text'
40+
)
41+
const is404 = titleTextNode.value.includes(
42+
'404 | Cypress Documentation'
43+
)
44+
45+
if (is404) {
46+
logger.error(
47+
`Broken link found on page ${currentUrl}: ${chalk.bgRed(pageUrl)}`
48+
)
49+
50+
brokenLinkRecords.push({
51+
originUrl: currentUrl,
52+
brokenUrl: pageUrl,
53+
})
54+
}
55+
},
56+
link: (link) => {
57+
logger.log(
58+
`${prettyPrintStatusCode(link.http.statusCode)} ${
59+
link.url.resolved
60+
}`
61+
)
62+
63+
numLinksChecked++
64+
},
65+
end: () => {
66+
logger.log(`Finished scanning url ${DOCS_SITE_URL}`)
67+
logger.log(`Number of links checked: ${numLinksChecked}`)
68+
resolve(brokenLinkRecords)
69+
},
70+
}
71+
)
72+
73+
logger.log(`🔗 Starting link checker for url: ${DOCS_SITE_URL}`)
74+
siteChecker.enqueue(DOCS_SITE_URL)
75+
})
76+
}
77+
78+
const main = async () => {
79+
console.time('recursiveBrokenLinkChecker')
80+
81+
const brokenLinkRecords = await makeSiteChecker()
82+
83+
logger.log(
84+
`Number of broken URLs found: ${
85+
brokenLinkRecords.length
86+
? `${chalk.bgRed(brokenLinkRecords.length)}`
87+
: `${chalk.green(brokenLinkRecords.length)} ✅`
88+
}`
89+
)
90+
91+
brokenLinkRecords.forEach(({ originUrl, brokenUrl }) => {
92+
logger.error(`************************`)
93+
logger.error(`Broken URL on page: ${originUrl}`)
94+
logger.error(`Broken URL: ${brokenUrl}`)
95+
})
96+
97+
console.timeEnd('recursiveBrokenLinkChecker')
98+
if (brokenLinkRecords.length) {
99+
process.exit(1)
100+
}
101+
}
102+
103+
main()

scripts/utils/logger.js

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
/* eslint-disable no-console */
2+
const chalk = require('chalk')
3+
4+
module.exports.logger = {
5+
log: (...args) => {
6+
console.log(`${chalk.yellow(`[${new Date().toISOString()}]:`)} `, ...args)
7+
},
8+
error: (...args) => {
9+
console.error(
10+
`${chalk.bgRed(`[${new Date().toISOString()}]: ⛔️ ERROR:`)} `,
11+
...args
12+
)
13+
},
14+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
const chalk = require('chalk')
2+
3+
module.exports.prettyPrintStatusCode = (statusCode) => {
4+
if (statusCode >= 400) {
5+
return chalk.bgRed(`ERROR ⛔️`)
6+
}
7+
8+
return chalk.green(`OK ✅`)
9+
}

0 commit comments

Comments
 (0)