forked from github/docs
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcheck-developer-links.js
137 lines (111 loc) · 5.45 KB
/
check-developer-links.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
const cheerio = require('cheerio')
const findPage = require('./find-page')
const renderContent = require('./render-content')
const rewriteLocalLinks = require('./rewrite-local-links')
const getApplicableVersions = require('./get-applicable-versions')
const { getPathWithoutLanguage } = require('./path-utils')
const { getEnterpriseVersionNumber } = require('./patterns')
const { deprecated } = require('./enterprise-server-releases')
// internal links will have a language code by the time we're testing them
// we also want to capture same-page anchors (#foo)
const languageCode = 'en'
const internalHrefs = ['/en', '#']
const renderedPageCache = {}
const checkedAnchorCache = {}
module.exports = async function checkLinks ($, page, context, version, checkedLinkCache = {}) {
// run rewriteLocalLinks to version links and add language codes
rewriteLocalLinks($, version, languageCode)
const brokenLinks = {
anchors: [],
links: []
}
// internal link check
for (const href of internalHrefs) {
const internalLinks = $(`a[href^="${href}"]`).get()
for (const internalLink of internalLinks) {
const href = $(internalLink).attr('href')
// enable caching so we don't check links more than once
// anchor links are cached locally (within this run) since they are specific to the page
if (checkedLinkCache[href] || checkedAnchorCache[href]) continue
const [link, anchor] = href.split('#')
// if anchor only (e.g., #foo), look for heading on same page
if (anchor && !link) {
// ignore anchors that are autogenerated from headings
if (anchor === $(internalLink).parent().attr('id')) continue
const matchingHeadings = getMatchingHeadings($, anchor)
if (matchingHeadings.length === 0) {
brokenLinks.anchors.push({ 'broken same-page anchor': `#${anchor}`, reason: 'heading not found on page' })
}
checkedAnchorCache[href] = true
continue
}
checkedLinkCache[href] = true
// skip rare hardcoded links to old GHE versions
// these paths will always be in the old versioned form
// example: /enterprise/11.10.340/admin/articles/upgrading-to-the-latest-release
const gheVersionInLink = link.match(getEnterpriseVersionNumber)
if (gheVersionInLink && deprecated.includes(gheVersionInLink[1])) continue
// look for linked page
const linkedPage = findPage(link, context.pages, context.redirects, languageCode)
if (!linkedPage) {
brokenLinks.links.push({ 'broken link': link, reason: 'linked page not found' })
continue
}
if (linkedPage.relativePath.includes('rest/reference/') && linkedPage.relativePath !== 'rest/reference/index.md') {
const linkedPageRelevantPermalink = linkedPage.permalinks.find(permalink => permalink.pageVersion === version)
const docsPath = linkedPageRelevantPermalink.href
.split('rest/reference/')[1]
.split('#')[0] // do not include #fragments
// find all operations that with an operationID that matches the requested docs path
context.currentRestOperations = context.operationsForCurrentProduct
.filter(operation => operation.operationId.startsWith(docsPath))
}
// finding the linked page isn't enough if it's a github.com page; also need to check versions
if (linkedPage.relativePath.startsWith('github')) {
const linkedPageVersions = getApplicableVersions(linkedPage.versions, linkedPage.relativePath)
if (!linkedPageVersions.includes(version) && $(internalLink).attr('class') !== 'dotcom-only') {
brokenLinks.links.push({ 'broken link': link, reason: `${version} not found in linked page versions`, 'linked page': linkedPage.fullPath })
continue
}
}
// collect elements of the page that may contain links
const linkedPageContent = linkedPage.relativePath.includes('graphql/reference/objects')
? linkedPage.markdown + context.graphql.prerenderedObjectsForCurrentVersion.html
: linkedPage.markdown
// create a unique string for caching purposes
const pathToCache = version + linkedPage.relativePath
const anchorToCheck = anchor
// if link with anchor (e.g., /some/path#foo), look for heading on linked page
if (anchorToCheck) {
// either render page or fetch it from cache if we've already rendered it
let linkedPageObject
if (!renderedPageCache[pathToCache]) {
const linkedPageHtml = await renderContent(linkedPageContent, context)
linkedPageObject = cheerio.load(linkedPageHtml, { xmlMode: true })
renderedPageCache[pathToCache] = linkedPageObject
} else {
linkedPageObject = renderedPageCache[pathToCache]
}
const matchingHeadings = getMatchingHeadings(linkedPageObject, anchorToCheck)
if (matchingHeadings.length === 0) {
if (anchor) {
brokenLinks.anchors.push({ 'broken anchor': `#${anchor}`, 'full link': `${getPathWithoutLanguage(link)}#${anchor}`, reason: 'heading not found on linked page', 'linked page': linkedPage.fullPath })
}
continue
}
}
}
}
return { brokenLinks, checkedLinkCache }
}
// article titles are h1s; headings can be any subsequent level
function getMatchingHeadings ($, anchor) {
return $(`
h2[id="${anchor}"],
h3[id="${anchor}"],
h4[id="${anchor}"],
h5[id="${anchor}"],
h6[id="${anchor}"],
a[name="${anchor}"]
`).get()
}