-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtransform-html.js
157 lines (125 loc) · 4.11 KB
/
transform-html.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
const jsdom = require('jsdom')
function transformHTML (
src,
{ path, template, data, unified, remarkParse, remarkGfm, remarkRehype, rehypeStringify }
) {
const content = unified()
.use(remarkParse)
.use(remarkGfm, {
singleTilde: false,
})
.use(remarkRehype)
.use(rehypeStringify)
.processSync(src)
.toString()
// Inject this data into the template, using a mustache-like
// replacement scheme.
const html = template.replace(/{{\s*([\w.]+)\s*}}/g, (token, key) => {
switch (key) {
case 'content':
return `<div id="_content">${content}</div>`
case 'url_path':
return encodeURI(path)
case 'toc':
return '<div id="_table_of_contents"></div>'
case 'title':
case 'section':
case 'description':
return data[key]
case 'config.github_repo':
case 'config.github_branch':
case 'config.github_path':
return data[key.replace(/^config\./, '')]
default:
throw new Error(`warning: unknown token '${token}' in ${path}`)
}
})
const dom = new jsdom.JSDOM(html)
const document = dom.window.document
// Rewrite relative URLs in links and image sources to be relative to
// this file; this is for supporting `file://` links. HTML pages need
// suffix appended.
const links = [
{ tag: 'a', attr: 'href', suffix: '.html' },
{ tag: 'img', attr: 'src' },
]
for (const linktype of links) {
for (const tag of document.querySelectorAll(linktype.tag)) {
let url = tag.getAttribute(linktype.attr)
if (url.startsWith('/')) {
const childDepth = path.split('/').length - 1
const prefix = childDepth > 0 ? '../'.repeat(childDepth) : './'
url = url.replace(/^\//, prefix)
if (linktype.suffix) {
url += linktype.suffix
}
tag.setAttribute(linktype.attr, url)
}
}
}
// Give headers a unique id so that they can be linked within the doc
const headerIds = []
for (const header of document.querySelectorAll('h1, h2, h3, h4, h5, h6')) {
const headerText = header.textContent
.replace(/[A-Z]/g, x => x.toLowerCase())
.replace(/ /g, '-')
.replace(/[^a-z0-9-]/g, '')
let headerId = headerText
let headerIncrement = 1
while (document.getElementById(headerId) !== null) {
headerId = headerText + ++headerIncrement
}
headerIds.push(headerId)
header.setAttribute('id', headerId)
}
// Walk the dom and build a table of contents
const tocEl = document.getElementById('_table_of_contents')
if (tocEl) {
const toc = generateTableOfContents(document)
if (toc) {
tocEl.appendChild(toc)
}
}
return dom.serialize()
}
function generateTableOfContents (document) {
const headers = walkHeaders(document.getElementById('_content'))
// The nesting depth of headers are not necessarily the header level.
// (eg, h1 > h3 > h5 is a depth of three even though there's an h5.)
const hierarchy = []
for (const header of headers) {
const level = headerLevel(header)
while (hierarchy.length && hierarchy[hierarchy.length - 1].headerLevel > level) {
hierarchy.pop()
}
if (!hierarchy.length || hierarchy[hierarchy.length - 1].headerLevel < level) {
const newList = document.createElement('ul')
newList.headerLevel = level
if (hierarchy.length) {
hierarchy[hierarchy.length - 1].appendChild(newList)
}
hierarchy.push(newList)
}
const element = document.createElement('li')
const link = document.createElement('a')
link.setAttribute('href', `#${header.getAttribute('id')}`)
link.innerHTML = header.innerHTML
element.appendChild(link)
hierarchy[hierarchy.length - 1].appendChild(element)
}
return hierarchy[0]
}
function walkHeaders (element, headers = []) {
for (const child of element.childNodes) {
if (headerLevel(child)) {
headers.push(child)
}
walkHeaders(child, headers)
}
return headers
}
function headerLevel (node) {
const level = node.tagName ? node.tagName.match(/^[Hh]([123456])$/) : null
return level ? level[1] : 0
}
module.exports = transformHTML