-
Notifications
You must be signed in to change notification settings - Fork 61.4k
/
Copy pathget-remote-json.js
91 lines (79 loc) · 3.22 KB
/
get-remote-json.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import path from 'path'
import fs from 'fs'
import crypto from 'crypto'
import got from 'got'
import statsd from '#src/observability/lib/statsd.js'
// The only reason this is exported is for the sake of the unit tests'
// ability to test in-memory miss after purging this with a mutation
export const cache = new Map()
const inProd = process.env.NODE_ENV === 'production'
// Wrapper on `got()` that is able to both cache in memory and on disk.
// The on-disk caching is in `.remotejson/`.
// We use this for downloading `redirects.json` files from one of the
// docs-ghes-<release number> repos as a proxy. A lot of those
// .json files are large and they're also static which makes them
// ideal for caching.
// Note that there's 2 layers of caching here:
// 1. Is it in memory cache?
// 2. No, is it on disk?
// 3. No, download from the internet then store responses in memory and disk
export default async function getRemoteJSON(url, config) {
// We could get fancy and make the cache key depend on the `config` too
// given that this is A) only used for archived enterprise stuff,
// and B) the config is only applicable on cache miss when doing the `got()`.
const cacheKey = url
// Assume it's in the in-memory cache first.
// Later we'll update this if we find we need to.
let fromCache = 'memory'
if (!cache.has(cacheKey)) {
fromCache = 'not'
let foundOnDisk = false
const tempFilename = crypto.createHash('md5').update(url).digest('hex')
// Do this here instead of at the top of the file so that it becomes
// possible to override this in unit tests.
const ROOT = process.env.GET_REMOTE_JSON_DISK_CACHE_ROOT || '.remotejson-cache'
const onDisk = path.join(ROOT, `${tempFilename}.json`)
try {
const body = fs.readFileSync(onDisk, 'utf-8')
// It might exist on disk, but it could be empty
if (body) {
try {
// It might be corrupted JSON.
cache.set(cacheKey, JSON.parse(body))
fromCache = 'disk'
foundOnDisk = true
} catch (error) {
if (!(error instanceof SyntaxError)) {
throw error
}
}
}
} catch (error) {
if (!(error instanceof SyntaxError || error.code === 'ENOENT')) {
throw error
}
}
if (!foundOnDisk) {
// got will, by default, follow redirects and it will throw if the ultimate
// response is not a 2xx.
// But it's possible that the page is a 200 OK but it's just not a JSON
// page at all. Then we can't assume we can deserialize it.
const res = await got(url, config)
if (!res.headers['content-type'].startsWith('application/json')) {
throw new Error(
`Fetching '${url}' resulted in a non-JSON response (${res.headers['content-type']})`,
)
}
cache.set(cacheKey, JSON.parse(res.body))
// Only write to disk for testing and local review.
// In production, we never write to disk. Only in-memory.
if (!inProd) {
fs.mkdirSync(path.dirname(onDisk), { recursive: true })
fs.writeFileSync(onDisk, res.body, 'utf-8')
}
}
}
const tags = [`from_cache:${fromCache}`]
statsd.increment('middleware.get_remote_json', 1, tags)
return cache.get(cacheKey)
}