-
Notifications
You must be signed in to change notification settings - Fork 31.2k
/
Copy pathcompile_cache.cc
555 lines (495 loc) Β· 19.2 KB
/
compile_cache.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
#include "compile_cache.h"
#include <string>
#include "debug_utils-inl.h"
#include "env-inl.h"
#include "node_file.h"
#include "node_internals.h"
#include "node_version.h"
#include "path.h"
#include "util.h"
#include "zlib.h"
#ifdef NODE_IMPLEMENTS_POSIX_CREDENTIALS
#include <unistd.h> // getuid
#endif
namespace node {
using v8::Function;
using v8::Local;
using v8::Module;
using v8::ScriptCompiler;
using v8::String;
namespace {
std::string Uint32ToHex(uint32_t crc) {
std::string str;
str.reserve(8);
for (int i = 28; i >= 0; i -= 4) {
char digit = (crc >> i) & 0xF;
digit += digit < 10 ? '0' : 'a' - 10;
str.push_back(digit);
}
return str;
}
// TODO(joyeecheung): use other hashes?
uint32_t GetHash(const char* data, size_t size) {
uLong crc = crc32(0L, Z_NULL, 0);
return crc32(crc, reinterpret_cast<const Bytef*>(data), size);
}
std::string GetCacheVersionTag() {
// On platforms where uids are available, use different folders for
// different users to avoid cache miss due to permission incompatibility.
// On platforms where uids are not available, bare with the cache miss.
// This should be fine on Windows, as there local directories tend to be
// user-specific.
std::string tag = std::string(NODE_VERSION) + '-' + std::string(NODE_ARCH) +
'-' + Uint32ToHex(ScriptCompiler::CachedDataVersionTag());
#ifdef NODE_IMPLEMENTS_POSIX_CREDENTIALS
tag += '-' + std::to_string(getuid());
#endif
return tag;
}
uint32_t GetCacheKey(std::string_view filename, CachedCodeType type) {
uLong crc = crc32(0L, Z_NULL, 0);
crc = crc32(crc, reinterpret_cast<const Bytef*>(&type), sizeof(type));
crc = crc32(
crc, reinterpret_cast<const Bytef*>(filename.data()), filename.length());
return crc;
}
} // namespace
template <typename... Args>
inline void CompileCacheHandler::Debug(const char* format,
Args&&... args) const {
if (is_debug_) [[unlikely]] {
FPrintF(stderr, format, std::forward<Args>(args)...);
}
}
ScriptCompiler::CachedData* CompileCacheEntry::CopyCache() const {
DCHECK_NOT_NULL(cache);
int cache_size = cache->length;
uint8_t* data = new uint8_t[cache_size];
memcpy(data, cache->data, cache_size);
return new ScriptCompiler::CachedData(
data, cache_size, ScriptCompiler::CachedData::BufferOwned);
}
// Used for identifying and verifying a file is a compile cache file.
// See comments in CompileCacheHandler::Persist().
constexpr uint32_t kCacheMagicNumber = 0x8adfdbb2;
const char* CompileCacheEntry::type_name() const {
switch (type) {
case CachedCodeType::kCommonJS:
return "CommonJS";
case CachedCodeType::kESM:
return "ESM";
case CachedCodeType::kStrippedTypeScript:
return "StrippedTypeScript";
case CachedCodeType::kTransformedTypeScript:
return "TransformedTypeScript";
case CachedCodeType::kTransformedTypeScriptWithSourceMaps:
return "TransformedTypeScriptWithSourceMaps";
default:
UNREACHABLE();
}
}
void CompileCacheHandler::ReadCacheFile(CompileCacheEntry* entry) {
Debug("[compile cache] reading cache from %s for %s %s...",
entry->cache_filename,
entry->type_name(),
entry->source_filename);
uv_fs_t req;
auto defer_req_cleanup = OnScopeLeave([&req]() { uv_fs_req_cleanup(&req); });
const char* path = entry->cache_filename.c_str();
uv_file file = uv_fs_open(nullptr, &req, path, O_RDONLY, 0, nullptr);
if (req.result < 0) {
// req will be cleaned up by scope leave.
Debug(" %s\n", uv_strerror(req.result));
return;
}
uv_fs_req_cleanup(&req);
auto defer_close = OnScopeLeave([file]() {
uv_fs_t close_req;
CHECK_EQ(0, uv_fs_close(nullptr, &close_req, file, nullptr));
uv_fs_req_cleanup(&close_req);
});
// Read the headers.
std::vector<uint32_t> headers(kHeaderCount);
uv_buf_t headers_buf = uv_buf_init(reinterpret_cast<char*>(headers.data()),
kHeaderCount * sizeof(uint32_t));
const int r = uv_fs_read(nullptr, &req, file, &headers_buf, 1, 0, nullptr);
if (r != static_cast<int>(headers_buf.len)) {
Debug("reading header failed, bytes read %d", r);
if (req.result < 0 && is_debug_) {
Debug(", %s", uv_strerror(req.result));
}
Debug("\n");
return;
}
Debug("[%d %d %d %d %d]...",
headers[kMagicNumberOffset],
headers[kCodeSizeOffset],
headers[kCacheSizeOffset],
headers[kCodeHashOffset],
headers[kCacheHashOffset]);
if (headers[kMagicNumberOffset] != kCacheMagicNumber) {
Debug("magic number mismatch: expected %d, actual %d\n",
kCacheMagicNumber,
headers[kMagicNumberOffset]);
return;
}
// Check the code size and hash which are already computed.
if (headers[kCodeSizeOffset] != entry->code_size) {
Debug("code size mismatch: expected %d, actual %d\n",
entry->code_size,
headers[kCodeSizeOffset]);
return;
}
if (headers[kCodeHashOffset] != entry->code_hash) {
Debug("code hash mismatch: expected %d, actual %d\n",
entry->code_hash,
headers[kCodeHashOffset]);
return;
}
// Read the cache, grow the buffer exponentially whenever it fills up.
size_t offset = headers_buf.len;
size_t capacity = 4096; // Initial buffer capacity
size_t total_read = 0;
uint8_t* buffer = new uint8_t[capacity];
while (true) {
// If there is not enough space to read more data, do a simple
// realloc here (we don't actually realloc because V8 requires
// the underlying buffer to be delete[]-able).
if (total_read == capacity) {
size_t new_capacity = capacity * 2;
auto* new_buffer = new uint8_t[new_capacity];
memcpy(new_buffer, buffer, capacity);
delete[] buffer;
buffer = new_buffer;
capacity = new_capacity;
}
uv_buf_t iov = uv_buf_init(reinterpret_cast<char*>(buffer + total_read),
capacity - total_read);
int bytes_read =
uv_fs_read(nullptr, &req, file, &iov, 1, offset + total_read, nullptr);
if (req.result < 0) { // Error.
// req will be cleaned up by scope leave.
delete[] buffer;
Debug(" %s\n", uv_strerror(req.result));
return;
}
uv_fs_req_cleanup(&req);
if (bytes_read <= 0) {
break;
}
total_read += bytes_read;
}
// Check the cache size and hash.
if (headers[kCacheSizeOffset] != total_read) {
Debug("cache size mismatch: expected %d, actual %d\n",
headers[kCacheSizeOffset],
total_read);
return;
}
uint32_t cache_hash = GetHash(reinterpret_cast<char*>(buffer), total_read);
if (headers[kCacheHashOffset] != cache_hash) {
Debug("cache hash mismatch: expected %d, actual %d\n",
headers[kCacheHashOffset],
cache_hash);
return;
}
entry->cache.reset(new ScriptCompiler::CachedData(
buffer, total_read, ScriptCompiler::CachedData::BufferOwned));
Debug(" success, size=%d\n", total_read);
}
CompileCacheEntry* CompileCacheHandler::GetOrInsert(Local<String> code,
Local<String> filename,
CachedCodeType type) {
DCHECK(!compile_cache_dir_.empty());
Utf8Value filename_utf8(isolate_, filename);
uint32_t key = GetCacheKey(filename_utf8.ToStringView(), type);
// TODO(joyeecheung): don't encode this again into UTF8. If we read the
// UTF8 content on disk as raw buffer (from the JS layer, while watching out
// for monkey patching), we can just hash it directly.
Utf8Value code_utf8(isolate_, code);
uint32_t code_hash = GetHash(code_utf8.out(), code_utf8.length());
auto loaded = compiler_cache_store_.find(key);
// TODO(joyeecheung): let V8's in-isolate compilation cache take precedence.
if (loaded != compiler_cache_store_.end() &&
loaded->second->code_hash == code_hash) {
return loaded->second.get();
}
// If the code hash mismatches, the code has changed, discard the stale entry
// and create a new one.
auto emplaced =
compiler_cache_store_.emplace(key, std::make_unique<CompileCacheEntry>());
auto* result = emplaced.first->second.get();
result->code_hash = code_hash;
result->code_size = code_utf8.length();
result->cache_key = key;
result->cache_filename =
compile_cache_dir_ + kPathSeparator + Uint32ToHex(key);
result->source_filename = filename_utf8.ToString();
result->cache = nullptr;
result->type = type;
// TODO(joyeecheung): if we fail enough times, stop trying for any future
// files.
ReadCacheFile(result);
return result;
}
ScriptCompiler::CachedData* SerializeCodeCache(Local<Function> func) {
return ScriptCompiler::CreateCodeCacheForFunction(func);
}
ScriptCompiler::CachedData* SerializeCodeCache(Local<Module> mod) {
return ScriptCompiler::CreateCodeCache(mod->GetUnboundModuleScript());
}
template <typename T>
void CompileCacheHandler::MaybeSaveImpl(CompileCacheEntry* entry,
Local<T> func_or_mod,
bool rejected) {
DCHECK_NOT_NULL(entry);
Debug("[compile cache] V8 code cache for %s %s was %s, ",
entry->type_name(),
entry->source_filename,
rejected ? "rejected"
: (entry->cache == nullptr) ? "not initialized"
: "accepted");
if (entry->cache != nullptr && !rejected) { // accepted
Debug("keeping the in-memory entry\n");
return;
}
Debug("%s the in-memory entry\n",
entry->cache == nullptr ? "initializing" : "refreshing");
ScriptCompiler::CachedData* data = SerializeCodeCache(func_or_mod);
DCHECK_EQ(data->buffer_policy, ScriptCompiler::CachedData::BufferOwned);
entry->refreshed = true;
entry->cache.reset(data);
}
void CompileCacheHandler::MaybeSave(CompileCacheEntry* entry,
Local<Module> mod,
bool rejected) {
DCHECK(mod->IsSourceTextModule());
MaybeSaveImpl(entry, mod, rejected);
}
void CompileCacheHandler::MaybeSave(CompileCacheEntry* entry,
Local<Function> func,
bool rejected) {
MaybeSaveImpl(entry, func, rejected);
}
void CompileCacheHandler::MaybeSave(CompileCacheEntry* entry,
std::string_view transpiled) {
CHECK(entry->type == CachedCodeType::kStrippedTypeScript ||
entry->type == CachedCodeType::kTransformedTypeScript ||
entry->type == CachedCodeType::kTransformedTypeScriptWithSourceMaps);
Debug("[compile cache] saving transpilation cache for %s %s\n",
entry->type_name(),
entry->source_filename);
// TODO(joyeecheung): it's weird to copy it again here. Convert the v8::String
// directly into buffer held by v8::ScriptCompiler::CachedData here.
int cache_size = static_cast<int>(transpiled.size());
uint8_t* data = new uint8_t[cache_size];
memcpy(data, transpiled.data(), cache_size);
entry->cache.reset(new ScriptCompiler::CachedData(
data, cache_size, ScriptCompiler::CachedData::BufferOwned));
entry->refreshed = true;
}
/**
* Persist the compile cache accumulated in memory to disk.
*
* To avoid race conditions, the cache file includes hashes of the original
* source code and the cache content. It's first written to a temporary file
* before being renamed to the target name.
*
* Layout of a cache file:
* [uint32_t] magic number
* [uint32_t] code size
* [uint32_t] code hash
* [uint32_t] cache size
* [uint32_t] cache hash
* .... compile cache content ....
*/
void CompileCacheHandler::Persist() {
DCHECK(!compile_cache_dir_.empty());
// TODO(joyeecheung): do this using a separate event loop to utilize the
// libuv thread pool and do the file system operations concurrently.
// TODO(joyeecheung): Currently flushing is triggered by either process
// shutdown or user requests. In the future we should simply start the
// writes right after module loading on a separate thread, and this method
// only blocks until all the pending writes (if any) on the other thread are
// finished. In that case, the off-thread writes should finish long
// before any attempt of flushing is made so the method would then only
// incur a negligible overhead from thread synchronization.
for (auto& pair : compiler_cache_store_) {
auto* entry = pair.second.get();
const char* type_name = entry->type_name();
if (entry->cache == nullptr) {
Debug("[compile cache] skip persisting %s %s because the cache was not "
"initialized\n",
type_name,
entry->source_filename);
continue;
}
if (entry->refreshed == false) {
Debug(
"[compile cache] skip persisting %s %s because cache was the same\n",
type_name,
entry->source_filename);
continue;
}
if (entry->persisted == true) {
Debug("[compile cache] skip persisting %s %s because cache was already "
"persisted\n",
type_name,
entry->source_filename);
continue;
}
DCHECK_EQ(entry->cache->buffer_policy,
ScriptCompiler::CachedData::BufferOwned);
char* cache_ptr =
reinterpret_cast<char*>(const_cast<uint8_t*>(entry->cache->data));
uint32_t cache_size = static_cast<uint32_t>(entry->cache->length);
uint32_t cache_hash = GetHash(cache_ptr, cache_size);
// Generating headers.
std::vector<uint32_t> headers(kHeaderCount);
headers[kMagicNumberOffset] = kCacheMagicNumber;
headers[kCodeSizeOffset] = entry->code_size;
headers[kCacheSizeOffset] = cache_size;
headers[kCodeHashOffset] = entry->code_hash;
headers[kCacheHashOffset] = cache_hash;
// Generate the temporary filename.
// The temporary file should be placed in a location like:
//
// $NODE_COMPILE_CACHE_DIR/v23.0.0-pre-arm64-5fad6d45-501/e7f8ef7f.cache.tcqrsK
//
// 1. $NODE_COMPILE_CACHE_DIR either comes from the $NODE_COMPILE_CACHE
// environment
// variable or `module.enableCompileCache()`.
// 2. v23.0.0-pre-arm64-5fad6d45-501 is the sub cache directory and
// e7f8ef7f is the hash for the cache (see
// CompileCacheHandler::Enable()),
// 3. tcqrsK is generated by uv_fs_mkstemp() as a temporary identifier.
uv_fs_t mkstemp_req;
auto cleanup_mkstemp =
OnScopeLeave([&mkstemp_req]() { uv_fs_req_cleanup(&mkstemp_req); });
std::string cache_filename_tmp = entry->cache_filename + ".XXXXXX";
Debug("[compile cache] Creating temporary file for cache of %s (%s)...",
entry->source_filename,
type_name);
int err = uv_fs_mkstemp(
nullptr, &mkstemp_req, cache_filename_tmp.c_str(), nullptr);
if (err < 0) {
Debug("failed. %s\n", uv_strerror(err));
continue;
}
Debug(" -> %s\n", mkstemp_req.path);
Debug("[compile cache] writing cache for %s %s to temporary file %s [%d "
"%d %d "
"%d %d]...",
type_name,
entry->source_filename,
mkstemp_req.path,
headers[kMagicNumberOffset],
headers[kCodeSizeOffset],
headers[kCacheSizeOffset],
headers[kCodeHashOffset],
headers[kCacheHashOffset]);
// Write to the temporary file.
uv_buf_t headers_buf = uv_buf_init(reinterpret_cast<char*>(headers.data()),
headers.size() * sizeof(uint32_t));
uv_buf_t data_buf = uv_buf_init(cache_ptr, entry->cache->length);
uv_buf_t bufs[] = {headers_buf, data_buf};
uv_fs_t write_req;
auto cleanup_write =
OnScopeLeave([&write_req]() { uv_fs_req_cleanup(&write_req); });
err = uv_fs_write(
nullptr, &write_req, mkstemp_req.result, bufs, 2, 0, nullptr);
if (err < 0) {
Debug("failed: %s\n", uv_strerror(err));
continue;
}
uv_fs_t close_req;
auto cleanup_close =
OnScopeLeave([&close_req]() { uv_fs_req_cleanup(&close_req); });
err = uv_fs_close(nullptr, &close_req, mkstemp_req.result, nullptr);
if (err < 0) {
Debug("failed: %s\n", uv_strerror(err));
continue;
}
Debug("success\n");
// Rename the temporary file to the actual cache file.
uv_fs_t rename_req;
auto cleanup_rename =
OnScopeLeave([&rename_req]() { uv_fs_req_cleanup(&rename_req); });
std::string cache_filename_final = entry->cache_filename;
Debug("[compile cache] Renaming %s to %s...",
mkstemp_req.path,
cache_filename_final);
err = uv_fs_rename(nullptr,
&rename_req,
mkstemp_req.path,
cache_filename_final.c_str(),
nullptr);
if (err < 0) {
Debug("failed: %s\n", uv_strerror(err));
continue;
}
Debug("success\n");
entry->persisted = true;
}
// Clear the map at the end in one go instead of during the iteration to
// avoid rehashing costs.
Debug("[compile cache] Clear deserialized cache.\n");
compiler_cache_store_.clear();
}
CompileCacheHandler::CompileCacheHandler(Environment* env)
: isolate_(env->isolate()),
is_debug_(
env->enabled_debug_list()->enabled(DebugCategory::COMPILE_CACHE)) {}
// Directory structure:
// - Compile cache directory (from NODE_COMPILE_CACHE)
// - $NODE_VERSION-$ARCH-$CACHE_DATA_VERSION_TAG-$UID
// - $FILENAME_AND_MODULE_TYPE_HASH.cache: a hash of filename + module type
CompileCacheEnableResult CompileCacheHandler::Enable(Environment* env,
const std::string& dir) {
std::string cache_tag = GetCacheVersionTag();
std::string absolute_cache_dir_base = PathResolve(env, {dir});
std::string cache_dir_with_tag =
absolute_cache_dir_base + kPathSeparator + cache_tag;
CompileCacheEnableResult result;
Debug("[compile cache] resolved path %s + %s -> %s\n",
dir,
cache_tag,
cache_dir_with_tag);
if (!env->permission()->is_granted(
env,
permission::PermissionScope::kFileSystemWrite,
cache_dir_with_tag)) [[unlikely]] {
result.message = "Skipping compile cache because write permission for " +
cache_dir_with_tag + " is not granted";
result.status = CompileCacheEnableStatus::FAILED;
return result;
}
if (!env->permission()->is_granted(
env,
permission::PermissionScope::kFileSystemRead,
cache_dir_with_tag)) [[unlikely]] {
result.message = "Skipping compile cache because read permission for " +
cache_dir_with_tag + " is not granted";
result.status = CompileCacheEnableStatus::FAILED;
return result;
}
fs::FSReqWrapSync req_wrap;
int err = fs::MKDirpSync(
nullptr, &(req_wrap.req), cache_dir_with_tag, 0777, nullptr);
if (is_debug_) {
Debug("[compile cache] creating cache directory %s...%s\n",
cache_dir_with_tag,
err < 0 ? uv_strerror(err) : "success");
}
if (err != 0 && err != UV_EEXIST) {
result.message =
"Cannot create cache directory: " + std::string(uv_strerror(err));
result.status = CompileCacheEnableStatus::FAILED;
return result;
}
result.cache_directory = absolute_cache_dir_base;
compile_cache_dir_ = cache_dir_with_tag;
result.status = CompileCacheEnableStatus::ENABLED;
return result;
}
} // namespace node