Skip to content

Commit

Permalink
Store a hash of generated bytecode
Browse files Browse the repository at this point in the history
Summary:
Extend the bytecode file format by adding a footer containing the SHA1 hash of everything above the footer. In slow debug mode, verify that the stored hash matches the actual contents at runtime.

Update all code paths that construct bytecode files (or that modify bytecode files in place).

Reviewed By: avp

Differential Revision: D22618554

fbshipit-source-id: 28919fca092942db3da2ba6a720c8304084f4a3a
  • Loading branch information
kodafb authored and facebook-github-bot committed Aug 3, 2020
1 parent ef9cfa1 commit 6871396
Show file tree
Hide file tree
Showing 8 changed files with 98 additions and 5 deletions.
8 changes: 8 additions & 0 deletions include/hermes/BCGen/HBC/BytecodeDataProvider.h
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,14 @@ class BCProviderFromBuffer final : public BCProviderBase {
llvh::ArrayRef<uint8_t> aref,
std::string *errorMessage = nullptr);

/// Given a valid bytecode buffer aref, returns whether its stored fileHash
/// matches the actual hash of the buffer.
static bool bytecodeHashIsValid(llvh::ArrayRef<uint8_t> aref);

/// Given a writable valid bytecode buffer aref, update its fileHash field
/// with the actual hash of the buffer.
static void updateBytecodeHash(llvh::MutableArrayRef<uint8_t> aref);

/// Returns the arrayref to small function headers;
/// this is also the start of the function header section.
const llvh::ArrayRef<hbc::SmallFuncHeader> getSmallFunctionHeaders() const {
Expand Down
21 changes: 18 additions & 3 deletions include/hermes/BCGen/HBC/BytecodeFileFormat.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ const static uint64_t MAGIC = 0x1F1903C103BC1FC6;
const static uint64_t DELTA_MAGIC = ~MAGIC;

// Bytecode version generated by this version of the compiler.
// Updated: Jun 15, 2020
const static uint32_t BYTECODE_VERSION = 74;
// Updated: Jul 20, 2020
const static uint32_t BYTECODE_VERSION = 75;

/// Property cache index which indicates no caching.
static constexpr uint8_t PROPERTY_CACHING_DISABLED = 0;
Expand Down Expand Up @@ -72,7 +72,7 @@ struct BytecodeFileHeader {
uint64_t magic;
uint32_t version;
uint8_t sourceHash[SHA1_NUM_BYTES];
uint32_t fileLength;
uint32_t fileLength; // Until the end of the BytecodeFileFooter.
uint32_t globalCodeIndex;
uint32_t functionCount;
uint32_t stringKindCount; // Number of string kind entries.
Expand Down Expand Up @@ -140,6 +140,21 @@ struct BytecodeFileHeader {
}
};

/**
* Footer of binary file. Used for summary information that is *not*
* read during normal execution (since that would hurt locality).
*/
struct BytecodeFileFooter {
uint8_t fileHash[SHA1_NUM_BYTES]; // Hash of everything above the footer.

// NOTE: If we ever add any non-byte fields, we need to ensure alignment
// everywhere this struct is written.

BytecodeFileFooter(const SHA1 &fileHash) {
std::copy(fileHash.begin(), fileHash.end(), this->fileHash);
}
};

/// The string table is an array of these entries, followed by an array of
/// OverflowStringTableEntry for the entries whose length or offset doesn't fit
/// into the bitfields.
Expand Down
5 changes: 5 additions & 0 deletions include/hermes/BCGen/HBC/BytecodeStream.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#ifndef HERMES_BCGEN_HBC_BYTECODESTREAM_H
#define HERMES_BCGEN_HBC_BYTECODESTREAM_H

#include "llvh/Support/SHA1.h"
#include "llvh/Support/raw_ostream.h"

#include "hermes/BCGen/Exceptions.h"
Expand Down Expand Up @@ -52,6 +53,8 @@ class BytecodeSerializer {
uint32_t debugInfoOffset_{0};
/// Count of overflow string entries, computed during layout phase.
uint32_t overflowStringEntryCount_{0};
/// Hash of everything written in non-layout mode so far.
llvh::SHA1 outputHasher_;

/// Each subsection of a function's `info' section is aligned thusly.
static constexpr uint32_t INFO_ALIGNMENT = 4;
Expand All @@ -60,6 +63,8 @@ class BytecodeSerializer {
void writeBinaryArray(const ArrayRef<T> array) {
size_t size = sizeof(T) * array.size();
if (!isLayout_) {
outputHasher_.update(llvh::ArrayRef<uint8_t>(
reinterpret_cast<const uint8_t *>(array.data()), size));
os_.write(reinterpret_cast<const char *>(array.data()), size);
}
loc_ += size;
Expand Down
49 changes: 49 additions & 0 deletions lib/BCGen/HBC/BytecodeDataProvider.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "hermes/VM/Serializer.h"

#include "llvh/Support/MathExtras.h"
#include "llvh/Support/SHA1.h"

#ifdef HERMESVM_SERIALIZE
using hermes::vm::Deserializer;
Expand All @@ -23,6 +24,37 @@ namespace hermes {
namespace hbc {

namespace {

/// Given a valid bytecode buffer aref, returns whether its stored fileHash
/// matches the actual hash of the buffer.
static bool hashIsValid(llvh::ArrayRef<uint8_t> aref) {
const auto *header =
reinterpret_cast<const hbc::BytecodeFileHeader *>(aref.data());
assert(
header->version == hbc::BYTECODE_VERSION &&
"must perform basic checks first");
// Use fileLength rather than aref.end() since there may be an epilogue.
const auto *footer = reinterpret_cast<const hbc::BytecodeFileFooter *>(
aref.data() + header->fileLength - sizeof(BytecodeFileFooter));
SHA1 actual = llvh::SHA1::hash(llvh::ArrayRef<uint8_t>(
aref.begin(), reinterpret_cast<const uint8_t *>(footer)));
return std::equal(actual.begin(), actual.end(), footer->fileHash);
}

static void updateHash(llvh::MutableArrayRef<uint8_t> aref) {
const auto *header =
reinterpret_cast<const hbc::BytecodeFileHeader *>(aref.data());
assert(
header->version == hbc::BYTECODE_VERSION &&
"must perform basic checks first");
// Use fileLength rather than aref.end() since there may be an epilogue.
auto *footer = reinterpret_cast<hbc::BytecodeFileFooter *>(
aref.data() + header->fileLength - sizeof(BytecodeFileFooter));
SHA1 actual = llvh::SHA1::hash(llvh::ArrayRef<uint8_t>(
aref.begin(), reinterpret_cast<const uint8_t *>(footer)));
std::copy(actual.begin(), actual.end(), footer->fileHash);
}

/// Returns if aref points to valid bytecode and specifies why it may not
/// in errorMessage (if supplied).
static bool sanityCheck(
Expand Down Expand Up @@ -81,6 +113,14 @@ static bool sanityCheck(
}
return false;
}
#ifdef HERMES_SLOW_DEBUG
if (!hashIsValid(aref)) {
if (errorMessage) {
*errorMessage = "Bytecode hash mismatch";
}
return false;
}
#endif
return true;
}

Expand Down Expand Up @@ -688,6 +728,15 @@ bool BCProviderFromBuffer::bytecodeStreamSanityCheck(
return sanityCheck(aref, BytecodeForm::Execution, errorMessage);
}

bool BCProviderFromBuffer::bytecodeHashIsValid(llvh::ArrayRef<uint8_t> aref) {
return hashIsValid(aref);
}

void BCProviderFromBuffer::updateBytecodeHash(
llvh::MutableArrayRef<uint8_t> aref) {
updateHash(aref);
}

#ifdef HERMESVM_SERIALIZE
void BCProviderFromBuffer::serialize(Serializer &s) const {
// For BCProviderFromBuffer, serialize the buffer directly.
Expand Down
1 change: 1 addition & 0 deletions lib/BCGen/HBC/BytecodeFormConverter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,7 @@ class BytecodeFormConverter {
for (auto step : steps) {
(this->*step)();
}
hbc::BCProviderFromBuffer::updateBytecodeHash(bytes_);
}
};

Expand Down
10 changes: 10 additions & 0 deletions lib/BCGen/HBC/BytecodeStream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,16 @@ void BytecodeSerializer::serialize(BytecodeModule &BM, const SHA1 &sourceHash) {

serializeDebugInfo(BM);

SHA1 fileHash{};
if (!isLayout_) {
auto hash = outputHasher_.result();
assert(hash.size() == sizeof(fileHash) && "Incorrect length of SHA1 hash");
std::copy(hash.begin(), hash.end(), fileHash.begin());
}
// Even in layout mode, we "write" a footer (with an ignored zero hash),
// so that fileLength_ is set correctly.
writeBinary(BytecodeFileFooter{fileHash});

if (isLayout_) {
finishLayout(BM);
serialize(BM, sourceHash);
Expand Down
7 changes: 6 additions & 1 deletion lib/BCGen/HBC/SimpleBytecodeBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "hermes/BCGen/HBC/SimpleBytecodeBuilder.h"

#include "llvh/Support/MathExtras.h"
#include "llvh/Support/SHA1.h"

using namespace hermes;
using namespace hbc;
Expand Down Expand Up @@ -48,7 +49,8 @@ std::unique_ptr<Buffer> SimpleBytecodeBuilder::generateBytecodeBuffer() {
}
// DebugInfo comes after the bytescodes, padded by 4 bytes.
uint32_t debugOffset = llvh::alignTo(currentSize, 4);
uint32_t totalSize = debugOffset + sizeof(DebugInfoHeader);
uint32_t totalSize =
debugOffset + sizeof(DebugInfoHeader) + sizeof(BytecodeFileFooter);
BytecodeOptions options;
BytecodeFileHeader header{MAGIC,
BYTECODE_VERSION,
Expand Down Expand Up @@ -102,6 +104,9 @@ std::unique_ptr<Buffer> SimpleBytecodeBuilder::generateBytecodeBuffer() {
// Write an empty debug info header.
DebugInfoHeader debugInfoHeader{0, 0, 0, 0, 0};
appendStructToBytecode(bytecode, debugInfoHeader);
// Add the bytecode hash.
appendStructToBytecode(
bytecode, BytecodeFileFooter{llvh::SHA1::hash(bytecode)});
// Generate the buffer.
return std::unique_ptr<Buffer>(new VectorBuffer(std::move(bytecode)));
}
2 changes: 1 addition & 1 deletion tools/hbc-attribute/hbc-attribute.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ using SLG = hermes::hbc::SerializedLiteralGenerator;
* If you have added or modified sections, make sure they're counted properly.
*/
static_assert(
BYTECODE_VERSION == 74,
BYTECODE_VERSION == 75,
"Bytecode version changed. Please verify that hbc-attribute counts correctly..");

static llvh::cl::opt<std::string> InputFilename(
Expand Down

0 comments on commit 6871396

Please sign in to comment.