Skip to content

Commit

Permalink
Speed up location finding by caching cur line
Browse files Browse the repository at this point in the history
Summary:
If locations are accessed in a natural left to right order, they will
always be either in the current or the next line, which we can cache to
avoid almost 100% of the lookups.

That is more the case for Juno than for Hermes, but it should benefit
Hermes too.

Juno needs access to the whole line anyway, because it needs to operate
with Unicode character indices, not byte offsets, so this kind of
caching integrates naturally into that model.

Reviewed By: avp

Differential Revision: D30720707

fbshipit-source-id: 5fc68250fdf7d758d5a79ac0a012734a0bed8d1c
  • Loading branch information
tmikov authored and facebook-github-bot committed Sep 23, 2021
1 parent b2da448 commit 1116dcf
Show file tree
Hide file tree
Showing 5 changed files with 215 additions and 40 deletions.
24 changes: 22 additions & 2 deletions external/llvh/include/llvh/Support/SourceMgr.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,13 +76,23 @@ class SourceMgr {
/// offset corresponding to a particular SMLoc).
mutable VariableSizeOffsets OffsetCache;

/// Lazily populate \c OffsetCache and return it.
template<typename T>
std::vector<T> *getOffsets() const;

/// Populate \c OffsetCache and look up a given \p Ptr in it, assuming
/// it points somewhere into \c Buffer. The static type parameter \p T
/// must be an unsigned integer type from uint{8,16,32,64}_t large
/// enough to store offsets inside \c Buffer.
/// \return a pointer to the start of the line and the line number.
/// \return the line and the line number.
template<typename T>
std::pair<StringRef, unsigned> getLineNumber(const char *Ptr) const;

/// Return a reference to the line with the specified 1-based line number.
/// If the line is greater than the last line in the buffer, an empty
/// reference is returned.
template<typename T>
std::pair<const char *, unsigned> getLineNumber(const char *Ptr) const;
StringRef getLineRef(unsigned line) const;

/// This is the location of the parent include, or null if at the top level.
SMLoc IncludeLoc;
Expand Down Expand Up @@ -189,6 +199,16 @@ class SourceMgr {
return getLineAndColumn(Loc, BufferID).first;
}

/// Find the line containing the specified location in the specified file.
/// Return the line number and a reference to the line itself.
/// This is not a fast method.
std::pair<StringRef, unsigned> FindLine(SMLoc Loc, unsigned BufferID = 0) const;

/// Return a reference to the specified (1-based) line.
/// If the line is greater than the last line in the buffer, an empty
/// reference is returned.
StringRef getLineRef(unsigned line, unsigned BufferID) const;

/// Find the line and column number for the specified location in the
/// specified file. This is not a fast method.
std::pair<unsigned, unsigned> getLineAndColumn(SMLoc Loc,
Expand Down
106 changes: 82 additions & 24 deletions external/llvh/lib/Support/SourceMgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,26 +80,31 @@ unsigned SourceMgr::FindBufferContainingLoc(SMLoc Loc) const {
}

template <typename T>
std::pair<const char *, unsigned> SourceMgr::SrcBuffer::getLineNumber(
const char *Ptr) const {
std::vector<T> *SourceMgr::SrcBuffer::getOffsets() const {
// Ensure OffsetCache is allocated and populated with offsets of all the
// '\n' bytes.
std::vector<T> *Offsets = nullptr;
if (OffsetCache.isNull()) {
Offsets = new std::vector<T>();
OffsetCache = Offsets;
size_t Sz = Buffer->getBufferSize();
assert(Sz <= std::numeric_limits<T>::max());
StringRef S = Buffer->getBuffer();
for (size_t N = 0; N < Sz; ++N) {
if (S[N] == '\n') {
Offsets->push_back(static_cast<T>(N));
}
if (!OffsetCache.isNull())
return OffsetCache.get<std::vector<T> *>();

std::vector<T> *Offsets = new std::vector<T>();
OffsetCache = Offsets;
size_t Sz = Buffer->getBufferSize();
assert(Sz <= std::numeric_limits<T>::max());
StringRef S = Buffer->getBuffer();
for (size_t N = 0; N < Sz; ++N) {
if (S[N] == '\n') {
Offsets->push_back(static_cast<T>(N));
}
} else {
Offsets = OffsetCache.get<std::vector<T> *>();
}

return Offsets;
}

template <typename T>
std::pair<StringRef, unsigned> SourceMgr::SrcBuffer::getLineNumber(
const char *Ptr) const {
std::vector<T> *Offsets = getOffsets<T>();

const char *BufStart = Buffer->getBufferStart();
assert(Ptr >= BufStart && Ptr <= Buffer->getBufferEnd());
ptrdiff_t PtrDiff = Ptr - BufStart;
Expand All @@ -116,8 +121,44 @@ std::pair<const char *, unsigned> SourceMgr::SrcBuffer::getLineNumber(
const char *LineStart =
EOL != Offsets->begin() ? BufStart + EOL[-1] + 1 : BufStart;

// The end of the line is the EOL inclusive or the end of the buffer exclusive.
const char *LineEnd =
EOL != Offsets->end() ? BufStart + *EOL + 1 : Buffer->getBufferEnd();

// Lines count from 1, so add 1 to the distance from the 0th line.
return {LineStart, (1 + (EOL - Offsets->begin()))};
return {StringRef(LineStart, LineEnd - LineStart), (1 + (EOL - Offsets->begin()))};
}

template<typename T>
StringRef SourceMgr::SrcBuffer::getLineRef(unsigned line) const {
assert(line >= 1 && "line number must be 1-based");
--line;

const char *BufStart = Buffer->getBufferStart();
std::vector<T> *Offsets = getOffsets<T>();
size_t size = Offsets->size();
if (line < size) {
auto EOL = Offsets->begin() + line;

// The start of the line is the previous line end + 1.
const char *LineStart =
EOL != Offsets->begin() ? BufStart + EOL[-1] + 1 : BufStart;

// The end of the line is the EOL inclusive.
const char *LineEnd = BufStart + *EOL + 1;

return StringRef(LineStart, LineEnd - LineStart);
} else {
// Asking for the last line?
if (line == size) {
const char *LineStart =
size != 0 ? BufStart + Offsets->back() + 1 : BufStart;
const char *LineEnd = Buffer->getBufferEnd();
return StringRef(LineStart, LineEnd - LineStart);
} else {
return StringRef(Buffer->getBufferEnd(), 0);
}
}
}

SourceMgr::SrcBuffer::SrcBuffer(SourceMgr::SrcBuffer &&Other)
Expand All @@ -141,8 +182,7 @@ SourceMgr::SrcBuffer::~SrcBuffer() {
}
}

std::pair<unsigned, unsigned>
SourceMgr::getLineAndColumn(SMLoc Loc, unsigned BufferID) const {
std::pair<StringRef, unsigned> SourceMgr::FindLine(SMLoc Loc, unsigned int BufferID) const {
if (!BufferID)
BufferID = FindBufferContainingLoc(Loc);
assert(BufferID && "Invalid Location!");
Expand All @@ -151,17 +191,35 @@ SourceMgr::getLineAndColumn(SMLoc Loc, unsigned BufferID) const {
const char *Ptr = Loc.getPointer();

size_t Sz = SB.Buffer->getBufferSize();
std::pair<const char *, unsigned> StartAndLineNo;
if (Sz <= std::numeric_limits<uint8_t>::max())
StartAndLineNo = SB.getLineNumber<uint8_t>(Ptr);
return SB.getLineNumber<uint8_t>(Ptr);
else if (Sz <= std::numeric_limits<uint16_t>::max())
StartAndLineNo = SB.getLineNumber<uint16_t>(Ptr);
return SB.getLineNumber<uint16_t>(Ptr);
else if (Sz <= std::numeric_limits<uint32_t>::max())
StartAndLineNo = SB.getLineNumber<uint32_t>(Ptr);
return SB.getLineNumber<uint32_t>(Ptr);
else
StartAndLineNo = SB.getLineNumber<uint64_t>(Ptr);
return SB.getLineNumber<uint64_t>(Ptr);
}

return std::make_pair(StartAndLineNo.second, Ptr - StartAndLineNo.first + 1);
StringRef SourceMgr::getLineRef(unsigned line, unsigned BufferID) const {
assert(BufferID != 0 && "BufferID must be specified");
auto &SB = getBufferInfo(BufferID);
size_t Sz = SB.Buffer->getBufferSize();
if (Sz <= std::numeric_limits<uint8_t>::max())
return SB.getLineRef<uint8_t>(line);
else if (Sz <= std::numeric_limits<uint16_t>::max())
return SB.getLineRef<uint16_t>(line);
else if (Sz <= std::numeric_limits<uint32_t>::max())
return SB.getLineRef<uint32_t>(line);
else
return SB.getLineRef<uint64_t>(line);
}

std::pair<unsigned, unsigned>
SourceMgr::getLineAndColumn(SMLoc Loc, unsigned BufferID) const {
auto LineRefAndNo = FindLine(Loc, BufferID);
return std::make_pair(LineRefAndNo.second,
Loc.getPointer() - LineRefAndNo.first.data() + 1);
}

void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc, raw_ostream &OS) const {
Expand Down
4 changes: 4 additions & 0 deletions include/hermes/AST/Context.h
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,10 @@ class Context {
return sm_;
}

const SourceErrorManager &getSourceErrorManager() const {
return sm_;
}

/// \return the table for static require resolution, nullptr if not supplied.
const std::vector<uint32_t> &getSegments() const {
return segments_;
Expand Down
45 changes: 45 additions & 0 deletions include/hermes/Support/SourceErrorManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,17 @@ class SourceErrorManager {
}
};

/// Result from looking for a line in an input buffer. Contains the buffer
/// id, 1-based line number and a reference to the line itself in the buffer.
struct LineCoord {
/// 1-based buffer id.
unsigned bufId = 0;
/// 1-based line number.
unsigned lineNo = 0;
/// A reference to the line itself, including the EOL, if present.
llvh::StringRef lineRef;
};

struct ICoordTranslator {
virtual ~ICoordTranslator() = 0;
virtual void translate(SourceCoords &coords) = 0;
Expand All @@ -113,6 +124,29 @@ class SourceErrorManager {
SourceErrorOutputOptions outputOptions_;
std::shared_ptr<ICoordTranslator> translator_{};

/// A cache to speed up finding locations. The assumption is that most lookups
/// happen either in the current or the next source line, which would happen
/// naturally if we are scanning the source left to right.
/// If there is a cache hit in the current line, there is no lookup at all -
/// just quick arithmetic to calculate the column offset. If the hit is in
/// the next line, we "slide" the cache - the next line becomes the current
/// one, and we fetch a reference to the next line, which is also an O(1)
/// operation.
struct FindLineCache {
/// 1-based buffer ID. 0 means cache is invalid.
unsigned bufferId = 0;
/// 1-based line number.
unsigned lineNo = 0;
/// The last found line.
llvh::StringRef lineRef{};
/// The following line.
llvh::StringRef nextLineRef{};

/// Fill a SourceCoords instance under the assumption that it is a verified
/// cache hit.
void fillCoords(SMLoc loc, SourceCoords &result);
} findLineCache_;

/// Virtual buffers are tagged with the higest bit.
static constexpr unsigned kVirtualBufIdTag = 1u
<< (sizeof(unsigned) * CHAR_BIT - 1);
Expand Down Expand Up @@ -363,6 +397,17 @@ class SourceErrorManager {
/// Find the bufferId of the specified location \p loc.
uint32_t findBufferIdForLoc(SMLoc loc) const;

/// Find the buffer ID and line of the specified location \p loc.
/// \return the buffer ID and line of the location, or None on error.
llvh::Optional<LineCoord> findBufferAndLine(SMLoc loc) const;

/// Return a reference to the specified (1-based) line.
/// If the line is greater than the last line in the buffer, an empty
/// reference is returned.
llvh::StringRef getLineRef(unsigned bufId, unsigned line) const {
return sm_.getLineRef(line, bufId);
}

/// Find the bufferId, line and column of the specified location \p loc.
/// \return true on success, false if could not be found, in which case
/// result.isValid() would also return false.
Expand Down
76 changes: 62 additions & 14 deletions lib/Support/SourceErrorManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -236,18 +236,29 @@ void SourceErrorManager::message(
message(dk, loc, SMRange{}, msg, subsystem);
}

/// Make sure the location doesn't point to \r or in the middle of a utf-8
/// sequence.
static inline SMLoc adjustSourceLocation(
const llvh::MemoryBuffer *buf,
SMLoc loc) {
auto SourceErrorManager::findBufferAndLine(SMLoc loc) const
-> llvh::Optional<LineCoord> {
if (!loc.isValid())
return llvh::None;

auto bufId = sm_.FindBufferContainingLoc(loc);
if (!bufId)
return llvh::None;

auto lineRefAndNo = sm_.FindLine(loc, bufId);

return LineCoord{bufId, lineRefAndNo.second, lineRefAndNo.first};
}

/// Adjust the source location backwards making sure it doesn't point to \r or
/// in the middle of a utf-8 sequence.
static inline SMLoc adjustSourceLocation(const char *bufStart, SMLoc loc) {
const char *ptr = loc.getPointer();
// In the very unlikely case that `loc` points to a '\r', we skip backwards
// until we find another character, while being careful not to fall off the
// beginning of the buffer.
if (LLVM_UNLIKELY(*ptr == '\r') ||
LLVM_UNLIKELY(isUTF8ContinuationByte(*ptr))) {
const char *bufStart = buf->getBufferStart();
do {
if (LLVM_UNLIKELY(ptr == bufStart)) {
// This is highly unlikely but theoretically possible. There were only
Expand All @@ -260,23 +271,60 @@ static inline SMLoc adjustSourceLocation(
return SMLoc::getFromPointer(ptr);
}

static bool locInside(llvh::StringRef str, SMLoc loc) {
const char *ptr = loc.getPointer();
return ptr >= str.begin() && ptr < str.end();
}

inline void SourceErrorManager::FindLineCache::fillCoords(
SMLoc loc,
SourceCoords &result) {
loc = adjustSourceLocation(lineRef.data(), loc);
result.bufId = bufferId;
result.line = lineNo;
result.col = loc.getPointer() - lineRef.data() + 1;
}

bool SourceErrorManager::findBufferLineAndLoc(SMLoc loc, SourceCoords &result) {
if (!loc.isValid()) {
result.bufId = 0;
return false;
}

result.bufId = sm_.FindBufferContainingLoc(loc);
if (!result.bufId)
return false;
if (findLineCache_.bufferId) {
// Check the cache with the hope that the lookup is within the last line or
// the next line.
if (locInside(findLineCache_.lineRef, loc)) {
findLineCache_.fillCoords(loc, result);
return true;
}
if (locInside(findLineCache_.nextLineRef, loc)) {
++findLineCache_.lineNo;
findLineCache_.lineRef = findLineCache_.nextLineRef;
findLineCache_.nextLineRef =
sm_.getLineRef(findLineCache_.lineNo + 1, findLineCache_.bufferId);

findLineCache_.fillCoords(loc, result);
return true;
}

// Adjust the source location if necessary.
loc = adjustSourceLocation(sm_.getMemoryBuffer(result.bufId), loc);
findLineCache_.bufferId = 0;
}

auto lineCoord = findBufferAndLine(loc);
if (!lineCoord) {
result.bufId = 0;
return false;
}

auto lineCol = sm_.getLineAndColumn(loc, result.bufId);
result.line = lineCol.first;
result.col = lineCol.second;
// Populate the cache.
findLineCache_.bufferId = lineCoord->bufId;
findLineCache_.lineNo = lineCoord->lineNo;
findLineCache_.lineRef = lineCoord->lineRef;
findLineCache_.nextLineRef =
sm_.getLineRef(findLineCache_.lineNo + 1, lineCoord->bufId);

findLineCache_.fillCoords(loc, result);
return true;
}

Expand Down

0 comments on commit 1116dcf

Please sign in to comment.