Skip to content

Commit

Permalink
watchman: restructure scm/Mercurial mergebase cache
Browse files Browse the repository at this point in the history
Summary:
I want to use LRUCache in more places in the Mercurial
class as it can help to avoid thundering herd issues when multiple
subscriptions need the same data.

This commit is a first pass that switches from the simple mergeBase
cache to a version that uses LRUCache, which simplifies some
of the logic in here.

One thing worth noting about this change is in how we deal with the possible
race condition of someone mutating the repo while a query is being run.

The behavior change is this:

Previously, if we detected a race, we'd return the potentially
bogus result to the caller but not update the value in the cache,
so a subsequent call would compute a new value.

With the new code we have to return either a value or an error
to the cache, so what we do now is include the mtime from the
dirstate file in the cache key.  If the dirstate changes after
we've started our query, other callers will use the updated timestamp
as a component in their cache key and won't reuse our result.

In both the old and new implementations we'll continue to return
the potentially fishy result we computed in the racey scenario.

Reviewed By: chadaustin

Differential Revision: D22582274

fbshipit-source-id: 5a8587403378f5f8a7672fef09c57e1ddd424a10
  • Loading branch information
wez authored and facebook-github-bot committed Aug 28, 2020
1 parent a944041 commit e2e877c
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 107 deletions.
135 changes: 40 additions & 95 deletions scm/Mercurial.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,109 +112,54 @@ ChildProcess::Options Mercurial::makeHgOptions(w_string requestId) const {
return opt;
}

Mercurial::infoCache::infoCache(std::string path) : dirStatePath(path) {
dirstate = FileInformation();
}

w_string Mercurial::infoCache::lookupMergeBase(const std::string& commitId) {
if (dotChanged()) {
watchman::log(
watchman::DBG, "Blowing mergeBases cache because dirstate changed\n");

mergeBases.clear();
return nullptr;
}

auto it = mergeBases.find(commitId);
if (it != mergeBases.end()) {
watchman::log(watchman::DBG, "mergeBases cache hit for ", commitId, "\n");
return it->second;
}
watchman::log(watchman::DBG, "mergeBases cache miss for ", commitId, "\n");

return nullptr;
}

bool fileTimeEqual(const FileInformation& info1, const FileInformation& info2) {
return (
info1.size == info2.size && info1.mtime.tv_sec == info2.mtime.tv_sec &&
info1.mtime.tv_nsec == info2.mtime.tv_nsec);
}

bool Mercurial::infoCache::dotChanged() {
bool result;
Mercurial::Mercurial(w_string_piece rootPath, w_string_piece scmRoot)
: SCM(rootPath, scmRoot),
dirStatePath_(to<std::string>(getSCMRoot(), "/.hg/dirstate")),
mergeBases_(Configuration(), "scm_hg_mergebase", 32, 10) {}

struct timespec Mercurial::getDirStateMtime() const {
try {
auto info = getFileInformation(
dirStatePath.c_str(), CaseSensitivity::CaseSensitive);

if (!fileTimeEqual(info, dirstate)) {
log(DBG, "mergeBases stat(", dirStatePath, ") info differs\n");
result = true;
} else {
result = false;
log(DBG, "mergeBases stat(", dirStatePath, ") info same\n");
}

dirstate = info;

} catch (const std::system_error& exc) {
// Failed to stat, so assume that it changed
log(DBG, "mergeBases stat(", dirStatePath, ") failed: ", exc.what(), "\n");
result = true;
dirStatePath_.c_str(), CaseSensitivity::CaseSensitive);
return info.mtime;
} catch (const std::system_error&) {
// Failed to stat, so assume the current time
struct timeval now;
gettimeofday(&now, nullptr);
struct timespec ts;
ts.tv_sec = now.tv_sec;
ts.tv_nsec = now.tv_usec * 1000;
return ts;
}
return result;
}

Mercurial::Mercurial(w_string_piece rootPath, w_string_piece scmRoot)
: SCM(rootPath, scmRoot),
cache_(infoCache(to<std::string>(getSCMRoot(), "/.hg/dirstate"))) {}

w_string Mercurial::mergeBaseWith(w_string_piece commitId, w_string requestId)
const {
std::string idString(commitId.data(), commitId.size());

FileInformation startDirState;
{
auto cache = cache_.wlock();
auto result = cache->lookupMergeBase(idString);
if (result) {
watchman::log(
watchman::DBG,
"Using cached mergeBase value of ",
result,
" for commitId ",
commitId,
" because dirstate file is unchanged\n");
return result;
}
startDirState = cache->dirstate;
}

auto revset = to<std::string>("ancestor(.,", commitId, ")");
auto result = runMercurial(
{hgExecutablePath(), "log", "-T", "{node}", "-r", revset},
makeHgOptions(requestId),
"query for the merge base");

if (result.output.size() != 40) {
throw SCMError(
"expected merge base to be a 40 character string, got ", result.output);
}

{
// Check that the dirState did not change, if it did it means that we raced
// with another actor and that it is unsafe to blindly replace the cached
// value with what we just computed, which we know would be stale
// information for later consumers. It is fine to continue using the data
// we collected because we know that a pending change will advise clients of
// the new state
auto cache = cache_.wlock();
if (fileTimeEqual(startDirState, cache->dirstate)) {
cache->mergeBases[idString] = result.output;
}
}
return result.output;
auto mtime = getDirStateMtime();
auto key =
folly::to<std::string>(commitId, ":", mtime.tv_sec, ":", mtime.tv_nsec);
auto commit = folly::to<std::string>(commitId);

return mergeBases_
.get(
key,
[this, commit, requestId](const std::string&) {
auto revset = to<std::string>("ancestor(.,", commit, ")");
auto result = runMercurial(
{hgExecutablePath(), "log", "-T", "{node}", "-r", revset},
makeHgOptions(requestId),
"query for the merge base");

if (result.output.size() != 40) {
throw SCMError(
"expected merge base to be a 40 character string, got ",
result.output);
}

return folly::makeFuture(result.output);
})
.get()
->value();
}

std::vector<w_string> Mercurial::getFilesChangedSinceMergeBaseWith(
Expand Down
17 changes: 5 additions & 12 deletions scm/Mercurial.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <unordered_map>
#include "ChildProcess.h"
#include "FileInformation.h"
#include "LRUCache.h"
#include "SCM.h"

namespace watchman {
Expand Down Expand Up @@ -36,19 +37,11 @@ class Mercurial : public SCM {
w_string requestId = nullptr) const override;

private:
std::string dirStatePath_;
mutable LRUCache<std::string, w_string> mergeBases_;

// Returns options for invoking hg
ChildProcess::Options makeHgOptions(w_string requestId) const;

struct infoCache {
std::string dirStatePath;
FileInformation dirstate;
std::unordered_map<std::string, w_string> mergeBases;

explicit infoCache(std::string path);
bool dotChanged();

w_string lookupMergeBase(const std::string& commitId);
};
mutable folly::Synchronized<infoCache> cache_;
struct timespec getDirStateMtime() const;
};
} // namespace watchman

0 comments on commit e2e877c

Please sign in to comment.