Skip to content

Commit

Permalink
Fix race condition caused by concurrent accesses to forceMmapOff_ whe…
Browse files Browse the repository at this point in the history
…n opening Posix WritableFile (facebook#9685)

Summary:
Pull Request resolved: facebook#9685

Our TSAN reports a race condition as follows when running test
```
gtest-parallel -r 100 ./external_sst_file_test --gtest_filter=ExternalSSTFileTest.MultiThreaded
```
leads to the following

```
WARNING: ThreadSanitizer: data race (pid=2683148)
  Write of size 1 at 0x556fede63340 by thread T7:
    #0 rocksdb::(anonymous namespace)::PosixFileSystem::OpenWritableFile(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, rocksdb::FileOptions const&, bool, std::unique_ptr<rocksdb::FSWritableFile, std::default_delete<rocksdb::FSWritableFile> >*, rocksdb::IODebugContext*) internal_repo_rocksdb/repo/env/fs_posix.cc:334 (external_sst_file_test+0xb61ac4)
    facebook#1 rocksdb::(anonymous namespace)::PosixFileSystem::ReopenWritableFile(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, rocksdb::FileOptions const&, std::unique_ptr<rocksdb::FSWritableFile, std::default_delete<rocksdb::FSWritableFile> >*, rocksdb::IODebugContext*) internal_repo_rocksdb/repo/env/fs_posix.cc:382 (external_sst_file_test+0xb5ba96)
    facebook#2 rocksdb::CompositeEnv::ReopenWritableFile(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::unique_ptr<rocksdb::WritableFile, std::default_delete<rocksdb::WritableFile> >*, rocksdb::EnvOptions const&) internal_repo_rocksdb/repo/env/composite_env.cc:334 (external_sst_file_test+0xa6ab7f)
    facebook#3 rocksdb::EnvWrapper::ReopenWritableFile(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::unique_ptr<rocksdb::WritableFile, std::default_delete<rocksdb::WritableFile> >*, rocksdb::EnvOptions const&) internal_repo_rocksdb/repo/include/rocksdb/env.h:1428 (external_sst_file_test+0x561f3e)
Previous read of size 1 at 0x556fede63340 by thread T4:
    #0 rocksdb::(anonymous namespace)::PosixFileSystem::OpenWritableFile(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, rocksdb::FileOptions const&, bool, std::unique_ptr<rocksdb::FSWritableFile, std::default_delete<rocksdb::FSWritableFile> >*, rocksdb::IODebugContext*) internal_repo_rocksdb/repo/env/fs_posix.cc:328 (external_sst_file_test+0xb61a70)
    facebook#1 rocksdb::(anonymous namespace)::PosixFileSystem::ReopenWritableFile(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator
...
```

Fix by making sure the following block gets executed only once:
```
      if (!checkedDiskForMmap_) {
        // this will be executed once in the program's lifetime.
        // do not use mmapWrite on non ext-3/xfs/tmpfs systems.
        if (!SupportsFastAllocate(fname)) {
          forceMmapOff_ = true;
        }
        checkedDiskForMmap_ = true;
      }
```

Reviewed By: pdillinger

Differential Revision: D34780308

fbshipit-source-id: b761f66b24c8b5b8389d86ea371c8542b8d869d5
  • Loading branch information
riversand963 authored and facebook-github-bot committed Mar 18, 2022
1 parent f0fca81 commit 3bdbf67
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 23 deletions.
1 change: 1 addition & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
* Fixed a bug that `Iterator::Refresh()` reads stale keys after DeleteRange() performed.
* Fixed a race condition when disable and re-enable manual compaction.
* Fixed automatic error recovery failure in atomic flush.
* Fixed a race condition when mmaping a WritableFile on POSIX.

### Public API changes
* Remove BlockBasedTableOptions.hash_index_allow_collision which already takes no effect.
Expand Down
45 changes: 22 additions & 23 deletions env/fs_posix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -325,14 +325,7 @@ class PosixFileSystem : public FileSystem {
SetFD_CLOEXEC(fd, &options);

if (options.use_mmap_writes) {
if (!checkedDiskForMmap_) {
// this will be executed once in the program's lifetime.
// do not use mmapWrite on non ext-3/xfs/tmpfs systems.
if (!SupportsFastAllocate(fname)) {
forceMmapOff_ = true;
}
checkedDiskForMmap_ = true;
}
MaybeForceDisableMmap(fd);
}
if (options.use_mmap_writes && !forceMmapOff_) {
result->reset(new PosixMmapFile(fname, fd, page_size_, options));
Expand Down Expand Up @@ -431,14 +424,7 @@ class PosixFileSystem : public FileSystem {
}

if (options.use_mmap_writes) {
if (!checkedDiskForMmap_) {
// this will be executed once in the program's lifetime.
// do not use mmapWrite on non ext-3/xfs/tmpfs systems.
if (!SupportsFastAllocate(fname)) {
forceMmapOff_ = true;
}
checkedDiskForMmap_ = true;
}
MaybeForceDisableMmap(fd);
}
if (options.use_mmap_writes && !forceMmapOff_) {
result->reset(new PosixMmapFile(fname, fd, page_size_, options));
Expand Down Expand Up @@ -999,8 +985,7 @@ class PosixFileSystem : public FileSystem {
}
#endif
private:
bool checkedDiskForMmap_;
bool forceMmapOff_; // do we override Env options?
bool forceMmapOff_ = false; // do we override Env options?

// Returns true iff the named directory exists and is a directory.
virtual bool DirExists(const std::string& dname) {
Expand All @@ -1011,10 +996,10 @@ class PosixFileSystem : public FileSystem {
return false; // stat() failed return false
}

bool SupportsFastAllocate(const std::string& path) {
bool SupportsFastAllocate(int fd) {
#ifdef ROCKSDB_FALLOCATE_PRESENT
struct statfs s;
if (statfs(path.c_str(), &s)) {
if (fstatfs(fd, &s)) {
return false;
}
switch (s.f_type) {
Expand All @@ -1028,11 +1013,26 @@ class PosixFileSystem : public FileSystem {
return false;
}
#else
(void)path;
(void)fd;
return false;
#endif
}

void MaybeForceDisableMmap(int fd) {
static std::once_flag s_check_disk_for_mmap_once;
assert(this == FileSystem::Default().get());
std::call_once(
s_check_disk_for_mmap_once,
[this](int fdesc) {
// this will be executed once in the program's lifetime.
// do not use mmapWrite on non ext-3/xfs/tmpfs systems.
if (!SupportsFastAllocate(fdesc)) {
forceMmapOff_ = true;
}
},
fd);
}

#ifdef ROCKSDB_IOURING_PRESENT
bool IsIOUringEnabled() {
if (RocksDbIOUringEnable && RocksDbIOUringEnable()) {
Expand Down Expand Up @@ -1166,8 +1166,7 @@ size_t PosixFileSystem::GetLogicalBlockSizeForWriteIfNeeded(
}

PosixFileSystem::PosixFileSystem()
: checkedDiskForMmap_(false),
forceMmapOff_(false),
: forceMmapOff_(false),
page_size_(getpagesize()),
allow_non_owner_access_(true) {
#if defined(ROCKSDB_IOURING_PRESENT)
Expand Down

0 comments on commit 3bdbf67

Please sign in to comment.