Skip to content

Commit

Permalink
Fix racy assignment of rowset indexes
Browse files Browse the repository at this point in the history
Fixes a Jenkins failure seen in build #749:

I0617 22:40:14.869637 25346 tablet.cc:416] Compaction: entering phase 1 (flushing snapshot)
I0617 22:40:14.870381 25345 tablet.cc:416] Compaction: entering phase 1 (flushing snapshot)
I0617 22:40:14.870650 25345 diskrowset.cc:110] Opened CFile writer for column key[type='uint32'] at path /tmp/kudutest-1106/MultiThreadedTabletTest_1.DeleteAndReinsert.1371534007/tablet/rowset_0000000954.compact-tmp/col_0
I0617 22:40:14.870782 25345 diskrowset.cc:110] Opened CFile writer for column val[type='uint32'] at path /tmp/kudutest-1106/MultiThreadedTabletTest_1.DeleteAndReinsert.1371534007/tablet/rowset_0000000954.compact-tmp/col_1
I0617 22:40:14.870944 25345 diskrowset.cc:110] Opened CFile writer for column update_count[type='uint32'] at path /tmp/kudutest-1106/MultiThreadedTabletTest_1.DeleteAndReinsert.1371534007/tablet/rowset_0000000954.compact-tmp/col_2
F0617 22:40:14.870992 25346 mt-tablet-test.cc:242] Check failed: _s.ok() Bad status: IO error: /tmp/kudutest-1106/MultiThreadedTabletTest_1.DeleteAndReinsert.1371534007/tablet/rowset_0000000954.compact-tmp: File exists (error 17)

In this case, a concurrent Flush and Compact ended up getting assigned rowset #954 due to
a non-atomic and non-locked increment of next_rowset_idx. This commit changes it to be
an Atomic variable using atomic increment.

No new unit tests since this race is hard to trigger and an existing test caught it.

Review: http://review.sf.cloudera.com/r/29324/
  • Loading branch information
toddlipcon committed Jun 18, 2013
1 parent bb5e454 commit 727dd77
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 7 deletions.
13 changes: 7 additions & 6 deletions src/tablet/tablet.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "common/iterator.h"
#include "common/scan_spec.h"
#include "common/schema.h"
#include "gutil/atomicops.h"
#include "gutil/strings/numbers.h"
#include "gutil/strings/strip.h"
#include "tablet/compaction.h"
Expand All @@ -30,7 +31,7 @@ namespace kudu { namespace tablet {
using std::string;
using std::vector;
using std::tr1::shared_ptr;

using base::subtle::Barrier_AtomicIncrement;

const char *kRowSetPrefix = "rowset_";

Expand Down Expand Up @@ -78,8 +79,8 @@ Status Tablet::Open() {
if (TryStripPrefixString(child, kRowSetPrefix, &suffix)) {
// The file should be named 'rowset_<N>'. N here is the index
// of the rowset (indicating the order in which it was flushed).
uint32_t rowset_idx;
if (!safe_strtou32(suffix.c_str(), &rowset_idx)) {
int32_t rowset_idx;
if (!safe_strto32(suffix.c_str(), &rowset_idx)) {
return Status::IOError(string("Bad rowset file: ") + absolute_path);
}

Expand All @@ -92,8 +93,7 @@ Status Tablet::Open() {
}
rowsets_.push_back(rowset);

next_rowset_idx_ = std::max(next_rowset_idx_,
(size_t)rowset_idx + 1);
next_rowset_idx_ = std::max(next_rowset_idx_, rowset_idx + 1);
} else {
LOG(WARNING) << "ignoring unknown file: " << absolute_path;
}
Expand Down Expand Up @@ -410,7 +410,8 @@ Status Tablet::PickRowSetsToCompact(RowSetsInCompaction *picked) const
}

Status Tablet::DoCompactionOrFlush(const RowSetsInCompaction &input) {
string new_rowset_dir = GetRowSetPath(dir_, next_rowset_idx_++);
AtomicWord my_index = Barrier_AtomicIncrement(&next_rowset_idx_, 1) - 1;
string new_rowset_dir = GetRowSetPath(dir_, my_index);
string tmp_rowset_dir = new_rowset_dir + ".compact-tmp";

LOG(INFO) << "Compaction: entering phase 1 (flushing snapshot)";
Expand Down
3 changes: 2 additions & 1 deletion src/tablet/tablet.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "common/generic_iterators.h"
#include "common/iterator.h"
#include "common/schema.h"
#include "gutil/atomicops.h"
#include "gutil/gscoped_ptr.h"
#include "tablet/diskrowset.h"
#include "tablet/memrowset.h"
Expand Down Expand Up @@ -155,7 +156,7 @@ class Tablet {
// and an RCU-style quiesce phase, but not worth it for now.
mutable percpu_rwlock component_lock_;

size_t next_rowset_idx_;
Atomic32 next_rowset_idx_;

Env *env_;

Expand Down

0 comments on commit 727dd77

Please sign in to comment.