Skip to content

Commit

Permalink
[tablet] Optimize TabletMetadata::CollectBlockIds efficiency
Browse files Browse the repository at this point in the history
Container size may reallocate and elements will be moved or copied
several times if using std::vector and inserting elements at the
beginning in TabletMetadata::CollectBlockIds, we'd better to insert
elements at the end.

We did some simple benchmarks for TabletMetadata::CollectBlockIds()
operation in TestTabletMetadata.BenchmarkCollectBlockIds, compared
with old version, result shows that inserting at the end provides a
linear time cost when block count per RowSet or RowSet count
increasing.
And also, we ran benchmarks to compare std::vector with std::list
and std::deque, both of them have worse efficiency.
Result details as follow:
(The first column is FLAGS_test_row_set_count/FLAGS_test_block_count_per_rs,
the other columns show 10 times total TabletMetadata::CollectBlockIds()
time cost, in millisecond.)
           vector          list    deque   vector
           (head insert)                   (rear insert)
1000/1000  3464            687     334     321
2000/1000  15238           1390    647     613
4000/1000  75139           3057    1392    1212
8000/1000  328808          6593    2805    2736
4000/2000  159517          6488    2965    2462
4000/4000  348471          11967   5513    5141
4000/8000  -(too long)     23706   11704   10806

Change-Id: I7ce853e35eb7dfa9f9a099e465ea8edfaa7c4aa9
Reviewed-on: http://gerrit.cloudera.org:8080/14445
Tested-by: Kudu Jenkins
Reviewed-by: Adar Dembo <[email protected]>
  • Loading branch information
acelyc111 authored and adembo committed Nov 15, 2019
1 parent 9a418e8 commit 3ca7db0
Show file tree
Hide file tree
Showing 11 changed files with 80 additions and 38 deletions.
2 changes: 1 addition & 1 deletion src/kudu/fs/block_id.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,6 @@ struct BlockIdEqual {
};

typedef std::unordered_set<BlockId, BlockIdHash, BlockIdEqual> BlockIdSet;

typedef std::vector<BlockId> BlockIdContainer;
} // namespace kudu
#endif /* KUDU_FS_BLOCK_ID_H */
2 changes: 1 addition & 1 deletion src/kudu/tablet/delta_tracker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,7 @@ Status DeltaTracker::CommitDeltaStoreMetadataUpdate(const RowSetMetadataUpdate&
&new_stores, type),
"Unable to open delta blocks");

vector<BlockId> removed_blocks;
BlockIdContainer removed_blocks;
rowset_metadata_->CommitUpdate(update, &removed_blocks);
rowset_metadata_->AddOrphanedBlocks(removed_blocks);
// Once we successfully commit to the rowset metadata, let's ensure we update
Expand Down
2 changes: 1 addition & 1 deletion src/kudu/tablet/diskrowset.cc
Original file line number Diff line number Diff line change
Expand Up @@ -601,7 +601,7 @@ Status DiskRowSet::MajorCompactDeltaStoresWithColumnIds(const vector<ColumnId>&
// Prepare the changes to the metadata.
RowSetMetadataUpdate update;
compaction->CreateMetadataUpdate(&update);
vector<BlockId> removed_blocks;
BlockIdContainer removed_blocks;
rowset_metadata_->CommitUpdate(update, &removed_blocks);

// Now that the metadata has been updated, open a new cfile set with the
Expand Down
16 changes: 8 additions & 8 deletions src/kudu/tablet/metadata-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,13 @@ TEST_F(MetadataTest, RSMD_TestReplaceDeltas_1) {
to_replace.emplace_back(2);
to_replace.emplace_back(3);

vector<BlockId> removed;
BlockIdContainer removed;
meta_->CommitUpdate(
RowSetMetadataUpdate()
.ReplaceRedoDeltaBlocks(to_replace, { BlockId(123) }), &removed);
ASSERT_EQ(vector<BlockId>({ BlockId(1), BlockId(123), BlockId(4) }),
meta_->redo_delta_blocks());
ASSERT_EQ(vector<BlockId>({ BlockId(2), BlockId(3) }),
ASSERT_EQ(BlockIdContainer({ BlockId(2), BlockId(3) }),
removed);
}

Expand All @@ -79,13 +79,13 @@ TEST_F(MetadataTest, RSMD_TestReplaceDeltas_2) {
to_replace.emplace_back(1);
to_replace.emplace_back(2);

vector<BlockId> removed;
BlockIdContainer removed;
meta_->CommitUpdate(
RowSetMetadataUpdate()
.ReplaceRedoDeltaBlocks(to_replace, { BlockId(123) }), &removed);
ASSERT_EQ(vector<BlockId>({ BlockId(123), BlockId(3), BlockId(4) }),
meta_->redo_delta_blocks());
ASSERT_EQ(vector<BlockId>({ BlockId(1), BlockId(2) }),
ASSERT_EQ(BlockIdContainer({ BlockId(1), BlockId(2) }),
removed);
}

Expand All @@ -95,13 +95,13 @@ TEST_F(MetadataTest, RSMD_TestReplaceDeltas_3) {
to_replace.emplace_back(3);
to_replace.emplace_back(4);

vector<BlockId> removed;
BlockIdContainer removed;
meta_->CommitUpdate(
RowSetMetadataUpdate()
.ReplaceRedoDeltaBlocks(to_replace, { BlockId(123) }), &removed);
ASSERT_EQ(vector<BlockId>({ BlockId(1), BlockId(2), BlockId(123) }),
meta_->redo_delta_blocks());
ASSERT_EQ(vector<BlockId>({ BlockId(3), BlockId(4) }),
ASSERT_EQ(BlockIdContainer({ BlockId(3), BlockId(4) }),
removed);
}

Expand All @@ -112,7 +112,7 @@ TEST_F(MetadataTest, RSMD_TestReplaceDeltas_Bad_NonContiguous) {
to_replace.emplace_back(4);

EXPECT_DEATH({
vector<BlockId> removed;
BlockIdContainer removed;
meta_->CommitUpdate(
RowSetMetadataUpdate().ReplaceRedoDeltaBlocks(to_replace, { BlockId(123) }),
&removed);
Expand All @@ -127,7 +127,7 @@ TEST_F(MetadataTest, RSMD_TestReplaceDeltas_Bad_DoesntExist) {
to_replace.emplace_back(555);

EXPECT_DEATH({
vector<BlockId> removed;
BlockIdContainer removed;
meta_->CommitUpdate(
RowSetMetadataUpdate().ReplaceRedoDeltaBlocks(to_replace, { BlockId(123) }),
&removed);
Expand Down
8 changes: 4 additions & 4 deletions src/kudu/tablet/rowset_metadata.cc
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ Status RowSetMetadata::CreateNew(TabletMetadata* tablet_metadata,
return Status::OK();
}

void RowSetMetadata::AddOrphanedBlocks(const vector<BlockId>& blocks) {
void RowSetMetadata::AddOrphanedBlocks(const BlockIdContainer& blocks) {
tablet_metadata_->AddOrphanedBlocks(blocks);
}

Expand Down Expand Up @@ -203,7 +203,7 @@ Status RowSetMetadata::CommitUndoDeltaDataBlock(const BlockId& block_id) {
}

void RowSetMetadata::CommitUpdate(const RowSetMetadataUpdate& update,
vector<BlockId>* removed) {
BlockIdContainer* removed) {
removed->clear();
{
std::lock_guard<LockType> l(lock_);
Expand Down Expand Up @@ -298,8 +298,8 @@ int64_t RowSetMetadata::live_row_count() const {
return live_row_count_;
}

vector<BlockId> RowSetMetadata::GetAllBlocks() {
vector<BlockId> blocks;
BlockIdContainer RowSetMetadata::GetAllBlocks() {
BlockIdContainer blocks;
std::lock_guard<LockType> l(lock_);
if (!adhoc_index_block_.IsNull()) {
blocks.push_back(adhoc_index_block_);
Expand Down
6 changes: 3 additions & 3 deletions src/kudu/tablet/rowset_metadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ class RowSetMetadata {

Status Flush();

void AddOrphanedBlocks(const std::vector<BlockId>& blocks);
void AddOrphanedBlocks(const BlockIdContainer& blocks);

const std::string ToString() const;

Expand Down Expand Up @@ -222,11 +222,11 @@ class RowSetMetadata {
// Returns the blocks removed from the rowset metadata during the update.
// These blocks must be added to the TabletMetadata's orphaned blocks list.
void CommitUpdate(const RowSetMetadataUpdate& update,
std::vector<BlockId>* removed);
BlockIdContainer* removed);

void ToProtobuf(RowSetDataPB *pb);

std::vector<BlockId> GetAllBlocks();
BlockIdContainer GetAllBlocks();

// Increase the row count.
// Note:
Expand Down
38 changes: 38 additions & 0 deletions src/kudu/tablet/tablet_metadata-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,29 +18,42 @@
#include "kudu/tablet/tablet_metadata.h"

#include <cstdint>
#include <map>
#include <memory>
#include <ostream>
#include <string>

#include <boost/optional/optional.hpp>
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <gtest/gtest.h>

#include "kudu/common/common.pb.h"
#include "kudu/common/partial_row.h"
#include "kudu/common/schema.h"
#include "kudu/common/wire_protocol-test-util.h"
#include "kudu/fs/block_id.h"
#include "kudu/gutil/gscoped_ptr.h"
#include "kudu/gutil/port.h"
#include "kudu/gutil/ref_counted.h"
#include "kudu/tablet/local_tablet_writer.h"
#include "kudu/tablet/metadata.pb.h"
#include "kudu/tablet/rowset_metadata.h"
#include "kudu/tablet/tablet-harness.h"
#include "kudu/tablet/tablet-test-util.h"
#include "kudu/tablet/tablet.h"
#include "kudu/util/pb_util.h"
#include "kudu/util/status.h"
#include "kudu/util/stopwatch.h"
#include "kudu/util/test_macros.h"

DEFINE_int64(test_row_set_count, 1000, "");
DEFINE_int64(test_block_count_per_rs, 1000, "");

using std::map;
using std::shared_ptr;
using std::unique_ptr;

namespace kudu {
namespace tablet {

Expand Down Expand Up @@ -183,5 +196,30 @@ TEST_F(TestTabletMetadata, TestOnDiskSize) {
ASSERT_GE(final_size, superblock_pb.ByteSize());
}

TEST_F(TestTabletMetadata, BenchmarkCollectBlockIds) {
auto tablet_meta = harness_->tablet()->metadata();
RowSetMetadataVector rs_metas;
for (int i = 0; i < FLAGS_test_row_set_count; ++i) {
unique_ptr<RowSetMetadata> meta;
ASSERT_OK(RowSetMetadata::CreateNew(tablet_meta, i, &meta));

map<ColumnId, BlockId> block_by_column;
for (int j = 0; j < FLAGS_test_block_count_per_rs; ++j) {
block_by_column[ColumnId(j)] = BlockId(j);
}
meta->SetColumnDataBlocks(block_by_column);
rs_metas.emplace_back(shared_ptr<RowSetMetadata>(meta.release()));
}
tablet_meta->UpdateAndFlush(RowSetMetadataIds(), rs_metas, TabletMetadata::kNoMrsFlushed);

for (int i = 0; i < 10; ++i) {
BlockIdContainer block_ids;
LOG_TIMING(INFO, "collecting BlockIds") {
block_ids = tablet_meta->CollectBlockIds();
}
LOG(INFO) << "block_ids size: " << block_ids.size();
}
}

} // namespace tablet
} // namespace kudu
22 changes: 11 additions & 11 deletions src/kudu/tablet/tablet_metadata.cc
Original file line number Diff line number Diff line change
Expand Up @@ -189,11 +189,11 @@ vector<BlockIdPB> TabletMetadata::CollectBlockIdPBs(const TabletSuperBlockPB& su
return block_ids;
}

vector<BlockId> TabletMetadata::CollectBlockIds() {
vector<BlockId> block_ids;
BlockIdContainer TabletMetadata::CollectBlockIds() {
BlockIdContainer block_ids;
for (const auto& r : rowsets_) {
vector<BlockId> rowset_block_ids = r->GetAllBlocks();
block_ids.insert(block_ids.begin(),
BlockIdContainer rowset_block_ids = r->GetAllBlocks();
block_ids.insert(block_ids.end(),
rowset_block_ids.begin(),
rowset_block_ids.end());
}
Expand Down Expand Up @@ -348,7 +348,7 @@ Status TabletMetadata::UpdateOnDiskSize() {
}

Status TabletMetadata::LoadFromSuperBlock(const TabletSuperBlockPB& superblock) {
vector<BlockId> orphaned_blocks;
BlockIdContainer orphaned_blocks;

VLOG(2) << "Loading TabletMetadata from SuperBlockPB:" << std::endl
<< SecureDebugString(superblock);
Expand Down Expand Up @@ -497,17 +497,17 @@ Status TabletMetadata::UpdateAndFlush(const RowSetMetadataIds& to_remove,
return Flush();
}

void TabletMetadata::AddOrphanedBlocks(const vector<BlockId>& blocks) {
void TabletMetadata::AddOrphanedBlocks(const BlockIdContainer& block_ids) {
std::lock_guard<LockType> l(data_lock_);
AddOrphanedBlocksUnlocked(blocks);
AddOrphanedBlocksUnlocked(block_ids);
}

void TabletMetadata::AddOrphanedBlocksUnlocked(const vector<BlockId>& blocks) {
void TabletMetadata::AddOrphanedBlocksUnlocked(const BlockIdContainer& block_ids) {
DCHECK(data_lock_.is_locked());
orphaned_blocks_.insert(blocks.begin(), blocks.end());
orphaned_blocks_.insert(block_ids.begin(), block_ids.end());
}

void TabletMetadata::DeleteOrphanedBlocks(const vector<BlockId>& blocks) {
void TabletMetadata::DeleteOrphanedBlocks(const BlockIdContainer& blocks) {
if (PREDICT_FALSE(!FLAGS_enable_tablet_orphaned_block_deletion)) {
LOG_WITH_PREFIX(WARNING) << "Not deleting " << blocks.size()
<< " block(s) from disk. Block deletion disabled via "
Expand Down Expand Up @@ -559,7 +559,7 @@ Status TabletMetadata::Flush() {
"tablet_id", tablet_id_);

MutexLock l_flush(flush_lock_);
vector<BlockId> orphaned;
BlockIdContainer orphaned;
TabletSuperBlockPB pb;
{
std::lock_guard<LockType> l(data_lock_);
Expand Down
9 changes: 5 additions & 4 deletions src/kudu/tablet/tablet_metadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ class TabletMetadata : public RefCountedThreadSafe<TabletMetadata> {
static std::vector<BlockIdPB> CollectBlockIdPBs(
const TabletSuperBlockPB& superblock);

std::vector<BlockId> CollectBlockIds();
BlockIdContainer CollectBlockIds();

const std::string& tablet_id() const {
DCHECK_NE(state_, kNotLoadedYet);
Expand Down Expand Up @@ -190,7 +190,7 @@ class TabletMetadata : public RefCountedThreadSafe<TabletMetadata> {
//
// Blocks are removed from this set after they are successfully deleted
// in a call to DeleteOrphanedBlocks().
void AddOrphanedBlocks(const std::vector<BlockId>& block_ids);
void AddOrphanedBlocks(const BlockIdContainer& block_ids);

// Mark the superblock to be in state 'delete_type', sync it to disk, and
// then delete all of the rowsets in this tablet.
Expand Down Expand Up @@ -289,6 +289,7 @@ class TabletMetadata : public RefCountedThreadSafe<TabletMetadata> {
private:
friend class RefCountedThreadSafe<TabletMetadata>;
friend class MetadataTest;
friend class TestTabletMetadataBenchmark;

// Compile time assert that no one deletes TabletMetadata objects.
~TabletMetadata();
Expand Down Expand Up @@ -331,15 +332,15 @@ class TabletMetadata : public RefCountedThreadSafe<TabletMetadata> {
const RowSetMetadataVector& rowsets) const;

// Requires 'data_lock_'.
void AddOrphanedBlocksUnlocked(const std::vector<BlockId>& block_ids);
void AddOrphanedBlocksUnlocked(const BlockIdContainer& block_ids);

// Deletes the provided 'blocks' on disk.
//
// All blocks that are successfully deleted are removed from the
// 'orphaned_blocks_' set.
//
// Failures are logged, but are not fatal.
void DeleteOrphanedBlocks(const std::vector<BlockId>& blocks);
void DeleteOrphanedBlocks(const BlockIdContainer& blocks);

enum State {
kNotLoadedYet,
Expand Down
9 changes: 6 additions & 3 deletions src/kudu/tools/kudu-tool-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1198,7 +1198,7 @@ TEST_F(ToolTest, TestFsCheck) {

// Create a local replica, flush some rows a few times, and collect all
// of the created block IDs.
vector<BlockId> block_ids;
BlockIdContainer block_ids;
{
TabletHarness::Options opts(kTestDir);
opts.tablet_id = kTabletId;
Expand Down Expand Up @@ -1235,8 +1235,11 @@ TEST_F(ToolTest, TestFsCheck) {
ASSERT_OK(fs.Open(&report));
std::shared_ptr<BlockDeletionTransaction> deletion_transaction =
fs.block_manager()->NewDeletionTransaction();
for (int i = 0; i < block_ids.size(); i += 2) {
deletion_transaction->AddDeletedBlock(block_ids[i]);
int index = 0;
for (const auto& block_id : block_ids) {
if (index++ % 2 == 0) {
deletion_transaction->AddDeletedBlock(block_id);
}
}
deletion_transaction->CommitDeletedBlocks(&missing_ids);
}
Expand Down
4 changes: 2 additions & 2 deletions src/kudu/tools/tool_action_fs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -140,14 +140,14 @@ Status Check(const RunnerContext& /*context*/) {
}

// Get the "live" block IDs (i.e. those referenced by a tablet).
vector<BlockId> live_block_ids;
BlockIdContainer live_block_ids;
unordered_map<BlockId, string, BlockIdHash, BlockIdEqual> live_block_id_to_tablet;
vector<string> tablet_ids;
RETURN_NOT_OK(fs_manager.ListTabletIds(&tablet_ids));
for (const auto& t : tablet_ids) {
scoped_refptr<TabletMetadata> meta;
RETURN_NOT_OK(TabletMetadata::Load(&fs_manager, t, &meta));
vector<BlockId> tablet_live_block_ids = meta->CollectBlockIds();
BlockIdContainer tablet_live_block_ids = meta->CollectBlockIds();
live_block_ids.insert(live_block_ids.end(),
tablet_live_block_ids.begin(),
tablet_live_block_ids.end());
Expand Down

0 comments on commit 3ca7db0

Please sign in to comment.