Skip to content

Commit

Permalink
key_encoder: avoid unordered_map for looking up encoders
Browse files Browse the repository at this point in the history
I saw EncoderResolver::GetKeyEncoder() taking up ~3% of CPU in some
workloads, mostly caused by a 'div' instruction in unordered_map. Since
we know that the type enums are low integers, we can just use a vector
for lookup as well.

This also switches from shared_ptr to unique_ptr for holding the
resolvers: the shared_ptr is a holdover from pre-C++11 where putting
single-ownership smart pointers into a container was impossible.

Change-Id: I5a9bf3a28e451562c3347fe9650db3874d62a368
Reviewed-on: http://gerrit.cloudera.org:8080/6431
Reviewed-by: David Ribeiro Alves <[email protected]>
Tested-by: Kudu Jenkins
Reviewed-by: Andrew Wong <[email protected]>
  • Loading branch information
toddlipcon committed Mar 22, 2017
1 parent 4f51c66 commit 3679ead
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 10 deletions.
4 changes: 2 additions & 2 deletions src/kudu/cfile/cfile-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,18 @@
#include <list>

#include "kudu/cfile/cfile-test-base.h"
#include "kudu/cfile/cfile.pb.h"
#include "kudu/cfile/cfile_reader.h"
#include "kudu/cfile/cfile_writer.h"
#include "kudu/cfile/cfile.pb.h"
#include "kudu/cfile/index_block.h"
#include "kudu/cfile/index_btree.h"
#include "kudu/common/columnblock.h"
#include "kudu/fs/fs-test-util.h"
#include "kudu/gutil/gscoped_ptr.h"
#include "kudu/gutil/stringprintf.h"
#include "kudu/util/metrics.h"
#include "kudu/util/test_macros.h"
#include "kudu/util/stopwatch.h"
#include "kudu/util/test_macros.h"

DECLARE_string(block_cache_type);
DECLARE_string(cfile_do_on_finish);
Expand Down
23 changes: 15 additions & 8 deletions src/kudu/common/key_encoder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
// under the License.

#include <functional>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>

#include "kudu/common/common.pb.h"
Expand All @@ -26,22 +26,22 @@
#include "kudu/gutil/singleton.h"
#include "kudu/util/faststring.h"

using std::shared_ptr;
using std::unordered_map;
using std::unique_ptr;
using std::vector;

namespace kudu {


// A resolver for Encoders
template <typename Buffer>
class EncoderResolver {
public:
const KeyEncoder<Buffer>& GetKeyEncoder(DataType t) {
return *FindOrDie(encoders_, t);
DCHECK(HasKeyEncoderForType(t));
return *encoders_[t];
}

const bool HasKeyEncoderForType(DataType t) {
return ContainsKey(encoders_, t);
return t < encoders_.size() && encoders_[t];
}

private:
Expand All @@ -59,11 +59,18 @@ class EncoderResolver {

template<DataType Type> void AddMapping() {
KeyEncoderTraits<Type, Buffer> traits;
InsertOrDie(&encoders_, Type, shared_ptr<KeyEncoder<Buffer> >(new KeyEncoder<Buffer>(traits)));
if (encoders_.size() <= Type) {
encoders_.resize(static_cast<size_t>(Type) + 1);
}
CHECK(!encoders_[Type]) << "already have mapping for " << DataType_Name(Type);
encoders_[Type].reset(new KeyEncoder<Buffer>(traits));
}

friend class Singleton<EncoderResolver<Buffer> >;
unordered_map<DataType, shared_ptr<KeyEncoder<Buffer> >, std::hash<size_t> > encoders_;
// We use a vector instead of a map here since this shows up in some hot paths
// and we know that the valid data types all have low enough IDs that the
// vector will be small.
vector<unique_ptr<KeyEncoder<Buffer>>> encoders_;
};

template <typename Buffer>
Expand Down

0 comments on commit 3679ead

Please sign in to comment.