Skip to content

Commit

Permalink
Merge pull request ClickHouse#56391 from ClickHouse/rocks-db-analyzer
Browse files Browse the repository at this point in the history
Fix rocksdb with analyzer.
  • Loading branch information
KochetovNicolai authored Nov 9, 2023
2 parents 9498638 + c58c583 commit f7e7089
Show file tree
Hide file tree
Showing 5 changed files with 202 additions and 15 deletions.
117 changes: 117 additions & 0 deletions src/Storages/KVStorageUtils.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#include <Storages/KVStorageUtils.h>

#include <Columns/ColumnSet.h>

#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTSubquery.h>
Expand Down Expand Up @@ -121,6 +123,121 @@ bool traverseASTFilter(
}
return false;
}

bool traverseDAGFilter(
const std::string & primary_key, const DataTypePtr & primary_key_type, const ActionsDAG::Node * elem, const ContextPtr & context, FieldVectorPtr & res)
{
if (elem->type == ActionsDAG::ActionType::ALIAS)
return traverseDAGFilter(primary_key, primary_key_type, elem->children.at(0), context, res);

if (elem->type != ActionsDAG::ActionType::FUNCTION)
return false;

auto func_name = elem->function_base->getName();

if (func_name == "and")
{
// one child has the key filter condition is ok
for (const auto * child : elem->children)
if (traverseDAGFilter(primary_key, primary_key_type, child, context, res))
return true;
return false;
}
else if (func_name == "or")
{
// make sure every child has the key filter condition
for (const auto * child : elem->children)
if (!traverseDAGFilter(primary_key, primary_key_type, child, context, res))
return false;
return true;
}
else if (func_name == "equals" || func_name == "in")
{
if (elem->children.size() != 2)
return false;

if (func_name == "in")
{
const auto * key = elem->children.at(0);
while (key->type == ActionsDAG::ActionType::ALIAS)
key = key->children.at(0);

if (key->type != ActionsDAG::ActionType::INPUT)
return false;

if (key->result_name != primary_key)
return false;

const auto * value = elem->children.at(1);
if (value->type != ActionsDAG::ActionType::COLUMN)
return false;

const IColumn * value_col = value->column.get();
if (const auto * col_const = typeid_cast<const ColumnConst *>(value_col))
value_col = &col_const->getDataColumn();

const auto * col_set = typeid_cast<const ColumnSet *>(value_col);
if (!col_set)
return false;

auto future_set = col_set->getData();
future_set->buildOrderedSetInplace(context);

auto set = future_set->get();
if (!set)
return false;

if (!set->hasExplicitSetElements())
return false;

set->checkColumnsNumber(1);
const auto & set_column = *set->getSetElements()[0];

if (set_column.getDataType() != primary_key_type->getTypeId())
return false;

for (size_t row = 0; row < set_column.size(); ++row)
res->push_back(set_column[row]);
return true;
}
else
{
const auto * key = elem->children.at(0);
while (key->type == ActionsDAG::ActionType::ALIAS)
key = key->children.at(0);

if (key->type != ActionsDAG::ActionType::INPUT)
return false;

if (key->result_name != primary_key)
return false;

const auto * value = elem->children.at(1);
if (value->type != ActionsDAG::ActionType::COLUMN)
return false;

auto converted_field = convertFieldToType((*value->column)[0], *primary_key_type);
if (!converted_field.isNull())
res->push_back(converted_field);
return true;
}
}
return false;
}
}

std::pair<FieldVectorPtr, bool> getFilterKeys(
const String & primary_key, const DataTypePtr & primary_key_type, const ActionDAGNodes & filter_nodes, const ContextPtr & context)
{
if (filter_nodes.nodes.empty())
return {{}, true};

auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, {}, context);
const auto * predicate = filter_actions_dag->getOutputs().at(0);

FieldVectorPtr res = std::make_shared<FieldVector>();
auto matched_keys = traverseDAGFilter(primary_key, primary_key_type, predicate, context, res);
return std::make_pair(res, !matched_keys);
}

std::pair<FieldVectorPtr, bool> getFilterKeys(
Expand Down
3 changes: 3 additions & 0 deletions src/Storages/KVStorageUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ using DataTypePtr = std::shared_ptr<const IDataType>;
std::pair<FieldVectorPtr, bool> getFilterKeys(
const std::string & primary_key, const DataTypePtr & primary_key_type, const SelectQueryInfo & query_info, const ContextPtr & context);

std::pair<FieldVectorPtr, bool> getFilterKeys(
const String & primary_key, const DataTypePtr & primary_key_type, const ActionDAGNodes & filter_nodes, const ContextPtr & context);

template <typename K, typename V>
void fillColumns(const K & key, const V & value, size_t key_pos, const Block & header, MutableColumns & columns)
{
Expand Down
91 changes: 79 additions & 12 deletions src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
#include <Interpreters/MutationsInterpreter.h>

#include <Processors/Executors/PullingPipelineExecutor.h>
#include <Processors/QueryPlan/SourceStepWithFilter.h>
#include <Processors/QueryPlan/QueryPlan.h>
#include <Processors/Sources/NullSource.h>

#include <Poco/Logger.h>
#include <Poco/Util/AbstractConfiguration.h>
Expand Down Expand Up @@ -440,7 +443,46 @@ void StorageEmbeddedRocksDB::initDB()
}
}

Pipe StorageEmbeddedRocksDB::read(
class ReadFromEmbeddedRocksDB : public SourceStepWithFilter
{
public:
std::string getName() const override { return "ReadFromEmbeddedRocksDB"; }
void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override;
void applyFilters() override;

ReadFromEmbeddedRocksDB(
Block sample_block,
StorageSnapshotPtr storage_snapshot_,
const StorageEmbeddedRocksDB & storage_,
SelectQueryInfo query_info_,
ContextPtr context_,
size_t max_block_size_,
size_t num_streams_)
: SourceStepWithFilter(DataStream{.header = std::move(sample_block)})
, storage_snapshot(std::move(storage_snapshot_))
, storage(storage_)
, query_info(std::move(query_info_))
, context(std::move(context_))
, max_block_size(max_block_size_)
, num_streams(num_streams_)
{
}

private:
StorageSnapshotPtr storage_snapshot;
const StorageEmbeddedRocksDB & storage;
SelectQueryInfo query_info;
ContextPtr context;

size_t max_block_size;
size_t num_streams;

FieldVectorPtr keys;
bool all_scan = false;
};

void StorageEmbeddedRocksDB::read(
QueryPlan & query_plan,
const Names & column_names,
const StorageSnapshotPtr & storage_snapshot,
SelectQueryInfo & query_info,
Expand All @@ -450,23 +492,39 @@ Pipe StorageEmbeddedRocksDB::read(
size_t num_streams)
{
storage_snapshot->check(column_names);
Block sample_block = storage_snapshot->metadata->getSampleBlock();

FieldVectorPtr keys;
bool all_scan = false;
auto reading = std::make_unique<ReadFromEmbeddedRocksDB>(
std::move(sample_block),
storage_snapshot,
*this,
query_info,
context_,
max_block_size,
num_streams);

query_plan.addStep(std::move(reading));
}

void ReadFromEmbeddedRocksDB::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
{
const auto & sample_block = getOutputStream().header;

Block sample_block = storage_snapshot->metadata->getSampleBlock();
auto primary_key_data_type = sample_block.getByName(primary_key).type;
std::tie(keys, all_scan) = getFilterKeys(primary_key, primary_key_data_type, query_info, context_);
if (all_scan)
{
auto iterator = std::unique_ptr<rocksdb::Iterator>(rocksdb_ptr->NewIterator(rocksdb::ReadOptions()));
auto iterator = std::unique_ptr<rocksdb::Iterator>(storage.rocksdb_ptr->NewIterator(rocksdb::ReadOptions()));
iterator->SeekToFirst();
return Pipe(std::make_shared<EmbeddedRocksDBSource>(*this, sample_block, std::move(iterator), max_block_size));
auto source = std::make_shared<EmbeddedRocksDBSource>(storage, sample_block, std::move(iterator), max_block_size);
source->setStorageLimits(query_info.storage_limits);
pipeline.init(Pipe(std::move(source)));
}
else
{
if (keys->empty())
return {};
{
pipeline.init(Pipe(std::make_shared<NullSource>(sample_block)));
return;
}

::sort(keys->begin(), keys->end());
keys->erase(std::unique(keys->begin(), keys->end()), keys->end());
Expand All @@ -484,13 +542,22 @@ Pipe StorageEmbeddedRocksDB::read(
size_t begin = num_keys * thread_idx / num_threads;
size_t end = num_keys * (thread_idx + 1) / num_threads;

pipes.emplace_back(std::make_shared<EmbeddedRocksDBSource>(
*this, sample_block, keys, keys->begin() + begin, keys->begin() + end, max_block_size));
auto source = std::make_shared<EmbeddedRocksDBSource>(
storage, sample_block, keys, keys->begin() + begin, keys->begin() + end, max_block_size);
source->setStorageLimits(query_info.storage_limits);
pipes.emplace_back(std::move(source));
}
return Pipe::unitePipes(std::move(pipes));
pipeline.init(Pipe::unitePipes(std::move(pipes)));
}
}

void ReadFromEmbeddedRocksDB::applyFilters()
{
const auto & sample_block = getOutputStream().header;
auto primary_key_data_type = sample_block.getByName(storage.primary_key).type;
std::tie(keys, all_scan) = getFilterKeys(storage.primary_key, primary_key_data_type, filter_nodes, context);
}

SinkToStoragePtr StorageEmbeddedRocksDB::write(
const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr /*context*/, bool /*async_insert*/)
{
Expand Down
4 changes: 3 additions & 1 deletion src/Storages/RocksDB/StorageEmbeddedRocksDB.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class Context;
class StorageEmbeddedRocksDB final : public IStorage, public IKeyValueEntity, WithContext
{
friend class EmbeddedRocksDBSink;
friend class ReadFromEmbeddedRocksDB;
public:
StorageEmbeddedRocksDB(const StorageID & table_id_,
const String & relative_data_path_,
Expand All @@ -39,7 +40,8 @@ class StorageEmbeddedRocksDB final : public IStorage, public IKeyValueEntity, Wi

std::string getName() const override { return "EmbeddedRocksDB"; }

Pipe read(
void read(
QueryPlan & query_plan,
const Names & column_names,
const StorageSnapshotPtr & storage_snapshot,
SelectQueryInfo & query_info,
Expand Down
2 changes: 0 additions & 2 deletions tests/analyzer_tech_debt.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
01268_shard_avgweighted
01455_shard_leaf_max_rows_bytes_to_read
01495_subqueries_in_with_statement
01504_rocksdb
01560_merge_distributed_join
01584_distributed_buffer_cannot_find_column
01586_columns_pruning
Expand All @@ -37,7 +36,6 @@
02345_implicit_transaction
02352_grouby_shadows_arg
02354_annoy
02375_rocksdb_with_filters
02402_merge_engine_with_view
02404_memory_bound_merging
02426_orc_bug
Expand Down

0 comments on commit f7e7089

Please sign in to comment.