Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix renaming property with indexes #4763

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions extension/fts/src/catalog/fts_index_catalog_entry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,15 @@ std::string FTSIndexAuxInfo::toCypher(const catalog::IndexCatalogEntry& indexEnt
const main::ClientContext* context) const {
std::string cypher;
auto catalog = context->getCatalog();
auto tableName =
catalog->getTableCatalogEntry(context->getTransaction(), indexEntry.getTableID())
->getName();
auto tableCatalogEntry =
catalog->getTableCatalogEntry(context->getTransaction(), indexEntry.getTableID());
auto tableName = tableCatalogEntry->getName();
std::string propertyStr;
auto properties = indexEntry.getProperties();
for (auto i = 0u; i < properties.size(); i++) {
auto propertyIDs = indexEntry.getPropertyIDs();
for (auto i = 0u; i < propertyIDs.size(); i++) {
propertyStr +=
common::stringFormat("'{}'{} ", properties[i], i == properties.size() - 1 ? "" : ",");
common::stringFormat("'{}'{}", tableCatalogEntry->getProperty(propertyIDs[i]).getName(),
i == propertyIDs.size() - 1 ? "" : ", ");
}
cypher += common::stringFormat("CALL CREATE_FTS_INDEX('{}', '{}', [{}], stemmer := '{}');",
tableName, indexEntry.getIndexName(), std::move(propertyStr), config.stemmer);
Expand Down
27 changes: 16 additions & 11 deletions extension/fts/src/function/create_fts_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,24 +27,25 @@ using namespace kuzu::main;
using namespace kuzu::function;

struct CreateFTSBindData final : FTSBindData {
std::vector<std::string> properties;
std::vector<common::property_id_t> propertyIDs;
FTSConfig ftsConfig;

CreateFTSBindData(std::string tableName, table_id_t tableID, std::string indexName,
std::vector<std::string> properties, FTSConfig createFTSConfig)
std::vector<common::property_id_t> propertyIDs, FTSConfig createFTSConfig)
: FTSBindData{std::move(tableName), tableID, std::move(indexName),
binder::expression_vector{}},
properties{std::move(properties)}, ftsConfig{std::move(createFTSConfig)} {}
propertyIDs{std::move(propertyIDs)}, ftsConfig{std::move(createFTSConfig)} {}

std::unique_ptr<TableFuncBindData> copy() const override {
return std::make_unique<CreateFTSBindData>(*this);
}
};

static std::vector<std::string> bindProperties(const catalog::NodeTableCatalogEntry& entry,
static std::vector<common::property_id_t> bindProperties(
const catalog::NodeTableCatalogEntry& entry,
const std::shared_ptr<binder::Expression>& properties) {
auto propertyValue = properties->constPtrCast<binder::LiteralExpression>()->getValue();
std::vector<std::string> result;
std::vector<common::property_id_t> result;
for (auto i = 0u; i < propertyValue.getChildrenSize(); i++) {
auto propertyName = NestedVal::getChildVal(&propertyValue, i)->toString();
if (!entry.containsProperty(propertyName)) {
Expand All @@ -55,7 +56,7 @@ static std::vector<std::string> bindProperties(const catalog::NodeTableCatalogEn
LogicalType::STRING()) {
throw BinderException{"Full text search index can only be built on string properties."};
}
result.push_back(std::move(propertyName));
result.push_back(entry.getPropertyID(propertyName));
}
return result;
}
Expand Down Expand Up @@ -88,10 +89,11 @@ static std::unique_ptr<TableFuncBindData> bindFunc(ClientContext* context,
auto indexName = input->getLiteralVal<std::string>(1);
auto nodeTableEntry = storage::IndexUtils::bindTable(*context,
input->getLiteralVal<std::string>(0), indexName, storage::IndexOperation::CREATE);
auto properties = bindProperties(*nodeTableEntry, input->getParam(2));
auto propertyIDs = bindProperties(*nodeTableEntry, input->getParam(2));
auto createFTSConfig = FTSConfig{input->optionalParams};
return std::make_unique<CreateFTSBindData>(nodeTableEntry->getName(),
nodeTableEntry->getTableID(), indexName, std::move(properties), std::move(createFTSConfig));
nodeTableEntry->getTableID(), indexName, std::move(propertyIDs),
std::move(createFTSConfig));
}

static std::string createStopWordsTableIfNotExists(const ClientContext& context,
Expand Down Expand Up @@ -138,7 +140,10 @@ std::string createFTSIndexQuery(ClientContext& context, const TableFuncBindData&
"key(ID));",
appearsInfoTableName);
auto tableName = ftsBindData->tableName;
for (auto& property : ftsBindData->properties) {
auto tableEntry =
context.getCatalog()->getTableCatalogEntry(context.getTransaction(), tableName);
for (auto& property : ftsBindData->propertyIDs) {
auto propertyName = tableEntry->getProperty(property).getName();
query += stringFormat("COPY `{}` FROM "
"(MATCH (b:`{}`) "
"WITH tokenize(b.{}) AS tk, OFFSET(ID(b)) AS id "
Expand All @@ -147,7 +152,7 @@ std::string createFTSIndexQuery(ClientContext& context, const TableFuncBindData&
"WHERE t1 is NOT NULL AND SIZE(t1) > 0 AND "
"NOT EXISTS {MATCH (s:`{}` {sw: t1})} "
"RETURN STEM(t1, '{}'), id1);",
appearsInfoTableName, tableName, property, stopWordsTableName,
appearsInfoTableName, tableName, propertyName, stopWordsTableName,
ftsBindData->ftsConfig.stemmer);
}

Expand Down Expand Up @@ -234,7 +239,7 @@ static offset_t tableFunc(const TableFuncInput& input, TableFuncOutput& /*output
auto avgDocLen = numDocs == 0 ? 0 : static_cast<double>(sharedState.totalLen.load()) / numDocs;
context.clientContext->getCatalog()->createIndex(context.clientContext->getTransaction(),
std::make_unique<catalog::IndexCatalogEntry>(FTSIndexCatalogEntry::TYPE_NAME,
bindData.tableID, bindData.indexName, bindData.properties,
bindData.tableID, bindData.indexName, bindData.propertyIDs,
std::make_unique<FTSIndexAuxInfo>(numDocs, avgDocLen, bindData.ftsConfig)));
return 0;
}
Expand Down
40 changes: 34 additions & 6 deletions extension/fts/test/test_files/fts_small.test
Original file line number Diff line number Diff line change
Expand Up @@ -296,16 +296,16 @@ Binder exception: Table: 0_docIdx4_appears_in already exists. Please drop or ren
---- ok
-STATEMENT CALL SHOW_INDEXES() RETURN *
---- 2
doc|docIdx1|FTS|[content]|True|CALL CREATE_FTS_INDEX('doc', 'docIdx1', ['content' ], stemmer := 'english');
doc|docIdx|FTS|[content,author,name]|True|CALL CREATE_FTS_INDEX('doc', 'docIdx', ['content', 'author', 'name' ], stemmer := 'english');
doc|docIdx1|FTS|[content]|True|CALL CREATE_FTS_INDEX('doc', 'docIdx1', ['content'], stemmer := 'english');
doc|docIdx|FTS|[content,author,name]|True|CALL CREATE_FTS_INDEX('doc', 'docIdx', ['content', 'author', 'name'], stemmer := 'english');
-STATEMENT CREATE NODE TABLE embeddings (id int64, vec FLOAT[8], PRIMARY KEY (id));
---- ok
-STATEMENT CALL CREATE_HNSW_INDEX('e_hnsw_index', 'embeddings', 'vec', distFunc := 'l2');
---- ok
-STATEMENT CALL SHOW_INDEXES() RETURN *
---- 3
doc|docIdx1|FTS|[content]|True|CALL CREATE_FTS_INDEX('doc', 'docIdx1', ['content' ], stemmer := 'english');
doc|docIdx|FTS|[content,author,name]|True|CALL CREATE_FTS_INDEX('doc', 'docIdx', ['content', 'author', 'name' ], stemmer := 'english');
doc|docIdx1|FTS|[content]|True|CALL CREATE_FTS_INDEX('doc', 'docIdx1', ['content'], stemmer := 'english');
doc|docIdx|FTS|[content,author,name]|True|CALL CREATE_FTS_INDEX('doc', 'docIdx', ['content', 'author', 'name'], stemmer := 'english');
embeddings|e_hnsw_index|HNSW|[vec]|True|CALL CREATE_HNSW_INDEX('e_hnsw_index', 'embeddings', 'vec', mu := 30, ml := 60, pu := 0.050000, distFunc := 'l2', alpha := 1.100000, efc := 200);
-RELOADDB
-STATEMENT CALL SHOW_INDEXES() RETURN *
Expand All @@ -317,6 +317,34 @@ embeddings|e_hnsw_index|HNSW|[vec]|True|CALL CREATE_HNSW_INDEX('e_hnsw_index', '
---- ok
-STATEMENT CALL SHOW_INDEXES() RETURN *
---- 3
doc|docIdx1|FTS|[content]|True|CALL CREATE_FTS_INDEX('doc', 'docIdx1', ['content' ], stemmer := 'english');
doc|docIdx|FTS|[content,author,name]|True|CALL CREATE_FTS_INDEX('doc', 'docIdx', ['content', 'author', 'name' ], stemmer := 'english');
doc|docIdx1|FTS|[content]|True|CALL CREATE_FTS_INDEX('doc', 'docIdx1', ['content'], stemmer := 'english');
doc|docIdx|FTS|[content,author,name]|True|CALL CREATE_FTS_INDEX('doc', 'docIdx', ['content', 'author', 'name'], stemmer := 'english');
embeddings|e_hnsw_index|HNSW|[vec]|True|CALL CREATE_HNSW_INDEX('e_hnsw_index', 'embeddings', 'vec', mu := 30, ml := 60, pu := 0.050000, distFunc := 'l2', alpha := 1.100000, efc := 200);
-STATEMENT ALTER TABLE doc RENAME content to detail
---- ok
-STATEMENT CALL SHOW_INDEXES() RETURN *
---- 3
doc|docIdx1|FTS|[detail]|True|CALL CREATE_FTS_INDEX('doc', 'docIdx1', ['detail'], stemmer := 'english');
doc|docIdx|FTS|[detail,author,name]|True|CALL CREATE_FTS_INDEX('doc', 'docIdx', ['detail', 'author', 'name'], stemmer := 'english');
embeddings|e_hnsw_index|HNSW|[vec]|True|CALL CREATE_HNSW_INDEX('e_hnsw_index', 'embeddings', 'vec', mu := 30, ml := 60, pu := 0.050000, distFunc := 'l2', alpha := 1.100000, efc := 200);
-STATEMENT CALL QUERY_FTS_INDEX('doc', 'docIdx', 'Alice') RETURN _node.ID, score
---- 2
0|0.271133
3|0.209476
-STATEMENT CREATE NODE TABLE student(ID INT64, age INT64, name STRING, primary key(ID))
---- ok
-STATEMENT CALL CREATE_FTS_INDEX('student', 'studentIdx', ['name'])
---- ok
-STATEMENT CALL SHOW_INDEXES() WHERE `table name` = 'student' RETURN *
---- 1
student|studentIdx|FTS|[name]|True|CALL CREATE_FTS_INDEX('student', 'studentIdx', ['name'], stemmer := 'english');
-STATEMENT ALTER TABLE student drop age
---- ok
-STATEMENT CALL SHOW_INDEXES() WHERE `table name` = 'student' RETURN *
---- 1
student|studentIdx|FTS|[name]|True|CALL CREATE_FTS_INDEX('student', 'studentIdx', ['name'], stemmer := 'english');
-STATEMENT ALTER TABLE student rename name to firstName
---- ok
-STATEMENT CALL SHOW_INDEXES() WHERE `table name` = 'student' RETURN *
---- 1
student|studentIdx|FTS|[firstName]|True|CALL CREATE_FTS_INDEX('student', 'studentIdx', ['firstName'], stemmer := 'english');
9 changes: 8 additions & 1 deletion src/binder/bind/bind_ddl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,15 @@ void Binder::validateNoIndexOnProperty(const std::string& tableName,
return;
}
auto tableEntry = catalog->getTableCatalogEntry(transaction, tableName);
if (!tableEntry->containsProperty(propertyName)) {
return;
}
auto propertyID = tableEntry->getPropertyID(propertyName);
for (auto indexCatalogEntry : catalog->getIndexEntries(transaction)) {
if (indexCatalogEntry->getTableID() == tableEntry->getTableID()) {
auto propertiesWithIndex = indexCatalogEntry->getPropertyIDs();
if (indexCatalogEntry->getTableID() == tableEntry->getTableID() &&
std::find(propertiesWithIndex.begin(), propertiesWithIndex.end(), propertyID) !=
propertiesWithIndex.end()) {
throw BinderException{stringFormat(
"Cannot drop property {} in table {} because it is used in one or more indexes. "
"Please remove the associated indexes before attempting to drop this property.",
Expand Down
11 changes: 6 additions & 5 deletions src/catalog/catalog_entry/hnsw_index_catalog_entry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,15 @@ std::string HNSWIndexAuxInfo::toCypher(const IndexCatalogEntry& indexEntry,
const main::ClientContext* context) const {
std::string cypher;
auto catalog = context->getCatalog();
auto tableName =
catalog->getTableCatalogEntry(context->getTransaction(), indexEntry.getTableID())
->getName();
auto tableEntry =
catalog->getTableCatalogEntry(context->getTransaction(), indexEntry.getTableID());
auto tableName = tableEntry->getName();
auto propertyName = tableEntry->getProperty(indexEntry.getPropertyIDs()[0]).getName();
auto distFuncName = storage::HNSWIndexConfig::distFuncToString(config.distFunc);
cypher += common::stringFormat("CALL CREATE_HNSW_INDEX('{}', '{}', '{}', mu := {}, ml := {}, "
"pu := {}, distFunc := '{}', alpha := {}, efc := {});",
indexEntry.getIndexName(), tableName, indexEntry.getProperties()[0], config.mu, config.ml,
config.pu, distFuncName, config.alpha, config.efc);
indexEntry.getIndexName(), tableName, propertyName, config.mu, config.ml, config.pu,
distFuncName, config.alpha, config.efc);
return cypher;
}

Expand Down
8 changes: 4 additions & 4 deletions src/catalog/catalog_entry/index_catalog_entry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
serializer.write(type);
serializer.write(tableID);
serializer.write(indexName);
serializer.serializeVector(properties);
serializer.serializeVector(propertyIDs);
const auto bufferedWriter = auxInfo->serialize();
serializer.write<uint64_t>(bufferedWriter->getSize());
serializer.write(bufferedWriter->getData().data.get(), bufferedWriter->getSize());
Expand All @@ -34,13 +34,13 @@
std::string type;
common::table_id_t tableID = common::INVALID_TABLE_ID;
std::string indexName;
std::vector<std::string> properties;
std::vector<common::property_id_t> propertyIDs;

Check warning on line 37 in src/catalog/catalog_entry/index_catalog_entry.cpp

View check run for this annotation

Codecov / codecov/patch

src/catalog/catalog_entry/index_catalog_entry.cpp#L37

Added line #L37 was not covered by tests
deserializer.deserializeValue(type);
deserializer.deserializeValue(tableID);
deserializer.deserializeValue(indexName);
deserializer.deserializeVector(properties);
deserializer.deserializeVector(propertyIDs);
auto indexEntry = std::make_unique<IndexCatalogEntry>(type, tableID, std::move(indexName),
std::move(properties), nullptr /* auxInfo */);
std::move(propertyIDs), nullptr /* auxInfo */);

Check warning on line 43 in src/catalog/catalog_entry/index_catalog_entry.cpp

View check run for this annotation

Codecov / codecov/patch

src/catalog/catalog_entry/index_catalog_entry.cpp#L43

Added line #L43 was not covered by tests
uint64_t auxBufferSize = 0;
deserializer.deserializeValue(auxBufferSize);
indexEntry->auxBuffer = std::make_unique<uint8_t[]>(auxBufferSize);
Expand Down
10 changes: 4 additions & 6 deletions src/function/table/call/hnsw/create_hnsw_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ CreateHNSWSharedState::CreateHNSWSharedState(const CreateHNSWIndexBindData& bind
->cast<storage::NodeTable>()},
numNodes{bindData.numNodes}, bindData{&bindData} {
hnswIndex = std::make_unique<storage::InMemHNSWIndex>(bindData.context, nodeTable,
bindData.columnID, bindData.config.copy());
bindData.tableEntry->getColumnID(bindData.propertyID), bindData.config.copy());
partitionerSharedState = std::make_shared<storage::HNSWIndexPartitionerSharedState>(
*bindData.context->getMemoryManager());
}
Expand All @@ -35,11 +35,11 @@ static std::unique_ptr<TableFuncBindData> bindFunc(main::ClientContext* context,
const auto tableID = tableEntry->getTableID();
storage::HNSWIndexUtils::validateColumnType(*tableEntry, columnName);
const auto& table = context->getStorageManager()->getTable(tableID)->cast<storage::NodeTable>();
auto columnID = tableEntry->getColumnID(columnName);
auto propertyID = tableEntry->getPropertyID(columnName);
auto config = storage::HNSWIndexConfig{input->optionalParams};
auto numNodes = table.getStats(context->getTransaction()).getTableCard();
auto maxOffset = numNodes > 0 ? numNodes - 1 : 0;
return std::make_unique<CreateHNSWIndexBindData>(context, indexName, tableEntry, columnID,
return std::make_unique<CreateHNSWIndexBindData>(context, indexName, tableEntry, propertyID,
numNodes, maxOffset, std::move(config));
}

Expand Down Expand Up @@ -99,9 +99,7 @@ static void finalizeFunc(const processor::ExecutionContext* context,
auto indexEntry =
std::make_unique<catalog::IndexCatalogEntry>(catalog::HNSWIndexCatalogEntry::TYPE_NAME,
bindData->tableEntry->getTableID(), bindData->indexName,
std::vector<std::string>{
hnswSharedState->nodeTable.getColumn(bindData->columnID).getName()},
std::move(auxInfo));
std::vector<common::property_id_t>{bindData->propertyID}, std::move(auxInfo));
catalog->createIndex(transaction, std::move(indexEntry));
}

Expand Down
6 changes: 3 additions & 3 deletions src/function/table/call/hnsw/query_hnsw_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ QueryHNSWIndexBindData::QueryHNSWIndexBindData(main::ClientContext* context,
boundInput{std::move(boundInput)}, config{config}, outputNode{std::move(outputNode)} {
auto indexEntry = this->boundInput.indexEntry;
auto& indexAuxInfo = indexEntry->getAuxInfo().cast<catalog::HNSWIndexAuxInfo>();
indexColumnID = this->boundInput.nodeTableEntry->getColumnID(indexEntry->getProperties()[0]);
indexColumnID = this->boundInput.nodeTableEntry->getColumnID(indexEntry->getPropertyIDs()[0]);
auto catalog = context->getCatalog();
upperHNSWRelTableEntry =
catalog->getTableCatalogEntry(context->getTransaction(), indexAuxInfo.upperRelTableID)
Expand All @@ -46,7 +46,7 @@ static std::unique_ptr<TableFuncBindData> bindFunc(main::ClientContext* context,
nodeTableEntry->getTableID(), indexName);
const auto& auxInfo = indexEntry->getAuxInfo().cast<catalog::HNSWIndexAuxInfo>();
KU_ASSERT(
nodeTableEntry->getProperty(indexEntry->getProperties()[0]).getType().getLogicalTypeID() ==
nodeTableEntry->getProperty(indexEntry->getPropertyIDs()[0]).getType().getLogicalTypeID() ==
common::LogicalTypeID::ARRAY);
KU_UNUSED(auxInfo);

Expand Down Expand Up @@ -140,7 +140,7 @@ static std::vector<float> getQueryVector(const binder::Expression& queryExpressi
}

static const common::LogicalType& getIndexColumnType(const BoundQueryHNSWIndexInput& boundInput) {
auto columnName = boundInput.indexEntry->getProperties()[0];
auto columnName = boundInput.indexEntry->getPropertyIDs()[0];
return boundInput.nodeTableEntry->getProperty(columnName).getType();
}

Expand Down
14 changes: 9 additions & 5 deletions src/function/table/call/show_indexes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,20 +89,24 @@ static std::unique_ptr<TableFuncBindData> bindFunc(const main::ClientContext* co
auto catalog = context->getCatalog();
auto indexEntries = catalog->getIndexEntries(context->getTransaction());
for (auto indexEntry : indexEntries) {
auto tableName =
catalog->getTableCatalogEntry(context->getTransaction(), indexEntry->getTableID())
->getName();
auto tableEntry =
catalog->getTableCatalogEntry(context->getTransaction(), indexEntry->getTableID());
auto tableName = tableEntry->getName();
auto indexName = indexEntry->getIndexName();
auto indexType = indexEntry->getIndexType();
auto properties = indexEntry->getProperties();
auto properties = indexEntry->getPropertyIDs();
std::vector<std::string> propertyNames;
for (auto& property : properties) {
propertyNames.push_back(tableEntry->getProperty(property).getName());
}
auto dependencyLoaded = indexEntry->isLoaded();
std::string indexDefinition;
if (dependencyLoaded) {
auto& auxInfo = indexEntry->getAuxInfo();
indexDefinition = auxInfo.toCypher(*indexEntry, context);
}
indexesInfo.emplace_back(std::move(tableName), std::move(indexName), std::move(indexType),
std::move(properties), dependencyLoaded, std::move(indexDefinition));
std::move(propertyNames), dependencyLoaded, std::move(indexDefinition));
}
return std::make_unique<ShowIndexesBindData>(indexesInfo, bindColumns(*input->binder),
indexesInfo.size());
Expand Down
Loading
Loading