Skip to content

Commit

Permalink
merge: Allow accessing Scylla system tables from alternator
Browse files Browse the repository at this point in the history
Merged patch series from Piotr Sarna:

This series allows reading rows from Scylla's system tables
via alternator by using a virtual interface.
If a Query or Scan request intercepts a table name with the following
pattern: .scylla.alternator.KEYSPACE_NAME.TABLE_NAME, it will read
the data from Scylla's KEYSPACE_NAME.TABLE_NAME table.
The interface is expected to only return data for Scylla system tables
and trying to access regular tables via this interface is expected
to return an error.
This series comes with tests (alternator-test, scylla_only).

Fixes scylladb#6122
Tests: alternator-test(local,remote (to verify that scylla_only works)

Piotr Sarna (5):
  alternator: add fallback serialization for all types
  alternator: add fetching static columns if they exist
  alternator: add a way of accessing system tables from alternator
  alternator-test: add scylla-only test for querying system tables
  docs: add an entry about accessing Scylla system tables

 alternator-test/test_system_tables.py | 61 +++++++++++++++++++++++++++
 alternator/executor.cc                | 38 ++++++++++++++++-
 alternator/executor.hh                |  1 +
 alternator/serialization.cc           | 11 +++--
 docs/alternator/alternator.md         | 15 +++++++
 5 files changed, 122 insertions(+), 4 deletions(-)
 create mode 100644 alternator-test/test_system_tables.py
  • Loading branch information
nyh committed Apr 20, 2020
2 parents 03f41b9 + dea5bc4 commit 44a1daf
Show file tree
Hide file tree
Showing 5 changed files with 122 additions and 4 deletions.
38 changes: 37 additions & 1 deletion alternator/executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,25 @@ static schema_ptr get_table(service::storage_proxy& proxy, const rjson::value& r
}
}

static std::tuple<bool, std::string_view, std::string_view> try_get_internal_table(std::string_view table_name) {
size_t it = table_name.find(executor::INTERNAL_TABLE_PREFIX);
if (it != 0) {
return {false, "", ""};
}
table_name.remove_prefix(executor::INTERNAL_TABLE_PREFIX.size());
size_t delim = table_name.find_first_of('.');
if (delim == std::string_view::npos) {
return {false, "", ""};
}
std::string_view ks_name = table_name.substr(0, delim);
table_name.remove_prefix(ks_name.size() + 1);
// Only internal keyspaces can be accessed to avoid leakage
if (!is_internal_keyspace(sstring(ks_name))) {
return {false, "", ""};
}
return {true, ks_name, table_name};
}

// get_table_or_view() is similar to to get_table(), except it returns either
// a table or a materialized view from which to read, based on the TableName
// and optional IndexName in the request. Only requests like Query and Scan
Expand All @@ -196,6 +215,17 @@ static std::pair<schema_ptr, table_or_view_type>
get_table_or_view(service::storage_proxy& proxy, const rjson::value& request) {
table_or_view_type type = table_or_view_type::base;
std::string table_name = get_table_name(request);

auto [is_internal_table, internal_ks_name, internal_table_name] = try_get_internal_table(table_name);
if (is_internal_table) {
try {
return { proxy.get_db().local().find_schema(sstring(internal_ks_name), sstring(internal_table_name)), type };
} catch (no_such_column_family&) {
throw api_error("ResourceNotFoundException",
format("Requested resource not found: Internal table: {}.{} not found", internal_ks_name, internal_table_name));
}
}

std::string keyspace_name = executor::KEYSPACE_NAME_PREFIX + table_name;
const rjson::value* index_name = rjson::find(request, "IndexName");
std::string orig_table_name;
Expand Down Expand Up @@ -684,6 +714,10 @@ future<executor::request_return_type> executor::create_table(client_state& clien
_stats.api_operations.create_table++;
elogger.trace("Creating table {}", request);
std::string table_name = get_table_name(request);
if (table_name.find(INTERNAL_TABLE_PREFIX) == 0) {
return make_ready_future<request_return_type>(api_error("ValidationException",
format("Prefix {} is reserved for accessing internal tables", INTERNAL_TABLE_PREFIX)));
}
std::string keyspace_name = executor::KEYSPACE_NAME_PREFIX + table_name;
const rjson::value& attribute_definitions = request["AttributeDefinitions"];

Expand Down Expand Up @@ -2920,8 +2954,10 @@ static future<executor::request_return_type> do_query(schema_ptr schema,

auto regular_columns = boost::copy_range<query::column_id_vector>(
schema->regular_columns() | boost::adaptors::transformed([] (const column_definition& cdef) { return cdef.id; }));
auto static_columns = boost::copy_range<query::column_id_vector>(
schema->static_columns() | boost::adaptors::transformed([] (const column_definition& cdef) { return cdef.id; }));
auto selection = cql3::selection::selection::wildcard(schema);
auto partition_slice = query::partition_slice(std::move(ck_bounds), {}, std::move(regular_columns), selection->get_query_options());
auto partition_slice = query::partition_slice(std::move(ck_bounds), std::move(static_columns), std::move(regular_columns), selection->get_query_options());
auto command = ::make_lw_shared<query::read_command>(schema->id(), schema->version(), partition_slice, query::max_partitions);

auto query_state_ptr = std::make_unique<service::query_state>(client_state, trace_state, std::move(permit));
Expand Down
1 change: 1 addition & 0 deletions alternator/executor.hh
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ public:
stats _stats;
static constexpr auto ATTRS_COLUMN_NAME = ":attrs";
static constexpr auto KEYSPACE_NAME_PREFIX = "alternator_";
static constexpr std::string_view INTERNAL_TABLE_PREFIX = ".scylla.alternator.";

executor(service::storage_proxy& proxy, service::migration_manager& mm, smp_service_group ssg)
: _proxy(proxy), _mm(mm), _ssg(ssg) {}
Expand Down
11 changes: 8 additions & 3 deletions alternator/serialization.cc
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,9 @@ std::string type_to_string(data_type type) {
};
auto it = types.find(type);
if (it == types.end()) {
throw std::runtime_error(format("Unknown type {}", type->name()));
// fall back to string, in order to be able to present
// internal Scylla types in a human-readable way
return "S";
}
return it->second;
}
Expand Down Expand Up @@ -205,8 +207,11 @@ rjson::value json_key_column_value(bytes_view cell, const column_definition& col
auto s = to_json_string(*decimal_type, bytes(cell));
return rjson::from_string(s);
} else {
// We shouldn't get here, we shouldn't see such key columns.
throw std::runtime_error(format("Unexpected key type: {}", column.type->name()));
// Support for arbitrary key types is useful for parsing values of virtual tables,
// which can involve any type supported by Scylla.
// In order to guarantee that the returned type is parsable by alternator clients,
// they are represented simply as strings.
return rjson::from_string(column.type->to_string(bytes(cell)));
}
}

Expand Down
15 changes: 15 additions & 0 deletions docs/alternator/alternator.md
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,21 @@ implemented, with the following limitations:
Those are different from the current DynamoDB metrics, but Scylla's
monitoring is rather advanced and provide more insights to the internals.

## Alternator-specific API

### Accessing system tables from Scylla
* Scylla exposes lots of useful information via its internal system tables,
which can be found in system keyspaces: 'system', 'system\_auth', etc.
In order to access to these tables via alternator interface,
Scan and Query requests can use a special table name:
.scylla.alternator.KEYSPACE\_NAME.TABLE\_NAME
which will return results fetched from corresponding Scylla table.
This interface can be used only to fetch data from system tables.
Attempts to read regular tables via the virtual interface will result
in an error.
Example: in order to query the contents of Scylla's system.large_rows,
pass TableName='.scylla.alternator.system.large_rows' to a Query/Scan request.

## Alternator design and implementation

This section provides only a very brief introduction to Alternator's
Expand Down
61 changes: 61 additions & 0 deletions test/alternator/test_system_tables.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# Copyright 2020 ScyllaDB
#
# This file is part of Scylla.
#
# Scylla is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Scylla is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with Scylla. If not, see <http://www.gnu.org/licenses/>.

# Tests for accessing alternator-only system tables (from Scylla).

import pytest
from botocore.exceptions import ClientError
from boto3.dynamodb.conditions import Key

internal_prefix = '.scylla.alternator.'

# Test that fetching key columns from system tables works
def test_fetch_from_system_tables(scylla_only, dynamodb):
client = dynamodb.meta.client
tables_response = client.scan(TableName=internal_prefix+'system_schema.tables',
AttributesToGet=['keyspace_name','table_name'])

for item in tables_response['Items']:
ks_name = item['keyspace_name']
table_name = item['table_name']

if not 'system' in ks_name:
continue

col_response = client.query(TableName=internal_prefix+'system_schema.columns',
KeyConditionExpression=Key('keyspace_name').eq(ks_name) & Key('table_name').eq(table_name))

key_columns = [item['column_name'] for item in col_response['Items'] if item['kind'] == 'clustering' or item['kind'] == 'partition_key']
qualified_name = "{}{}.{}".format(internal_prefix, ks_name, table_name)
response = client.scan(TableName=qualified_name, AttributesToGet=key_columns)
print(ks_name, table_name, response)

def test_block_access_to_non_system_tables_with_virtual_interface(scylla_only, test_table_s, dynamodb):
client = dynamodb.meta.client
with pytest.raises(ClientError, match='ResourceNotFoundException.*{}'.format(internal_prefix)):
tables_response = client.scan(TableName="{}alternator_{}.{}".format(internal_prefix, test_table_s.name, test_table_s.name))

def test_block_creating_tables_with_reserved_prefix(scylla_only, dynamodb):
client = dynamodb.meta.client
for wrong_name_postfix in ['', 'a', 'xxx', 'system_auth.roles', 'table_name']:
with pytest.raises(ClientError, match=internal_prefix):
dynamodb.create_table(TableName=internal_prefix+wrong_name_postfix,
BillingMode='PAY_PER_REQUEST',
KeySchema=[{'AttributeName':'p', 'KeyType':'HASH'}],
AttributeDefinitions=[{'AttributeName':'p', 'AttributeType': 'S'}]
)

0 comments on commit 44a1daf

Please sign in to comment.