Skip to content

Commit

Permalink
Add dialog UI to get Cassandra's connection info (vmware-archive#643)
Browse files Browse the repository at this point in the history
Data loading from Cassandra SS tables
  • Loading branch information
daniarherikurniawan authored Aug 5, 2020
1 parent d2a0f08 commit e95bb4b
Show file tree
Hide file tree
Showing 18 changed files with 922 additions and 244 deletions.
44 changes: 44 additions & 0 deletions data/sstable/cassdb.cql
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,50 @@ INSERT INTO users (id, name, address,
INSERT INTO users (id, name, address,
salary, phone) VALUES(now(),'', '',0, '');

CREATE TABLE test(
id UUID PRIMARY KEY,
name text,
salary int,
text text,
int int,
boolean boolean,
ascii ascii,
inet inet,
timeuuid timeuuid,
smallint smallint,
tinyint tinyint,
varint varint,
bigint bigint,
decimal decimal,
double double,
float float,
time time,
timestamp timestamp,
date date,
duration duration,
blob blob
);

INSERT INTO test (id, name, salary, text, int, boolean, ascii, inet, timeuuid, smallint,
tinyint, varint, bigint, decimal, double, float, time, timestamp, date, duration, blob)
VALUES(now(),'Mr. NULL',null, null, null, null, null, null, null, null, null, null, null, null, null,
null, null, null, null, null, null
);

INSERT INTO test (id, name, salary, text, int, boolean, ascii, inet, timeuuid, smallint,
tinyint, varint, bigint, decimal, double, float, time, timestamp, date, duration, blob)
VALUES(now(),'Mr. Test', 45000, null, null, true, '35', '127.0.0.1', 50554d6e-29bb-11e5-b345-feff819cdc9f,
1, 2, 10, toTimestamp(now()), 3.7875, 6714592679.3400884579357, 3.14753, '13:30:23.123',
'2017-05-05 15:00:00.000', '2020-07-14', P1DT12H30M5S, textAsBlob('adb14fbe076f6b94444c660e36a400151f26fc6f')
);


CREATE TABLE counter(
id int PRIMARY KEY,
counter counter
);

UPDATE counter SET counter = counter + 1 WHERE id = 2;

CREATE TABLE flights(
ID int PRIMARY KEY,
Expand Down
1 change: 1 addition & 0 deletions data/sstable/loadCassandraDB.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,5 @@ ${CASSANDRA_INSTALLATION_DIR}/bin/cqlsh --file ${SAVEDIR}/cassdb.cql
# Force Compaction to put the data into sstable
${CASSANDRA_INSTALLATION_DIR}/bin/nodetool flush
${CASSANDRA_INSTALLATION_DIR}/bin/nodetool compact cassdb
${CASSANDRA_INSTALLATION_DIR}/bin/nodetool -h localhost -p 7199 clearsnapshot
rm -f flights_data.csv
Binary file added docs/db-menu-cassandra.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
File renamed without changes
45 changes: 40 additions & 5 deletions docs/userManual.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ one row for an airline flight. Columns in this dataset include: the date of the
the origin and destination cities, the origin and destination states,
the origin airport code, the distance flown, the departure and arrival delay.

Updated on 2020 Jul 30.
Updated on 2020 Aug 05.

# Contents
|Section|Reference|
Expand All @@ -50,7 +50,8 @@ Updated on 2020 Jul 30.
|2.3.6.|[Reading JSON files](#236-reading-json-files)|
|2.3.7.|[Reading ORC files](#237-reading-orc-files)|
|2.3.8.|[Reading data from SQL databases](#238-reading-data-from-sql-databases)|
|2.3.9.|[Reading Parquet files](#239-reading-parquet-files)|
|2.3.9.|[Reading data from Cassandra databases](#239-reading-data-from-cassandra-databases)|
|2.3.10.|[Reading Parquet files](#2310-reading-parquet-files)|
|2.4.|[Navigating multiple datasets](#24-navigating-multiple-datasets)|
|3.|[Data views](#3-data-views)|
|3.1.|[The heading of a view](#31-the-heading-of-a-view)|
Expand Down Expand Up @@ -214,7 +215,7 @@ storage.
files](#236-reading-json-files).

* Parquet files: allows the user to [read the data from a set of
Parquet files](#239-reading-parquet-files).
Parquet files](#2310-reading-parquet-files).

* ORC files: allows the user to [read the data from a set of ORC
files](#237-reading-orc-files).
Expand Down Expand Up @@ -425,7 +426,7 @@ on the local user machine.

The following menu allows the user to specify the data to load.

![Specifying database connections](db-menu.png)
![Specifying database connections](db-menu-mysql.png)

* database kind: A drop-down menu indicating the kind of database to
load data from. Currently we support 'mysql' and 'impala'.
Expand All @@ -448,7 +449,41 @@ Numeric values are converted either to integers (if they fit into
32-bits) or to doubles. Boolean values are read as strings
containing two values, "true" and "false".

#### 2.3.9. Reading Parquet files
#### 2.3.9. Reading data from Cassandra databases

Hillview instance is required to be running on each Cassandra node.
Thus, Hillview must be deployed in a cluster mode with all Cassandra
nodes listed as the workers in the config.json. Moreover, Hillview
must have access to Cassandra's directories because each Hillview
workers will directly read local Cassandra's SSTables.

The following menu allows the user to specify the data to load.

![Specifying Cassandra database connections](db-menu-cassandra.png)

* database kind: Indicating the kind of database that we will connect
to, which is Cassandra. We also support another DB, 'mysql' and
'impala'.

* host: One of the network name/IP address of the machines hosting
Cassandra cluster. Every node in Cassandra cluster are the same
(no master-slave concept), thus Hillview can connect to any node.

* db directory: The directory where Cassandra is located/installed.

* port: The network port where Cassandra service is listening.

* jmx port: The JMX port where Cassandra does inter-node communication.

* database: The database/keyspace to load data from.

* table: The table to load data from.

* user: The name of the user connecting to the database.

* password: Credentials of the user connecting to the database.

#### 2.3.10. Reading Parquet files

Hillview can read data from [Apache Parquet
files](http://parquet.apache.org), a columnar storage format. The
Expand Down
36 changes: 35 additions & 1 deletion docs/userManual.src
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,7 @@ on the local user machine.

The following menu allows the user to specify the data to load.

![Specifying database connections](db-menu.png)
![Specifying database connections](db-menu-mysql.png)

* database kind: A drop-down menu indicating the kind of database to
load data from. Currently we support 'mysql' and 'impala'.
Expand All @@ -380,6 +380,40 @@ Numeric values are converted either to integers (if they fit into
32-bits) or to doubles. Boolean values are read as strings
containing two values, "true" and "false".

#### Reading data from Cassandra databases

Hillview instance is required to be running on each Cassandra node.
Thus, Hillview must be deployed in a cluster mode with all Cassandra
nodes listed as the workers in the config.json. Moreover, Hillview
must have access to Cassandra's directories because each Hillview
workers will directly read local Cassandra's SSTables.

The following menu allows the user to specify the data to load.

![Specifying Cassandra database connections](db-menu-cassandra.png)

* database kind: Indicating the kind of database that we will connect
to, which is Cassandra. We also support another DB, 'mysql' and
'impala'.

* host: One of the network name/IP address of the machines hosting
Cassandra cluster. Every node in Cassandra cluster are the same
(no master-slave concept), thus Hillview can connect to any node.

* db directory: The directory where Cassandra is located/installed.

* port: The network port where Cassandra service is listening.

* jmx port: The JMX port where Cassandra does inter-node communication.

* database: The database/keyspace to load data from.

* table: The table to load data from.

* user: The name of the user connecting to the database.

* password: Credentials of the user connecting to the database.

#### Reading Parquet files

Hillview can read data from [Apache Parquet
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/*
* Copyright (c) 2020 VMware Inc. All Rights Reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.hillview.maps;

import org.hillview.dataset.api.Empty;
import org.hillview.dataset.api.IMap;
import org.hillview.storage.CassandraConnectionInfo;
import org.hillview.storage.CassandraDatabase;
import org.hillview.storage.CassandraFileReference;
import org.hillview.storage.IFileReference;
import org.hillview.storage.CassandraDatabase.CassandraTokenRange;

import javax.annotation.Nullable;

import java.util.ArrayList;
import java.util.List;

public class FindCassandraFilesMap implements IMap<Empty, List<IFileReference>> {
static final long serialVersionUID = 1;
private final CassandraConnectionInfo conn;

public FindCassandraFilesMap(CassandraConnectionInfo conn) {
this.conn = conn;
}

@Override
public List<IFileReference> apply(@Nullable Empty empty) {
List<IFileReference> result = new ArrayList<IFileReference>();
CassandraDatabase db = new CassandraDatabase(this.conn);
List<String> ssTables = db.getSSTablePath();
List<CassandraTokenRange> tokenRanges = db.getTokenRanges();
String localEndpoint = db.getLocalEndpoint();
db.closeClusterConnection();
for (String ssTable : ssTables)
result.add(new CassandraFileReference(ssTable, tokenRanges, localEndpoint));
return result;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,25 @@

package org.hillview.storage;

/**
* This information is required to open a Cassandra database connection.
*/
public class CassandraConnectionInfo extends JdbcConnectionInformation {
static final long serialVersionUID = 1;

/** Port for establishing probe connection */
public int jmxPort;
/** Local Cassandra installation directory (can be found at bin/install-cassandra.sh) */
/**
* Local Cassandra installation directory (can be found at
* bin/install-cassandra.sh)
*/
public String cassandraRootDir;

@Override
public String toString() {
return this.cassandraRootDir + "/" + this.port;
StringBuilder sb = new StringBuilder(super.toString());
sb.append(" jmxPort : " + this.jmxPort + "\n");
sb.append(" cassandraRootDir : " + this.cassandraRootDir + "\n");
return sb.toString();
}
}
Loading

0 comments on commit e95bb4b

Please sign in to comment.