tooling: support for cassandra update-schema (cadence-workflow#177)

avmi · May 15, 2017 · ef260bf · ef260bf
1 parent 437a8a9
commit ef260bf
Show file tree

Hide file tree

Showing 21 changed files with 1,530 additions and 110 deletions.
diff --git a/.gitignore b/.gitignore
@@ -14,6 +14,4 @@ test
 test.log
 
 # Executables produced by cadence repo
-cadence
-cadence-cassandra-tool
-
+/cadence*
diff --git a/common/persistence/persistenceTestBase.go b/common/persistence/persistenceTestBase.go
@@ -719,7 +719,7 @@ func (s *CassandraTestCluster) setupTestCluster(keySpace string, dropKeySpace bo
 	}
 	s.createCluster(testWorkflowClusterHosts, testDatacenter, gocql.Consistency(1), keySpace)
 	s.createKeyspace(1, dropKeySpace)
-	s.loadSchema([]string{"workflow_test.cql", "visibility_test.cql"}, schemaDir)
+	s.loadSchema([]string{"schema.cql"}, schemaDir)
 }
 
 func (s *CassandraTestCluster) tearDownTestCluster() {
@@ -757,9 +757,9 @@ func (s *CassandraTestCluster) dropKeyspace() {
 }
 
 func (s *CassandraTestCluster) loadSchema(fileNames []string, schemaDir string) {
-	workflowSchemaDir := "./schema/"
+	workflowSchemaDir := "./schema/cadence"
 	if schemaDir != "" {
-		workflowSchemaDir = schemaDir + "/schema/"
+		workflowSchemaDir = schemaDir + "/schema/cadence"
 	}
 
 	err := common.LoadCassandraSchema(workflowSchemaDir, fileNames, s.keyspace)

diff --git a/schema/README.md b/schema/README.md
@@ -0,0 +1,46 @@
+What
+----
+This directory contains the cassandra schema for every keyspace that cadence owns. The directory structure is as follows
+
+```
+./schema
+   - keyspace1/
+   - keyspace2/
+        - keyspace.cql     -- Contains the keyspace definition
+        - schema.cql       -- Contains the latest & greatest snapshot of the schema for the keyspace
+        - versioned
+             - v0.1/
+             - v0.2/       -- One directory per schema version change
+             - v1.0/
+                - manifest.json   -- json file describing the change
+                - changes.cql     -- changes in this version, only [CREATE, ALTER] commands are allowed
+```
+
+How
+---
+
+Q: How do I update existing schema ?
+* Add your changes to schema.cql
+* Create a new schema version directory under ./schema/keyspace/versioned/vx.x
+** Add a manifest.json
+** Add your changes in a cql file
+* Update the unit test within ./tools/cassandra/updateTask_test.go `TestDryrun` with your version x.x
+* Once you are done with these use the ./cadence-cassandra-tool to update the schema
+
+Q: What's the format of manifest.json
+
+Example below:
+* MinCompatibleVersion is the minimum schema version that your code can handle
+* SchemaUpdateCqlFiles are list of .cql files containg your create/alter commands
+
+
+```
+{
+    "CurrVersion": "0.1",
+    "MinCompatibleVersion": "0.1",
+    "Description": "base version of schema",
+    "SchemaUpdateCqlFiles": [
+        "base.cql"
+    ]
+}
+```
diff --git a/schema/keyspace_test.cql → schema/cadence/keyspace.cql b/schema/keyspace_test.cql → schema/cadence/keyspace.cql
diff --git a/schema/cadence/schema.cql b/schema/cadence/schema.cql
@@ -0,0 +1,210 @@
+CREATE TABLE shards (
+  shard_id           int,
+  PRIMARY KEY (shard_id)
+);
+
+CREATE TYPE shard (
+  shard_id            int,
+  owner               text, -- Host identifier processing the shard
+  -- Range identifier used for generating ack ids for tasks within shard.
+  -- Also used for optimistic concurrency and all writes to a shard are conditional on this value.
+  range_id            bigint,
+  -- This field keeps track of number of times owner for a shard changes before updating range_id or ack_levels
+  stolen_since_renew  int,
+  updated_at          timestamp,
+  transfer_ack_level  bigint,
+);
+
+--- Workflow execution and mutable state ---
+CREATE TYPE workflow_execution (
+  domain_id              uuid,
+  workflow_id            text,
+  run_id                 uuid,
+  task_list              text,
+  workflow_type_name     text,
+  decision_task_timeout  int,
+  execution_context      blob,
+  state                  int,  -- enum WorkflowState {Created, Running, Completed}
+  close_status           int,  -- enum WorkflowCloseStatus {None, Completed, Failed, Canceled, Terminated, ContinuedAsNew, TimedOut}
+  next_event_id          bigint,
+  last_processed_event   bigint,
+  start_time             timestamp,
+  last_updated_time      timestamp,
+  create_request_id      uuid,
+  decision_schedule_id   bigint,
+  decision_started_id    bigint,
+  decision_request_id    text,    -- Identifier used by matching engine for retrying history service calls for recording task is started
+  decision_timeout       int,
+);
+
+-- TODO: Remove fields that are left over from activity and workflow tasks.
+CREATE TYPE transfer_task (
+  domain_id           uuid,   -- The domain ID that this transfer task belongs to
+  workflow_id         text,   -- The workflow ID that this transfer task belongs to
+  run_id              uuid,   -- The run ID that this transfer task belongs to
+  task_id             bigint,
+  target_domain_id    uuid,   -- The external domain ID that this transfer task is doing work for.
+  target_workflow_id  text,   -- The external workflow ID that this transfer task is doing work for.
+  target_run_id       uuid,   -- The external run ID that this transfer task is doing work for.
+  task_list           text,
+  type                int,    -- enum TaskType {ActivityTask, DecisionTask, DeleteExecution, CancelExecution}
+  schedule_id         bigint,
+);
+
+CREATE TYPE timer_task (
+  domain_id        uuid,
+  workflow_id      text,
+  run_id           uuid,
+  task_id          bigint,
+  type             int,  -- enum TaskType {DecisionTaskTimeout, ActivityTaskTimeout, UserTimer}
+  timeout_type     int, -- enum TimeoutType in IDL {START_TO_CLOSE, SCHEDULE_TO_START, SCHEDULE_TO_CLOSE, HEARTBEAT}
+  event_id         bigint, -- Corresponds to event ID in history that is responsible for this timer.
+);
+
+-- Workflow activity in progress mutable state
+CREATE TYPE activity_info (
+  schedule_id               bigint,
+  scheduled_event           blob,
+  started_id                bigint,
+  started_event             blob,
+  activity_id               text,    -- Client generated unique ID for the activity.
+  request_id                text,    -- Identifier used by matching engine for retrying history service calls for recording task is started
+  details                   blob,
+  schedule_to_start_timeout int,
+  schedule_to_close_timeout int,
+  start_to_close_timeout    int,
+  heart_beat_timeout        int,
+  cancel_requested          boolean, -- If a cancel request is made to cancel the activity in progress.
+  cancel_request_id         bigint,  -- Event ID that identifies the cancel request.
+  last_hb_updated_time      timestamp, -- Last time the heartbeat is received.
+);
+
+-- User timer details
+CREATE TYPE timer_info (
+  timer_id      text,      -- User defined timer ID
+  started_id    bigint,    -- The event ID corresponding to timer started.
+  expiry_time   timestamp, -- Timestamp at which this timer expires or fires
+  task_id       bigint,    -- The task ID if we have one created for this timer
+);
+
+-- Activity or workflow task in a task list
+CREATE TYPE task (
+  domain_id        uuid,
+  workflow_id      text,
+  run_id           uuid,
+  schedule_id      bigint,
+);
+
+CREATE TYPE task_list (
+  domain_id        uuid,
+  name             text,
+  type             int, -- enum TaskRowType {ActivityTask, DecisionTask}
+  ack_level        bigint, -- task_id of the last acknowledged message
+);
+
+CREATE TYPE domain (
+  id          uuid,
+  name        text,
+  status      int, -- enum DomainStatus {Registered, Deprecated, Deleted}
+  description text,
+  owner_email text,
+);
+
+CREATE TYPE domain_config (
+  retention int,
+  emit_metric boolean
+);
+
+CREATE TABLE executions (
+  shard_id           int,
+  type               int, -- enum RowType { Shard, Execution, TransferTask, TimerTask}
+  domain_id          uuid,
+  workflow_id        text,
+  run_id             uuid,
+  current_run_id     uuid,
+  task_id            bigint, -- unique identifier for transfer and timer tasks for an execution
+  shard              frozen<shard>,
+  execution          frozen<workflow_execution>,
+  transfer           frozen<transfer_task>,
+  timer              frozen<timer_task>,
+  next_event_id      bigint,  -- This is needed to make conditional updates on session history
+  range_id           bigint static, -- Increasing sequence identifier for transfer queue, checkpointed into shard info
+  activity_map       map<bigint, frozen<activity_info>>,
+  timer_map          map<text, frozen<timer_info>>,
+  PRIMARY KEY  (shard_id, type, domain_id, workflow_id, run_id, task_id)
+);
+
+CREATE TABLE events (
+  domain_id      uuid,
+  workflow_id    text,
+  run_id         uuid,
+  -- We insert a batch of events with each append transaction.
+  -- This field stores the event id of first event in the batch.
+  first_event_id bigint,
+  range_id       bigint,
+  tx_id          bigint,
+  data           blob, -- Batch of workflow execution history events as a blob
+  data_encoding  text, -- Protocol used for history serialization
+  data_version   int,  -- history blob version
+  PRIMARY KEY ((domain_id, workflow_id, run_id), first_event_id)
+);
+
+-- Stores activity or workflow tasks
+CREATE TABLE tasks (
+  domain_id        uuid,
+  task_list_name   text,
+  task_list_type   int, -- enum TaskListType {ActivityTask, DecisionTask}
+  type             int, -- enum rowType {Task, TaskList}
+  task_id          bigint,  -- unique identifier for tasks, monotonically increasing
+  range_id         bigint static, -- Used to ensure that only one process can write to the table
+  task             frozen<task>,
+  task_list        frozen<task_list>,
+  PRIMARY KEY ((domain_id, task_list_name, task_list_type), type, task_id)
+);
+
+CREATE TABLE domains (
+  id     uuid,
+  domain frozen<domain>,
+  config frozen<domain_config>,
+  PRIMARY KEY (id)
+);
+
+CREATE TABLE domains_by_name (
+  name   text,
+  domain frozen<domain>,
+  config frozen<domain_config>,
+  PRIMARY KEY (name)
+);
+
+-- Visiblity schema goes below
+
+CREATE TABLE open_executions (
+  domain_id            uuid,
+  domain_partition     int,
+  workflow_id          text,
+  run_id               uuid,
+  start_time           timestamp,
+  workflow_type_name   text,
+  PRIMARY KEY  ((domain_id, domain_partition), start_time, run_id)
+) WITH CLUSTERING ORDER BY (start_time DESC);
+
+
+CREATE INDEX open_by_workflow_id ON open_executions (workflow_id);
+CREATE INDEX open_by_type ON open_executions (workflow_type_name);
+
+CREATE TABLE closed_executions (
+  domain_id            uuid,
+  domain_partition     int,
+  workflow_id          text,
+  run_id               uuid,
+  start_time           timestamp,
+  close_time           timestamp,
+  status               int,  -- enum WorkflowExecutionCloseStatus {COMPLETED, FAILED, CANCELED, TERMINATED, CONTINUED_AS_NEW, TIMED_OUT}
+  workflow_type_name   text,
+  PRIMARY KEY  ((domain_id, domain_partition), start_time, run_id)
+) WITH CLUSTERING ORDER BY (start_time DESC);
+
+CREATE INDEX closed_by_workflow_id ON closed_executions (workflow_id);
+CREATE INDEX closed_by_close_time ON closed_executions (close_time);
+CREATE INDEX closed_by_type ON closed_executions (workflow_type_name);
+CREATE INDEX closed_by_status ON closed_executions (status);
diff --git a/schema/workflow_test.cql → schema/cadence/versioned/v0.1/base.cql b/schema/workflow_test.cql → schema/cadence/versioned/v0.1/base.cql
@@ -174,4 +174,35 @@ CREATE TABLE domains_by_name (
   domain frozen<domain>,
   config frozen<domain_config>,
   PRIMARY KEY (name)
-);
+);
+
+CREATE TABLE open_executions (
+  domain_id            uuid,
+  domain_partition     int,
+  workflow_id          text,
+  run_id               uuid,
+  start_time           timestamp,
+  workflow_type_name   text,
+  PRIMARY KEY  ((domain_id, domain_partition), start_time, run_id)
+) WITH CLUSTERING ORDER BY (start_time DESC);
+
+
+CREATE INDEX open_by_workflow_id ON open_executions (workflow_id);
+CREATE INDEX open_by_type ON open_executions (workflow_type_name);
+
+CREATE TABLE closed_executions (
+  domain_id            uuid,
+  domain_partition     int,
+  workflow_id          text,
+  run_id               uuid,
+  start_time           timestamp,
+  close_time           timestamp,
+  status               int,  -- enum WorkflowExecutionCloseStatus {COMPLETED, FAILED, CANCELED, TERMINATED, CONTINUED_AS_NEW, TIMED_OUT}
+  workflow_type_name   text,
+  PRIMARY KEY  ((domain_id, domain_partition), start_time, run_id)
+) WITH CLUSTERING ORDER BY (start_time DESC);
+
+CREATE INDEX closed_by_workflow_id ON closed_executions (workflow_id);
+CREATE INDEX closed_by_close_time ON closed_executions (close_time);
+CREATE INDEX closed_by_type ON closed_executions (workflow_type_name);
+CREATE INDEX closed_by_status ON closed_executions (status);
diff --git a/schema/cadence/versioned/v0.1/manifest.json b/schema/cadence/versioned/v0.1/manifest.json
@@ -0,0 +1,8 @@
+{
+    "CurrVersion": "0.1",
+    "MinCompatibleVersion": "0.1",
+    "Description": "base version of schema",
+    "SchemaUpdateCqlFiles": [
+        "base.cql"
+    ]
+}
diff --git a/schema/visibility_test.cql b/schema/visibility_test.cql
diff --git a/tools/cassandra/README.md b/tools/cassandra/README.md
@@ -0,0 +1,24 @@
+What
+----
+This package contains the tooling for cadence cassandra operations.
+
+How
+---
+- Run make bins
+- You should see an executable `cadence-cassandra-tool`
+
+Setting up initial cassandra schema on a new cluster
+----------------------------------------------------
+```
+./cadence-cassandra-tool -ep 127.0.0.1 -k cadence setup-schema -v 0.0 -- this sets up just the schema version tables with initial version of 0.0
+./cadence-cassandra-tool -ep 127.0.0.1 -k cadence update-schema -d ./schema/cadence -- upgrades your schema to the latest version
+```
+
+Updating schema on an existing cluster
+--------------------------------------
+You can only upgrade to a new version after the initial setup done above.
+
+```
+./cadence-cassandra-tool -ep 127.0.0.1 -k cadence update-schema -d ./schema/cadence -v x.x -y -- executes a dryrun of upgrade to version x.x
+./cadence-cassandra-tool -ep 127.0.0.1 -k cadence update-schema -d ./schema/cadence -v x.x    -- actually executes the upgrade to version x.x
+```