Skip to content

Commit

Permalink
shardController: revisit shard shutdown order, add latency metrics (c…
Browse files Browse the repository at this point in the history
  • Loading branch information
venkat1109 authored Aug 15, 2017
1 parent 85ba1df commit f246cb3
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 12 deletions.
6 changes: 6 additions & 0 deletions common/metrics/defs.go
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,9 @@ const (
ShardItemRemovedCounter
MembershipChangedCounter
NumShardsGauge
GetEngineForShardErrorCounter
GetEngineForShardLatency
RemoveEngineForShardLatency
)

// Matching metrics enum
Expand Down Expand Up @@ -558,6 +561,9 @@ var MetricDefs = map[ServiceIdx]map[int]metricDefinition{
ShardItemRemovedCounter: {metricName: "sharditem-removed-count", metricType: Counter},
MembershipChangedCounter: {metricName: "membership-changed-count", metricType: Counter},
NumShardsGauge: {metricName: "numshards-gauge", metricType: Gauge},
GetEngineForShardErrorCounter: {metricName: "get-engine-for-shard-errors", metricType: Counter},
GetEngineForShardLatency: {metricName: "get-engine-for-shard-latency", metricType: Timer},
RemoveEngineForShardLatency: {metricName: "remove-engine-for-shard-latency", metricType: Timer},
},
Matching: {
PollSuccessCounter: {metricName: "poll.success"},
Expand Down
4 changes: 0 additions & 4 deletions glide.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 0 additions & 3 deletions glide.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,6 @@ import:
- package: github.com/dgryski/go-farm
- package: github.com/emirpasic/gods
- package: github.com/davecgh/go-spew
- package: github.com/uber-go/timer
subpackages:
- twheel
- package: github.com/urfave/cli
- package: gopkg.in/yaml.v2
- package: gopkg.in/validator.v2
Expand Down
17 changes: 12 additions & 5 deletions service/history/shardController.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,15 +167,18 @@ func (c *shardController) GetEngine(workflowID string) (Engine, error) {
}

func (c *shardController) getEngineForShard(shardID int) (Engine, error) {
sw := c.metricsClient.StartTimer(metrics.HistoryShardControllerScope, metrics.GetEngineForShardLatency)
defer sw.Stop()
item, err := c.getOrCreateHistoryShardItem(shardID)
if err != nil {
return nil, err
}

return item.getOrCreateEngine(c.shardClosedCh)
}

func (c *shardController) removeEngineForShard(shardID int) {
sw := c.metricsClient.StartTimer(metrics.HistoryShardControllerScope, metrics.RemoveEngineForShardLatency)
defer sw.Stop()
item, _ := c.removeHistoryShardItem(shardID)
if item != nil {
item.stopEngine()
Expand Down Expand Up @@ -297,6 +300,7 @@ AcquireLoop:
if info.Identity() == c.host.Identity() {
_, err1 := c.getEngineForShard(shardID)
if err1 != nil {
c.metricsClient.IncCounter(metrics.HistoryShardControllerScope, metrics.GetEngineForShardErrorCounter)
logging.LogOperationFailedEvent(c.logger, fmt.Sprintf("Unable to create history shard engine: %v", shardID),
err1)
continue AcquireLoop
Expand Down Expand Up @@ -381,16 +385,19 @@ func (i *historyShardsItem) stopEngine() {
i.Lock()
defer i.Unlock()

if i.executionMgr != nil {
i.executionMgr.Close()
}

if i.engine != nil {
logging.LogShardEngineStoppingEvent(i.logger, i.host.Identity(), i.shardID)
i.engine.Stop()
i.engine = nil
logging.LogShardEngineStoppedEvent(i.logger, i.host.Identity(), i.shardID)
}

// Shutting down executionMgr will close all connections
// to cassandra for this engine. So, make sure to
// close executionMgr only after stopping the engine
if i.executionMgr != nil {
i.executionMgr.Close()
}
}

func isShardOwnershiptLostError(err error) bool {
Expand Down

0 comments on commit f246cb3

Please sign in to comment.