Skip to content

Commit

Permalink
server: Add sampling rate to distributed tracing
Browse files Browse the repository at this point in the history
ExperimentalDistributedTracingSamplingRatePerMillion is the
number of samples to collect per million spans.
Defaults to 0.
  • Loading branch information
lilic committed Aug 30, 2021
1 parent a1fd98c commit 810f489
Show file tree
Hide file tree
Showing 6 changed files with 212 additions and 57 deletions.
10 changes: 10 additions & 0 deletions server/embed/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,9 @@ type Config struct {
// that exist at the same time.
// Can only be used if ExperimentalEnableDistributedTracing is true.
ExperimentalDistributedTracingServiceInstanceID string `json:"experimental-distributed-tracing-instance-id"`
// ExperimentalDistributedTracingSamplingRatePerMillion is the number of samples to collect per million spans.
// Defaults to 0.
ExperimentalDistributedTracingSamplingRatePerMillion int `json:"experimental-distributed-tracing-sampling-rate"`

// Logger is logger options: currently only supports "zap".
// "capnslog" is removed in v3.5.
Expand Down Expand Up @@ -681,6 +684,13 @@ func (cfg *Config) Validate() error {
return fmt.Errorf("unknown auto-compaction-mode %q", cfg.AutoCompactionMode)
}

// Validate distributed tracing configuration but only if enabled.
if cfg.ExperimentalEnableDistributedTracing {
if err := validateTracingConfig(cfg.ExperimentalDistributedTracingSamplingRatePerMillion); err != nil {
return fmt.Errorf("distributed tracing configurition is not valid: (%v)", err)
}
}

return nil
}

Expand Down
111 changes: 111 additions & 0 deletions server/embed/config_tracing.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
// Copyright 2021 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package embed

import (
"context"
"fmt"

"go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc"
"go.opentelemetry.io/otel/exporters/otlp"
"go.opentelemetry.io/otel/exporters/otlp/otlpgrpc"
"go.opentelemetry.io/otel/propagation"
"go.opentelemetry.io/otel/sdk/resource"
tracesdk "go.opentelemetry.io/otel/sdk/trace"
"go.opentelemetry.io/otel/semconv"
"go.uber.org/zap"
)

const maxSamplingRatePerMillion = 1000000

func validateTracingConfig(samplingRate int) error {
if samplingRate < 0 {
return fmt.Errorf("tracing sampling rate must be positive")
}
if samplingRate > maxSamplingRatePerMillion {
return fmt.Errorf("tracing sampling rate must be less than %d", maxSamplingRatePerMillion)
}

return nil
}

func setupTracingExporter(ctx context.Context, cfg *Config) (exporter tracesdk.SpanExporter, options []otelgrpc.Option, err error) {
exporter, err = otlp.NewExporter(ctx,
otlpgrpc.NewDriver(
otlpgrpc.WithEndpoint(cfg.ExperimentalDistributedTracingAddress),
otlpgrpc.WithInsecure(),
))
if err != nil {
return nil, nil, err
}

res := resource.NewWithAttributes(
semconv.ServiceNameKey.String(cfg.ExperimentalDistributedTracingServiceName),
)

if resWithIDKey := determineResourceWithIDKey(cfg.ExperimentalDistributedTracingServiceInstanceID); resWithIDKey != nil {
// Merge resources into a new
// resource in case of duplicates.
res = resource.Merge(res, resWithIDKey)
}

options = append(options,
otelgrpc.WithPropagators(
propagation.NewCompositeTextMapPropagator(
propagation.TraceContext{},
propagation.Baggage{},
),
),
otelgrpc.WithTracerProvider(
tracesdk.NewTracerProvider(
tracesdk.WithBatcher(exporter),
tracesdk.WithResource(res),
tracesdk.WithSampler(
tracesdk.ParentBased(determineSampler(cfg.ExperimentalDistributedTracingSamplingRatePerMillion)),
),
),
),
)

cfg.logger.Debug(
"distributed tracing enabled",
zap.String("address", cfg.ExperimentalDistributedTracingAddress),
zap.String("service-name", cfg.ExperimentalDistributedTracingServiceName),
zap.String("service-instance-id", cfg.ExperimentalDistributedTracingServiceInstanceID),
zap.Int("sampling-rate", cfg.ExperimentalDistributedTracingSamplingRatePerMillion),
)

return exporter, options, err
}

func determineSampler(samplingRate int) tracesdk.Sampler {
sampler := tracesdk.NeverSample()
if samplingRate == 0 {
return sampler
}
return tracesdk.TraceIDRatioBased(float64(samplingRate) / float64(maxSamplingRatePerMillion))
}

// As Tracing service Instance ID must be unique, it should
// never use the empty default string value, it's set if
// if it's a non empty string.
func determineResourceWithIDKey(serviceInstanceID string) *resource.Resource {
if serviceInstanceID != "" {
return resource.NewWithAttributes(
(semconv.ServiceInstanceIDKey.String(serviceInstanceID)),
)
}
return nil
}
83 changes: 83 additions & 0 deletions server/embed/config_tracing_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
// Copyright 2021 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package embed

import (
"testing"
)

const neverSampleDescription = "AlwaysOffSampler"

func TestDetermineSampler(t *testing.T) {
tests := []struct {
name string
sampleRate int
wantSamplerDescription string
}{
{
name: "sample rate is disabled",
sampleRate: 0,
wantSamplerDescription: neverSampleDescription,
},
{
name: "sample rate is 100",
sampleRate: 100,
wantSamplerDescription: "TraceIDRatioBased{0.0001}",
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
sampler := determineSampler(tc.sampleRate)
if tc.wantSamplerDescription != sampler.Description() {
t.Errorf("tracing sampler was not as expected; expected sampler: %#+v, got sampler: %#+v", tc.wantSamplerDescription, sampler.Description())
}
})
}
}

func TestTracingConfig(t *testing.T) {
tests := []struct {
name string
sampleRate int
wantErr bool
}{
{
name: "invalid - sample rate is less than 0",
sampleRate: -1,
wantErr: true,
},
{
name: "invalid - sample rate is more than allowed value",
sampleRate: maxSamplingRatePerMillion + 1,
wantErr: true,
},
{
name: "valid - sample rate is 100",
sampleRate: 100,
wantErr: false,
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
err := validateTracingConfig(tc.sampleRate)
if err == nil && tc.wantErr {
t.Errorf("expected error got (%v) error", err)
}
if err != nil && !tc.wantErr {
t.Errorf("expected no errors, got error: (%v)", err)
}
})
}
}
62 changes: 5 additions & 57 deletions server/embed/etcd.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,6 @@ import (

grpc_prometheus "github.com/grpc-ecosystem/go-grpc-prometheus"
"github.com/soheilhy/cmux"
"go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc"
"go.opentelemetry.io/otel/exporters/otlp"
"go.opentelemetry.io/otel/exporters/otlp/otlpgrpc"
"go.opentelemetry.io/otel/propagation"
"go.opentelemetry.io/otel/sdk/resource"
tracesdk "go.opentelemetry.io/otel/sdk/trace"
"go.opentelemetry.io/otel/semconv"
"go.uber.org/zap"
"google.golang.org/grpc"
"google.golang.org/grpc/keepalive"
Expand Down Expand Up @@ -229,7 +222,7 @@ func StartEtcd(inCfg *Config) (e *Etcd, err error) {

if srvcfg.ExperimentalEnableDistributedTracing {
tctx := context.Background()
tracingExporter, opts, err := e.setupTracing(tctx)
tracingExporter, opts, err := setupTracingExporter(tctx, cfg)
if err != nil {
return e, err
}
Expand All @@ -238,6 +231,10 @@ func StartEtcd(inCfg *Config) (e *Etcd, err error) {
}
e.tracingExporterShutdown = func() { tracingExporter.Shutdown(tctx) }
srvcfg.ExperimentalTracerOptions = opts

e.cfg.logger.Info(
"distributed tracing setup enabled",
)
}

print(e.cfg.logger, *cfg, srvcfg, memberInitialized)
Expand Down Expand Up @@ -809,52 +806,3 @@ func parseCompactionRetention(mode, retention string) (ret time.Duration, err er
}
return ret, nil
}

func (e *Etcd) setupTracing(ctx context.Context) (exporter tracesdk.SpanExporter, options []otelgrpc.Option, err error) {
exporter, err = otlp.NewExporter(ctx,
otlpgrpc.NewDriver(
otlpgrpc.WithEndpoint(e.cfg.ExperimentalDistributedTracingAddress),
otlpgrpc.WithInsecure(),
))
if err != nil {
return nil, nil, err
}
res := resource.NewWithAttributes(
semconv.ServiceNameKey.String(e.cfg.ExperimentalDistributedTracingServiceName),
)
// As Tracing service Instance ID must be unique, it should
// never use the empty default string value, so we only set it
// if it's a non empty string.
if e.cfg.ExperimentalDistributedTracingServiceInstanceID != "" {
resWithIDKey := resource.NewWithAttributes(
(semconv.ServiceInstanceIDKey.String(e.cfg.ExperimentalDistributedTracingServiceInstanceID)),
)
// Merge resources to combine into a new
// resource in case of duplicates.
res = resource.Merge(res, resWithIDKey)
}

options = append(options,
otelgrpc.WithPropagators(
propagation.NewCompositeTextMapPropagator(
propagation.TraceContext{},
propagation.Baggage{},
),
),
otelgrpc.WithTracerProvider(
tracesdk.NewTracerProvider(
tracesdk.WithBatcher(exporter),
tracesdk.WithResource(res),
),
),
)

e.cfg.logger.Info(
"distributed tracing enabled",
zap.String("distributed-tracing-address", e.cfg.ExperimentalDistributedTracingAddress),
zap.String("distributed-tracing-service-name", e.cfg.ExperimentalDistributedTracingServiceName),
zap.String("distributed-tracing-service-instance-id", e.cfg.ExperimentalDistributedTracingServiceInstanceID),
)

return exporter, options, err
}
1 change: 1 addition & 0 deletions server/etcdmain/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,7 @@ func newConfig() *config {
fs.StringVar(&cfg.ec.ExperimentalDistributedTracingAddress, "experimental-distributed-tracing-address", embed.ExperimentalDistributedTracingAddress, "Address for distributed tracing used for OpenTelemetry Tracing (if enabled with experimental-enable-distributed-tracing flag).")
fs.StringVar(&cfg.ec.ExperimentalDistributedTracingServiceName, "experimental-distributed-tracing-service-name", embed.ExperimentalDistributedTracingServiceName, "Configures service name for distributed tracing to be used to define service name for OpenTelemetry Tracing (if enabled with experimental-enable-distributed-tracing flag). 'etcd' is the default service name. Use the same service name for all instances of etcd.")
fs.StringVar(&cfg.ec.ExperimentalDistributedTracingServiceInstanceID, "experimental-distributed-tracing-instance-id", "", "Configures service instance ID for distributed tracing to be used to define service instance ID key for OpenTelemetry Tracing (if enabled with experimental-enable-distributed-tracing flag). There is no default value set. This ID must be unique per etcd instance.")
fs.IntVar(&cfg.ec.ExperimentalDistributedTracingSamplingRatePerMillion, "experimental-distributed-tracing-sampling-rate", 0, "Number of samples to collect per million spans for OpenTelemetry Tracing (if enabled with experimental-enable-distributed-tracing flag).")

// auth
fs.StringVar(&cfg.ec.AuthToken, "auth-token", cfg.ec.AuthToken, "Specify auth token specific options.")
Expand Down
2 changes: 2 additions & 0 deletions server/etcdmain/help.go
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,8 @@ Experimental distributed tracing:
Distributed tracing service name, must be same across all etcd instances.
--experimental-distributed-tracing-instance-id ''
Distributed tracing instance ID, must be unique per each etcd instance.
--experimental-distributed-tracing-sampling-rate '0'
Number of samples to collect per million spans for distributed tracing. Disabled by default.
v2 Proxy (to be deprecated in v3.6):
--proxy 'off'
Expand Down

0 comments on commit 810f489

Please sign in to comment.