Skip to content

Commit

Permalink
feat: add webhook metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
UgOrange committed Nov 4, 2024
1 parent 3d32168 commit 3ff5da4
Show file tree
Hide file tree
Showing 8 changed files with 92 additions and 67 deletions.
12 changes: 9 additions & 3 deletions cmd/varmor/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ var (
managerIP string
webhookMatchLabel string
bpfExclusiveMode bool
enableMetrics bool
statusUpdateCycle time.Duration
setupLog = log.Log.WithName("SETUP")
)
Expand All @@ -87,7 +88,7 @@ func main() {
flag.StringVar(&webhookMatchLabel, "webhookMatchLabel", "sandbox.varmor.org/enable=true", "Configure the matchLabel of webhook configuration, the valid format is key=value or nil")
flag.BoolVar(&bpfExclusiveMode, "bpfExclusiveMode", false, "Set this flag to enable exclusive mode for the BPF enforcer. It will disable the AppArmor confinement when using the BPF enforcer.")
flag.DurationVar(&statusUpdateCycle, "statusUpdateCycle", time.Hour*2, "Configure the status update cycle for VarmorPolicy and ArmorProfile")

flag.BoolVar(&enableMetrics, "enableMetrics", false, "Set this flag to enable metrics.")
if err := flag.Set("v", "2"); err != nil {
setupLog.Error(err, "flag.Set()")
os.Exit(1)
Expand Down Expand Up @@ -147,7 +148,11 @@ func main() {
} else {
gin.SetMode(gin.ReleaseMode)
}
metricsModule := metrics.NewMetricsModule(log.Log.WithName("METRICS"))
// metric init
var metricsModule *metrics.MetricsModule

metricsModule = metrics.NewMetricsModule(log.Log.WithName("METRICS"), enableMetrics)

if agent {
setupLog.Info("vArmor agent startup")

Expand All @@ -164,8 +169,8 @@ func main() {
config.StatusServicePort,
config.ClassifierServicePort,
stopCh,
log.Log.WithName("AGENT"),
metricsModule,
log.Log.WithName("AGENT"),
)
if err != nil {
setupLog.Error(err, "agent.NewAgent()")
Expand Down Expand Up @@ -274,6 +279,7 @@ func main() {
managerIP,
config.WebhookServicePort,
bpfExclusiveMode,
metricsModule,
log.Log.WithName("WEBHOOK-SERVER"))
if err != nil {
setupLog.Error(err, "Failed to create webhook webhookServer")
Expand Down
5 changes: 2 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,8 @@ require (
github.com/seccomp/libseccomp-golang v0.10.0
go.opentelemetry.io/otel v1.30.0
go.opentelemetry.io/otel/exporters/prometheus v0.52.0
go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.29.0
go.opentelemetry.io/otel/metric v1.30.0
go.opentelemetry.io/otel/sdk v1.30.0
go.opentelemetry.io/otel/sdk/metric v1.30.0
go.opentelemetry.io/otel/trace v1.30.0
golang.org/x/sys v0.25.0
google.golang.org/grpc v1.58.3
gopkg.in/yaml.v3 v3.0.1
Expand Down Expand Up @@ -98,6 +95,8 @@ require (
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
github.com/ugorji/go/codec v1.2.11 // indirect
go.opencensus.io v0.24.0 // indirect
go.opentelemetry.io/otel/sdk v1.30.0 // indirect
go.opentelemetry.io/otel/trace v1.30.0 // indirect
golang.org/x/arch v0.3.0 // indirect
golang.org/x/crypto v0.26.0 // indirect
golang.org/x/exp v0.0.0-20231214170342-aacd6d4b4611 // indirect
Expand Down
2 changes: 0 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -260,8 +260,6 @@ go.opentelemetry.io/otel v1.30.0 h1:F2t8sK4qf1fAmY9ua4ohFS/K+FUuOPemHUIXHtktrts=
go.opentelemetry.io/otel v1.30.0/go.mod h1:tFw4Br9b7fOS+uEao81PJjVMjW/5fvNCbpsDIXqP0pc=
go.opentelemetry.io/otel/exporters/prometheus v0.52.0 h1:kmU3H0b9ufFSi8IQCcxack+sWUblKkFbqWYs6YiACGQ=
go.opentelemetry.io/otel/exporters/prometheus v0.52.0/go.mod h1:+wsAp2+JhuGXX7YRkjlkx6hyWY3ogFPfNA4x3nyiAh0=
go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.29.0 h1:X3ZjNp36/WlkSYx0ul2jw4PtbNEDDeLskw3VPsrpYM0=
go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.29.0/go.mod h1:2uL/xnOXh0CHOBFCWXz5u1A4GXLiW+0IQIzVbeOEQ0U=
go.opentelemetry.io/otel/metric v1.30.0 h1:4xNulvn9gjzo4hjg+wzIKG7iNFEaBMX00Qd4QIZs7+w=
go.opentelemetry.io/otel/metric v1.30.0/go.mod h1:aXTfST94tswhWEb+5QjlSqG+cZlmyXy/u8jFpor3WqQ=
go.opentelemetry.io/otel/sdk v1.30.0 h1:cHdik6irO49R5IysVhdn8oaiR9m8XluDaJAs4DfOrYE=
Expand Down
12 changes: 2 additions & 10 deletions internal/agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ import (
"encoding/json"
"fmt"
"github.com/bytedance/vArmor/pkg/metrics"
"go.opentelemetry.io/otel/metric"
"os/exec"
"path/filepath"
"reflect"
Expand Down Expand Up @@ -87,10 +86,6 @@ type Agent struct {
classifierPort int
stopCh <-chan struct{}
log logr.Logger
admissionRequests metric.Int64Counter
mutatedRequests metric.Int64Counter
nonMutatedRequests metric.Int64Counter
webhookLatency metric.Float64Histogram
}

func NewAgent(
Expand All @@ -106,8 +101,9 @@ func NewAgent(
managerPort int,
classifierPort int,
stopCh <-chan struct{},
metricsModule *metrics.MetricsModule,
log logr.Logger,
metricsModule *metrics.MetricsModule) (*Agent, error) {
) (*Agent, error) {

var err error

Expand All @@ -132,10 +128,6 @@ func NewAgent(
classifierPort: classifierPort,
stopCh: stopCh,
log: log,
admissionRequests: metricsModule.RegisterInt64Counter("admission_requests_total", "Total number of admission requests"),
mutatedRequests: metricsModule.RegisterInt64Counter("mutated_requests", "Number of requests that were mutated"),
nonMutatedRequests: metricsModule.RegisterInt64Counter("non_mutated_requests", "Number of requests that were not mutated"),
webhookLatency: metricsModule.RegisterHistogram("webhook_latency", "Latency of webhook processing", 0.1, 0.5, 1, 2, 5),
}

if !debug {
Expand Down
44 changes: 23 additions & 21 deletions internal/status/api/v1/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,35 +62,37 @@ type StatusManager struct {
statusUpdateCycle time.Duration
debug bool
log logr.Logger
metricsModule *metrics.MetricsModule
profileSuccess metric.Int64Counter
profileFailure metric.Int64Counter
profileChangeCount metric.Int64Counter
profileStatusPerNode metric.Int64Gauge
profileLatestStatus metric.Int64Gauge
}

func NewStatusManager(coreInterface corev1.CoreV1Interface, appsInterface appsv1.AppsV1Interface, varmorInterface varmorinterface.CrdV1beta1Interface, statusUpdateCycle time.Duration, debug bool, metricsModule *metrics.MetricsModule, log logr.Logger) *StatusManager {
m := StatusManager{
coreInterface: coreInterface,
appsInterface: appsInterface,
varmorInterface: varmorInterface,
desiredNumber: 0,
PolicyStatuses: make(map[string]varmortypes.PolicyStatus),
ModelingStatuses: make(map[string]varmortypes.ModelingStatus),
ResetCh: make(chan string, 50),
DeleteCh: make(chan string, 50),
UpdateStatusCh: make(chan string, 100),
UpdateModeCh: make(chan string, 50),
statusQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "status"),
dataQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "data"),
statusUpdateCycle: statusUpdateCycle,
profileSuccess: metricsModule.RegisterInt64Counter("profile_processing_success", "Number of successful profile processing"),
profileFailure: metricsModule.RegisterInt64Counter("profile_processing_failure", "Number of failed profile processing"),
profileChangeCount: metricsModule.RegisterInt64Counter("profile_change_count", "Number of profile change"),
profileStatusPerNode: metricsModule.RegisterInt64Gauge("profile_status_per_node", "Number of profile status per node"),
profileLatestStatus: metricsModule.RegisterInt64Gauge("profile_latest_status", "Latest profile status"),
debug: debug,
log: log,
coreInterface: coreInterface,
appsInterface: appsInterface,
varmorInterface: varmorInterface,
desiredNumber: 0,
PolicyStatuses: make(map[string]varmortypes.PolicyStatus),
ModelingStatuses: make(map[string]varmortypes.ModelingStatus),
ResetCh: make(chan string, 50),
DeleteCh: make(chan string, 50),
UpdateStatusCh: make(chan string, 100),
UpdateModeCh: make(chan string, 50),
statusQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "status"),
dataQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "data"),
statusUpdateCycle: statusUpdateCycle,
metricsModule: metricsModule,
debug: debug,
log: log,
}
if metricsModule.Enabled {
m.profileSuccess = metricsModule.RegisterInt64Counter("profile_processing_success", "Number of successful profile processing")
m.profileFailure = metricsModule.RegisterInt64Counter("profile_processing_failure", "Number of failed profile processing")
m.profileChangeCount = metricsModule.RegisterInt64Counter("profile_change_count", "Number of profile change")
m.profileStatusPerNode = metricsModule.RegisterInt64Gauge("profile_status_per_node", "Number of profile status per node")
}
return &m
}
Expand Down
15 changes: 6 additions & 9 deletions internal/status/api/v1/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,13 @@ func (m *StatusManager) Status(c *gin.Context) {

logger.V(3).Info("enqueue ProfileStatus from agent")
m.statusQueue.Add(profileStatus)
go m.HandleProfileStatusUpdate(profileStatus)
if m.metricsModule.Enabled == true {
go m.HandleProfileStatusUpdate(profileStatus)
}
}
func (m *StatusManager) HandleProfileStatusUpdate(status varmortypes.ProfileStatus) {
ctx := context.Background()
// 标签信息
// label info
labels := []attribute.KeyValue{
attribute.String("namespace", status.Namespace),
attribute.String("profile_name", status.ProfileName),
Expand All @@ -83,16 +85,11 @@ func (m *StatusManager) HandleProfileStatusUpdate(status varmortypes.ProfileStat
m.profileChangeCount.Add(ctx, 1, metric.WithAttributes(labels...))

if status.Status == "Success" {
m.profileStatusPerNode.Record(ctx, 1, metric.WithAttributes(labels...)) // 1 表示成功
m.profileStatusPerNode.Record(ctx, 1, metric.WithAttributes(labels...)) // 1 mean success
} else {
m.profileStatusPerNode.Record(ctx, 0, metric.WithAttributes(labels...)) // 0 表示失败
m.profileStatusPerNode.Record(ctx, 0, metric.WithAttributes(labels...)) // 0 mean failure
}

if status.Status == "Success" {
m.profileLatestStatus.Record(ctx, 1, metric.WithAttributes(labels...))
} else {
m.profileLatestStatus.Record(ctx, 0, metric.WithAttributes(labels...))
}
}

// updatePolicyStatus update StatusManager.PolicyStatuses[statusKey] with profileStatus which comes from agent.
Expand Down
44 changes: 37 additions & 7 deletions internal/webhooks/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ import (
"context"
"crypto/tls"
"fmt"
"github.com/bytedance/vArmor/pkg/metrics"
"go.opentelemetry.io/otel/metric"
"net/http"
"time"

Expand All @@ -43,12 +45,17 @@ import (

// WebhookServer contains configured TLS server with MutationWebhook.
type WebhookServer struct {
server *http.Server
webhookRegister *webhookconfig.Register
policyCacher *policycacher.PolicyCacher
deserializer runtime.Decoder
bpfExclusiveMode bool
log logr.Logger
server *http.Server
webhookRegister *webhookconfig.Register
policyCacher *policycacher.PolicyCacher
deserializer runtime.Decoder
bpfExclusiveMode bool
metricsModule *metrics.MetricsModule
admissionRequests metric.Int64Counter
mutatedRequests metric.Int64Counter
nonMutatedRequests metric.Int64Counter
webhookLatency metric.Float64Histogram
log logr.Logger
}

func NewWebhookServer(
Expand All @@ -58,16 +65,23 @@ func NewWebhookServer(
addr string,
port int,
bpfExclusiveMode bool,
metricsModule *metrics.MetricsModule,
log logr.Logger,
) (*WebhookServer, error) {

ws := &WebhookServer{
webhookRegister: webhookRegister,
policyCacher: policyCacher,
bpfExclusiveMode: bpfExclusiveMode,
metricsModule: metricsModule,
log: log,
}

if metricsModule.Enabled {
ws.admissionRequests = metricsModule.RegisterInt64Counter("admission_requests_total", "Total number of admission requests")
ws.mutatedRequests = metricsModule.RegisterInt64Counter("mutated_requests", "Number of requests that were mutated")
ws.nonMutatedRequests = metricsModule.RegisterInt64Counter("non_mutated_requests", "Number of requests that were not mutated")
ws.webhookLatency = metricsModule.RegisterHistogram("webhook_latency", "Latency of webhook processing", 0.1, 0.5, 1, 2, 5)
}
scheme := runtime.NewScheme()
codecs := serializer.NewCodecFactory(scheme)
ws.deserializer = codecs.UniversalDeserializer()
Expand Down Expand Up @@ -114,8 +128,12 @@ func NewWebhookServer(

func (ws *WebhookServer) handlerFunc(handler func(request *admissionv1.AdmissionRequest) *admissionv1.AdmissionResponse) http.HandlerFunc {
return func(rw http.ResponseWriter, r *http.Request) {
ctx := context.Background()
startTime := time.Now()

if ws.admissionRequests != nil {
ws.admissionRequests.Add(ctx, 1)
}
admissionReview := bodyToAdmissionReview(r, rw, ws.log)
if admissionReview == nil {
ws.log.Error(fmt.Errorf("failed to parse admission review request"), "request", r)
Expand All @@ -136,8 +154,20 @@ func (ws *WebhookServer) handlerFunc(handler func(request *admissionv1.Admission
"operation", request.Operation)

admissionReview.Response = handler(request)
if admissionReview.Response.Patch != nil && len(admissionReview.Response.Patch) > 0 {
if ws.mutatedRequests != nil {
ws.mutatedRequests.Add(ctx, 1)
}
} else {
if ws.nonMutatedRequests != nil {
ws.nonMutatedRequests.Add(ctx, 1)
}
}
writeResponse(rw, admissionReview)

if ws.webhookLatency != nil {
ws.webhookLatency.Record(ctx, time.Since(startTime).Seconds())
}
logger.V(3).Info("AdmissionRequest processed", "time", time.Since(startTime).String())
}
}
Expand Down
25 changes: 13 additions & 12 deletions pkg/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,30 +16,31 @@ const (

type MetricsModule struct {
meter metric.Meter
enabled bool
Enabled bool
log logr.Logger
}

func NewMetricsModule(log logr.Logger) *MetricsModule {
func NewMetricsModule(log logr.Logger, enabled bool) *MetricsModule {
exporter, err := prometheus.New()
if err != nil {
log.Error(err, "failed to create Prometheus exporter")
}
provider := sdkmetric.NewMeterProvider(sdkmetric.WithReader(exporter))
meter := provider.Meter(MeterName)

go func() {
log.Info("Serving metrics at :8822/metrics")
http.Handle("/metrics", promhttp.Handler())
err := http.ListenAndServe(":8822", nil)
if err != nil {
log.Error(err, "failed to start metrics server")
}
}()
if enabled == true {
go func() {
log.Info("Serving metrics at :8822/metrics")
http.Handle("/metrics", promhttp.Handler())
err := http.ListenAndServe(":8822", nil)
if err != nil {
log.Error(err, "failed to start metrics server")
}
}()
}

return &MetricsModule{
meter: meter,
enabled: true,
Enabled: enabled,
}
}
func (m *MetricsModule) RegisterCounter(name string, description string) metric.Float64Counter {
Expand Down

0 comments on commit 3ff5da4

Please sign in to comment.