Skip to content

Commit

Permalink
Add support for the Prometheus Operator (hashicorp#772)
Browse files Browse the repository at this point in the history
support collecting Vault server metrics by deploying PrometheusOperator
CustomResources.

Co-authored-by: Sam Weston <[email protected]>
Co-authored-by: Theron Voran <[email protected]>
  • Loading branch information
3 people authored Sep 1, 2022
1 parent 8a6872e commit 0407431
Show file tree
Hide file tree
Showing 13 changed files with 498 additions and 4 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/acceptance.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,6 @@ jobs:
node_image: kindest/node:v${{ matrix.kind-k8s-version }}
version: v0.14.0

- run: bats ./test/acceptance -t
- run: bats --tap --timing ./test/acceptance
env:
VAULT_LICENSE_CI: ${{ secrets.VAULT_LICENSE_CI }}
4 changes: 2 additions & 2 deletions .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ jobs:
steps:
- uses: actions/checkout@v2
- uses: ./.github/workflows/setup-test-tools
- run: bats ./test/unit -t
- run: bats --tap --timing ./test/unit

chart-verifier:
runs-on: ubuntu-latest
Expand All @@ -22,4 +22,4 @@ jobs:
with:
go-version: '1.17.4'
- run: go install github.com/redhat-certification/chart-verifier@${CHART_VERIFIER_VERSION}
- run: bats ./test/chart -t
- run: bats --tap --timing ./test/chart
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
## Unreleased
Features:
* Add PrometheusOperator support for collecting Vault server metrics. [GH-772](https://github.com/hashicorp/vault-helm/pull/772)

## 0.21.0 (August 10th, 2022)

Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ acceptance:
ifneq ($(LOCAL_ACCEPTANCE_TESTS),true)
gcloud auth activate-service-account --key-file=${GOOGLE_CREDENTIALS}
endif
bats test/${ACCEPTANCE_TESTS}
bats --tap --timing test/${ACCEPTANCE_TESTS}

# this target is for provisioning the GKE cluster
# it is run in the docker container above when the test-provision target is invoked
Expand Down
26 changes: 26 additions & 0 deletions templates/prometheus-prometheusrules.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{{ if and (.Values.serverTelemetry.prometheusRules.rules)
(or (.Values.global.serverTelemetry.prometheusOperator) (.Values.serverTelemetry.prometheusRules.enabled) )
}}
---
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ template "vault.fullname" . }}
labels:
helm.sh/chart: {{ include "vault.chart" . }}
app.kubernetes.io/name: {{ include "vault.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- /* update the selectors docs in values.yaml whenever the defaults below change. */ -}}
{{- $selectors := .Values.serverTelemetry.prometheusRules.selectors }}
{{- if $selectors }}
{{- toYaml $selectors | nindent 4 }}
{{- else }}
release: prometheus
{{- end }}
spec:
groups:
- name: {{ include "vault.fullname" . }}
rules:
{{- toYaml .Values.serverTelemetry.prometheusRules.rules | nindent 6 }}
{{- end }}
44 changes: 44 additions & 0 deletions templates/prometheus-servicemonitor.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
{{ template "vault.mode" . }}
{{ if or (.Values.global.serverTelemetry.prometheusOperator) (.Values.serverTelemetry.serviceMonitor.enabled) }}
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: {{ template "vault.fullname" . }}
labels:
helm.sh/chart: {{ include "vault.chart" . }}
app.kubernetes.io/name: {{ include "vault.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- /* update the selectors docs in values.yaml whenever the defaults below change. */ -}}
{{- $selectors := .Values.serverTelemetry.serviceMonitor.selectors }}
{{- if $selectors }}
{{- toYaml $selectors | nindent 4 }}
{{- else }}
release: prometheus
{{- end }}
spec:
selector:
matchLabels:
app.kubernetes.io/name: {{ template "vault.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- if eq .mode "ha" }}
vault-active: "true"
{{- else }}
vault-internal: "true"
{{- end }}
endpoints:
- port: {{ include "vault.scheme" . }}
interval: {{ .Values.serverTelemetry.serviceMonitor.interval }}
scrapeTimeout: {{ .Values.serverTelemetry.serviceMonitor.scrapeTimeout }}
scheme: {{ include "vault.scheme" . | lower }}
path: /v1/sys/metrics
params:
format:
- prometheus
tlsConfig:
insecureSkipVerify: true
namespaceSelector:
matchNames:
- {{ .Release.Namespace }}
{{ end }}
1 change: 1 addition & 0 deletions templates/server-ha-active-service.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ metadata:
app.kubernetes.io/name: {{ include "vault.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
vault-active: "true"
annotations:
{{ template "vault.service.annotations" .}}
spec:
Expand Down
1 change: 1 addition & 0 deletions templates/server-headless-service.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ metadata:
app.kubernetes.io/name: {{ include "vault.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
vault-internal: "true"
annotations:
{{ template "vault.service.annotations" .}}
spec:
Expand Down
90 changes: 90 additions & 0 deletions test/acceptance/server-telemetry.bats
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#!/usr/bin/env bats

load _helpers

@test "server/telemetry: prometheusOperator" {
cd `chart_dir`
helm --namespace acceptance uninstall $(name_prefix) || :
helm --namespace acceptance uninstall prometheus || :
kubectl delete namespace acceptance --ignore-not-found=true
kubectl create namespace acceptance
kubectl config set-context --current --namespace=acceptance

helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
helm repo update
helm install \
--wait \
--version 39.6.0 \
prometheus prometheus-community/kube-prometheus-stack

helm install \
--wait \
--values ./test/acceptance/server-test/telemetry.yaml \
"$(name_prefix)" .

wait_for_running $(name_prefix)-0

# Sealed, not initialized
wait_for_sealed_vault $(name_prefix)-0

# Vault Init
local token=$(kubectl exec -ti "$(name_prefix)-0" -- \
vault operator init -format=json -n 1 -t 1 | \
jq -r '.unseal_keys_b64[0]')
[ "${token}" != "" ]

# Vault Unseal
local pods=($(kubectl get pods --selector='app.kubernetes.io/name=vault' -o json | jq -r '.items[].metadata.name'))
for pod in "${pods[@]}"
do
kubectl exec -ti ${pod} -- vault operator unseal ${token}
done

wait_for_ready "$(name_prefix)-0"

# Unsealed, initialized
local sealed_status=$(kubectl exec "$(name_prefix)-0" -- vault status -format=json |
jq -r '.sealed' )
[ "${sealed_status}" == "false" ]

local init_status=$(kubectl exec "$(name_prefix)-0" -- vault status -format=json |
jq -r '.initialized')
[ "${init_status}" == "true" ]

# unfortunately it can take up to 2 minutes for the vault prometheus job to appear
# TODO: investigate how reduce this.
local job_labels
local tries=0
until [ $tries -ge 240 ]
do
job_labels=$( (kubectl exec -n acceptance svc/prometheus-kube-prometheus-prometheus \
-c prometheus \
-- wget -q -O - http://127.0.0.1:9090/api/v1/label/job/values) | tee /dev/stderr )

# Ensure the expected job label was picked up by Prometheus
[ "$(echo "${job_labels}" | jq 'any(.data[]; . == "vault-internal")')" = "true" ] && break

((++tries))
sleep .5
done


# Ensure the expected job is "up"
local job_up=$( ( kubectl exec -n acceptance svc/prometheus-kube-prometheus-prometheus \
-c prometheus \
-- wget -q -O - 'http://127.0.0.1:9090/api/v1/query?query=up{job="vault-internal"}' ) | \
tee /dev/stderr )
[ "$(echo "${job_up}" | jq '.data.result[0].value[1]')" = \"1\" ]
}

# Clean up
teardown() {
if [[ ${CLEANUP:-true} == "true" ]]
then
echo "helm/pvc teardown"
helm uninstall $(name_prefix)
helm uninstall prometheus
kubectl delete --all pvc
kubectl delete namespace acceptance --ignore-not-found=true
fi
}
28 changes: 28 additions & 0 deletions test/acceptance/server-test/telemetry.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
server:
standalone:
config: |
ui = true
listener "tcp" {
tls_disable = 1
address = "[::]:8200"
cluster_address = "[::]:8201"
# Enable unauthenticated metrics access (necessary for Prometheus Operator)
telemetry {
unauthenticated_metrics_access = "true"
}
}
storage "file" {
path = "/vault/data"
}
telemetry {
prometheus_retention_time = "30s",
disable_hostname = true
}
serverTelemetry:
serviceMonitor:
enabled: true
interval: 15s
68 changes: 68 additions & 0 deletions test/unit/prometheus-prometheusrules.bats
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#!/usr/bin/env bats

load _helpers

@test "prometheus/PrometheusRules-server: assertDisabled by default" {
cd `chart_dir`
local actual=$( (helm template \
--show-only templates/prometheus-prometheusrules.yaml \
--set 'serverTelemetry.prometheusRules.rules.foo=bar' \
. || echo "---") | tee /dev/stderr |
yq 'length > 0' | tee /dev/stderr)
[ "${actual}" = "false" ]
}

@test "prometheus/PrometheusRules-server: assertDisabled with rules-defined=false" {
cd `chart_dir`
local actual=$( (helm template \
--show-only templates/prometheus-prometheusrules.yaml \
--set 'serverTelemetry.prometheusRules.enabled=true' \
. || echo "---") | tee /dev/stderr | yq 'length > 0' | tee /dev/stderr)
[ "${actual}" = "false" ]
}

@test "prometheus/PrometheusRules-server: assertEnabled with rules-defined=true" {
cd `chart_dir`
local output=$( (helm template \
--show-only templates/prometheus-prometheusrules.yaml \
--set 'serverTelemetry.prometheusRules.enabled=true' \
--set 'serverTelemetry.prometheusRules.rules.foo=bar' \
--set 'serverTelemetry.prometheusRules.rules.baz=qux' \
.) | tee /dev/stderr )

[ "$(echo "$output" | yq -r '.spec.groups | length')" = "1" ]
[ "$(echo "$output" | yq -r '.spec.groups[0] | length')" = "2" ]
[ "$(echo "$output" | yq -r '.spec.groups[0].name')" = "release-name-vault" ]
[ "$(echo "$output" | yq -r '.spec.groups[0].rules | length')" = "2" ]
[ "$(echo "$output" | yq -r '.spec.groups[0].rules.foo')" = "bar" ]
[ "$(echo "$output" | yq -r '.spec.groups[0].rules.baz')" = "qux" ]
}

@test "prometheus/PrometheusRules-server: assertSelectors default" {
cd `chart_dir`
local output=$( (helm template \
--show-only templates/prometheus-prometheusrules.yaml \
--set 'serverTelemetry.prometheusRules.enabled=true' \
--set 'serverTelemetry.prometheusRules.rules.foo=bar' \
. ) | tee /dev/stderr)

[ "$(echo "$output" | yq -r '.metadata.labels | length')" = "5" ]
[ "$(echo "$output" | yq -r '.metadata.labels.release')" = "prometheus" ]
}

@test "prometheus/PrometheusRules-server: assertSelectors overrides" {
cd `chart_dir`
local output=$( (helm template \
--show-only templates/prometheus-prometheusrules.yaml \
--set 'serverTelemetry.prometheusRules.enabled=true' \
--set 'serverTelemetry.prometheusRules.rules.foo=bar' \
--set 'serverTelemetry.prometheusRules.selectors.baz=qux' \
--set 'serverTelemetry.prometheusRules.selectors.bar=foo' \
. ) | tee /dev/stderr)

[ "$(echo "$output" | yq -r '.metadata.labels | length')" = "6" ]
[ "$(echo "$output" | yq -r '.metadata.labels | has("app")')" = "false" ]
[ "$(echo "$output" | yq -r '.metadata.labels | has("kube-prometheus-stack")')" = "false" ]
[ "$(echo "$output" | yq -r '.metadata.labels.baz')" = "qux" ]
[ "$(echo "$output" | yq -r '.metadata.labels.bar')" = "foo" ]
}
Loading

0 comments on commit 0407431

Please sign in to comment.