Skip to content

Commit

Permalink
[tf][testnet] k8s metrics server install for autoscale
Browse files Browse the repository at this point in the history
  • Loading branch information
rustielin authored and aptos-bot committed Apr 13, 2022
1 parent f65ac5f commit 30f74b9
Show file tree
Hide file tree
Showing 11 changed files with 90 additions and 6 deletions.
23 changes: 23 additions & 0 deletions terraform/helm/k8s-metrics/.helmignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/
6 changes: 6 additions & 0 deletions terraform/helm/k8s-metrics/Chart.lock
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
dependencies:
- name: metrics-server
repository: https://kubernetes-sigs.github.io/metrics-server/
version: 3.8.2
digest: sha256:fa1a19fa0f1ff4bae7f9e397277af3a832718ba50351e6ddf3b72a398d17fd0a
generated: "2022-04-12T17:19:04.312907-07:00"
8 changes: 8 additions & 0 deletions terraform/helm/k8s-metrics/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
apiVersion: v2
name: aptos-k8s-metrics
version: 0.1.0

dependencies:
- name: metrics-server
version: 3.8.2
repository: "https://kubernetes-sigs.github.io/metrics-server/"
4 changes: 4 additions & 0 deletions terraform/helm/k8s-metrics/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Aptos K8s Metrics Addon
================================

Includes addons for CoreDNS autoscaling and Metrics Server
Binary file not shown.
25 changes: 25 additions & 0 deletions terraform/helm/k8s-metrics/templates/dns.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
apiVersion: autoscaling/v2beta2
kind: HorizontalPodAutoscaler
metadata:
name: hpa-coredns
namespace: kube-system
spec:
maxReplicas: {{ .Values.coredns.maxReplicas }}
minReplicas: 2
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: coredns
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70
- type: Resource
resource:
name: memory
target:
type: AverageValue
averageUtilization: 70
2 changes: 2 additions & 0 deletions terraform/helm/k8s-metrics/values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
coredns:
maxReplicas: 2
16 changes: 16 additions & 0 deletions terraform/testnet/addons.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
resource "helm_release" "metrics-server" {
count = var.enable_k8s_metrics_server ? 1 : 0
name = "metrics-server"
namespace = "kube-system"
chart = "${path.module}/../helm/k8s-metrics"
max_history = 10
wait = false

values = [
jsonencode({
coredns = {
maxReplicas = var.num_validators
}
})
]
}
4 changes: 1 addition & 3 deletions terraform/testnet/forge/files/cleanup_forge.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,4 @@
# Script to be run in a Forge pod after the test runs as cleanup
# This separates the pod lifecycle from aptos/aptos

# CoreDNS in EKS needs to scale down to avoid cluster resource contention
# after the cluster has been scaled down by forge test cleanup
kubectl scale deployment/coredns --namespace kube-system --replicas=2
echo "Forge clean"
3 changes: 0 additions & 3 deletions terraform/testnet/forge/files/init_forge.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,6 @@
# Init script to be run in a Forge pod
# This separates the pod lifecycle from aptos/aptos

# CoreDNS in EKS needs to scale with the size of Forge clusters
kubectl scale deployment/coredns --namespace kube-system --replicas={{ .Values.forge.numValidators }}

# set up internal helm repo, naming it testnet-internal
helm plugin install https://github.com/hypnoglow/helm-s3.git
helm repo add testnet-internal s3://{{ .Values.forge.helmBucket }}/charts
Expand Down
5 changes: 5 additions & 0 deletions terraform/testnet/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -164,3 +164,8 @@ variable "enable_dev_vault" {
description = "TEST ONLY: Enables Vault in Dev Mode for all validators"
default = false
}

variable "enable_k8s_metrics_server" {
description = "Installs kubernetes metrics server: https://github.com/kubernetes-sigs/metrics-server"
default = false
}

0 comments on commit 30f74b9

Please sign in to comment.