Skip to content

Commit

Permalink
[fgi] push helm charts before forge run
Browse files Browse the repository at this point in the history
  • Loading branch information
rustielin authored and aptos-bot committed Mar 31, 2022
1 parent 97facf4 commit 003902a
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 11 deletions.
14 changes: 13 additions & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ jobs:
steps:
- checkout
- aws-setup
- kubernetes/install-kubectl
- deploy-setup
- run: echo "export IMAGE_TAG=dev_$(git rev-parse --short=8 HEAD)" >> $BASH_ENV
# since we're running with `--build-all`, assume that if it passes, we have all images required for Forge
- run: aws ecr describe-images --repository-name="aptos/validator" --image-ids=imageTag=$IMAGE_TAG
Expand Down Expand Up @@ -259,6 +259,18 @@ commands:
- run: sudo apt-get install build-essential ca-certificates clang curl git libssl-dev pkg-config --no-install-recommends --assume-yes
- run: curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain stable
- run: cat $HOME/.cargo/env >> $BASH_ENV
deploy-setup:
steps:
- kubernetes/install-kubectl
- run:
name: Install Helm
# https://helm.sh/docs/intro/install/#from-apt-debianubuntu
command: |
curl https://baltocdn.com/helm/signing.asc | sudo apt-key add -
sudo apt-get install apt-transport-https --yes
echo "deb https://baltocdn.com/helm/stable/debian/ all main" | sudo tee /etc/apt/sources.list.d/helm-stable-debian.list
sudo apt-get update
sudo apt-get install helm
### Sets up the permissions required for accessing AWS resources
aws-setup:
steps:
Expand Down
6 changes: 4 additions & 2 deletions scripts/fgi/README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# fgi

`fgi` is the entrypoint to the Forge unified testing framework. It is a python script with minimal dependencies
outside of the python standard library, for portability.
`fgi` is the entrypoint to the Forge unified testing framework. It is a python script with minimal dependencies outside of the python standard library, for portability. `fgi` must be run from the Aptos project root.

To run, you must have `kubectl` and `helm` installed, and have the proper permissions to access the clusters specified in `kube.py`.

```
# fgi must be run from the Aptos project root
./scripts/fgi/run -h
```
81 changes: 74 additions & 7 deletions scripts/fgi/kube.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,24 @@
# Copyright (c) Aptos
# SPDX-License-Identifier: Apache-2.0

import json, os, random, subprocess, time
import json
import os
import random
import subprocess
import tempfile
import time

FORGE_K8S_CLUSTERS = ["forge-1"]

WORKSPACE_CHART_BUCKETS = {
"forge-0": "aptos-testnet-forge-0-helm-312428ba",
"forge-1": "aptos-testnet-forge-1-helm-a2b65112",
}

AWS_ACCOUNT = (
subprocess.check_output(
["aws", "sts", "get-caller-identity", "--query", "Account", "--output", "text"],
["aws", "sts", "get-caller-identity",
"--query", "Account", "--output", "text"],
stderr=subprocess.DEVNULL,
encoding="UTF-8",
).strip()
Expand Down Expand Up @@ -109,9 +120,11 @@ def kube_ensure_cluster(clusters):
continue

if num_running_pods > 0:
print(f"{cluster} has {num_running_pods} running forge pods. Skipping.")
print(
f"{cluster} has {num_running_pods} running forge pods. Skipping.")
elif num_pending_pods > 0:
print(f"{cluster} has {num_pending_pods} pending forge pods. Skipping.")
print(
f"{cluster} has {num_pending_pods} pending forge pods. Skipping.")
else:
return cluster

Expand All @@ -127,7 +140,8 @@ def kube_ensure_cluster(clusters):
# - no other forge pods currently Running or Pending
# - all monitoring pods are ready
def kube_select_cluster():
shuffled_clusters = random.sample(FORGE_K8S_CLUSTERS, len(FORGE_K8S_CLUSTERS))
shuffled_clusters = random.sample(
FORGE_K8S_CLUSTERS, len(FORGE_K8S_CLUSTERS))
return kube_ensure_cluster(shuffled_clusters)


Expand Down Expand Up @@ -168,7 +182,8 @@ def kube_wait_job(job_name, context):
)
return 1

print(f"Waiting for {job_name} to be scheduled. Current phase: {phase}")
print(
f"Waiting for {job_name} to be scheduled. Current phase: {phase}")
time.sleep(1)

print(f"Failed to schedule job: {job_name}")
Expand All @@ -194,7 +209,8 @@ def kube_init_context(workspace=None):
print("Failed to access EKS, try awsmfa?")
raise
# preserve the kube context by updating kubeconfig for the specified workspace
clusters = FORGE_K8S_CLUSTERS + [workspace] if workspace else FORGE_K8S_CLUSTERS
clusters = FORGE_K8S_CLUSTERS + \
[workspace] if workspace else FORGE_K8S_CLUSTERS
for cluster in clusters:
subprocess.run(
[
Expand Down Expand Up @@ -273,3 +289,54 @@ def get_forge_job_jsonpath(job_name, context, jsonpath):
],
encoding="UTF-8",
)


def helm_s3_init(workspace):
bucket_url = WORKSPACE_CHART_BUCKETS[workspace]
subprocess.run(
f"helm plugin install https://github.com/hypnoglow/helm-s3.git || true",
shell=True,
check=True
)
subprocess.run(
["helm", "s3", "init", f"s3://{bucket_url}/charts"],
check=True
)
subprocess.run(
["helm", "repo", "add",
f"testnet-{workspace}", f"s3://{bucket_url}/charts"],
check=True
)


def helm_package_push(chart_path, chart_name, workspace, dir):
subprocess.run(
[
"helm",
"package",
chart_path,
"-d",
dir,
"--app-version",
"1.0.0",
"--version",
"1.0.0"
],
check=True
)
subprocess.run(
f"helm s3 push --force {dir}/{chart_name}-*.tgz testnet-{workspace}",
shell=True,
check=True,
)


def push_helm_charts(workspace):
helm_s3_init(workspace)
tempdir = tempfile.mkdtemp()
helm_package_push("terraform/testnet/testnet",
"testnet", workspace, tempdir)
helm_package_push("terraform/helm/validator",
"aptos-validator", workspace, tempdir)
helm_package_push("terraform/helm/fullnode",
"aptos-fullnode", workspace, tempdir)
4 changes: 3 additions & 1 deletion scripts/fgi/run
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ from kube import (
get_cluster_context,
kube_wait_job,
create_forge_job,
push_helm_charts,
)

TAG = ""
Expand Down Expand Up @@ -162,6 +163,7 @@ else:
context = get_cluster_context(workspace)
print(f"Running experiments on cluster: {workspace}")
grafana_url = get_grafana_url(workspace)
push_helm_charts(workspace)
print()

job_name, template = create_forge_job(
Expand Down Expand Up @@ -200,7 +202,7 @@ print("**********")

print("==========begin-pod-logs==========")
subprocess.call(
f"kubectl --context={context} logs -f -l job-name={job_name} | tee {OUTPUT_TEE}",
f"kubectl --context={context} logs -f -l job-name={job_name} | tee -a {OUTPUT_TEE}",
shell=True,
)
print("==========end-pod-logs==========")
Expand Down

0 comments on commit 003902a

Please sign in to comment.