Add ssh key to launch template for Nodegroups to help debug (#475) #61
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: kit-e2e-test | |
on: | |
push: | |
branches: [main] | |
permissions: | |
id-token: write | |
pull-requests: write | |
contents: read | |
jobs: | |
kit: | |
strategy: | |
matrix: | |
os: [ ubuntu-latest,macos-latest] | |
runs-on: ${{ matrix.os }} | |
steps: | |
- uses: actions/checkout@v3 | |
- uses: ./.github/actions/install-go-and-dependencies | |
- uses: aws-actions/configure-aws-credentials@v1 | |
with: | |
role-to-assume: arn:aws:iam::638256630554:role/kit-github-workflow | |
role-session-name: githubworkflowsession | |
aws-region: us-west-2 | |
- name: Install kubectl | |
run: | | |
if [[ "${{ matrix.os }}" == "ubuntu-latest" ]]; then | |
curl -LO "https://storage.googleapis.com/kubernetes-release/release/$(curl -L -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl" | |
elif [[ "${{ matrix.os }}" == "macos-latest" ]]; then | |
curl -LO "https://storage.googleapis.com/kubernetes-release/release/$(curl -L -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/darwin/amd64/kubectl" | |
fi | |
chmod +x kubectl | |
sudo mv kubectl /usr/local/bin/ | |
- name: Build kitctl, provision Kit Environment and KIT guest cluster. | |
run: | | |
allVersions=("1-21" "1-22" "1-23" "1-24" "1-25" "1-26" "1-27" "1-28" "1-29") | |
cd ${HOME}/work/kubernetes-iteration-toolkit/kubernetes-iteration-toolkit/substrate/cmd/kitctl | |
go build . | |
if [[ "${{ matrix.os }}" == "ubuntu-latest" ]]; then | |
os_type="linux" | |
elif [[ "${{ matrix.os }}" == "macos-latest" ]]; then | |
os_type="darwin" | |
fi | |
./kitctl bootstrap kit-github-${os_type}-${{ github.run_id }}-${{github.run_attempt}} | |
export KUBECONFIG=${HOME}/.kit/env/kit-github-${os_type}-${{ github.run_id }}-${{github.run_attempt}}/etc/kubernetes/admin.conf | |
kubectl get pods -A | |
for version in "${allVersions[@]}"; do | |
K8S_VERSION=${version//-/.} | |
GUEST_CLUSTER_NAME="guest-${os_type}-${version}-${{ github.run_id }}-${{github.run_attempt}}" | |
cat <<-EOF | kubectl apply -f - | |
apiVersion: kit.k8s.sh/v1alpha1 | |
kind: ControlPlane | |
metadata: | |
name: ${GUEST_CLUSTER_NAME} # Desired Cluster name | |
spec: | |
kubernetesVersion: "${K8S_VERSION}" | |
etcd: | |
replicas: 3 | |
master: | |
apiServer: | |
replicas: 1 | |
EOF | |
for (( time=0; time<=600; time+=5 )); do | |
controlplane_object=$(kubectl get controlplane ${GUEST_CLUSTER_NAME} -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null) | |
if [[ "$controlplane_object" == "True" ]]; then | |
echo "Control plane objects are ready" | |
break; | |
else | |
echo "Control plane objects are not ready" | |
fi | |
if [[ "$time" -eq 590 ]]; then | |
echo "Timeout: Control plane objects are not ready after 10 minutes" | |
kubectl logs deploy/kit-controller -n kit | |
exit 1 | |
fi | |
sleep 5 | |
done | |
kubectl wait --for=condition=Ready pods --all --timeout=5m | |
GUESTKUBECONFIG=/tmp/kubeconfig | |
kubectl get secret ${GUEST_CLUSTER_NAME}-kube-admin-config -ojsonpath='{.data.config}' | base64 -d >$GUESTKUBECONFIG | |
cat <<-EOF | kubectl apply -f - | |
apiVersion: kit.k8s.sh/v1alpha1 | |
kind: DataPlane | |
metadata: | |
name: ${GUEST_CLUSTER_NAME}-nodes | |
spec: | |
clusterName: ${GUEST_CLUSTER_NAME} # Associated Cluster name | |
nodeCount: 2 | |
EOF | |
for (( time=0; time<=600; time+=5 )); do | |
dataplane_object=$(kubectl get dataplane ${GUEST_CLUSTER_NAME}-nodes -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null) | |
if [[ "$dataplane_object" == "True" ]]; then | |
echo "Dataplane objects are ready" | |
break; | |
else | |
echo "Dataplane objects are not ready" | |
fi | |
if [[ "$time" -eq 590 ]]; then | |
echo "Timeout: Dataplane objects are not ready after 10 minutes" | |
kubectl logs deploy/kit-controller -n kit | |
exit 1 | |
fi | |
sleep 5 | |
done | |
kubectl --kubeconfig=$GUESTKUBECONFIG apply -f https://raw.githubusercontent.com/aws/amazon-vpc-cni-k8s/release-1.10/config/master/aws-k8s-cni.yaml | |
kubectl --kubeconfig=$GUESTKUBECONFIG rollout status daemonset/aws-node -n kube-system --timeout 5m | |
for (( time=0; time<=220; time+=5 )); do | |
ready_node=$(kubectl --kubeconfig=$GUESTKUBECONFIG get nodes 2>/dev/null | grep -w Ready | wc -l) | |
if [[ "$ready_node" -eq 2 ]]; then | |
echo "Dataplane nodes are ready" | |
break; | |
else | |
echo "Dataplane nodes are not ready" | |
fi | |
if [[ "$time" -eq 210 ]]; then | |
echo "Timeout: Dataplane nodes are not ready after 4 minutes" | |
exit 1 | |
fi | |
sleep 5 | |
done | |
kubectl --kubeconfig=$GUESTKUBECONFIG create deployment nginx-deployment --image=nginx | |
kubectl --kubeconfig=$GUESTKUBECONFIG rollout status deployment/nginx-deployment --timeout 5m | |
kubectl --kubeconfig=$GUESTKUBECONFIG expose deployment nginx-deployment --name=nginx-service --port=80 --target-port=80 --type=ClusterIP | |
kubectl --kubeconfig=$GUESTKUBECONFIG certificate approve $(kubectl --kubeconfig=$GUESTKUBECONFIG get csr | grep Pending | awk '{print $1}') | |
kubectl --kubeconfig=$GUESTKUBECONFIG run -i --rm --restart=Never curly-pod --image=curlimages/curl --command -- curl -s 'http://nginx-service:80' | |
done | |
working-directory: substrate | |
- name: Delete the Guest Control plane and Dataplane | |
if: always() | |
run: | | |
allVersions=("1-21" "1-22" "1-23" "1-24" "1-25" "1-26" "1-27" "1-28" "1-29") | |
if [[ "${{ matrix.os }}" == "ubuntu-latest" ]]; then | |
os_type="linux" | |
elif [[ "${{ matrix.os }}" == "macos-latest" ]]; then | |
os_type="darwin" | |
fi | |
export KUBECONFIG=${HOME}/.kit/env/kit-github-${os_type}-${{ github.run_id }}-${{github.run_attempt}}/etc/kubernetes/admin.conf | |
kubectl config current-context | |
for version in "${allVersions[@]}"; do | |
GUEST_CLUSTER_NAME="guest-${os_type}-${version}-${{ github.run_id }}-${{github.run_attempt}}" | |
if kubectl get dataplane $GUEST_CLUSTER_NAME-nodes > /dev/null 2>&1; then | |
kubectl delete dataplane $GUEST_CLUSTER_NAME-nodes | |
echo "dataplane deleted." | |
else | |
echo "No dataplane found $GUEST_CLUSTER_NAME-nodes." | |
fi | |
if kubectl get controlplane $GUEST_CLUSTER_NAME > /dev/null 2>&1; then | |
kubectl delete controlplane $GUEST_CLUSTER_NAME | |
echo "Controlplane deleted." | |
else | |
echo "No controlplane found $GUEST_CLUSTER_NAME." | |
fi | |
done | |
#Adding sleep to ensure that the resources are deleted like EG:Load balancer. | |
sleep 120 | |
continue-on-error: true | |
working-directory: substrate | |
- name: Delete the kit environment | |
if: always() | |
run: | | |
#During the first attempt, "kitctl" was unable to delete some resources sporadically. To ensure that enough time is given for the resources to be deleted, wait time is added around 280 seconds before attempting to delete the kit again. | |
#and for the second iteration with a wait time of 30 seconds to ensure all resources are successfully deleted as we are left with few resources sometimes. | |
#TODO: we should fix `kitctl delete` to make it more robust and not hang forever. | |
cd ${HOME}/work/kubernetes-iteration-toolkit/kubernetes-iteration-toolkit/substrate/cmd/kitctl | |
if [[ "${{ matrix.os }}" == "ubuntu-latest" ]]; then | |
os_type="linux" | |
elif [[ "${{ matrix.os }}" == "macos-latest" ]]; then | |
os_type="darwin" | |
# installing coreutils for macos to use gtimeout as timeout is not available in macos by default,as kitctl hangs sparodically | |
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" | |
brew install coreutils | |
fi | |
export KUBECONFIG=${HOME}/.kit/env/kit-github-${os_type}-${{ github.run_id }}-${{github.run_attempt}}/etc/kubernetes/admin.conf | |
# Set the maximum number of attempts | |
MAX_ATTEMPTS=2 | |
# Set the delay between retries in seconds | |
RETRY_DELAY=30 | |
# Set the maximum duration in seconds | |
MAX_DURATION=280 | |
# Set the start time | |
START_TIME=$(date +%s) | |
# Set a flag to indicate whether the command was successfully completed or not | |
COMMAND_COMPLETED=false | |
# Attempt to delete the resource multiple times | |
for i in $(seq 1 $MAX_ATTEMPTS); do | |
# Run the command with a timeout | |
if [[ "${{ matrix.os }}" == "macos-latest" ]]; then | |
gtimeout $MAX_DURATION ./kitctl delete kit-github-${os_type}-${{ github.run_id }}-${{github.run_attempt}} | |
else | |
timeout $MAX_DURATION ./kitctl delete kit-github-${os_type}-${{ github.run_id }}-${{github.run_attempt}} | |
fi | |
# Check if the command timed out | |
status=$? | |
if [ $status -eq 0 ]; then | |
echo "Command executed successfully for $i iteration." | |
COMMAND_COMPLETED=true | |
elif [ $status -eq 124 ]; then | |
echo "Command timed out. Retrying in $RETRY_DELAY seconds." | |
elif [ $status -eq 143 ]; then | |
echo "Command terminated by SIGTERM signal. Retrying in $RETRY_DELAY seconds." | |
else | |
echo "Command failed with exit code $status." | |
fi | |
# Wait for the specified delay before trying again | |
sleep $RETRY_DELAY | |
# Calculate the elapsed time | |
ELAPSED_TIME=$(( $(date +%s) - $START_TIME )) | |
# If the elapsed time exceeds the maximum duration, print a message and exit the loop | |
if [ $ELAPSED_TIME -gt $MAX_DURATION ]; then | |
echo "Maximum duration exceeded. Exiting the loop." | |
break | |
fi | |
done | |
# Check if the command was successfully completed | |
if [ "$COMMAND_COMPLETED" = false ]; then | |
echo "Failed to delete resource after $MAX_ATTEMPTS attempts." | |
fi | |
continue-on-error: true | |
working-directory: substrate |