diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index c34084ca..7545d305 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -15,7 +15,7 @@ jobs: - name: YAML Lint run: | yamllint -d "{extends: relaxed, rules: {line-length: {max: 250}}}" \ - --no-warnings rockcraft.yaml + --no-warnings images/charmed-spark/rockcraft.yaml build: runs-on: ubuntu-latest @@ -33,7 +33,6 @@ jobs: run: | sudo snap install yq sudo snap install rockcraft --classic --edge - sudo snap install --devmode --channel edge skopeo - name: Build image run: sudo make build @@ -41,17 +40,28 @@ jobs: - name: Build image (Jupyter) run: sudo make build FLAVOUR=jupyter + - name: Build image (Kyuubi) + run: sudo make build FLAVOUR=kyuubi + - name: Get Artifact Name id: artifact run: | - ARTIFACT=$(make help | grep 'Artifact: ') - echo "name=${ARTIFACT#'Artifact: '}" >> $GITHUB_OUTPUT + BASE_ARTIFACT=$(make help | grep 'Artifact: ') + echo "base_artifact_name=${BASE_ARTIFACT#'Artifact: '}" >> $GITHUB_OUTPUT + JUPYTER_ARTIFACT=$(make help FLAVOUR=jupyter | grep 'Artifact: ') + echo "jupyter_artifact_name=${JUPYTER_ARTIFACT#'Artifact: '}" >> $GITHUB_OUTPUT + KYUUBI_ARTIFACT=$(make help FLAVOUR=kyuubi | grep 'Artifact: ') + echo "kyuubi_artifact_name=${KYUUBI_ARTIFACT#'Artifact: '}" >> $GITHUB_OUTPUT - name: Change artifact permissions - run: sudo chmod a+r ${{ steps.artifact.outputs.name }} + run: sudo chmod a+r ${{ steps.artifact.outputs.base_artifact_name }} ${{ steps.artifact.outputs.jupyter_artifact_name }} ${{ steps.artifact.outputs.kyuubi_artifact_name }} - name: Upload locally built artifact uses: actions/upload-artifact@v4 with: name: charmed-spark - path: ${{ steps.artifact.outputs.name }} + path: | + ${{ steps.artifact.outputs.base_artifact_name }} + ${{ steps.artifact.outputs.jupyter_artifact_name }} + ${{ steps.artifact.outputs.kyuubi_artifact_name }} + diff --git a/.github/workflows/integration.yaml b/.github/workflows/integration.yaml index 049ff6b3..8798a1fd 100644 --- a/.github/workflows/integration.yaml +++ b/.github/workflows/integration.yaml @@ -27,12 +27,16 @@ jobs: - name: Get Artifact Name id: artifact run: | - ARTIFACT=$(make help | grep 'Artifact: ') - echo "name=${ARTIFACT#'Artifact: '}" >> $GITHUB_OUTPUT + BASE_ARTIFACT=$(make help | grep 'Artifact: ') + echo "base_artifact_name=${BASE_ARTIFACT#'Artifact: '}" >> $GITHUB_OUTPUT + JUPYTER_ARTIFACT=$(make help FLAVOUR=jupyter | grep 'Artifact: ') + echo "jupyter_artifact_name=${JUPYTER_ARTIFACT#'Artifact: '}" >> $GITHUB_OUTPUT + KYUUBI_ARTIFACT=$(make help FLAVOUR=kyuubi | grep 'Artifact: ') + echo "kyuubi_artifact_name=${KYUUBI_ARTIFACT#'Artifact: '}" >> $GITHUB_OUTPUT - name: Install and configure microk8s run: | - make microk8s MICROK8S=${{ matrix.k8s_version }} + make microk8s-setup MICROK8S_CHANNEL=${{ matrix.k8s_version }} - name: Download artifact uses: actions/download-artifact@v4 @@ -43,26 +47,44 @@ jobs: - name: Run tests run: | # Unpack Artifact - mv charmed-spark/${{ steps.artifact.outputs.name }} . - rmdir charmed-spark + mv charmed-spark/${{ steps.artifact.outputs.base_artifact_name }} . # Import artifact into docker with new tag - sudo make import TARGET=docker REPOSITORY=ghcr.io/canonical/ PREFIX=test- \ - -o ${{ steps.artifact.outputs.name }} + sudo make docker-import REPOSITORY=ghcr.io/canonical/ PREFIX=test- \ + -o ${{ steps.artifact.outputs.base_artifact_name }} # Import artifact into microk8s to be used in integration tests - sudo make import TARGET=microk8s PREFIX=test- REPOSITORY=ghcr.io/canonical/ \ - -o $(find .make_cache -name "*.tag") + sudo make microk8s-import PREFIX=test- REPOSITORY=ghcr.io/canonical/ \ + -o ${{ steps.artifact.outputs.base_artifact_name }} sg microk8s -c "make tests" - name: Run tests (Jupyter) run: | + # Unpack Artifact + mv charmed-spark/${{ steps.artifact.outputs.jupyter_artifact_name }} . + # Import artifact into docker with new tag - sudo make import \ - FLAVOUR=jupyter TARGET=microk8s \ - TAG=$(yq .version rockcraft.yaml) \ - REPOSITORY=ghcr.io/canonical/ PREFIX=test- \ - -o $(find .make_cache -name "*.tag") + sudo make microk8s-import \ + FLAVOUR=jupyter \ + TAG=$(yq .version images/charmed-spark/rockcraft.yaml) \ + REPOSITORY=ghcr.io/canonical/ \ + PREFIX=test- \ + -o ${{ steps.artifact.outputs.jupyter_artifact_name }} sg microk8s -c "make tests FLAVOUR=jupyter" + + - name: Run tests (Kyuubi) + run: | + # Unpack Artifact + mv charmed-spark/${{ steps.artifact.outputs.kyuubi_artifact_name }} . + rmdir charmed-spark + + # Import artifact into docker with new tag + sudo make microk8s-import \ + FLAVOUR=kyuubi \ + TAG=$(yq .version images/charmed-spark/rockcraft.yaml) \ + REPOSITORY=ghcr.io/canonical/ PREFIX=test- \ + -o ${{ steps.artifact.outputs.kyuubi_artifact_name }} + + sg microk8s -c "make tests FLAVOUR=kyuubi" diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml index d70acb4a..143f318e 100644 --- a/.github/workflows/publish.yaml +++ b/.github/workflows/publish.yaml @@ -23,8 +23,8 @@ jobs: - name: Extract ROCK metadata shell: bash run: | - VERSION=$(yq '(.version|split("-"))[0]' rockcraft.yaml) - BASE=$(yq '(.base|split("@"))[1]' rockcraft.yaml) + VERSION=$(yq '(.version|split("-"))[0]' images/charmed-spark/rockcraft.yaml) + BASE=$(yq '(.base|split("@"))[1]' images/charmed-spark/rockcraft.yaml) echo "version=${VERSION}" >> $GITHUB_OUTPUT echo "base=${BASE}" >> $GITHUB_OUTPUT id: rock_metadata @@ -67,8 +67,12 @@ jobs: - name: Get Artifact Name id: artifact run: | - ARTIFACT=$(make help | grep 'Artifact: ') - echo "name=${ARTIFACT#'Artifact: '}" >> $GITHUB_OUTPUT + BASE_ARTIFACT=$(make help | grep 'Artifact: ') + echo "base_artifact_name=${BASE_ARTIFACT#'Artifact: '}" >> $GITHUB_OUTPUT + JUPYTER_ARTIFACT=$(make help FLAVOUR=jupyter | grep 'Artifact: ') + echo "jupyter_artifact_name=${JUPYTER_ARTIFACT#'Artifact: '}" >> $GITHUB_OUTPUT + KYUUBI_ARTIFACT=$(make help FLAVOUR=kyuubi | grep 'Artifact: ') + echo "kyuubi_artifact_name=${KYUUBI_ARTIFACT#'Artifact: '}" >> $GITHUB_OUTPUT - name: Download artifact uses: actions/download-artifact@v4 @@ -79,8 +83,7 @@ jobs: - name: Publish Image to Channel run: | # Unpack artifact - mv charmed-spark/${{ steps.artifact.outputs.name }} . - rmdir charmed-spark + mv charmed-spark/${{ steps.artifact.outputs.base_artifact_name }} . REPOSITORY="ghcr.io/canonical/" RISK=${{ needs.release_checks.outputs.risk }} @@ -90,12 +93,12 @@ jobs: IMAGE_NAME=$(make help REPOSITORY=${REPOSITORY} TAG=${TAG} help | grep "Image\:" | cut -d ":" -f2 | xargs) # Import artifact into docker with new tag - sudo make import TARGET=docker REPOSITORY=${REPOSITORY} TAG=${TAG}\ + sudo make docker-import REPOSITORY=${REPOSITORY} TAG=${TAG}\ -o ${{ steps.artifact.outputs.name }} # Add relevant labels COMMIT_ID=$(git log -1 --format=%H) - DESCRIPTION=$(yq .description rockcraft.yaml | xargs) + DESCRIPTION=$(yq .description images/charmed-spark/rockcraft.yaml | xargs) echo "FROM ${IMAGE_NAME}:${TAG}" | docker build --label org.opencontainers.image.description="${DESCRIPTION}" --label org.opencontainers.image.revision="${COMMIT_ID}" --label org.opencontainers.image.source="${{ github.repositoryUrl }}" -t "${IMAGE_NAME}:${TAG}" - @@ -113,20 +116,24 @@ jobs: - name: Publish JupyterLab Image to Channel run: | - + # Unpack artifact + mv charmed-spark/${{ steps.artifact.outputs.jupyter_artifact_name }} . + REPOSITORY="ghcr.io/canonical/" RISK=${{ needs.release_checks.outputs.risk }} TRACK=${{ needs.release_checks.outputs.track }} if [ ! -z "$RISK" ] && [ "${RISK}" != "no-risk" ]; then TAG=${TRACK}_${RISK}; else TAG=${TRACK}; fi # Import artifact into docker with new tag - sudo make import TARGET=docker FLAVOUR=jupyter \ - REPOSITORY=${REPOSITORY} TAG=${TAG}\ - -o $(find .make_cache -name "*.tag") + sudo make docker-import \ + FLAVOUR=jupyter \ + REPOSITORY=${REPOSITORY} \ + TAG=${TAG} \ + -o ${{ steps.artifact.outputs.jupyter_artifact_name }} IMAGE_NAME=$(make help FLAVOUR=jupyter REPOSITORY=${REPOSITORY} TAG=${TAG} help | grep "Image\:" | cut -d ":" -f2 | xargs) - DESCRIPTION=$(sed -n '/^#\ \-/,/^#\ \-/ p' build/Dockerfile.jupyter | sed 's/^\#\ //g' | sed '$ d' | tail -n+2 | xargs) + DESCRIPTION=$(yq .flavours.jupyter.image_description images/metadata.yaml | xargs) echo "FROM ${IMAGE_NAME}:${TAG}" | docker build --label org.opencontainers.image.description="${DESCRIPTION}" --label org.opencontainers.image.revision="${COMMIT_ID}" --label org.opencontainers.image.source="${{ github.repositoryUrl }}" -t "${IMAGE_NAME}:${TAG}" - @@ -142,3 +149,41 @@ jobs: echo "Publishing ${IMAGE_NAME}:${VERSION_TAG}" docker push ${IMAGE_NAME}:${VERSION_TAG} fi + + + - name: Publish Kyuubi Image to Channel + run: | + # Unpack artifact + mv charmed-spark/${{ steps.artifact.outputs.kyuubi_artifact_name }} . + rmdir charmed-spark + + REPOSITORY="ghcr.io/canonical/" + RISK=${{ needs.release_checks.outputs.risk }} + TRACK=${{ needs.release_checks.outputs.track }} + if [ ! -z "$RISK" ] && [ "${RISK}" != "no-risk" ]; then TAG=${TRACK}_${RISK}; else TAG=${TRACK}; fi + + # Import artifact into docker with new tag + sudo make docker-import \ + FLAVOUR=kyuubi \ + REPOSITORY=${REPOSITORY} \ + TAG=${TAG} \ + -o ${{ steps.artifact.outputs.kyuubi_artifact_name }} + + IMAGE_NAME=$(make help FLAVOUR=kyuubi REPOSITORY=${REPOSITORY} TAG=${TAG} help | grep "Image\:" | cut -d ":" -f2 | xargs) + + DESCRIPTION=$(yq .flavours.kyuubi.image_description images/metadata.yaml | xargs) + + echo "FROM ${IMAGE_NAME}:${TAG}" | docker build --label org.opencontainers.image.description="${DESCRIPTION}" --label org.opencontainers.image.revision="${COMMIT_ID}" --label org.opencontainers.image.source="${{ github.repositoryUrl }}" -t "${IMAGE_NAME}:${TAG}" - + + echo "Publishing ${IMAGE_NAME}:${TAG}" + docker push ${IMAGE_NAME}:${TAG} + + if [[ "$RISK" == "edge" ]]; then + VERSION_LONG=$(make help FLAVOUR=kyuubi | grep "Tag\:" | cut -d ":" -f2 | xargs) + VERSION_TAG="${VERSION_LONG}-${{ needs.release_checks.outputs.base }}_edge" + + docker tag ${IMAGE_NAME}:${TAG} ${IMAGE_NAME}:${VERSION_TAG} + + echo "Publishing ${IMAGE_NAME}:${VERSION_TAG}" + docker push ${IMAGE_NAME}:${VERSION_TAG} + fi \ No newline at end of file diff --git a/.github/workflows/trivy.yml b/.github/workflows/trivy.yml index b824eb1f..27666359 100644 --- a/.github/workflows/trivy.yml +++ b/.github/workflows/trivy.yml @@ -14,14 +14,14 @@ jobs: steps: - name: Checkout code uses: actions/checkout@v4 - - name: Install skopeo + - name: Install rockcraft (for skopeo) run: | - sudo snap install --devmode --channel edge skopeo + sudo snap install rockcraft --classic --edge - name: Get Artifact Name id: artifact run: | - ARTIFACT=$(make help | grep 'Artifact: ') - echo "name=${ARTIFACT#'Artifact: '}" >> $GITHUB_OUTPUT + BASE_ARTIFACT=$(make help | grep 'Artifact: ') + echo "base_artifact_name=${BASE_ARTIFACT#'Artifact: '}" >> $GITHUB_OUTPUT - name: Download artifact uses: actions/download-artifact@v4 with: @@ -30,10 +30,9 @@ jobs: - name: Import locally run: | # Unpack artifact - mv charmed-spark/${{ steps.artifact.outputs.name }} . - rmdir charmed-spark - sudo skopeo --insecure-policy copy \ - docker-archive:${{ steps.artifact.outputs.name }} \ + mv charmed-spark/${{ steps.artifact.outputs.base_artifact_name }} . + sudo rockcraft.skopeo --insecure-policy copy \ + docker-archive:${{ steps.artifact.outputs.base_artifact_name }} \ docker-daemon:trivy/charmed-spark:test - name: Run Trivy vulnerability scanner uses: aquasecurity/trivy-action@master diff --git a/.gitignore b/.gitignore index 748fefbc..5a5bbea0 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,5 @@ env/ .make_cache/ derby.log metastore_db/ -spark-sql.out \ No newline at end of file +spark-sql.out +.vscode \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c27e3b68..f95a32e9 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -10,11 +10,10 @@ cd charmed-spark-rock ### Installing Prerequisites ```bash -sudo snap install rockcraft --edge +sudo snap install rockcraft --edge --classic sudo snap install docker sudo snap install lxd sudo snap install yq -sudo snap install skopeo --edge --devmode ``` ### Configuring Prerequisites diff --git a/Makefile b/Makefile index 9fd91e1e..4c98c656 100644 --- a/Makefile +++ b/Makefile @@ -11,146 +11,273 @@ # EXTERNAL VARIABLES # ====================== -REPOSITORY := -PREFIX := -TARGET := docker +# The repository where the image is going to be hosted. +# eg, ghcr.io/canonical [To be passed when you 'make' the recipe] +REPOSITORY := + +# The prefix to be pre-pended to image name +# eg, test- [To be passed when you 'make' the recipe] +PREFIX := + PLATFORM := amd64 + +# The flavor of the image, (one of spark, jupyter and kyuubi) FLAVOUR := "spark" -MICROK8S := "1.28/stable" + +# The channel of `microk8s` snap to be used for testing +MICROK8S_CHANNEL := "1.28/stable" # ====================== # INTERNAL VARIABLES # ====================== +# The directory to be used as cache, where intermediate tag files will be stored. _MAKE_DIR := .make_cache $(shell mkdir -p $(_MAKE_DIR)) -K8S_TAG := $(_MAKE_DIR)/.k8s_tag -AWS_TAG := $(_MAKE_DIR)/.aws_tag -IMAGE_NAME := $(shell yq .name rockcraft.yaml) +# eg, charmed-spark +ROCK_NAME := $(shell yq .name images/charmed-spark/rockcraft.yaml) + +# eg, 3.4.2 +SPARK_VERSION := $(shell yq .version images/charmed-spark/rockcraft.yaml) -VERSION := $(shell yq .version rockcraft.yaml) +# eg, 1.9.0 +KYUUBI_VERSION=$(shell yq .flavours.kyuubi.version images/metadata.yaml) -VERSION_FLAVOUR=$(shell grep "version:$(FLAVOUR)" rockcraft.yaml | sed "s/^#//" | cut -d ":" -f3) +# eg, 4.0.11 +JUPYTER_VERSION=$(shell yq .flavours.jupyter.version images/metadata.yaml) -_ROCK_OCI=$(IMAGE_NAME)_$(VERSION)_$(PLATFORM).rock +# The filename of the Rock file built during the build process. +# eg, charmed-spark_3.4.2_amd64.rock +ROCK_FILE=$(ROCK_NAME)_$(SPARK_VERSION)_$(PLATFORM).rock -CHARMED_OCI_FULL_NAME=$(REPOSITORY)$(PREFIX)$(IMAGE_NAME) -CHARMED_OCI_JUPYTER=$(CHARMED_OCI_FULL_NAME)-jupyterlab +# The filename of the final artifact built for Spark image +# eg, charmed-spark_3.4.2_amd64.tar +SPARK_ARTIFACT=$(ROCK_NAME)_$(SPARK_VERSION)_$(PLATFORM).tar +# The filename of the final artifact built for Jupyter image +# eg, charmed-spark-jupyterlab_3.4.2_amd64.tar +JUPYTER_ARTIFACT=$(ROCK_NAME)-jupyterlab_$(SPARK_VERSION)_$(PLATFORM).tar + +# The filename of the final artifact built for Kyuubi image +# eg, charmed-spark-kyuubi_3.4.2_amd64.tar +KYUUBI_ARTIFACT=$(ROCK_NAME)-kyuubi_$(SPARK_VERSION)_$(PLATFORM).tar + + +# Decide on what the name of artifact, display name and tag for the image will be. +# +# ARTIFACT: The name of the tarfile (artifact) that will be generated after building the image +# DISPLAY_NAME: The fully qualified name of the image without tags +# TAG: The tag for the image +# +# For eg, +# ARTIFACT = "charmed-spark_3.4.2_amd64.tar" TAG = "3.4.2" DISPLAY_NAME = "ghcr.io/canonical/charmed-spark" +# or, +# ARTIFACT = "charmed-spark-jupyterlab_3.4.2_amd64.tar" TAG = "3.4.2-4.0.11" DISPLAY_NAME = "ghcr.io/canonical/charmed-spark-jupyterlab" +# or, +# ARTIFACT = "charmed-spark-kyuubi_3.4.2_amd64.tar" TAG = "3.4.2-1.9.0" DISPLAY_NAME = "ghcr.io/canonical/charmed-spark-kyuubi" +# ifeq ($(FLAVOUR), jupyter) -NAME=$(CHARMED_OCI_JUPYTER) -TAG=$(VERSION)-$(VERSION_FLAVOUR) -BASE_NAME=$(IMAGE_NAME)-jupyterlab_$(VERSION)_$(PLATFORM).tar + DISPLAY_NAME=$(REPOSITORY)$(PREFIX)$(ROCK_NAME)-jupyterlab + TAG=$(SPARK_VERSION)-$(JUPYTER_VERSION) + ARTIFACT=$(JUPYTER_ARTIFACT) +else ifeq ($(FLAVOUR), kyuubi) + DISPLAY_NAME=$(REPOSITORY)$(PREFIX)$(ROCK_NAME)-kyuubi + TAG=$(SPARK_VERSION)-$(KYUUBI_VERSION) + ARTIFACT=$(KYUUBI_ARTIFACT) else -NAME=$(CHARMED_OCI_FULL_NAME) -TAG=$(VERSION) -BASE_NAME=$(IMAGE_NAME)_$(VERSION)_$(PLATFORM).tar + DISPLAY_NAME=$(REPOSITORY)$(PREFIX)$(ROCK_NAME) + TAG=$(SPARK_VERSION) + ARTIFACT=$(SPARK_ARTIFACT) endif -FTAG=$(_MAKE_DIR)/$(NAME)/$(TAG) -CHARMED_OCI_TAG := $(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)/$(TAG) -CHARMED_OCI_JUPYTER_TAG := $(_MAKE_DIR)/$(CHARMED_OCI_JUPYTER)/$(TAG) +# Marker files that are used to specify certain make targets have been rebuilt. +# +# SPARK_MARKER: The Spark image has been built and has been registered with docker registry +# JUPYTER_MARKER: The Jupyter image has been built and has been registered with docker registry +# KYUUBI_MARKER: The Kyuubi image has been built and has been registered with docker registry +# K8S_MARKER: The MicroK8s cluster has been installed and configured successfully +# AWS_MARKER: The AWS CLI has been installed and configured with valid S3 credentials from MinIO +SPARK_MARKER=$(_MAKE_DIR)/spark-$(SPARK_VERSION).tag +JUPYTER_MARKER=$(_MAKE_DIR)/jupyter-$(JUPYTER_VERSION).tag +KYUUBI_MARKER=$(_MAKE_DIR)/kyuubi-$(KYUUBI_VERSION).tag +K8S_MARKER=$(_MAKE_DIR)/k8s.tag +AWS_MARKER=$(_MAKE_DIR)/aws.tag + -_TMP_OCI_NAME := stage-$(IMAGE_NAME) -_TMP_OCI_TAG := $(_MAKE_DIR)/$(_TMP_OCI_NAME)/$(TAG) +# The names of different flavours of the image in the docker container registry +STAGED_IMAGE_DOCKER_ALIAS=staged-charmed-spark:latest +SPARK_DOCKER_ALIAS=charmed-spark:$(SPARK_VERSION) +JUPYTER_DOCKER_ALIAS=charmed-spark-jupyter:$(SPARK_VERSION)-$(JUPYTER_VERSION) +KYUUBI_DOCKER_ALIAS=charmed-spark-kyuubi:$(SPARK_VERSION)-$(KYUUBI_VERSION) + + +# ====================== +# RECIPES +# ====================== + + +# Display the help message that includes the available recipes provided by this Makefile, +# the name of the artifacts, instructions, etc. help: - @echo "---------------HELP-----------------" - @echo "Name: $(IMAGE_NAME)" + @echo "-------------------------HELP---------------------------" + @echo "Name: $(ROCK_NAME)" @echo "Version: $(VERSION)" @echo "Platform: $(PLATFORM)" @echo " " @echo "Flavour: $(FLAVOUR)" @echo " " - @echo "Image: $(NAME)" + @echo "Image: $(DISPLAY_NAME)" @echo "Tag: $(TAG)" - @echo "Artifact: $(BASE_NAME)" + @echo "Artifact: $(ARTIFACT)" @echo " " @echo "Type 'make' followed by one of these keywords:" @echo " " - @echo " - build for creating the OCI Images" - @echo " - import for importing the images to a container registry" - @echo " - microk8s setup a local Microk8s cluster for running integration tests" - @echo " - tests for running integration tests" - @echo " - clean for removing cache file" - @echo "------------------------------------" - -$(_ROCK_OCI): rockcraft.yaml + @echo " - rock for building the rock image to a rock file" + @echo " - build FLAVOUR=xxxx for creating the OCI Images with flavour xxxx" + @echo " - docker-import for importing the images to Docker container registry" + @echo " - microk8s-import for importing the images to MicroK8s container registry" + @echo " - microk8s-setup to setup a local Microk8s cluster for running integration tests" + @echo " - aws-cli-setup to setup the AWS CLI and S3 credentials for running integration tests" + @echo " - tests FLAVOUR=xxxx for running integration tests for flavour xxxx" + @echo " - clean for removing cache files, artifact file and rock file" + @echo "--------------------------------------------------------" + + + +# Recipe for creating a rock image from the current repository. +# +# ROCK_FILE => charmed-spark_3.4.2_amd64.rock +# +$(ROCK_FILE): images/charmed-spark/rockcraft.yaml $(wildcard images/charmed-spark/*/*) @echo "=== Building Charmed Image ===" - rockcraft pack + (cd images/charmed-spark && rockcraft pack) + mv images/charmed-spark/$(ROCK_FILE) . + + +rock: $(ROCK_FILE) -$(_TMP_OCI_TAG).tag: $(_ROCK_OCI) - skopeo --insecure-policy \ + +# Recipe that builds Spark image and exports it to a tarfile in the current directory +$(SPARK_MARKER): $(ROCK_FILE) images/charmed-spark/Dockerfile + rockcraft.skopeo --insecure-policy \ copy \ - oci-archive:"$(_ROCK_OCI)" \ - docker-daemon:"$(_TMP_OCI_NAME):$(TAG)" - if [ ! -d "$(_MAKE_DIR)/$(_TMP_OCI_NAME)" ]; then mkdir -p "$(_MAKE_DIR)/$(_TMP_OCI_NAME)"; fi - touch $(_TMP_OCI_TAG).tag - -$(K8S_TAG): - @echo "=== Setting up and configure local Microk8s cluster ===" - /bin/bash ./tests/integration/setup-microk8s.sh $(MICROK8S) - sg microk8s ./tests/integration/config-microk8s.sh - @touch $(K8S_TAG) + oci-archive:"$(ROCK_FILE)" \ + docker-daemon:"$(STAGED_IMAGE_DOCKER_ALIAS)" + + docker build -t $(SPARK_DOCKER_ALIAS) \ + --build-arg BASE_IMAGE="$(STAGED_IMAGE_DOCKER_ALIAS)" \ + images/charmed-spark + + docker save $(SPARK_DOCKER_ALIAS) -o $(SPARK_ARTIFACT) + + touch $(SPARK_MARKER) + + +# Shorthand recipe for building Spark image +spark: $(SPARK_MARKER) + + +# Recipe that builds Jupyter image and exports it to a tarfile in the current directory +$(JUPYTER_MARKER): $(SPARK_MARKER) images/charmed-spark-jupyter/Dockerfile $(wildcard images/charmed-spark-jupyter/*/*) + docker build -t $(JUPYTER_DOCKER_ALIAS) \ + --build-arg BASE_IMAGE=$(SPARK_DOCKER_ALIAS) \ + --build-arg JUPYTERLAB_VERSION="$(JUPYTER_VERSION)" \ + images/charmed-spark-jupyter + + docker save $(JUPYTER_DOCKER_ALIAS) -o $(JUPYTER_ARTIFACT) + + touch $(JUPYTER_MARKER) -$(AWS_TAG): $(K8S_TAG) - @echo "=== Setting up and configure AWS CLI ===" - /bin/bash ./tests/integration/setup-aws-cli.sh - touch $(AWS_TAG) - -microk8s: $(K8S_TAG) - -$(CHARMED_OCI_TAG).tag: $(_TMP_OCI_TAG).tag build/Dockerfile - docker build -t "$(CHARMED_OCI_FULL_NAME):$(TAG)" \ - --build-arg BASE_IMAGE="$(_TMP_OCI_NAME):$(TAG)" \ - -f build/Dockerfile . - if [ ! -d "$(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)" ]; then mkdir -p "$(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)"; fi - touch $(CHARMED_OCI_TAG).tag - -$(CHARMED_OCI_JUPYTER_TAG).tag: $(CHARMED_OCI_TAG).tag build/Dockerfile.jupyter files/jupyter - docker build -t "$(CHARMED_OCI_JUPYTER):$(TAG)" \ - --build-arg BASE_IMAGE="$(CHARMED_OCI_FULL_NAME):$(TAG)" \ - --build-arg JUPYTERLAB_VERSION="$(VERSION_FLAVOUR)" \ - -f build/Dockerfile.jupyter . - if [ ! -d "$(_MAKE_DIR)/$(CHARMED_OCI_JUPYTER)" ]; then mkdir -p "$(_MAKE_DIR)/$(CHARMED_OCI_JUPYTER)"; fi - touch $(CHARMED_OCI_JUPYTER_TAG).tag - -$(_MAKE_DIR)/%/$(TAG).tar: $(_MAKE_DIR)/%/$(TAG).tag - docker save $*:$(TAG) -o $(_MAKE_DIR)/$*/$(TAG).tar - -$(BASE_NAME): $(FTAG).tar - @echo "=== Creating $(BASE_NAME) OCI archive (flavour: $(FLAVOUR)) ===" - cp $(FTAG).tar $(BASE_NAME) - -build: $(BASE_NAME) - -ifeq ($(TARGET), docker) -import: build - @echo "=== Importing image $(NAME):$(TAG) into docker ===" - $(eval IMAGE := $(shell docker load -i $(BASE_NAME))) - docker tag $(lastword $(IMAGE)) $(NAME):$(TAG) - if [ ! -d "$(_MAKE_DIR)/$(NAME)" ]; then mkdir -p "$(_MAKE_DIR)/$(NAME)"; fi - touch $(FTAG).tag -endif -ifeq ($(TARGET), microk8s) -import: $(K8S_TAG) build - @echo "=== Importing image $(NAME):$(TAG) into Microk8s container registry ===" - microk8s ctr images import --base-name $(NAME):$(TAG) $(BASE_NAME) +# Shorthand recipe for building Jupyter image +jupyter: $(JUPYTER_MARKER) + + +# Recipe that builds Kyuubi image and exports it to a tarfile in the current directory +$(KYUUBI_MARKER): $(SPARK_MARKER) images/charmed-spark-kyuubi/Dockerfile $(wildcard images/charmed-spark-kyuubi/*/*) + docker build -t $(KYUUBI_DOCKER_ALIAS) \ + --build-arg BASE_IMAGE=$(SPARK_DOCKER_ALIAS) \ + images/charmed-spark-kyuubi + + docker save $(KYUUBI_DOCKER_ALIAS) -o $(KYUUBI_ARTIFACT) + + touch $(KYUUBI_MARKER) + + +# Shorthand recipe for building Kyuubi image +kyuubi: $(KYUUBI_MARKER) + + +$(ARTIFACT): +ifeq ($(FLAVOUR), jupyter) + make jupyter +else ifeq ($(FLAVOUR), kyuubi) + make kyuubi +else + make spark endif -tests: $(K8S_TAG) $(AWS_TAG) + +# Shorthand recipe to build the image. The flavour is picked from FLAVOUR variable to `make`. +# +# eg, ARTIFACT => charmed-spark_3.4.2_amd64.tar +build: $(ARTIFACT) + + +# Recipe for cleaning up the build files and environment +# Cleans the make cache directory along with .rock and .tar files +clean: + @echo "=== Cleaning environment ===" + rm -rf $(_MAKE_DIR) *.rock *.tar + (cd images/charmed-spark && rockcraft clean) + + +# Recipe that imports the image into docker container registry +docker-import: $(ARTIFACT) + $(eval IMAGE := $(shell docker load -i $(ARTIFACT))) + docker tag $(lastword $(IMAGE)) $(DISPLAY_NAME):$(TAG) + + +# Recipe that imports the image into microk8s container registry +microk8s-import: $(ARTIFACT) $(K8S_MARKER) + $(eval IMAGE := $(shell microk8s ctr images import $(ARTIFACT) | cut -d' ' -f2)) + microk8s ctr images tag $(IMAGE) $(DISPLAY_NAME):$(TAG) + + +# Recipe that runs the integration tests +tests: $(K8S_MARKER) $(AWS_MARKER) @echo "=== Running Integration Tests ===" ifeq ($(FLAVOUR), jupyter) /bin/bash ./tests/integration/integration-tests-jupyter.sh +else ifeq ($(FLAVOUR), kyuubi) + /bin/bash ./tests/integration/integration-tests-kyuubi.sh else /bin/bash ./tests/integration/integration-tests.sh endif -clean: - @echo "=== Cleaning environment ===" - rockcraft clean - rm -rf $(_MAKE_DIR) *.rock *.tar + +# Shorthand recipe for setup and configuration of K8s cluster. +microk8s-setup: $(K8S_MARKER) + +# Shorthand recipe for setup and configuration of AWS CLI. +aws-cli-setup: $(AWS_MARKER) + + +# Recipe for setting up and configuring the K8s cluster. +$(K8S_MARKER): + @echo "=== Setting up and configuring local Microk8s cluster ===" + /bin/bash ./tests/integration/setup-microk8s.sh $(MICROK8S_CHANNEL) + sg microk8s ./tests/integration/config-microk8s.sh + touch $(K8S_MARKER) + + +# Recipe for setting up and configuring the AWS CLI and credentials. +# Depends upon K8S_MARKER because the S3 credentials to AWS CLI is provided by MinIO, which is a MicroK8s plugin +$(AWS_MARKER): $(K8S_MARKER) + @echo "=== Setting up and configure AWS CLI ===" + /bin/bash ./tests/integration/setup-aws-cli.sh + touch $(AWS_MARKER) diff --git a/build/Dockerfile.jupyter b/images/charmed-spark-jupyter/Dockerfile similarity index 84% rename from build/Dockerfile.jupyter rename to images/charmed-spark-jupyter/Dockerfile index c4ad3505..97940b0b 100644 --- a/build/Dockerfile.jupyter +++ b/images/charmed-spark-jupyter/Dockerfile @@ -17,8 +17,8 @@ USER root RUN rm /var/lib/pebble/default/layers/*.yaml /opt/pebble/*.sh RUN python3 -m pip install "jupyterlab==$JUPYTERLAB_VERSION" -COPY ./files/jupyter/pebble/layers.yaml /var/lib/pebble/default/layers/001-charmed-jupyter.yaml -COPY ./files/jupyter/bin/jupyterlab-server.sh /opt/pebble/jupyterlab-server.sh +COPY ./pebble/layers.yaml /var/lib/pebble/default/layers/001-charmed-jupyter.yaml +COPY ./bin/jupyterlab-server.sh /opt/pebble/jupyterlab-server.sh RUN chown _daemon_:_daemon_ /opt/pebble/jupyterlab-server.sh USER _daemon_ diff --git a/files/jupyter/bin/jupyterlab-server.sh b/images/charmed-spark-jupyter/bin/jupyterlab-server.sh similarity index 100% rename from files/jupyter/bin/jupyterlab-server.sh rename to images/charmed-spark-jupyter/bin/jupyterlab-server.sh diff --git a/files/jupyter/pebble/layers.yaml b/images/charmed-spark-jupyter/pebble/layers.yaml similarity index 100% rename from files/jupyter/pebble/layers.yaml rename to images/charmed-spark-jupyter/pebble/layers.yaml diff --git a/images/charmed-spark-kyuubi/Dockerfile b/images/charmed-spark-kyuubi/Dockerfile new file mode 100644 index 00000000..e3a13e89 --- /dev/null +++ b/images/charmed-spark-kyuubi/Dockerfile @@ -0,0 +1,76 @@ +# Copyright 2024 Canonical Ltd. +# See LICENSE file for licensing details. + + +# --------------------------------------------------------------------------- +# This is an OCI image for Kyuubi K8s charm. +# +# It is an open source, end-to-end, production ready data platform on top of +# cloud native technologies. +# --------------------------------------------------------------------------- + + ARG KYUUBI_ARTIFACT="https://dlcdn.apache.org/kyuubi/kyuubi-1.9.0/apache-kyuubi-1.9.0-bin.tgz" + ARG CHECKSUM="54721812f35743aec60104466c3527e6d68f2a75afb3bdbd68d06c9bd7e09a5f35f71d9edb9f24cc2189d403ef5aa65ee54fe594ca1c78e9ab621177bab32054" + ARG TARFILE="kyuubi.tar.tz" + ARG BASE_IMAGE + +# -------------------------------------------------------------------------- +# Temporary stage to download and extract Kyuubi +# -------------------------------------------------------------------------- + + FROM alpine:latest AS downloader + + ARG KYUUBI_ARTIFACT + ARG CHECKSUM + ARG TARFILE + + # Install wget and tar in the temporary image + RUN apk add --no-cache wget tar + RUN echo hello + RUN echo $TARFILE + RUN echo bye + + # Download and verify Kyuubi + RUN wget -O $TARFILE $KYUUBI_ARTIFACT && \ + echo "$CHECKSUM $TARFILE" | sha512sum -c - + + # Debugging steps to check the downloaded file and checksums + RUN ls -lh $TARFILE + RUN sha512sum $TARFILE + RUN echo "$CHECKSUM $TARFILE" | sha512sum -c - + + # Create the target directory and extract the tarball + RUN mkdir -p /opt/kyuubi && \ + tar -xzvf $TARFILE -C /opt/kyuubi --strip-components=1 + +# -------------------------------------------------------------------------- +# Final stage +# -------------------------------------------------------------------------- + + FROM $BASE_IMAGE + + ARG BASE_IMAGE + + USER root + + # Copy the extracted Kyuubi files from the temporary stage + COPY --from=downloader /opt/kyuubi /opt/kyuubi + + # Copy local files to the container + COPY ./bin/kyuubi.sh /opt/pebble/kyuubi.sh + RUN chown _daemon_:_daemon_ /opt/pebble/kyuubi.sh + + # Copy pebble layers + COPY ./pebble/layers.yaml /var/lib/pebble/default/layers/002-charmed-kyuubi.yaml + + # Copy PostgreSQL dependencies + RUN cp /opt/kyuubi/jars/postgresql* /opt/spark/jars + + # Set appropriate permissions + RUN chown -R _daemon_:_daemon_ /opt/kyuubi/ && \ + chmod -R 750 /opt/kyuubi + + USER _daemon_ + + # Provide Default Entrypoint for Pebble + ENTRYPOINT [ "/bin/pebble", "enter", "--verbose", "--args", "sparkd" ] \ No newline at end of file diff --git a/files/spark/bin/kyuubi.sh b/images/charmed-spark-kyuubi/bin/kyuubi.sh similarity index 100% rename from files/spark/bin/kyuubi.sh rename to images/charmed-spark-kyuubi/bin/kyuubi.sh diff --git a/images/charmed-spark-kyuubi/pebble/layers.yaml b/images/charmed-spark-kyuubi/pebble/layers.yaml new file mode 100644 index 00000000..9c8a7485 --- /dev/null +++ b/images/charmed-spark-kyuubi/pebble/layers.yaml @@ -0,0 +1,6 @@ +services: + kyuubi: + command: "/bin/bash /opt/pebble/kyuubi.sh" + summary: "This is the Kyuubi service" + override: replace + startup: enabled \ No newline at end of file diff --git a/build/Dockerfile b/images/charmed-spark/Dockerfile similarity index 74% rename from build/Dockerfile rename to images/charmed-spark/Dockerfile index 7a3b64ed..5373a047 100644 --- a/build/Dockerfile +++ b/images/charmed-spark/Dockerfile @@ -1,4 +1,4 @@ -ARG BASE_IMAGE=base-charmed-spark:latest +ARG BASE_IMAGE=staged-charmed-spark:latest FROM $BASE_IMAGE # Provide Default Entrypoint for Pebble ENTRYPOINT [ "/bin/pebble", "enter", "--verbose", "--args", "sparkd" ] \ No newline at end of file diff --git a/files/spark/bin/history-server.sh b/images/charmed-spark/bin/history-server.sh similarity index 100% rename from files/spark/bin/history-server.sh rename to images/charmed-spark/bin/history-server.sh diff --git a/files/spark/bin/spark-client.pyspark b/images/charmed-spark/bin/spark-client.pyspark similarity index 100% rename from files/spark/bin/spark-client.pyspark rename to images/charmed-spark/bin/spark-client.pyspark diff --git a/files/spark/bin/spark-client.service-account-registry b/images/charmed-spark/bin/spark-client.service-account-registry similarity index 100% rename from files/spark/bin/spark-client.service-account-registry rename to images/charmed-spark/bin/spark-client.service-account-registry diff --git a/files/spark/bin/spark-client.spark-shell b/images/charmed-spark/bin/spark-client.spark-shell similarity index 100% rename from files/spark/bin/spark-client.spark-shell rename to images/charmed-spark/bin/spark-client.spark-shell diff --git a/files/spark/bin/spark-client.spark-sql b/images/charmed-spark/bin/spark-client.spark-sql similarity index 100% rename from files/spark/bin/spark-client.spark-sql rename to images/charmed-spark/bin/spark-client.spark-sql diff --git a/files/spark/bin/spark-client.spark-submit b/images/charmed-spark/bin/spark-client.spark-submit similarity index 100% rename from files/spark/bin/spark-client.spark-submit rename to images/charmed-spark/bin/spark-client.spark-submit diff --git a/files/spark/bin/sparkd.sh b/images/charmed-spark/bin/sparkd.sh similarity index 100% rename from files/spark/bin/sparkd.sh rename to images/charmed-spark/bin/sparkd.sh diff --git a/files/spark/conf/spark-defaults.conf b/images/charmed-spark/conf/spark-defaults.conf similarity index 100% rename from files/spark/conf/spark-defaults.conf rename to images/charmed-spark/conf/spark-defaults.conf diff --git a/rockcraft.yaml b/images/charmed-spark/rockcraft.yaml similarity index 90% rename from rockcraft.yaml rename to images/charmed-spark/rockcraft.yaml index a755ec90..c06a7b78 100644 --- a/rockcraft.yaml +++ b/images/charmed-spark/rockcraft.yaml @@ -11,8 +11,6 @@ description: | license: Apache-2.0 version: "3.4.2" -# version:spark:3.4.2 -# version:jupyter:4.0.11 base: ubuntu@22.04 @@ -49,11 +47,6 @@ services: # working-dir: /opt/spark environment: SPARK_PROPERTIES_FILE: /etc/spark8t/conf/spark-defaults.conf - kyuubi: - command: "/bin/bash /opt/pebble/kyuubi.sh" - summary: "This is the Kyuubi service" - override: replace - startup: disabled parts: spark: @@ -94,19 +87,9 @@ parts: - opt/spark/python - opt/spark/data - kyuubi: - plugin: dump - after: [ spark ] - source: https://dlcdn.apache.org/kyuubi/kyuubi-1.9.0/apache-kyuubi-1.9.0-bin.tgz - source-checksum: sha512/54721812f35743aec60104466c3527e6d68f2a75afb3bdbd68d06c9bd7e09a5f35f71d9edb9f24cc2189d403ef5aa65ee54fe594ca1c78e9ab621177bab32054 - override-build: | - mkdir -p $CRAFT_PART_INSTALL/opt/kyuubi && cp -r ./* $CRAFT_PART_INSTALL/opt/kyuubi - stage: - - opt/kyuubi - dependencies: plugin: nil - after: [ kyuubi ] + after: [ spark ] build-packages: - wget overlay-script: | @@ -185,12 +168,11 @@ parts: charmed-spark: plugin: dump after: [ spark8t, spark ] - source: files/spark + source: . organize: conf/spark-defaults.conf: etc/spark8t/conf/spark-defaults.conf bin/sparkd.sh: opt/pebble/sparkd.sh bin/history-server.sh: opt/pebble/history-server.sh - bin/kyuubi.sh: opt/pebble/kyuubi.sh bin/spark-client.pyspark: opt/spark-client/python/bin/spark-client.pyspark bin/spark-client.spark-sql: opt/spark-client/python/bin/spark-client.spark-sql bin/spark-client.service-account-registry: opt/spark-client/python/bin/spark-client.service-account-registry @@ -200,7 +182,6 @@ parts: - etc/spark8t/conf/ - opt/pebble/sparkd.sh - opt/pebble/history-server.sh - - opt/pebble/kyuubi.sh - opt/spark-client/python/bin/spark-client.pyspark - opt/spark-client/python/bin/spark-client.spark-sql - opt/spark-client/python/bin/spark-client.service-account-registry @@ -235,16 +216,10 @@ parts: chown -R ${SPARK_GID}:${SPARK_UID} var/log/spark chmod -R 750 var/log/spark - # Make PostgreSQL JDBC available for Spark as well - cp opt/kyuubi/jars/postgresql* opt/spark/jars/ - # This is needed to run the spark job, as it requires RW+ access on the spark folder chown -R ${SPARK_GID}:${SPARK_UID} opt/spark chmod -R 750 opt/spark - chown -R ${SPARK_GID}:${SPARK_UID} opt/kyuubi - chmod -R 750 opt/kyuubi - mkdir -p var/lib/spark mkdir -p var/lib/spark/notebook chown -R ${SPARK_GID}:${SPARK_UID} var/lib/spark diff --git a/images/metadata.yaml b/images/metadata.yaml new file mode 100644 index 00000000..9884f167 --- /dev/null +++ b/images/metadata.yaml @@ -0,0 +1,14 @@ +flavours: + jupyter: + version: 4.0.11 + image_description: | + This is an OCI image to deploy JupyterLab server instances on K8s, fully + integrated with Charmed Spark ecosystem and utilities. The image provides + an automated and seamless user-experience to deploy, operate, manage and + monitor SparkJob running on K8s cluster. It is an open source, end-to-end, + production ready data platform on top of cloud native technologies. + kyuubi: + version: 1.9.0 + image_description: | + This is an OCI image that contains Kyuubi, fully integrated with Charmed + Spark ecosystem and utilities. \ No newline at end of file diff --git a/tests/integration/integration-tests-jupyter.sh b/tests/integration/integration-tests-jupyter.sh index c6d8617e..1896d5f0 100755 --- a/tests/integration/integration-tests-jupyter.sh +++ b/tests/integration/integration-tests-jupyter.sh @@ -15,14 +15,16 @@ NAMESPACE=tests get_spark_version(){ - SPARK_VERSION=$(yq '(.version)' rockcraft.yaml) + SPARK_VERSION=$(yq '(.version)' images/charmed-spark/rockcraft.yaml) echo "$SPARK_VERSION" } + spark_image(){ echo "ghcr.io/canonical/test-charmed-spark-jupyterlab:$(get_spark_version)" } + setup_jupyter() { echo "setup_jupyter() ${1} ${2}" diff --git a/tests/integration/integration-tests-kyuubi.sh b/tests/integration/integration-tests-kyuubi.sh new file mode 100755 index 00000000..cc192c7d --- /dev/null +++ b/tests/integration/integration-tests-kyuubi.sh @@ -0,0 +1,186 @@ +#!/bin/bash + +# The integration tests are designed to tests that SQL queries can be submitted to Kyuubi and/or shell processes are +# working properly with restricted permission of the service account starting the process. For this reason, +# in the tests we spawn two pods: +# +# 1. Admin pod, that is used to create and delete service accounts +# 2. User pod, that is used to start and execute Spark Jobs +# +# The Admin pod is created once at the beginning of the tests and it is used to manage Spark service accounts +# throughtout the integration tests. On the other hand, the User pod(s) are created together with the creation +# of the Spark user (service accounts and secrets) at the beginning of each test, and they are destroyed at the +# end of the test. + + +# Import reusable utilities +source ./tests/integration/utils/s3-utils.sh +source ./tests/integration/utils/k8s-utils.sh + + +# Global Variables +NAMESPACE=tests +SERVICE_ACCOUNT=spark +ADMIN_POD_NAME=testpod-admin +USER_POD_NAME=kyuubi-test +S3_BUCKET=kyuubi + + +get_spark_version(){ + # Fetch Spark version from images/charmed-spark/rockcraft.yaml + yq '(.version)' images/charmed-spark/rockcraft.yaml +} + + +kyuubi_image(){ + # The Kyuubi image that is going to be used for test + echo "ghcr.io/canonical/test-charmed-spark-kyuubi:$(get_spark_version)" +} + + +setup_kyuubi_pod() { + # Setup Kyuubi pod for testing + # + # Arguments: + # $1: The service account to be used for creating Kyuubi pod + # $1: The namespace to be used for creating Kyuubi pod + + # Create service account using the admin pod + create_serviceaccount_using_pod $SERVICE_ACCOUNT $NAMESPACE $ADMIN_POD_NAME + + image=$(kyuubi_image) + + # Create the pod with the newly created service account + sed -e "s%%${image}%g" \ + -e "s//${SERVICE_ACCOUNT}/g" \ + -e "s//${NAMESPACE}/g" \ + -e "s//${USER_POD_NAME}/g" \ + ./tests/integration/resources/kyuubi.yaml | \ + kubectl -n tests apply -f - + + wait_for_pod $USER_POD_NAME $NAMESPACE + + # Prepare S3 bucket + create_s3_bucket $S3_BUCKET + + s3_endpoint=$(get_s3_endpoint) + s3_access_key=$(get_s3_access_key) + s3_secret_key=$(get_s3_secret_key) + + # Write Spark configs inside the Kyuubi container + kubectl -n $NAMESPACE exec kyuubi-test -- env IMG="$image" /bin/bash -c 'echo spark.kubernetes.container.image=$IMG > /etc/spark8t/conf/spark-defaults.conf' + kubectl -n $NAMESPACE exec kyuubi-test -- env NN="$NAMESPACE" /bin/bash -c 'echo spark.kubernetes.namespace=$NN >> /etc/spark8t/conf/spark-defaults.conf' + kubectl -n $NAMESPACE exec kyuubi-test -- env UU="$USERNAME" /bin/bash -c 'echo spark.kubernetes.authenticate.driver.serviceAccountName=$UU >> /etc/spark8t/conf/spark-defaults.conf' + kubectl -n $NAMESPACE exec kyuubi-test -- env ENDPOINT="$s3_endpoint" /bin/bash -c 'echo spark.hadoop.fs.s3a.endpoint=$ENDPOINT >> /etc/spark8t/conf/spark-defaults.conf' + kubectl -n $NAMESPACE exec kyuubi-test -- env ACCESS_KEY="$s3_access_key" /bin/bash -c 'echo spark.hadoop.fs.s3a.access.key=$ACCESS_KEY >> /etc/spark8t/conf/spark-defaults.conf' + kubectl -n $NAMESPACE exec kyuubi-test -- env SECRET_KEY="$s3_secret_key" /bin/bash -c 'echo spark.hadoop.fs.s3a.secret.key=$SECRET_KEY >> /etc/spark8t/conf/spark-defaults.conf' + kubectl -n $NAMESPACE exec kyuubi-test -- /bin/bash -c 'echo spark.hadoop.fs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider >> /etc/spark8t/conf/spark-defaults.conf' + kubectl -n $NAMESPACE exec kyuubi-test -- /bin/bash -c 'echo spark.hadoop.fs.s3a.connection.ssl.enabled=false >> /etc/spark8t/conf/spark-defaults.conf' + kubectl -n $NAMESPACE exec kyuubi-test -- /bin/bash -c 'echo spark.hadoop.fs.s3a.path.style.access=true >> /etc/spark8t/conf/spark-defaults.conf' + kubectl -n $NAMESPACE exec kyuubi-test -- env BUCKET="$S3_BUCKET" /bin/bash -c 'echo spark.sql.warehouse.dir=s3a://$BUCKET/warehouse >> /etc/spark8t/conf/spark-defaults.conf' + kubectl -n $NAMESPACE exec kyuubi-test -- env BUCKET="$S3_BUCKET" /bin/bash -c 'echo spark.kubernetes.file.upload.path=s3a://$BUCKET >> /etc/spark8t/conf/spark-defaults.conf' + + # Wait some time for the server to be up and running + sleep 10 +} + + +cleanup_user() { + # Cleanup user resources. + # + # Arguments: + # $1: Exit code of the accompanying process (used to decide how to clean up) + # $2: Service account name + # $3: Namespace + + exit_code=$1 + username=$2 + namespace=$3 + + # Delete user pod and service account + kubectl -n $NAMESPACE delete pod $USER_POD_NAME --wait=true + kubectl -n $NAMESPACE exec $ADMIN_POD_NAME -- env UU="$username" NN="$namespace" \ + /bin/bash -c 'spark-client.service-account-registry delete --username $UU --namespace $NN' + + # Delete S3 bucket + delete_s3_bucket kyuubi + + # Verify deletion of service account + output=$(kubectl -n $NAMESPACE exec $ADMIN_POD_NAME -- /bin/bash -c 'spark-client.service-account-registry list') + exists=$(echo -e "$output" | grep "$namespace:$username" | wc -l) + if [ "${exists}" -ne "0" ]; then + exit 2 + fi + + if [ "${exit_code}" -ne "0" ]; then + kubectl delete ns $NAMESPACE + exit 1 + fi +} + + +cleanup_user_success() { + echo "cleanup_user_success()......" + cleanup_user 0 $SERVICE_ACCOUNT $NAMESPACE +} + + +cleanup_user_failure() { + echo "cleanup_user_failure()......" + cleanup_user 1 $SERVICE_ACCOUNT $NAMESPACE +} + + +teardown_test_pods() { + kubectl -n $NAMESPACE delete pod $ADMIN_POD_NAME $USER_POD_NAME +} + + +test_jdbc_connection(){ + # Test the JDBC endpoint exposed by Kyuubi by running a few SQL queries + jdbc_endpoint=$(kubectl -n $NAMESPACE exec kyuubi-test -- pebble logs kyuubi | grep 'Starting and exposing JDBC connection at:' | rev | cut -d' ' -f1 | rev) + echo "Testing JDBC endpoint '$jdbc_endpoint'..." + + commands=$(cat ./tests/integration/resources/test-kyuubi.sql) + + echo -e "$(kubectl exec kyuubi-test -n $NAMESPACE -- \ + env CMDS="$commands" ENDPOINT="$jdbc_endpoint" \ + /bin/bash -c 'echo "$CMDS" | /opt/kyuubi/bin/beeline -u $ENDPOINT' + )" > /tmp/test_beeline.out + + num_rows_inserted=$(cat /tmp/test_beeline.out | grep "Inserted Rows:" | sed 's/|/ /g' | tail -n 1 | xargs | rev | cut -d' ' -f1 | rev ) + echo -e "${num_rows_inserted} rows were inserted." + + if [ "${num_rows_inserted}" != "3" ]; then + echo "ERROR: Test failed. ${num_rows_inserted} out of 3 rows were inserted. Aborting with exit code 1." + exit 1 + fi + + rm /tmp/test_beeline.out +} + + + +echo -e "##################################" +echo -e "SETUP ADMIN TEST POD" +echo -e "##################################" + +kubectl create namespace $NAMESPACE +setup_admin_pod $ADMIN_POD_NAME $(kyuubi_image) $NAMESPACE + +echo -e "##################################" +echo -e "START KYUUBI POD AND BEGIN TESTING" +echo -e "##################################" + +(setup_kyuubi_pod && test_jdbc_connection && cleanup_user_success) || cleanup_user_failure + +echo -e "##################################" +echo -e "TEARDOWN ADMIN POD" +echo -e "##################################" + +teardown_test_pods +kubectl delete namespace $NAMESPACE + +echo -e "##################################" +echo -e "END OF THE TEST" +echo -e "##################################" diff --git a/tests/integration/integration-tests.sh b/tests/integration/integration-tests.sh index 9dc6ba03..87eed901 100755 --- a/tests/integration/integration-tests.sh +++ b/tests/integration/integration-tests.sh @@ -16,7 +16,7 @@ NAMESPACE=tests get_spark_version(){ - SPARK_VERSION=$(yq '(.version)' rockcraft.yaml) + SPARK_VERSION=$(yq '(.version)' images/charmed-spark/rockcraft.yaml) echo "$SPARK_VERSION" } @@ -149,7 +149,6 @@ teardown_test_pod() { kubectl logs testpod-admin -n $NAMESPACE kubectl logs testpod -n $NAMESPACE kubectl logs -l spark-version=3.4.2 -n $NAMESPACE - kubectl -n $NAMESPACE delete pod testpod kubectl -n $NAMESPACE delete pod testpod-admin kubectl delete namespace $NAMESPACE diff --git a/tests/integration/resources/kyuubi.yaml b/tests/integration/resources/kyuubi.yaml new file mode 100644 index 00000000..41d86622 --- /dev/null +++ b/tests/integration/resources/kyuubi.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: Pod +metadata: + name: +spec: + serviceAccountName: "" + containers: + - image: + name: kyuubi diff --git a/tests/integration/resources/test-kyuubi.sql b/tests/integration/resources/test-kyuubi.sql new file mode 100644 index 00000000..e095a115 --- /dev/null +++ b/tests/integration/resources/test-kyuubi.sql @@ -0,0 +1,6 @@ +CREATE DATABASE IF NOT EXISTS db_name; +USE db_name; +CREATE TABLE IF NOT EXISTS db_name.table_name (number Int, word String); +INSERT INTO db_name.table_name VALUES (1, "foo"), (2, "bar"), (3, "grok"); +SELECT CONCAT("Inserted Rows: ", COUNT(*)) FROM db_name.table_name; +!quit diff --git a/tests/integration/setup-aws-cli.sh b/tests/integration/setup-aws-cli.sh index 53d58c0c..a6386697 100755 --- a/tests/integration/setup-aws-cli.sh +++ b/tests/integration/setup-aws-cli.sh @@ -3,18 +3,48 @@ # Install AWS CLI sudo snap install aws-cli --classic -# Get Access key and secret key from MinIO -ACCESS_KEY=$(kubectl get secret -n minio-operator microk8s-user-1 -o jsonpath='{.data.CONSOLE_ACCESS_KEY}' | base64 -d) -SECRET_KEY=$(kubectl get secret -n minio-operator microk8s-user-1 -o jsonpath='{.data.CONSOLE_SECRET_KEY}' | base64 -d) -# Get S3 endpoint from MinIO -S3_ENDPOINT=$(kubectl get service minio -n minio-operator -o jsonpath='{.spec.clusterIP}') +get_s3_endpoint(){ + # Get S3 endpoint from MinIO + kubectl get service minio -n minio-operator -o jsonpath='{.spec.clusterIP}' +} + + +wait_and_retry(){ + # Retry a command for a number of times by waiting a few seconds. + + command="$@" + retries=0 + max_retries=50 + until [ "$retries" -ge $max_retries ] + do + $command &> /dev/null && break + retries=$((retries+1)) + echo "Trying to execute command ${command}..." + sleep 5 + done + + # If the command was not successful even on maximum retries + if [ "$retries" -ge $max_retries ]; then + echo "Maximum number of retries ($max_retries) reached. ${command} returned with non zero status." + exit 1 + fi +} + +# Wait for `minio` service to be ready and S3 endpoint to be available +wait_and_retry get_s3_endpoint + +S3_ENDPOINT=$(get_s3_endpoint) DEFAULT_REGION="us-east-2" +ACCESS_KEY=$(kubectl get secret -n minio-operator microk8s-user-1 -o jsonpath='{.data.CONSOLE_ACCESS_KEY}' | base64 -d) +SECRET_KEY=$(kubectl get secret -n minio-operator microk8s-user-1 -o jsonpath='{.data.CONSOLE_SECRET_KEY}' | base64 -d) # Configure AWS CLI credentials aws configure set aws_access_key_id $ACCESS_KEY aws configure set aws_secret_access_key $SECRET_KEY aws configure set default.region $DEFAULT_REGION aws configure set endpoint_url "http://$S3_ENDPOINT" + +wait_and_retry aws s3 ls echo "AWS CLI credentials set successfully" \ No newline at end of file diff --git a/tests/integration/setup-microk8s.sh b/tests/integration/setup-microk8s.sh index 32d10ca9..6153c9fb 100755 --- a/tests/integration/setup-microk8s.sh +++ b/tests/integration/setup-microk8s.sh @@ -5,4 +5,3 @@ sudo snap alias microk8s.kubectl kubectl sudo usermod -a -G microk8s ${USER} mkdir -p ~/.kube sudo chown -f -R ${USER} ~/.kube - diff --git a/tests/integration/utils/k8s-utils.sh b/tests/integration/utils/k8s-utils.sh new file mode 100644 index 00000000..95af2008 --- /dev/null +++ b/tests/integration/utils/k8s-utils.sh @@ -0,0 +1,93 @@ +#!/bin/bash + +# Copyright 2024 Canonical Ltd. + +# This file contains several Bash utility functions related to K8s resource management. +# To use them, simply `source` this file in your bash script. + + +# Check if AWS CLI has been installed and the credentials have been configured. If not, exit. +if ! kubectl get ns >> /dev/null ; then + echo "The K8s cluster has not been configured properly. Exiting..." + exit 1 +fi + + +wait_for_pod() { + # Wait for the given pod in the given namespace to be ready. + # + # Arguments: + # $1: Name of the pod + # $2: Namespace that contains the pod + + pod_name=$1 + namespace=$2 + + echo "Waiting for pod '$pod_name' to become ready..." + kubectl wait --for condition=Ready pod/$pod_name -n $namespace --timeout 60s +} + + +create_serviceaccount_using_pod(){ + # Create a service account in the given namespace using a given pod. + # + # Arguments: + # $1: Name of the service account + # $2: Namespace which the service account should belong to + # $3: Name of the pod to be used for creation + + username=$1 + namespace=$2 + pod_name=$3 + + echo "Creating service account '$username' in namespace '$namespace'..." + kubectl -n $namespace exec $pod_name -- env UU="$username" NN="$namespace" \ + /bin/bash -c 'spark-client.service-account-registry create --username $UU --namespace $NN' + echo "Service account '$username' in namespace '$namespace' created successfully." +} + + +delete_serviceaccount_using_pod(){ + # Delete a service account in the given namespace using a given pod. + # + # Arguments: + # $1: Name of the service account + # $2: Namespace which the service account belongs to + # $3: Name of the pod to be used for deletion + + username=$1 + namespace=$2 + pod_name=$3 + + echo "Deleting service account '$username' in namespace '$namespace'..." + kubectl -n $namespace exec $pod_name -- env UU="$username" NN="$namespace" \ + /bin/bash -c 'spark-client.service-account-registry delete --username $UU --namespace $NN' + echo "Service account '$username' in namespace '$namespace' deleted successfully." +} + + + +setup_admin_pod(){ + # Create a pod with admin service account. + # + # Arguments: + # $1: Name of the admin pod + # $2: Image to be used when creating the admin pod + # $3: Namespace where the pod is to be created + + pod_name=$1 + image=$2 + namespace=$3 + + echo "Creating admin pod with name $pod_name and image $image..." + kubectl run $pod_name --image=$image --env="KUBECONFIG=/var/lib/spark/.kube/config" --namespace=${namespace} + + # Wait for pod to be ready + wait_for_pod $pod_name $namespace + + user_kubeconfig=$(cat /home/${USER}/.kube/config) + kubectl -n $namespace exec $pod_name -- /bin/bash -c 'mkdir -p ~/.kube' + kubectl -n $namespace exec $pod_name -- env KCONFIG="$user_kubeconfig" /bin/bash -c 'echo "$KCONFIG" > ~/.kube/config' + + echo "Admin pod with name '$pod_name' created and configured successfully." +} \ No newline at end of file diff --git a/tests/integration/utils/s3-utils.sh b/tests/integration/utils/s3-utils.sh new file mode 100644 index 00000000..7031f0fd --- /dev/null +++ b/tests/integration/utils/s3-utils.sh @@ -0,0 +1,75 @@ +#!/bin/bash + +# Copyright 2024 Canonical Ltd. + +# This file contains several Bash utility functions related to S3 bucket management +# To use them, simply `source` this file in your bash script. + + +# Check if AWS CLI has been installed and the credentials have been configured. If not, exit. +if ! aws s3 ls; then + echo "The AWS CLI and S3 credentials have not been configured properly. Exiting..." + exit 1 +fi + + +get_s3_endpoint(){ + # Print the endpoint where the S3 bucket is exposed on. + kubectl get service minio -n minio-operator -o jsonpath='{.spec.clusterIP}' +} + + +get_s3_access_key(){ + # Print the S3 Access Key by reading it from K8s secret + kubectl get secret -n minio-operator microk8s-user-1 -o jsonpath='{.data.CONSOLE_ACCESS_KEY}' | base64 -d +} + + +get_s3_secret_key(){ + # Print the S3 Secret Key by reading it from K8s secret + kubectl get secret -n minio-operator microk8s-user-1 -o jsonpath='{.data.CONSOLE_SECRET_KEY}' | base64 -d +} + + +create_s3_bucket(){ + # Create a S3 bucket with the given name. + # + # Arguments: + # $1: Name of the bucket to be created. + + BUCKET_NAME=$1 + aws s3 mb s3://"$BUCKET_NAME" + echo "Created S3 bucket ${BUCKET_NAME}." +} + + +delete_s3_bucket(){ + # Delete a S3 bucket with the given name. + # + # Arguments: + # $1: Name of the bucket to be deleted. + + BUCKET_NAME=$1 + aws s3 rb "s3://$BUCKET_NAME" --force + echo "Deleted S3 bucket ${BUCKET_NAME}" +} + + +copy_file_to_s3_bucket(){ + # Copy a file from local to S3 bucket. + # + # Arguments: + # $1: Name of the destination bucket + # $2: Path of the local file to be uploaded + + BUCKET_NAME=$1 + FILE_PATH=$2 + + # If file path is '/foo/bar/file.ext', the basename is 'file.ext' + BASE_NAME=$(basename "$FILE_PATH") + + # Copy the file to S3 bucket + aws s3 cp $FILE_PATH s3://"$BUCKET_NAME"/"$BASE_NAME" + + echo "Copied file ${FILE_PATH} to S3 bucket ${BUCKET_NAME}." +} \ No newline at end of file