chore: add notebook tests

genka7 · Jun 4, 2020 · 09ad6de · 09ad6de
1 parent ebca74d
commit 09ad6de
Show file tree

Hide file tree

Showing 9 changed files with 253 additions and 21 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -1,22 +1,176 @@
+# Python CircleCI 2.0 configuration file
+#
+# Check https://circleci.com/docs/2.0/language-python/ for more details
+#
 ---
-version: 2
+version: 2.1
+executors:
+    # here we can define an executor that will be shared across different jobs
+    python-executor:
+        docker:
+            - image: python:3.7-slim-buster
+              environment:
+                  POETRY_CACHE: /work/.cache/poetry
+                  PIP_CACHE_DIR: /work/.cache/pip
+                  JUPYTER_RUNTIME_DIR: /work/.cache/jupyter/runtime
+                  JUPYTER_CONFIG_DIR: /work/.cache/jupyter/config
+                  SHELL: bash -l
+        working_directory: /work
+        resource_class: large
+    docker-executor:
+        docker:
+            - image: dreamquark/docker:latest
+        working_directory: /work
+        resource_class: small
+
+commands:
+    # here we can define steps that will be shared across different jobs
+    install_poetry:
+        description: Install poetry
+        steps:
+            - run:
+                  name: Install prerequisites and poetry
+                  command: |
+                      apt update && apt install curl make git libopenblas-base build-essential -y
+                      curl -sSL https://raw.githubusercontent.com/sdispater/poetry/master/get-poetry.py | python
+                      source $HOME/.poetry/env
+                      poetry config virtualenvs.path $POETRY_CACHE
+                      poetry run pip install --upgrade --no-cache-dir pip==20.1;
+
 jobs:
+    test-build-docker:
+        executor: docker-executor
+        steps:
+            - checkout
+            - setup_remote_docker
+            - run:
+                  name: build docker
+                  command: |
+                      make build
+    test-build-docker-gpu:
+        executor: docker-executor
+        steps:
+            - checkout
+            - setup_remote_docker
+            - run:
+                  name: build docker gpu
+                  command: |
+                      make build-gpu
     lint-code:
-        machine:
-            image: circleci/classic:latest
-        working_directory: ~/tabnet
-        resource_class: medium
+        executor: python-executor
+        resource_class: small
+        steps:
+            - checkout
+            # Download and cache dependencies
+            - restore_cache:
+                  keys:
+                      - v1-dependencies-{{ checksum "poetry.lock" }}
+            - install_poetry
+            - run:
+                  name: LintCode
+                  shell: bash -leo pipefail
+                  command: |
+                      poetry run flake8
+    install:
+        executor: python-executor
+        resource_class: small
+        steps:
+            - checkout
+            # Download and cache dependencies
+            - restore_cache:
+                  keys:
+                      - v1-dependencies-{{ checksum "poetry.lock" }}
+                      # fallback to using the latest cache if no exact match is found
+                      - v1-dependencies-
+            - install_poetry
+            - run:
+                  name: Install dependencies
+                  shell: bash -leo pipefail
+                  command: |
+                      source $HOME/.poetry/env
+                      poetry config virtualenvs.path $POETRY_CACHE
+                      poetry run pip install torch==1.4.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
+                      poetry install
+            - save_cache:
+                  paths:
+                      - /work/.cache/poetry
+                  key: v1-dependencies-{{ checksum "poetry.lock" }}
+    test-nb-census:
+        executor: python-executor
+        steps:
+            - checkout
+            # Download and cache dependencies
+            - restore_cache:
+                  keys:
+                      - v1-dependencies-{{ checksum "poetry.lock" }}
+            - install_poetry
+            - run:
+                  name: run test-nb-census
+                  shell: bash -leo pipefail
+                  command: |
+                      make test-nb-census
+    test-nb-multi-regression:
+        executor: python-executor
+        steps:
+            - checkout
+            # Download and cache dependencies
+            - restore_cache:
+                  keys:
+                      - v1-dependencies-{{ checksum "poetry.lock" }}
+            - install_poetry
+            - run:
+                  name: run test-nb-multi-regression
+                  shell: bash -leo pipefail
+                  command: |
+                      make test-nb-multi-regression
+    test-nb-forest:
+        executor: python-executor
+        steps:
+            - checkout
+            # Download and cache dependencies
+            - restore_cache:
+                  keys:
+                      - v1-dependencies-{{ checksum "poetry.lock" }}
+            - install_poetry
+            - run:
+                  name: run test-nb-forest
+                  shell: bash -leo pipefail
+                  command: |
+                      make test-nb-forest
+    test-nb-regression:
+        executor: python-executor
         steps:
             - checkout
+            # Download and cache dependencies
+            - restore_cache:
+                  keys:
+                      - v1-dependencies-{{ checksum "poetry.lock" }}
+            - install_poetry
             - run:
-                name: LintCode
-                command: |
-                    make build
-                    make install
-                    make lint
+                  name: run test-nb-regression
+                  shell: bash -leo pipefail
+                  command: |
+                      make test-nb-regression
 
 workflows:
     version: 2
     CI-tabnet:
         jobs:
-            - lint-code
+            - test-build-docker
+            - test-build-docker-gpu
+            - install
+            - test-nb-census:
+                  requires:
+                      - install
+            - test-nb-multi-regression:
+                  requires:
+                      - install
+            - test-nb-regression:
+                  requires:
+                      - install
+            - test-nb-forest:
+                  requires:
+                      - install
+            - lint-code:
+                  requires:
+                      - install
diff --git a/.gitignore b/.gitignore
@@ -6,6 +6,12 @@ data/
 *.pt
 *~
 
+# Notebook to python
+forest_example.py
+regression_example.py
+census_example.py
+multi_regression_example.py
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]

diff --git a/Dockerfile b/Dockerfile
@@ -10,6 +10,6 @@ ENV JUPYTER_CONFIG_DIR /work/.cache/jupyter/config
 
 RUN $HOME/.poetry/bin/poetry config virtualenvs.path $POETRY_CACHE
 
-ENV PATH /root/.poetry/bin:/bin:/usr/local/bin:/usr/bin
+ENV PATH ${PATH}:/root/.poetry/bin:/bin:/usr/local/bin:/usr/bin
 
 CMD ["bash", "-l"]
diff --git a/Dockerfile_gpu b/Dockerfile_gpu
@@ -49,6 +49,8 @@ RUN set -ex \
     && wget -O python.tar.xz "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz" \
     && wget -O python.tar.xz.asc "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz.asc" \
     && export GNUPGHOME="$(mktemp -d)" \
+# Fix to avoid GPG server problem
+	&& echo "disable-ipv6" >> ${GNUPGHOME}/dirmngr.conf \
     && gpg --batch --keyserver ha.pool.sks-keyservers.net --recv-keys "$GPG_KEY" \
     && gpg --batch --verify python.tar.xz.asc python.tar.xz \
     && { command -v gpgconf > /dev/null && gpgconf --kill all || :; } \

diff --git a/Makefile b/Makefile
@@ -13,6 +13,7 @@ default: help;   # default target
 
 IMAGE_NAME=tabnet:latest
 IMAGE_RELEASER_NAME=release-changelog:latest
+NOTEBOOKS_DIR=/work
 
 DOCKER_RUN = docker run  --rm  -v ${FOLDER}:/work -w /work --entrypoint bash -lc ${IMAGE_NAME} -c
 
@@ -65,6 +66,30 @@ root_bash: ## Start a root bash inside the container
 	docker exec -it --user root $$(docker ps --filter ancestor=${IMAGE_NAME} --filter expose=${PORT} -q) bash
 .PHONY: root_bash
 
+_run_notebook:
+	set -e
+	echo "$(NB_FILE)" | xargs -n1 -I {} echo "poetry run jupyter nbconvert --to=script $(NOTEBOOKS_DIR)/{} || exit 1"  | sh
+	echo "$(NB_FILE)" | xargs -n1 -I {} echo "echo 'Running {}' && poetry run ipython $(NOTEBOOKS_DIR)/{} && echo 'Notebook $(NOTEBOOKS_DIR)/{} OK' || exit 1"  | sed 's/.ipynb/.py/' | sh
+	echo "$(NB_FILE)" | sed 's/.ipynb/.py/' | xargs -n1 -I {} echo "echo 'Cleaning up $(NOTEBOOKS_DIR)/{}' && rm $(NOTEBOOKS_DIR)/{} || exit 1"  | sh
+.PHONY: _run_notebook
+
+test-nb-census: ## run census income tests using notebooks
+	$(MAKE) _run_notebook NB_FILE="./census_example.ipynb"
+.PHONY: test-obfuscator
+
+test-nb-forest: ## run census income tests using notebooks
+	$(MAKE) _run_notebook NB_FILE="./forest_example.ipynb"
+.PHONY: test-obfuscator
+
+test-nb-regression: ## run regression example tests using notebooks
+	$(MAKE) _run_notebook NB_FILE="./regression_example.ipynb"
+.PHONY: test-obfuscator
+
+test-nb-multi-regression: ## run multi regression example tests using notebooks
+	$(MAKE) _run_notebook NB_FILE="./multi_regression_example.ipynb"
+.PHONY: test-obfuscator
+
 help: ## Display help
 	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
 .PHONY: help
+
diff --git a/census_example.ipynb b/census_example.ipynb
@@ -174,6 +174,15 @@
     "y_test = train[target].values[test_indices]"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "max_epochs = 1000 if not os.getenv(\"CI\", False) else 20"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -185,7 +194,7 @@
     "clf.fit(\n",
     "    X_train=X_train, y_train=y_train,\n",
     "    X_valid=X_valid, y_valid=y_valid,\n",
-    "    max_epochs=1000 , patience=20,\n",
+    "    max_epochs=max_epochs , patience=20,\n",
     "    batch_size=1024, virtual_batch_size=128,\n",
     "    num_workers=0,\n",
     "    drop_last=False\n",
@@ -357,7 +366,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.6"
+   "version": "3.7.5"
   }
  },
  "nbformat": 4,

diff --git a/forest_example.ipynb b/forest_example.ipynb
@@ -226,6 +226,15 @@
     "y_test = train[target].values[test_indices]"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "max_epochs = 1000 if not os.getenv(\"CI\", False) else 2"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -237,7 +246,7 @@
     "clf.fit(\n",
     "    X_train=X_train, y_train=y_train,\n",
     "    X_valid=X_valid, y_valid=y_valid,\n",
-    "    max_epochs=1000, patience=100,\n",
+    "    max_epochs=max_epochs, patience=100,\n",
     "    batch_size=16384, virtual_batch_size=256\n",
     ") "
    ]
@@ -357,6 +366,15 @@
     "# XGB"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "n_estimators = 1000 if not os.getenv(\"CI\", False) else 20"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -369,7 +387,7 @@
     "\n",
     "clf_xgb = XGBClassifier(max_depth=8,\n",
     "    learning_rate=0.1,\n",
-    "    n_estimators=1000,\n",
+    "    n_estimators=n_estimators,\n",
     "    verbosity=0,\n",
     "    silent=None,\n",
     "    objective=\"multi:softmax\",\n",
@@ -435,7 +453,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.6"
+   "version": "3.7.5"
   }
  },
  "nbformat": 4,

diff --git a/multi_regression_example.ipynb b/multi_regression_example.ipynb
@@ -182,6 +182,15 @@
     "y_test = np.transpose(np.tile(y_test, (n_targets,1)))"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "max_epochs = 1000 if not os.getenv(\"CI\", False) else 20"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -193,7 +202,7 @@
     "clf.fit(\n",
     "    X_train=X_train, y_train=y_train,\n",
     "    X_valid=X_valid, y_valid=y_valid,\n",
-    "    max_epochs=1000,\n",
+    "    max_epochs=max_epochs,\n",
     "    patience=50,\n",
     "    batch_size=1024, virtual_batch_size=128,\n",
     "    num_workers=0,\n",
@@ -301,7 +310,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.6"
+   "version": "3.7.5"
   }
  },
  "nbformat": 4,