[KG] Update CI to cover Knowledge Graph (dmlc#913)

* upd * fig edgebatch edges * add test * trigger * Update README.md for pytorch PinSage example. Add noting that the PinSage model example under example/pytorch/recommendation only work with Python 3.6+ as its dataset loader depends on stanfordnlp package which work only with Python 3.6+. * Provid a frame agnostic API to test nn modules on both CPU and CUDA side. 1. make dgl.nn.xxx frame agnostic 2. make test.backend include dgl.nn modules 3. modify test_edge_softmax of test/mxnet/test_nn.py and test/pytorch/test_nn.py work on both CPU and GPU * Fix style * Delete unused code * Make agnostic test only related to tests/backend 1. clear all agnostic related code in dgl.nn 2. make test_graph_conv agnostic to cpu/gpu * Fix code style * fix * doc * Make all test code under tests.mxnet/pytorch.test_nn.py work on both CPU and GPU. * Fix syntex * Remove rand * Add TAGCN nn.module and example * Now tagcn can run on CPU. * Add unitest for TGConv * Fix style * For pubmed dataset, using --lr=0.005 can achieve better acc * Fix style * Fix some descriptions * trigger * Fix doc * Add nn.TGConv and example * Fix bug * Update data in mxnet.tagcn test acc. * Fix some comments and code * delete useless code * Fix namming * Fix bug * Fix bug * Add test for mxnet TAGCov * Add test code for mxnet TAGCov * Update some docs * Fix some code * Update docs dgl.nn.mxnet * Update weight init * Fix * reproduce the bug * Fix concurrency bug reported at dmlc#755. Also make test_shared_mem_store.py more deterministic. * Update test_shared_mem_store.py * Update dmlc/core * Update Knowledge Graph CI with new Docker image * Remove unused line_profierx * Poke Jenkins * Update test with exit code check and simplify docker * Update Jenkinsfile to make app test a standalone stage * Update kg_test * Update Jenkinsfile * Make some KG test parallel * Update * KG MXNet does not support ComplEx * Update Jenkinsfile * Update Jenkins file * Change torch-1.2 to torch-1.2-cu92 * ci * Update ubuntu_install_mxnet_cpu.sh * Update ubuntu_install_mxnet_gpu.sh * We only need to test train and eval script. Delete some test code
yzhangee · Oct 11, 2019 · 93e3c49 · 93e3c49
1 parent bde7525
commit 93e3c49
Show file tree

Hide file tree

Showing 10 changed files with 160 additions and 23 deletions.
diff --git a/Jenkinsfile b/Jenkinsfile
@@ -204,11 +204,6 @@ pipeline {
                 tutorial_test_linux("pytorch")
               }
             }
-            stage("Knowledge Graph test") {
-              steps {
-                kg_test_linux("pytorch", "cpu")
-              }
-            }
           }
           post {
             always {
@@ -270,11 +265,6 @@ pipeline {
                 unit_test_linux("mxnet", "cpu")
               }
             }
-            stage("Knowledge Graph test") {
-              steps {
-                kg_test_linux("mxnet", "cpu")
-              }
-            }
             //stage("Tutorial test") {
             //  steps {
             //    tutorial_test_linux("mxnet")
@@ -310,6 +300,55 @@ pipeline {
         }
       }
     }
+    stage("App") {
+      parallel {
+        stage("Knowledge Graph CPU") {
+          agent { docker { image "dgllib/dgl-ci-cpu:torch-1.2.0" } }
+          stages {
+            stage("Torch test") {
+              steps {
+                kg_test_linux("pytorch", "cpu")
+              }
+            }
+            stage("MXNet test") {
+              steps {
+                kg_test_linux("mxnet", "cpu")
+              }
+            }
+          }
+          post {
+            always {
+              cleanWs disableDeferredWipeout: true, deleteDirs: true
+            }
+          }
+        }
+        stage("Knowledge Graph GPU") {
+          agent {
+            docker {
+              image "dgllib/dgl-ci-gpu:torch-1.2.0"
+              args "--runtime nvidia"
+            }
+          }
+          stages {
+            stage("Torch test") {
+              steps {
+                kg_test_linux("pytorch", "gpu")
+              }
+            }
+            stage("MXNet test") {
+              steps {
+                kg_test_linux("mxnet", "gpu")
+              }
+            }
+          }
+          post {
+            always {
+              cleanWs disableDeferredWipeout: true, deleteDirs: true
+            }
+          }
+        }
+      }
+    }
   }
   post {
     always {

diff --git a/apps/kg/eval.py b/apps/kg/eval.py
@@ -7,7 +7,6 @@
 import logging
 import time
 import pickle
-import line_profiler
 
 backend = os.environ.get('DGLBACKEND')
 if backend.lower() == 'mxnet':

diff --git a/docker/Dockerfile.ci_cpu_torch_1.2.0 b/docker/Dockerfile.ci_cpu_torch_1.2.0
@@ -0,0 +1,34 @@
+# CI docker CPU env
+# Adapted from github.com/dmlc/tvm/docker/Dockerfile.ci_cpu
+FROM ubuntu:16.04
+
+RUN apt-get update --fix-missing
+
+COPY install/ubuntu_install_core.sh /install/ubuntu_install_core.sh
+RUN bash /install/ubuntu_install_core.sh
+
+COPY install/ubuntu_install_build.sh /install/ubuntu_install_build.sh
+RUN bash /install/ubuntu_install_build.sh
+
+# ANTLR deps
+COPY install/ubuntu_install_java.sh /install/ubuntu_install_java.sh
+RUN bash /install/ubuntu_install_java.sh
+
+COPY install/ubuntu_install_antlr.sh /install/ubuntu_install_antlr.sh
+RUN bash /install/ubuntu_install_antlr.sh
+
+# python
+COPY install/ubuntu_install_python.sh /install/ubuntu_install_python.sh
+RUN bash /install/ubuntu_install_python.sh
+
+COPY install/ubuntu_install_python_package.sh /install/ubuntu_install_python_package.sh
+RUN bash /install/ubuntu_install_python_package.sh
+
+COPY install/ubuntu_install_torch_1.2.0.sh /install/ubuntu_install_torch.sh
+RUN bash /install/ubuntu_install_torch.sh
+
+COPY install/ubuntu_install_mxnet_cpu.sh /install/ubuntu_install_mxnet_cpu.sh
+RUN bash /install/ubuntu_install_mxnet_cpu.sh
+
+COPY install/FB15k.zip /data/kg/FB15k.zip
+RUN cd /data/kg && unzip FB15k.zip
diff --git a/docker/Dockerfile.ci_gpu_torch_1.2.0 b/docker/Dockerfile.ci_gpu_torch_1.2.0
@@ -0,0 +1,42 @@
+# CI docker GPU env
+FROM nvidia/cuda:9.0-cudnn7-devel
+
+# Base scripts
+RUN apt-get update --fix-missing
+
+COPY install/ubuntu_install_core.sh /install/ubuntu_install_core.sh
+RUN bash /install/ubuntu_install_core.sh
+
+COPY install/ubuntu_install_build.sh /install/ubuntu_install_build.sh
+RUN bash /install/ubuntu_install_build.sh
+
+# ANTLR deps
+COPY install/ubuntu_install_java.sh /install/ubuntu_install_java.sh
+RUN bash /install/ubuntu_install_java.sh
+
+COPY install/ubuntu_install_antlr.sh /install/ubuntu_install_antlr.sh
+RUN bash /install/ubuntu_install_antlr.sh
+
+# python
+COPY install/ubuntu_install_python.sh /install/ubuntu_install_python.sh
+RUN bash /install/ubuntu_install_python.sh
+
+COPY install/ubuntu_install_python_package.sh /install/ubuntu_install_python_package.sh
+RUN bash /install/ubuntu_install_python_package.sh
+
+COPY install/ubuntu_install_torch_1.2.0.sh /install/ubuntu_install_torch.sh
+RUN bash /install/ubuntu_install_torch.sh
+
+COPY install/ubuntu_install_mxnet_gpu.sh /install/ubuntu_install_mxnet_gpu.sh
+RUN bash /install/ubuntu_install_mxnet_gpu.sh
+
+COPY install/FB15k.zip /data/kg/FB15k.zip
+RUN cd /data/kg && unzip FB15k.zip
+
+# Environment variables
+ENV PATH=/usr/local/nvidia/bin:${PATH}
+ENV PATH=/usr/local/cuda/bin:${PATH}
+ENV CPLUS_INCLUDE_PATH=/usr/local/cuda/include:${CPLUS_INCLUDE_PATH}
+ENV C_INCLUDE_PATH=/usr/local/cuda/include:${C_INCLUDE_PATH}
+ENV LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/nvidia/lib64:${LIBRARY_PATH}
+ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/nvidia/lib64:${LD_LIBRARY_PATH}
diff --git a/docker/README.md b/docker/README.md
@@ -14,3 +14,15 @@ docker build -t dgl-gpu -f Dockerfile.ci_gpu .
 ```bash
 docker build -t dgl-lint -f Dockerfile.ci_lint .
 ```
+
+### CPU image for kg
+```bash
+wget https://s3.us-east-2.amazonaws.com/dgl.ai/dataset/FB15k.zip -P install/
+docker build -t dgl-cpu:torch-1.2.0 -f Dockerfile.ci_cpu_torch_1.2.0 .
+```
+
+### GPU image for kg
+```bash
+wget https://s3.us-east-2.amazonaws.com/dgl.ai/dataset/FB15k.zip -P install/
+docker build -t dgl-gpu:torch-1.2.0 -f Dockerfile.ci_gpu_torch_1.2.0 .
+```
diff --git a/docker/install/ubuntu_install_mxnet_cpu.sh b/docker/install/ubuntu_install_mxnet_cpu.sh
@@ -1 +1 @@
-pip3 install mxnet==1.5.0b20190605
+pip3 install mxnet
diff --git a/docker/install/ubuntu_install_mxnet_gpu.sh b/docker/install/ubuntu_install_mxnet_gpu.sh
@@ -1 +1 @@
-pip3 install mxnet-cu90==1.5.0b20190605
+pip3 install mxnet-cu90
diff --git a/docker/install/ubuntu_install_python_package.sh b/docker/install/ubuntu_install_python_package.sh
@@ -1,3 +1,3 @@
 # install libraries for python package on ubuntu
-pip2 install nose numpy cython scipy networkx matplotlib nltk requests[security]
-pip3 install nose numpy cython scipy networkx matplotlib nltk requests[security]
+#pip2 install nose numpy cython scipy networkx matplotlib nltk requests[security] tqdm
+pip3 install nose numpy cython scipy networkx matplotlib nltk requests[security] tqdm
diff --git a/docker/install/ubuntu_install_torch_1.2.0.sh b/docker/install/ubuntu_install_torch_1.2.0.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+# install torch
+pip3 install torch==1.2.0+cu92 torchvision==0.4.0+cu92 -f https://download.pytorch.org/whl/torch_stable.html
diff --git a/tests/scripts/task_kg_test.sh b/tests/scripts/task_kg_test.sh
@@ -36,28 +36,36 @@ export DGL_DOWNLOAD_DIR=${PWD}
 
 pushd $KG_DIR> /dev/null
 
-python3 -m nose -v --with-xunit tests/test_score.py || "run test_score.py on $1"
+python3 -m nose -v --with-xunit tests/test_score.py || fail "run test_score.py on $1"
 
 if [ "$2" == "cpu" ]; then
-    # verify CPU training
+    # verify CPU training DistMult
     python3 train.py --model DistMult --dataset FB15k --batch_size 128 \
         --neg_sample_size 16 --hidden_dim 100 --gamma 500.0 --lr 0.1 --max_step 100 \
-        --batch_size_eval 16 --valid --test -adv --eval_interval 30 --eval_percent 0.01
+        --batch_size_eval 16 --valid --test -adv --eval_interval 30 --eval_percent 0.01 \
+        --save_emb DistMult_FB15k_emb --data_path /data/kg || fail "run DistMult on $2"
+
+    # verify saving training result
+    python3 eval.py --model_name DistMult --dataset FB15k --hidden_dim 100 \
+        --gamma 500.0 --batch_size 16 --model_path DistMult_FB15k_emb/ \
+        --eval_percent 0.01 --data_path /data/kg || fail "eval DistMult on $2"
 elif [ "$2" == "gpu" ]; then
-    # verify GPU training
+    # verify GPU training DistMult
     python3 train.py --model DistMult --dataset FB15k --batch_size 128 \
         --neg_sample_size 16 --hidden_dim 100 --gamma 500.0 --lr 0.1 --max_step 100 \
-        --batch_size_eval 16 --gpu 0 --valid --test -adv --eval_interval 30 --eval_percent 0.01
+        --batch_size_eval 16 --gpu 0 --valid --test -adv --eval_interval 30 --eval_percent 0.01 \
+        --data_path /data/kg || fail "run DistMult on $2"
 
     # verify mixed CPU GPU training
     python3 train.py --model DistMult --dataset FB15k --batch_size 128 \
         --neg_sample_size 16 --hidden_dim 100 --gamma 500.0 --lr 0.1 --max_step 100 \
         --batch_size_eval 16 --gpu 0 --valid --test -adv --mix_cpu_gpu --eval_percent 0.01 \
-        --save_emb DistMult_FB15k_emb
+        --save_emb DistMult_FB15k_emb --data_path /data/kg || fail "run mix CPU/GPU DistMult"
 
     # verify saving training result
-    python3 eval.py --model_name DistMult --dataset FB15k --hidden_dim 2000 \
-        --gamma 500.0 --batch_size 16 --gpu 0 --model_path DistMult_FB15k_emb/ --eval_percent 0.01
+    python3 eval.py --model_name DistMult --dataset FB15k --hidden_dim 100 \
+        --gamma 500.0 --batch_size 16 --gpu 0 --model_path DistMult_FB15k_emb/ \
+        --eval_percent 0.01 --data_path /data/kg || fail "eval DistMult on $2"
 fi
 
 popd > /dev/null
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		pip3 install mxnet==1.5.0b20190605
		pip3 install mxnet
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		pip3 install mxnet-cu90==1.5.0b20190605
		pip3 install mxnet-cu90