[Optimization][KG] Several optimizations on DGL-KG (dmlc#1233)

* Several optimizations on DGL-KG: 1. Sorted positive edges for sampling which can reduce random memory access during positive sampling 2. Asynchronous node embedding update 3. Balanced Relation Partition that gives balanced number of edges in each partition. When there is no cross partition relation, relation embedding can be pin into GPU memory 4. tunable neg_sample_size instead of fixed neg_sample_size * Fix test * Fix test and eval.py * Now TransR is OK * Fix single GPU with mix_cpu_gpu * Add app tests * Fix test script * fix mxnet * Fix sample * Add docstrings * Fix * Default value for num_workers * Upd * upd
asaluja · Feb 9, 2020 · ffe5898 · ffe5898
1 parent f103bbf
commit ffe5898
Show file tree

Hide file tree

Showing 15 changed files with 1,098 additions and 250 deletions.
diff --git a/apps/kg/config/best_config.sh b/apps/kg/config/best_config.sh
@@ -1,37 +1,74 @@
 #To reproduce reported results on README, you can run the model with the following commands:
 
 # for FB15k
-
+# DistMult 1GPU
 DGLBACKEND=pytorch python3 train.py --model DistMult --dataset FB15k --batch_size 1024 \
-    --neg_sample_size 256 --hidden_dim 2000 --gamma 500.0 --lr 0.1 --max_step 100000 \
-    --batch_size_eval 16 --gpu 0 --valid --test -adv
+    --neg_sample_size 256 --hidden_dim 400 --gamma 143.0 --lr 0.08 --batch_size_eval 16 \
+    --valid --test -adv --mix_cpu_gpu --eval_interval 100000 --gpu 0 \
+    --num_worker=8  --max_step 40000
+# DistMult 8GPU
+DGLBACKEND=pytorch python3 train.py --model DistMult --dataset FB15k --batch_size 1024 \
+    --neg_sample_size 256 --hidden_dim 400 --gamma 143.0 --lr 0.08 --batch_size_eval 16 \
+    --valid --test -adv --mix_cpu_gpu --eval_interval 100000 --num_proc 8 --gpu 0 1 2 3 4 5 6 7 \
+    --num_worker=4 --max_step 10000 --rel_part --async_update
 
+# ComplEx 1GPU
 DGLBACKEND=pytorch python3 train.py --model ComplEx --dataset FB15k --batch_size 1024 \
-    --neg_sample_size 256 --hidden_dim 2000 --gamma 500.0 --lr 0.2 --max_step 100000 \
-    --batch_size_eval 16 --gpu 0 --valid --test -adv
+    --neg_sample_size 256 --hidden_dim 400 --gamma 143.0 --lr 0.1 --regularization_coef 2.00E-06 \
+    --batch_size_eval 16 --valid --test -adv --mix_cpu_gpu --eval_interval 100000 \
+    --gpu 0 --num_worker=8 --max_step 32000
+# ComplEx 8GPU
+DGLBACKEND=pytorch python3 train.py --model ComplEx --dataset FB15k --batch_size 1024 \
+    --neg_sample_size 256 --hidden_dim 400 --gamma 143.0 --lr 0.1 --regularization_coef 2.00E-06 \
+    --batch_size_eval 16 --valid --test -adv --mix_cpu_gpu --eval_interval 100000 --num_proc 8 \
+    --gpu 0 1 2 3 4 5 6 7 --num_worker=4 --max_step 4000 --rel_part --async_update
 
+# TransE_l1 1GPU
 DGLBACKEND=pytorch python3 train.py --model TransE_l1 --dataset FB15k --batch_size 1024 \
-    --neg_sample_size 256 --hidden_dim 2000 --gamma 24.0 --lr 0.01 --max_step 20000 \
-    --batch_size_eval 16 --gpu 0 --valid --test -adv
+    --neg_sample_size 64 --regularization_coef 1e-07 --hidden_dim 400 --gamma 16.0 --lr 0.01 \
+    --batch_size_eval 16 --valid --test -adv --mix_cpu_gpu --eval_interval 100000 \
+    --gpu 0 --num_worker=8 --max_step 48000
+# TransE_l1 8GPU
+DGLBACKEND=pytorch python3 train.py --model TransE_l1 --dataset FB15k --batch_size 1024 \
+    --neg_sample_size 64 --regularization_coef 1e-07 --hidden_dim 400 --gamma 16.0 --lr 0.01 \
+    --batch_size_eval 16 --valid --test -adv --mix_cpu_gpu --eval_interval 100000 --num_proc 8 \
+    --gpu 0 1 2 3 4 5 6 7 --num_worker=4 --max_step 6000 --rel_part --async_update
 
+# TransE_l2 1GPU
 DGLBACKEND=pytorch python3 train.py --model TransE_l2 --dataset FB15k --batch_size 1024 \
     --neg_sample_size 256 --hidden_dim 2000 --gamma 12.0 --lr 0.1 --max_step 30000 \
     --batch_size_eval 16 --gpu 0 --valid --test -adv --regularization_coef=2e-7 
 
+# RESCAL 1GPU
 DGLBACKEND=pytorch python3 train.py --model RESCAL --dataset FB15k --batch_size 1024 \
     --neg_sample_size 256 --hidden_dim 500 --gamma 24.0 --lr 0.03 --max_step 30000 \
     --batch_size_eval 16 --gpu 0 --valid --test -adv
 
+# TransR 1GPU
 DGLBACKEND=pytorch python3 train.py --model TransR --dataset FB15k --batch_size 1024 \
-    --neg_sample_size 256 --hidden_dim 500 --gamma 24.0 --lr 0.01 --max_step 30000 \
-    --batch_size_eval 16 --gpu 0 --valid --test -adv
-
-DGLBACKEND=pytorch python3 train.py --model RotatE --dataset FB15k --batch_size 1024 \
-    --neg_sample_size 256 --hidden_dim 400 --gamma 12.0 --lr 0.01 --max_step 30000 \
-    --batch_size_eval 16 --gpu 0 --valid --test -adv -de --regularization_coef=1e-4
+    --neg_sample_size 256 --regularization_coef 5e-8 --hidden_dim 200 --gamma 8.0 --lr 0.015 \
+    --batch_size_eval 16 --valid --test -adv --mix_cpu_gpu --eval_interval 100000 \
+    --gpu 0 --num_worker=8 --max_step 32000
+# TransR 8GPU
+DGLBACKEND=pytorch python3 train.py --model TransR --dataset FB15k --batch_size 1024 \
+    --neg_sample_size 256 --regularization_coef 5e-8 --hidden_dim 200 --gamma 8.0 --lr 0.015 \
+    --batch_size_eval 16 --valid --test -adv --mix_cpu_gpu --eval_interval 100000 --num_proc 8 \
+    --gpu 0 1 2 3 4 5 6 7 --num_worker=4 --max_step 4000 --rel_part --async_update
+
+# RotatE 1GPU
+DGLBACKEND=pytorch python3 train.py --model RotatE --dataset FB15k --batch_size 2048 \
+    --neg_sample_size 256 --regularization_coef 1e-07 --hidden_dim 200 --gamma 12.0 --lr 0.009 \
+    --batch_size_eval 16 --valid --test -adv --mix_cpu_gpu --eval_interval 100000 -de \
+    --mix_cpu_gpu --max_step 40000 --gpu 0 --num_worker=4
+
+# RotatE 8GPU
+DGLBACKEND=pytorch python3 train.py --model RotatE --dataset FB15k --batch_size 2048 \
+    --neg_sample_size 256 --regularization_coef 1e-07 --hidden_dim 200 --gamma 12.0 --lr 0.009 \
+    --batch_size_eval 16 --valid --test -adv --mix_cpu_gpu --eval_interval 100000 -de \
+    --mix_cpu_gpu --max_step 5000 --num_proc 8 --gpu 0 1 2 3 4 5 6 7 --num_worker=4 \
+    --rel_part --async_update
 
 # for wn18
-
 DGLBACKEND=pytorch python3 train.py --model TransE_l1 --dataset wn18 --batch_size 1024 \
     --neg_sample_size 512 --hidden_dim 500 --gamma 12.0 --adversarial_temperature 0.5 \
     --lr 0.01 --max_step 40000 --batch_size_eval 16 --gpu 0 --valid --test -adv \