updates

ibagur · Aug 19, 2020 · 4df3e3e · 4df3e3e
2 parents 92faff9 + 6ec857e
commit 4df3e3e
Show file tree

Hide file tree

Showing 13 changed files with 45 additions and 46 deletions.
diff --git a/README.md b/README.md
@@ -1,18 +1,22 @@
-# Recurrent Independent Mechanism
-An implementation of [Recurrent Independent Mechanisms](https://arxiv.org/abs/1909.10893) (Goyal et al. 2019) in PyTorch.
+ # Recurrent Independent Mechanisms
+ An implementation of [Recurrent Independent Mechanisms](https://arxiv.org/abs/1909.10893) (Goyal et al. 2019) in PyTorch.
+
+[Anirudh Goyal](https://anirudh9119.github.io/), [Alex Lamb](https://alexlamb62.github.io/), [Jordan Hoffmann](https://jhoffmann.org/), [Shagun Sodhani](https://mila.quebec/en/person/shagun-sodhani/), [Sergey Levine](https://people.eecs.berkeley.edu/~svlevine/), [Yoshua Bengio](https://mila.quebec/en/yoshua-bengio/), [Bernhard Sch{\"o}lkopf](https://www.is.mpg.de/~bs)
+
+ It features adding and copying synthetic task from the paper.
+
+
+ # Examples
+ `./experiment_copying.sh 600 6 4 50 200` for full training & test run of RIMs on the copying task.
+
+ `./experiment_adding.sh 600 6 4 50 200 0.2` for full training and test run of RIMs on the adding task. 
 
 
-It features adding and copying synthetic task from the paper.
 
 
-# Examples
-`./experiment_copying.sh 600 6 4 50 200` for full training & test run of RIMs on the copying task.
-
-`./experiment_adding.sh 600 6 4 50 200 0.2` for full training and test run of RIMs on the adding task. 
-
-@article{goyal2019recurrent,
-  title={Recurrent independent mechanisms},
-  author={Goyal, Anirudh and Lamb, Alex and Hoffmann, Jordan and Sodhani, Shagun and Levine, Sergey and Bengio, Yoshua and Sch{\"o}lkopf, Bernhard},
-  journal={arXiv preprint arXiv:1909.10893},
-  year={2019}
-}
+    @article{goyal2019recurrent,
+        title={Recurrent independent mechanisms},
+        author={Goyal, Anirudh and Lamb, Alex and Hoffmann, Jordan and Sodhani, Shagun and Levine, Sergey and Bengio, Yoshua and Sch{\"o}lkopf, Bernhard},
+        journal={arXiv preprint arXiv:1909.10893},
+        year={2019}
+    }
diff --git a/event_based/Blocks_Cifar/.gitkeep b/event_based/Blocks_Cifar/.gitkeep
diff --git a/event_based/Blocks_Copying/.gitkeep b/event_based/Blocks_Copying/.gitkeep
diff --git a/event_based/Blocks_MNIST/.gitkeep b/event_based/Blocks_MNIST/.gitkeep
diff --git a/event_based/Blocks_adding/.gitkeep b/event_based/Blocks_adding/.gitkeep
diff --git a/event_based/experiment_adding.sh b/event_based/experiment_adding.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 echo Running on $HOSTNAME
-source /home/anirudh/.bashrc
-conda activate torch1
+#source /home/anirudh/.bashrc
+#conda activate torch1
 run=1
 lr=.001
 dim1=$1
@@ -12,7 +12,7 @@ log=100
 train_len=$4
 test_len=$5
 drop=$6
-name="/home/anirudh/RIMs_release/event_based/Blocks_adding/Blocks_"$dim1"_"$em"_"$block1"_"$topk1"_"$drop"_"$lr"_"$log"_"$train_len"_"$test_len
+name="Blocks_adding/Blocks_"$dim1"_"$em"_"$block1"_"$topk1"_"$drop"_"$lr"_"$log"_"$train_len"_"$test_len
 name="${name//./}"
 echo Running version $name
-python /home/anirudh/RIMs_release/event_based/train_adding.py --cuda --cudnn --algo blocks --name $name --lr $lr --drop $drop --nhid $dim1 --num_blocks $block1 --topk $topk1 --use_inactive --nlayers 1 --emsize $em --log-interval $log --train_len $train_len --test_len $test_len --clip 0.1
+python3 train_adding.py --cuda --cudnn --algo blocks --name $name --lr $lr --drop $drop --nhid $dim1 --num_blocks $block1 --topk $topk1 --use_inactive --nlayers 1 --emsize $em --log-interval $log --train_len $train_len --test_len $test_len --clip 0.1
diff --git a/event_based/experiment_cifar_1layered.sh b/event_based/experiment_cifar_1layered.sh
@@ -1,15 +1,13 @@
 #!/bin/bash
 echo Running on $HOSTNAME
-source /home/anirudh/.bashrc
-conda activate torch1
 lr=.0007
 dim1=$1
 em=$1
 block1=$2
 topk1=$3
 drop=0.2
 log=100
-name="/home/anirudh/icml_blocks/sparse_relational/Blocks_Cifar/Blocks_"$dim1"_"$em"_"$block1"_"$topk1"_FALSE_"$drop"_"$lr"_"$log
+name="Blocks_Cifar/Blocks_"$dim1"_"$em"_"$block1"_"$topk1"_FALSE_"$drop"_"$lr"_"$log
 name="${name//./}"
 echo Running version $name
-python /home/anirudh/icml_blocks/sparse_relational/train_cifar.py --cuda --cudnn --algo blocks --name $name --lr $lr --drop $drop --nhid $dim1 --num_blocks $block1 --topk $topk1 --nlayers 1 --emsize $em --log-interval $log
+python3 train_cifar.py --cuda --cudnn --algo blocks --name $name --lr $lr --drop $drop --nhid $dim1 --num_blocks $block1 --topk $topk1 --nlayers 1 --emsize $em --log-interval $log
diff --git a/event_based/experiment_copying.sh b/event_based/experiment_copying.sh
@@ -1,7 +1,5 @@
 #!/bin/bash
 echo Running on $HOSTNAME
-source /home/anirudh/.bashrc
-conda activate torch1
 lr=.001
 dim1=$1
 em=$1
@@ -11,7 +9,7 @@ drop=0.5
 log=100
 train_len=$4
 test_len=$5
-name="/home/anirudh/RIMs_release/event_based/Blocks_copying/Blocks_"$dim1"_"$em"_"$block1"_"$topk1"_FALSE_"$drop"_"$lr"_"$log"_"$train_len"_"$test_len
+name="Blocks_Copying/Blocks_"$dim1"_"$em"_"$block1"_"$topk1"_FALSE_"$drop"_"$lr"_"$log"_"$train_len"_"$test_len
 name="${name//./}"
 echo Running version $name
-python /home/anirudh/RIMs_release/event_based/train_copying.py --cuda --cudnn --algo blocks --name $name --lr $lr --drop $drop --nhid $dim1 --num_blocks $block1 --topk $topk1 --nlayers 1 --emsize $em --log-interval $log --train_len $train_len --test_len $test_len
+python3 train_copying.py --cuda --cudnn --algo blocks --name $name --lr $lr --drop $drop --nhid $dim1 --num_blocks $block1 --topk $topk1 --nlayers 1 --emsize $em --log-interval $log --train_len $train_len --test_len $test_len
diff --git a/event_based/experiment_copying_lstm.sh b/event_based/experiment_copying_lstm.sh
@@ -1,7 +1,5 @@
 #!/bin/bash
 echo Running on $HOSTNAME
-source /home/anirudh/.bashrc
-conda activate torch1
 lr=.001
 dim1=$1
 em=$1
@@ -11,7 +9,7 @@ memory_slot=1
 memory_heads=1
 memory_size_head=1
 gate_style='unit'
-name="/home/anirudh/blocks/sparse_relational/Blocks_copying/LSTM_"$dim1"_"$em"_FALSE_"$drop"_"$lr"_"$log
+name="Blocks_Copying/LSTM_"$dim1"_"$em"_FALSE_"$drop"_"$lr"_"$log
 name="${name//./}"
 echo Running version $name
-python /home/anirudh/blocks/sparse_relational/train_copying.py --cuda --cudnn --algo lstm --name $name --lr $lr --drop $drop --nhid $dim1 --nlayers 1 --emsize $em --log-interval $log
+python3 train_copying.py --cuda --cudnn --algo lstm --name $name --lr $lr --drop $drop --nhid $dim1 --nlayers 1 --emsize $em --log-interval $log
diff --git a/event_based/experiment_mnist_1layered.sh b/event_based/experiment_mnist_1layered.sh
@@ -1,7 +1,5 @@
 #!/bin/bash
 echo Running on $HOSTNAME
-source /home/anirudh/.bashrc
-conda activate torch1
 run=1
 lr=.0007
 dim1=$1
@@ -10,7 +8,7 @@ block1=$2
 topk1=$3
 drop=0.5
 log=100
-name="/home/anirudh/RIMs_release/event_based/Blocks_MNIST/Blocks_"$dim1"_"$em"_"$block1"_"$topk1"_"$drop"_"$lr"_"$log
+name="Blocks_MNIST/Blocks_"$dim1"_"$em"_"$block1"_"$topk1"_"$drop"_"$lr"_"$log
 name="${name//./}"
 echo Running version $name
-python /home/anirudh/RIMs_release/event_based/train_mnist.py --cuda --cudnn --algo blocks --name $name --lr $lr --drop $drop --nhid $dim1 --num_blocks $block1 --topk $topk1 --nlayers 1 --emsize $em --log-interval $log
+python3 train_mnist.py --cuda --cudnn --algo blocks --name $name --lr $lr --drop $drop --nhid $dim1 --num_blocks $block1 --topk $topk1 --nlayers 1 --emsize $em --log-interval $log
diff --git a/event_based/mnist_seq_data_classify.py b/event_based/mnist_seq_data_classify.py
@@ -25,9 +25,9 @@
     y: (784,50000) int32.
 '''
 
-def mnist_data():
-    mnist_trainset = datasets.MNIST(root='/home/anirudh/blocks/sparse_relational/data', train=True, download=True, transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor()]))
-    mnist_testset = datasets.MNIST(root='/home/anirudh/blocks/sparse_relational/data', train=False, download=True, transform=torchvision.transforms.Compose([torchvision.transforms.ToTensor()]))
+def mnist_data(path):
+    mnist_trainset = datasets.MNIST(root=path, train=True, download=True, transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor()]))
+    mnist_testset = datasets.MNIST(root=path, train=False, download=True, transform=torchvision.transforms.Compose([torchvision.transforms.ToTensor()]))
 
     num_val = len(mnist_trainset) // 5
     np.random.seed(0)

diff --git a/event_based/train_copying.py b/event_based/train_copying.py
@@ -268,13 +268,14 @@ def evaluate_(copy_x, copy_y):
     with torch.no_grad():
         for i in range(num_batches):
             batch_ind = random.randint(0, num_batches-1)
-            data = Variable(copy_x[batch_ind].cuda())
-            targets = Variable(copy_y[batch_ind].cuda())
+            data = Variable(copy_x[batch_ind].cuda()) if args.cuda else Variable(copy_x[batch_ind])
+            targets = Variable(copy_y[batch_ind].cuda()) if args.cuda else Variable(copy_y[batch_ind])
             #output, hidden,extra_loss = model(data, hidden)
             output, hidden, extra_loss, _, _ = model(data, hidden, calc_mask)
             if not args.adaptivesoftmax:
-                loss = criterion(output.view(-1, ntokens), targets.view((args.test_len + 20)*64))
+                loss = criterion(output.view(-1, ntokens), targets.reshape((args.test_len + 20) * 64))
             else:
+                raise Exception('not implemented')
                 _, loss = criterion_adaptive(output.view(-1, args.nhid), targets)
             total_loss += loss.item()
             hidden = repackage_hidden(hidden)
@@ -301,23 +302,25 @@ def train(epoch):
         batch_ind = random.randint(0, num_batches-1)
 
         #data, targets = get_batch(train_data, i)
-        data = Variable(copy_x[batch_ind].cuda())
-        targets = Variable(copy_y[batch_ind].cuda())
+        data = Variable(copy_x[batch_ind].cuda()) if args.cuda else Variable(copy_x[batch_ind])
+        targets = Variable(copy_y[batch_ind].cuda()) if args.cuda else Variable(copy_y[batch_ind])
 
-        torch.cuda.synchronize()
+        if args.cuda:
+            torch.cuda.synchronize()
         forward_start_time = time.time()
         hidden = repackage_hidden(hidden)
         model.zero_grad()
 
         output, hidden, extra_loss, masks, sample_masks = model(data, hidden, calc_mask)
         if not args.adaptivesoftmax:
-            loss = criterion(output.view(-1, ntokens), targets.view((args.train_len + 20)*64))
+            loss = criterion(output.view(-1, ntokens), targets.reshape((args.train_len + 20) * 64))
         else:
             raise Exception('not implemented')
             _, loss = criterion_adaptive(output.view(-1, args.nhid), targets)
         total_loss += loss.item()
 
-        torch.cuda.synchronize()
+        if args.cuda:
+            torch.cuda.synchronize()
 
         forward_elapsed = time.time() - forward_start_time
         forward_elapsed_time += forward_elapsed

diff --git a/event_based/train_mnist.py b/event_based/train_mnist.py
@@ -138,7 +138,7 @@ def none_or_str(value):
 
 # Get Data Loaders
 
-train_loader, val_loader, test_loader = mnist_data()
+train_loader, val_loader, test_loader = mnist_data(path=os.getcwd() + '/mnist_data')
 
 # Starting from sequential data, batchify arranges the dataset into columns.
 # For instance, with the alphabet as the sequence and batch size 4, we'd get