Skip to content

Commit

Permalink
updates
Browse files Browse the repository at this point in the history
  • Loading branch information
Anirudh Goyal committed Aug 19, 2020
2 parents 92faff9 + 6ec857e commit 4df3e3e
Show file tree
Hide file tree
Showing 13 changed files with 45 additions and 46 deletions.
32 changes: 18 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,18 +1,22 @@
# Recurrent Independent Mechanism
An implementation of [Recurrent Independent Mechanisms](https://arxiv.org/abs/1909.10893) (Goyal et al. 2019) in PyTorch.
# Recurrent Independent Mechanisms
An implementation of [Recurrent Independent Mechanisms](https://arxiv.org/abs/1909.10893) (Goyal et al. 2019) in PyTorch.

[Anirudh Goyal](https://anirudh9119.github.io/), [Alex Lamb](https://alexlamb62.github.io/), [Jordan Hoffmann](https://jhoffmann.org/), [Shagun Sodhani](https://mila.quebec/en/person/shagun-sodhani/), [Sergey Levine](https://people.eecs.berkeley.edu/~svlevine/), [Yoshua Bengio](https://mila.quebec/en/yoshua-bengio/), [Bernhard Sch{\"o}lkopf](https://www.is.mpg.de/~bs)

It features adding and copying synthetic task from the paper.


# Examples
`./experiment_copying.sh 600 6 4 50 200` for full training & test run of RIMs on the copying task.

`./experiment_adding.sh 600 6 4 50 200 0.2` for full training and test run of RIMs on the adding task.


It features adding and copying synthetic task from the paper.


# Examples
`./experiment_copying.sh 600 6 4 50 200` for full training & test run of RIMs on the copying task.

`./experiment_adding.sh 600 6 4 50 200 0.2` for full training and test run of RIMs on the adding task.

@article{goyal2019recurrent,
title={Recurrent independent mechanisms},
author={Goyal, Anirudh and Lamb, Alex and Hoffmann, Jordan and Sodhani, Shagun and Levine, Sergey and Bengio, Yoshua and Sch{\"o}lkopf, Bernhard},
journal={arXiv preprint arXiv:1909.10893},
year={2019}
}
@article{goyal2019recurrent,
title={Recurrent independent mechanisms},
author={Goyal, Anirudh and Lamb, Alex and Hoffmann, Jordan and Sodhani, Shagun and Levine, Sergey and Bengio, Yoshua and Sch{\"o}lkopf, Bernhard},
journal={arXiv preprint arXiv:1909.10893},
year={2019}
}
Empty file.
Empty file.
Empty file.
Empty file.
8 changes: 4 additions & 4 deletions event_based/experiment_adding.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/bash
echo Running on $HOSTNAME
source /home/anirudh/.bashrc
conda activate torch1
#source /home/anirudh/.bashrc
#conda activate torch1
run=1
lr=.001
dim1=$1
Expand All @@ -12,7 +12,7 @@ log=100
train_len=$4
test_len=$5
drop=$6
name="/home/anirudh/RIMs_release/event_based/Blocks_adding/Blocks_"$dim1"_"$em"_"$block1"_"$topk1"_"$drop"_"$lr"_"$log"_"$train_len"_"$test_len
name="Blocks_adding/Blocks_"$dim1"_"$em"_"$block1"_"$topk1"_"$drop"_"$lr"_"$log"_"$train_len"_"$test_len
name="${name//./}"
echo Running version $name
python /home/anirudh/RIMs_release/event_based/train_adding.py --cuda --cudnn --algo blocks --name $name --lr $lr --drop $drop --nhid $dim1 --num_blocks $block1 --topk $topk1 --use_inactive --nlayers 1 --emsize $em --log-interval $log --train_len $train_len --test_len $test_len --clip 0.1
python3 train_adding.py --cuda --cudnn --algo blocks --name $name --lr $lr --drop $drop --nhid $dim1 --num_blocks $block1 --topk $topk1 --use_inactive --nlayers 1 --emsize $em --log-interval $log --train_len $train_len --test_len $test_len --clip 0.1
6 changes: 2 additions & 4 deletions event_based/experiment_cifar_1layered.sh
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
#!/bin/bash
echo Running on $HOSTNAME
source /home/anirudh/.bashrc
conda activate torch1
lr=.0007
dim1=$1
em=$1
block1=$2
topk1=$3
drop=0.2
log=100
name="/home/anirudh/icml_blocks/sparse_relational/Blocks_Cifar/Blocks_"$dim1"_"$em"_"$block1"_"$topk1"_FALSE_"$drop"_"$lr"_"$log
name="Blocks_Cifar/Blocks_"$dim1"_"$em"_"$block1"_"$topk1"_FALSE_"$drop"_"$lr"_"$log
name="${name//./}"
echo Running version $name
python /home/anirudh/icml_blocks/sparse_relational/train_cifar.py --cuda --cudnn --algo blocks --name $name --lr $lr --drop $drop --nhid $dim1 --num_blocks $block1 --topk $topk1 --nlayers 1 --emsize $em --log-interval $log
python3 train_cifar.py --cuda --cudnn --algo blocks --name $name --lr $lr --drop $drop --nhid $dim1 --num_blocks $block1 --topk $topk1 --nlayers 1 --emsize $em --log-interval $log
6 changes: 2 additions & 4 deletions event_based/experiment_copying.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
#!/bin/bash
echo Running on $HOSTNAME
source /home/anirudh/.bashrc
conda activate torch1
lr=.001
dim1=$1
em=$1
Expand All @@ -11,7 +9,7 @@ drop=0.5
log=100
train_len=$4
test_len=$5
name="/home/anirudh/RIMs_release/event_based/Blocks_copying/Blocks_"$dim1"_"$em"_"$block1"_"$topk1"_FALSE_"$drop"_"$lr"_"$log"_"$train_len"_"$test_len
name="Blocks_Copying/Blocks_"$dim1"_"$em"_"$block1"_"$topk1"_FALSE_"$drop"_"$lr"_"$log"_"$train_len"_"$test_len
name="${name//./}"
echo Running version $name
python /home/anirudh/RIMs_release/event_based/train_copying.py --cuda --cudnn --algo blocks --name $name --lr $lr --drop $drop --nhid $dim1 --num_blocks $block1 --topk $topk1 --nlayers 1 --emsize $em --log-interval $log --train_len $train_len --test_len $test_len
python3 train_copying.py --cuda --cudnn --algo blocks --name $name --lr $lr --drop $drop --nhid $dim1 --num_blocks $block1 --topk $topk1 --nlayers 1 --emsize $em --log-interval $log --train_len $train_len --test_len $test_len
6 changes: 2 additions & 4 deletions event_based/experiment_copying_lstm.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
#!/bin/bash
echo Running on $HOSTNAME
source /home/anirudh/.bashrc
conda activate torch1
lr=.001
dim1=$1
em=$1
Expand All @@ -11,7 +9,7 @@ memory_slot=1
memory_heads=1
memory_size_head=1
gate_style='unit'
name="/home/anirudh/blocks/sparse_relational/Blocks_copying/LSTM_"$dim1"_"$em"_FALSE_"$drop"_"$lr"_"$log
name="Blocks_Copying/LSTM_"$dim1"_"$em"_FALSE_"$drop"_"$lr"_"$log
name="${name//./}"
echo Running version $name
python /home/anirudh/blocks/sparse_relational/train_copying.py --cuda --cudnn --algo lstm --name $name --lr $lr --drop $drop --nhid $dim1 --nlayers 1 --emsize $em --log-interval $log
python3 train_copying.py --cuda --cudnn --algo lstm --name $name --lr $lr --drop $drop --nhid $dim1 --nlayers 1 --emsize $em --log-interval $log
6 changes: 2 additions & 4 deletions event_based/experiment_mnist_1layered.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
#!/bin/bash
echo Running on $HOSTNAME
source /home/anirudh/.bashrc
conda activate torch1
run=1
lr=.0007
dim1=$1
Expand All @@ -10,7 +8,7 @@ block1=$2
topk1=$3
drop=0.5
log=100
name="/home/anirudh/RIMs_release/event_based/Blocks_MNIST/Blocks_"$dim1"_"$em"_"$block1"_"$topk1"_"$drop"_"$lr"_"$log
name="Blocks_MNIST/Blocks_"$dim1"_"$em"_"$block1"_"$topk1"_"$drop"_"$lr"_"$log
name="${name//./}"
echo Running version $name
python /home/anirudh/RIMs_release/event_based/train_mnist.py --cuda --cudnn --algo blocks --name $name --lr $lr --drop $drop --nhid $dim1 --num_blocks $block1 --topk $topk1 --nlayers 1 --emsize $em --log-interval $log
python3 train_mnist.py --cuda --cudnn --algo blocks --name $name --lr $lr --drop $drop --nhid $dim1 --num_blocks $block1 --topk $topk1 --nlayers 1 --emsize $em --log-interval $log
6 changes: 3 additions & 3 deletions event_based/mnist_seq_data_classify.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@
y: (784,50000) int32.
'''

def mnist_data():
mnist_trainset = datasets.MNIST(root='/home/anirudh/blocks/sparse_relational/data', train=True, download=True, transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor()]))
mnist_testset = datasets.MNIST(root='/home/anirudh/blocks/sparse_relational/data', train=False, download=True, transform=torchvision.transforms.Compose([torchvision.transforms.ToTensor()]))
def mnist_data(path):
mnist_trainset = datasets.MNIST(root=path, train=True, download=True, transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor()]))
mnist_testset = datasets.MNIST(root=path, train=False, download=True, transform=torchvision.transforms.Compose([torchvision.transforms.ToTensor()]))

num_val = len(mnist_trainset) // 5
np.random.seed(0)
Expand Down
19 changes: 11 additions & 8 deletions event_based/train_copying.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,13 +268,14 @@ def evaluate_(copy_x, copy_y):
with torch.no_grad():
for i in range(num_batches):
batch_ind = random.randint(0, num_batches-1)
data = Variable(copy_x[batch_ind].cuda())
targets = Variable(copy_y[batch_ind].cuda())
data = Variable(copy_x[batch_ind].cuda()) if args.cuda else Variable(copy_x[batch_ind])
targets = Variable(copy_y[batch_ind].cuda()) if args.cuda else Variable(copy_y[batch_ind])
#output, hidden,extra_loss = model(data, hidden)
output, hidden, extra_loss, _, _ = model(data, hidden, calc_mask)
if not args.adaptivesoftmax:
loss = criterion(output.view(-1, ntokens), targets.view((args.test_len + 20)*64))
loss = criterion(output.view(-1, ntokens), targets.reshape((args.test_len + 20) * 64))
else:
raise Exception('not implemented')
_, loss = criterion_adaptive(output.view(-1, args.nhid), targets)
total_loss += loss.item()
hidden = repackage_hidden(hidden)
Expand All @@ -301,23 +302,25 @@ def train(epoch):
batch_ind = random.randint(0, num_batches-1)

#data, targets = get_batch(train_data, i)
data = Variable(copy_x[batch_ind].cuda())
targets = Variable(copy_y[batch_ind].cuda())
data = Variable(copy_x[batch_ind].cuda()) if args.cuda else Variable(copy_x[batch_ind])
targets = Variable(copy_y[batch_ind].cuda()) if args.cuda else Variable(copy_y[batch_ind])

torch.cuda.synchronize()
if args.cuda:
torch.cuda.synchronize()
forward_start_time = time.time()
hidden = repackage_hidden(hidden)
model.zero_grad()

output, hidden, extra_loss, masks, sample_masks = model(data, hidden, calc_mask)
if not args.adaptivesoftmax:
loss = criterion(output.view(-1, ntokens), targets.view((args.train_len + 20)*64))
loss = criterion(output.view(-1, ntokens), targets.reshape((args.train_len + 20) * 64))
else:
raise Exception('not implemented')
_, loss = criterion_adaptive(output.view(-1, args.nhid), targets)
total_loss += loss.item()

torch.cuda.synchronize()
if args.cuda:
torch.cuda.synchronize()

forward_elapsed = time.time() - forward_start_time
forward_elapsed_time += forward_elapsed
Expand Down
2 changes: 1 addition & 1 deletion event_based/train_mnist.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def none_or_str(value):

# Get Data Loaders

train_loader, val_loader, test_loader = mnist_data()
train_loader, val_loader, test_loader = mnist_data(path=os.getcwd() + '/mnist_data')

# Starting from sequential data, batchify arranges the dataset into columns.
# For instance, with the alphabet as the sequence and batch size 4, we'd get
Expand Down

0 comments on commit 4df3e3e

Please sign in to comment.