Skip to content

Commit

Permalink
Merge branch 'master' into onmt
Browse files Browse the repository at this point in the history
  • Loading branch information
soumith authored Jan 17, 2017
2 parents 8254bd4 + 1a9d6b5 commit dc5926a
Show file tree
Hide file tree
Showing 20 changed files with 602 additions and 256 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
mnist/data
dcgan/data
VAE/data
*.pyc
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@ A repository showcasing examples of using pytorch
- Word level Language Modeling using LSTM RNNs
- Imagenet-12 training with Residual Networks
- Generative Adversarial Networks (DCGAN)
- Variational Auto-Encoders
- Superresolution using an efficient sub-pixel convolutional neural network
13 changes: 13 additions & 0 deletions VAE/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Basic VAE Example

This is an improved implementation of the paper [Stochastic Gradient VB and the
Variational Auto-Encoder](http://arxiv.org/abs/1312.6114) by Kingma and Welling.
It uses ReLUs and the adam optimizer, instead of sigmoids and adagrad. These changes make the network converge much faster.

We reuse the data preparation script of the MNIST experiment

```bash
pip install -r requirements.txt
python ../mnist/data.py
python main.py
```
137 changes: 137 additions & 0 deletions VAE/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
from __future__ import print_function
import os
import torch
import torch.utils.data
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable

# Training settings
BATCH_SIZE = 150
TEST_BATCH_SIZE = 1000
NUM_EPOCHS = 2


cuda = torch.cuda.is_available()

print('====> Running with CUDA: {0}'.format(cuda))


assert os.path.exists('data/processed/training.pt'), \
"Please run python ../mnist/data.py before starting the VAE."

# Data
print('====> Loading data')
with open('data/processed/training.pt', 'rb') as f:
training_set = torch.load(f)
with open('data/processed/test.pt', 'rb') as f:
test_set = torch.load(f)

training_data = training_set[0].view(-1, 784).div(255)
test_data = test_set[0].view(-1, 784).div(255)

del training_set
del test_set

if cuda:
training_data.cuda()
test_data.cuda()

train_loader = torch.utils.data.DataLoader(training_data,
batch_size=BATCH_SIZE,
shuffle=True)

test_loader = torch.utils.data.DataLoader(test_data,
batch_size=TEST_BATCH_SIZE)

# Model
print('====> Building model')


class VAE(nn.Module):
def __init__(self):
super(VAE, self).__init__()

self.fc1 = nn.Linear(784, 400)
self.fc21 = nn.Linear(400, 20)
self.fc22 = nn.Linear(400, 20)
self.fc3 = nn.Linear(20, 400)
self.fc4 = nn.Linear(400, 784)

self.relu = nn.ReLU()
self.sigmoid = nn.Sigmoid()

def encode(self, x):
h1 = self.relu(self.fc1(x))
return self.fc21(h1), self.fc22(h1)

def reparametrize(self, mu, logvar):
std = logvar.mul(0.5).exp_()
eps = Variable(torch.randn(std.size()), requires_grad=False)
return eps.mul(std).add_(mu)

def decode(self, z):
h3 = self.relu(self.fc3(z))
return self.sigmoid(self.fc4(h3))

def forward(self, x):
mu, logvar = self.encode(x)
z = self.reparametrize(mu, logvar)
return self.decode(z), mu, logvar


model = VAE()
if cuda is True:
model.cuda()

reconstruction_function = nn.BCELoss()
reconstruction_function.size_average = False


def loss_function(recon_x, x, mu, logvar):
BCE = reconstruction_function(recon_x, x)

# Appendix B from VAE paper: 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
KLD_element = mu.pow(2).add_(logvar.exp()).mul_(-1).add_(1).add_(logvar)
KLD = torch.sum(KLD_element).mul_(-0.5)

return BCE + KLD


optimizer = optim.Adam(model.parameters(), lr=1e-3)


def train(epoch):
model.train()
train_loss = 0
for batch in train_loader:
batch = Variable(batch)

optimizer.zero_grad()
recon_batch, mu, logvar = model(batch)
loss = loss_function(recon_batch, batch, mu, logvar)
loss.backward()
train_loss += loss
optimizer.step()

print('====> Epoch: {} Loss: {:.4f}'.format(
epoch,
train_loss.data[0] / training_data.size(0)))


def test(epoch):
model.eval()
test_loss = 0
for batch in test_loader:
batch = Variable(batch)

recon_batch, mu, logvar = model(batch)
test_loss += loss_function(recon_batch, batch, mu, logvar)

test_loss = test_loss.data[0] / test_data.size(0)
print('====> Test set results: {:.4f}'.format(test_loss))


for epoch in range(1, NUM_EPOCHS + 1):
train(epoch)
test(epoch)
3 changes: 3 additions & 0 deletions VAE/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
torch
tqdm
six
4 changes: 2 additions & 2 deletions dcgan/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def weights_init(m):
m.weight.data.normal_(1.0, 0.02)
m.bias.data.fill_(0)

class _netG(nn.Container):
class _netG(nn.Module):
def __init__(self, ngpu):
super(_netG, self).__init__()
self.ngpu = ngpu
Expand Down Expand Up @@ -133,7 +133,7 @@ def forward(self, input):
netG.load_state_dict(torch.load(opt.netG))
print(netG)

class _netD(nn.Container):
class _netD(nn.Module):
def __init__(self, ngpu):
super(_netD, self).__init__()
self.ngpu = ngpu
Expand Down
2 changes: 1 addition & 1 deletion imagenet/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@

```bash
pip install -r requirements.txt
OMP_NUM_THREADS=1 python main.py --data <path to ImageNet>
python main.py --data <path to ImageNet>
```
76 changes: 50 additions & 26 deletions imagenet/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,21 @@
import torch.utils.data
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models

import resnet

model_names = sorted(name for name in models.__dict__
if name.islower() and not name.startswith("__"))


parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
parser.add_argument('--data', metavar='PATH', required=True,
parser.add_argument('data', metavar='DIR',
help='path to dataset')
parser.add_argument('--arch', '-a', metavar='ARCH', default='resnet18',
help='model architecture: resnet18 | resnet34 | ...'
'(default: resnet18)')
choices=model_names,
help='model architecture: ' +
' | '.join(model_names) +
' (default: resnet18)')
parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
help='number of data loading workers (default: 4)')
parser.add_argument('--epochs', default=90, type=int, metavar='N',
Expand All @@ -39,6 +44,10 @@
metavar='N', help='print frequency (default: 10)')
parser.add_argument('--resume', default='', type=str, metavar='PATH',
help='path to latest checkpoint (default: none)')
parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
help='evaluate model on validation set')
parser.add_argument('--pretrained', dest='pretrained', action='store_true',
help='use pre-trained model')

best_prec1 = 0

Expand All @@ -48,12 +57,18 @@ def main():
args = parser.parse_args()

# create model
if args.arch.startswith('resnet'):
if args.pretrained:
print("=> using pre-trained model '{}'".format(args.arch))
model = models.__dict__[args.arch](pretrained=True)
else:
print("=> creating model '{}'".format(args.arch))
model = resnet.__dict__[args.arch]()
model = models.__dict__[args.arch]()

if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
model.features = torch.nn.DataParallel(model.features)
model.cuda()
else:
parser.error('invalid architecture: {}'.format(args.arch))
model = torch.nn.DataParallel(model).cuda()

# optionally resume from a checkpoint
if args.resume:
Expand All @@ -63,7 +78,8 @@ def main():
args.start_epoch = checkpoint['epoch']
best_prec1 = checkpoint['best_prec1']
model.load_state_dict(checkpoint['state_dict'])
print(" | resuming from epoch {}".format(args.start_epoch))
print("=> loaded checkpoint '{}' (epoch {})"
.format(args.evaluate, checkpoint['epoch']))
else:
print("=> no checkpoint found at '{}'".format(args.resume))

Expand Down Expand Up @@ -95,32 +111,31 @@ def main():
batch_size=args.batch_size, shuffle=False,
num_workers=args.workers, pin_memory=True)

# parallelize model across all visible GPUs
model = torch.nn.DataParallel(model)

# define loss function (criterion) and pptimizer
criterion = nn.CrossEntropyLoss().cuda()

optimizer = torch.optim.SGD(model.parameters(), args.lr,
momentum=args.momentum,
weight_decay=args.weight_decay)

if args.evaluate:
validate(val_loader, model, criterion)
return

for epoch in range(args.start_epoch, args.epochs):
adjust_learning_rate(optimizer, epoch)

# train for one epoch
model.train()
train(train_loader, model, criterion, optimizer, epoch)

# evaluate on validation set
model.eval()
prec1 = validate(val_loader, model, criterion)

# remember best prec@1 and save checkpoint
is_best = prec1 > best_prec1
best_prec1 = max(prec1, best_prec1)
save_checkpoint({
'epoch': epoch,
'epoch': epoch + 1,
'arch': args.arch,
'state_dict': model.state_dict(),
'best_prec1': best_prec1,
Expand All @@ -134,6 +149,9 @@ def train(train_loader, model, criterion, optimizer, epoch):
top1 = AverageMeter()
top5 = AverageMeter()

# switch to train mode
model.train()

end = time.time()
for i, (input, target) in enumerate(train_loader):
# measure data loading time
Expand All @@ -149,9 +167,9 @@ def train(train_loader, model, criterion, optimizer, epoch):

# measure accuracy and record loss
prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
losses.update(loss.data[0])
top1.update(prec1[0])
top5.update(prec5[0])
losses.update(loss.data[0], input.size(0))
top1.update(prec1[0], input.size(0))
top5.update(prec5[0], input.size(0))

# compute gradient and do SGD step
optimizer.zero_grad()
Expand Down Expand Up @@ -179,6 +197,9 @@ def validate(val_loader, model, criterion):
top1 = AverageMeter()
top5 = AverageMeter()

# switch to evaluate mode
model.eval()

end = time.time()
for i, (input, target) in enumerate(val_loader):
target = target.cuda(async=True)
Expand All @@ -191,9 +212,9 @@ def validate(val_loader, model, criterion):

# measure accuracy and record loss
prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
losses.update(loss.data[0])
top1.update(prec1[0])
top5.update(prec5[0])
losses.update(loss.data[0], input.size(0))
top1.update(prec1[0], input.size(0))
top5.update(prec5[0], input.size(0))

# measure elapsed time
batch_time.update(time.time() - end)
Expand All @@ -208,6 +229,9 @@ def validate(val_loader, model, criterion):
i, len(val_loader), batch_time=batch_time, loss=losses,
top1=top1, top5=top5))

print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'
.format(top1=top1, top5=top5))

return top1.avg


Expand All @@ -226,13 +250,13 @@ def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.n = 0
self.count = 0

def update(self, val):
def update(self, val, n=1):
self.val = val
self.sum += val
self.n += 1
self.avg = self.sum / self.n
self.sum += val * n
self.count += n
self.avg = self.sum / self.count


def adjust_learning_rate(optimizer, epoch):
Expand All @@ -247,7 +271,7 @@ def accuracy(output, target, topk=(1,)):
maxk = max(topk)
batch_size = target.size(0)

_, pred = output.topk(maxk, True, True)
_, pred = output.topk(maxk, 1, True, True)
pred = pred.t()
correct = pred.eq(target.view(1, -1).expand_as(pred))

Expand Down
Loading

0 comments on commit dc5926a

Please sign in to comment.