forked from quark0/darts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest.py
121 lines (105 loc) · 4.93 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import argparse
import os, sys
import time
import math
import numpy as np
import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
import data
import model
from utils import batchify, get_batch, repackage_hidden, create_exp_dir, save_checkpoint
parser = argparse.ArgumentParser(description='PyTorch PennTreeBank/WikiText2 Language Model')
parser.add_argument('--data', type=str, default='../data/penn/',
help='location of the data corpus')
parser.add_argument('--emsize', type=int, default=850,
help='size of word embeddings')
parser.add_argument('--nhid', type=int, default=850,
help='number of hidden units per layer')
parser.add_argument('--nhidlast', type=int, default=850,
help='number of hidden units for the last rnn layer')
parser.add_argument('--lr', type=float, default=20,
help='initial learning rate')
parser.add_argument('--clip', type=float, default=0.25,
help='gradient clipping')
parser.add_argument('--epochs', type=int, default=8000,
help='upper epoch limit')
parser.add_argument('--batch_size', type=int, default=64, metavar='N',
help='batch size')
parser.add_argument('--bptt', type=int, default=35,
help='sequence length')
parser.add_argument('--dropout', type=float, default=0.75,
help='dropout applied to layers (0 = no dropout)')
parser.add_argument('--dropouth', type=float, default=0.3,
help='dropout for rnn layers (0 = no dropout)')
parser.add_argument('--dropouti', type=float, default=0.2,
help='dropout for input embedding layers (0 = no dropout)')
parser.add_argument('--dropoute', type=float, default=0.2,
help='dropout to remove words from embedding layer (0 = no dropout)')
parser.add_argument('--seed', type=int, default=1267,
help='random seed')
parser.add_argument('--nonmono', type=int, default=5,
help='random seed')
parser.add_argument('--cuda', action='store_false',
help='use CUDA')
parser.add_argument('--log-interval', type=int, default=200, metavar='N',
help='report interval')
parser.add_argument('--model_path', type=str, default='EXP/model.pt',
help='path to load the pretrained model')
parser.add_argument('--alpha', type=float, default=0,
help='alpha L2 regularization on RNN activation (alpha = 0 means no regularization)')
parser.add_argument('--beta', type=float, default=1e-3,
help='beta slowness regularization applied on RNN activiation (beta = 0 means no regularization)')
parser.add_argument('--wdecay', type=float, default=5e-7,
help='weight decay applied to all weights')
parser.add_argument('--continue_train', action='store_true',
help='continue train from a checkpoint')
parser.add_argument('--n_experts', type=int, default=1,
help='number of experts')
parser.add_argument('--max_seq_len_delta', type=int, default=20,
help='max sequence length')
parser.add_argument('--gpu', type=int, default=0, help='GPU device to use')
args = parser.parse_args()
def logging(s, print_=True, log_=True):
print(s)
# Set the random seed manually for reproducibility.
np.random.seed(args.seed)
torch.manual_seed(args.seed)
if torch.cuda.is_available():
if not args.cuda:
print("WARNING: You have a CUDA device, so you should probably run with --cuda")
else:
torch.cuda.set_device(args.gpu)
cudnn.benchmark = True
cudnn.enabled=True
torch.cuda.manual_seed_all(args.seed)
corpus = data.Corpus(args.data)
test_batch_size = 1
test_data = batchify(corpus.test, test_batch_size, args)
def evaluate(data_source, batch_size=10):
# Turn on evaluation mode which disables dropout.
model.eval()
total_loss = 0
ntokens = len(corpus.dictionary)
hidden = model.init_hidden(batch_size)
for i in range(0, data_source.size(0) - 1, args.bptt):
print(i, data_source.size(0)-1)
data, targets = get_batch(data_source, i, args, evaluation=True)
targets = targets.view(-1)
log_prob, hidden = parallel_model(data, hidden)
loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data
total_loss += loss * len(data)
hidden = repackage_hidden(hidden)
return total_loss[0] / len(data_source)
# Load the best saved model.
model = torch.load(args.model_path)
total_params = sum(x.data.nelement() for x in model.parameters())
logging('Args: {}'.format(args))
logging('Model total parameters: {}'.format(total_params))
parallel_model = model.cuda()
# Run on test data.
test_loss = evaluate(test_data, test_batch_size)
logging('=' * 89)
logging('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format(
test_loss, math.exp(test_loss)))
logging('=' * 89)