forked from u784799i/biLSTM_attn
-
Notifications
You must be signed in to change notification settings - Fork 0
/
data_loder.py
53 lines (44 loc) · 1.71 KB
/
data_loder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import numpy as np
import torch
from torch.autograd import Variable
import const
class DataLoader(object):
def __init__(self, src_sents, label, max_len, cuda=True,
batch_size=64, shuffle=True, evaluation=False):
self.cuda = cuda
self.sents_size = len(src_sents)
self._step = 0
self._stop_step = self.sents_size // batch_size
self.evaluation = evaluation
self._batch_size = batch_size
self._max_len = max_len
self._src_sents = np.asarray(src_sents)
self._label = np.asarray(label)
if shuffle:
self._shuffle()
def _shuffle(self):
indices = np.arange(self._src_sents.shape[0])
np.random.shuffle(indices)
self._src_sents = self._src_sents[indices]
self._label = self._label[indices]
def __iter__(self):
return self
def __next__(self):
def pad_to_longest(insts, max_len):
inst_data = np.array([inst + [const.PAD] * (max_len - len(inst)) for inst in insts])
inst_data_tensor = Variable(torch.from_numpy(inst_data), volatile=self.evaluation)
if self.cuda:
inst_data_tensor = inst_data_tensor.cuda()
return inst_data_tensor
if self._step == self._stop_step:
self._step = 0
raise StopIteration()
_start = self._step*self._batch_size
_bsz = self._batch_size
self._step += 1
data = pad_to_longest(self._src_sents[_start:_start+_bsz], self._max_len)
label = Variable(torch.from_numpy(self._label[_start:_start+_bsz]),
volatile=self.evaluation)
if self.cuda:
label = label.cuda()
return data, label