Skip to content

Commit

Permalink
add device function to basic bert
Browse files Browse the repository at this point in the history
  • Loading branch information
920232796 committed Feb 6, 2021
1 parent 33d106e commit 2e6f8d9
Show file tree
Hide file tree
Showing 26 changed files with 85 additions and 85 deletions.
8 changes: 7 additions & 1 deletion bert_seq2seq/basic_bert.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
class BasicBert(nn.Module):
def __init__(self):
super().__init__()
self.device = torch.device("cpu")

def load_pretrain_params(self, pretrain_model_path, keep_tokens=None):
checkpoint = torch.load(pretrain_model_path)
Expand All @@ -31,6 +32,11 @@ def load_all_params(self, model_path, device="cuda"):

def forward(self, x):
raise NotImplemented


def set_device(self, device):
self.device = torch.device(device)
self.to(device)

def save_all_params(self, save_path):
torch.save(self.state_dict(), save_path)

5 changes: 5 additions & 0 deletions bert_seq2seq/bert_cls_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@ def forward(self, text, position_enc=None, labels=None, use_layer_num=-1):
if use_layer_num < 0 or use_layer_num > 7:
# 越界
raise Exception("层数选择错误,因为bert base模型共8层,所以参数只只允许0 - 7, 默认为-1,取最后一层")
text = text.to(self.device)
if position_enc is not None:
position_enc = position_enc.to(self.device)
if labels is not None:
labels = labels.to(self.device)
enc_layers, _ = self.bert(text,
output_all_encoded_layers=True)
squence_out = enc_layers[use_layer_num]
Expand Down
16 changes: 8 additions & 8 deletions bert_seq2seq/bert_relation_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,14 @@ def extrac_subject(self, output, subject_ids):
subject = torch.cat((start_end[:, 0], start_end[:, 1]), dim=-1)
return subject

def forward(self, text, subject_ids, position_enc=None, subject_labels=None, object_labels=None, use_layer_num=-1, device="cpu"):
def forward(self, text, subject_ids, position_enc=None, subject_labels=None, object_labels=None, use_layer_num=-1):
if use_layer_num != -1:
if use_layer_num < 0 or use_layer_num > 7:
# 越界
raise Exception("层数选择错误,因为bert base模型共8层,所以参数只只允许0 - 7, 默认为-1,取最后一层")
# 计算target mask
text = text.to(device)
subject_ids = subject_ids.to(device)
text = text.to(self.device)
subject_ids = subject_ids.to(self.device)

self.target_mask = (text > 0).float()
enc_layers, _ = self.bert(text,
Expand All @@ -92,8 +92,8 @@ def forward(self, text, subject_ids, position_enc=None, subject_labels=None, obj
predictions = object_pred_act
if subject_labels is not None and object_labels is not None:
## 计算loss
subject_labels = subject_labels.to(device)
object_labels = object_labels.to(device)
subject_labels = subject_labels.to(self.device)
object_labels = object_labels.to(self.device)
loss = self.compute_total_loss(subject_pred_act, object_pred_act, subject_labels, object_labels)
return predictions, loss
else :
Expand All @@ -104,7 +104,7 @@ def predict_subject(self, text,use_layer_num=-1, device="cpu"):
if use_layer_num < 0 or use_layer_num > 7:
# 越界
raise Exception("层数选择错误,因为bert base模型共8层,所以参数只只允许0 - 7, 默认为-1,取最后一层")
text = text.to(device)
text = text.to(self.device)

self.target_mask = (text > 0).float()
enc_layers, _ = self.bert(text, output_all_encoded_layers=True)
Expand All @@ -125,8 +125,8 @@ def predict_object_predicate(self, text, subject_ids, use_layer_num=-1, device="
# 越界
raise Exception("层数选择错误,因为bert base模型共8层,所以参数只只允许0 - 7, 默认为-1,取最后一层")
# 计算target mask
text = text.to(device)
subject_ids = subject_ids.to(device)
text = text.to(self.device)
subject_ids = subject_ids.to(self.device)

enc_layers, _ = self.bert(text, output_all_encoded_layers=True)
squence_out = enc_layers[use_layer_num]
Expand Down
8 changes: 7 additions & 1 deletion bert_seq2seq/bert_seq_labeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,13 @@ def forward(self, text, position_enc=None, labels=None, use_layer_num=-1):
if use_layer_num < 0 or use_layer_num > 7:
# 越界
raise Exception("层数选择错误,因为bert base模型共8层,所以参数只只允许0 - 7, 默认为-1,取最后一层")
self.target_mask = (text > 0).float()
self.target_mask = (text > 0).float().to(self.device)
text = text.to(self.device)
if position_enc is not None:
position_enc = position_enc.to(self.device)
if labels is not None:
labels = labels.to(self.device)

enc_layers, _ = self.bert(text,
output_all_encoded_layers=True)
squence_out = enc_layers[use_layer_num]
Expand Down
7 changes: 6 additions & 1 deletion bert_seq2seq/bert_seq_labeling_crf.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,12 @@ def forward(self, text, position_enc=None, labels=None, use_layer_num=-1):
# 越界
raise Exception("层数选择错误,因为bert base模型共8层,所以参数只只允许0 - 7, 默认为-1,取最后一层")
# 计算target mask
self.target_mask = (text > 0).float()
self.target_mask = (text > 0).float().to(self.device)
text = text.to(self.device)
if position_enc is not None :
position_enc = position_enc.to(self.device)
if labels is not None :
labels = labels.to(self.device)
enc_layers, _ = self.bert(text,
output_all_encoded_layers=True)
squence_out = enc_layers[use_layer_num]
Expand Down
32 changes: 19 additions & 13 deletions bert_seq2seq/seq2seq_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,20 @@ def compute_loss(self, predictions, labels, target_mask):
loss = nn.CrossEntropyLoss(ignore_index=0, reduction="none")
return (loss(predictions, labels) * target_mask).sum() / target_mask.sum() ## 通过mask 取消 pad 和句子a部分预测的影响

def forward(self, input_tensor, token_type_id, position_enc=None, labels=None, device="cpu"):
def forward(self, input_tensor, token_type_id, position_enc=None, labels=None):
## 传入输入,位置编码,token type id ,还有句子a 和句子b的长度,注意都是传入一个batch数据
## 传入的几个值,在seq2seq 的batch iter 函数里面都可以返回
input_tensor = input_tensor.to(self.device)
token_type_id = token_type_id.to(self.device)
if position_enc is not None:
position_enc = position_enc.to(self.device)
if labels is not None :
labels = labels.to(self.device)
input_shape = input_tensor.shape
batch_size = input_shape[0]
seq_len = input_shape[1]
## 构建特殊的mask
ones = torch.ones((1, 1, seq_len, seq_len), dtype=torch.float32, device=device)
ones = torch.ones((1, 1, seq_len, seq_len), dtype=torch.float32, device=self.device)
a_mask = ones.tril() # 下三角矩阵
s_ex12 = token_type_id.unsqueeze(1).unsqueeze(2).float()
s_ex13 = token_type_id.unsqueeze(1).unsqueeze(3).float()
Expand All @@ -73,20 +79,20 @@ def forward(self, input_tensor, token_type_id, position_enc=None, labels=None, d
else :
return predictions

def generate(self, text, out_max_length=40, beam_size=1, device="cpu", is_poem=False, max_length=256):
def generate(self, text, out_max_length=40, beam_size=1, is_poem=False, max_length=256):
# 对 一个 句子生成相应的结果
## 通过输出最大长度得到输入的最大长度,这里问题不大,如果超过最大长度会进行截断
self.out_max_length = out_max_length
input_max_length = max_length - out_max_length
# print(text)
token_ids, token_type_ids = self.tokenizer.encode(text, max_length=input_max_length)
token_ids = torch.tensor(token_ids, device=device).view(1, -1)
token_type_ids = torch.tensor(token_type_ids, device=device).view(1, -1)
token_ids = torch.tensor(token_ids, device=self.device).view(1, -1)
token_type_ids = torch.tensor(token_type_ids, device=self.device).view(1, -1)
if is_poem:## 古诗的beam-search稍有不同

out_puts_ids = self.beam_search_poem(text, token_ids, token_type_ids, self.word2ix, beam_size=beam_size, device=device)
out_puts_ids = self.beam_search_poem(text, token_ids, token_type_ids, self.word2ix, beam_size=beam_size, device=self.device)
else :
out_puts_ids = self.beam_search(token_ids, token_type_ids, self.word2ix, beam_size=beam_size, device=device)
out_puts_ids = self.beam_search(token_ids, token_type_ids, self.word2ix, beam_size=beam_size, device=self.device)

# 解码 得到相应输出
# if err is False:
Expand Down Expand Up @@ -225,12 +231,12 @@ def beam_search(self, token_ids, token_type_ids, word2ix, beam_size=1, device="c
output_scores = torch.zeros(token_ids.shape[0], device=device)
for step in range(self.out_max_length):
if step == 0:
scores = self.forward(token_ids, token_type_ids, device=device)
scores = self.forward(token_ids, token_type_ids)
# 重复beam-size次 输入ids
token_ids = token_ids.view(1, -1).repeat(beam_size, 1)
token_type_ids = token_type_ids.view(1, -1).repeat(beam_size, 1)
else:
scores = self.forward(new_input_ids, new_token_type_ids, device=device)
scores = self.forward(new_input_ids, new_token_type_ids)

logit_score = torch.log_softmax(scores[:, -1], dim=-1)

Expand Down Expand Up @@ -296,12 +302,12 @@ def beam_search_poem(self, text, token_ids, token_type_ids, word2ix, beam_size=1
output_scores = torch.zeros(token_ids.shape[0], device=device)
for step in range(self.out_max_length):
if step == 0:
scores = self.forward(token_ids, token_type_ids, device=device)
scores = self.forward(token_ids, token_type_ids)
# 重复beam-size次 输入ids
token_ids = token_ids.view(1, -1).repeat(beam_size, 1)
token_type_ids = token_type_ids.view(1, -1).repeat(beam_size, 1)
else:
scores = self.forward(new_input_ids, new_token_type_ids, device=device)
scores = self.forward(new_input_ids, new_token_type_ids)

logit_score = torch.log_softmax(scores[:, -1], dim=-1)

Expand Down Expand Up @@ -422,12 +428,12 @@ def beam_search_poem_v2(self, text, token_ids, token_type_ids, word2ix, beam_siz
output_scores = torch.zeros(token_ids.shape[0], device=device)
for step in range(self.out_max_length):
if step == 0:
scores = self.forward(token_ids, token_type_ids, device=device)
scores = self.forward(token_ids, token_type_ids)
# 重复beam-size次 输入ids
token_ids = token_ids.view(1, -1).repeat(beam_size, 1)
token_type_ids = token_type_ids.view(1, -1).repeat(beam_size, 1)
else:
scores = self.forward(new_input_ids, new_token_type_ids, device=device)
scores = self.forward(new_input_ids, new_token_type_ids)

logit_score = torch.log_softmax(scores[:, -1], dim=-1)
# if len(last_chars) != 0:
Expand Down
9 changes: 3 additions & 6 deletions examples/THUCNews自动摘要.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def __init__(self):
# 加载已经训练好的模型,继续训练

# 将模型发送到计算设备(GPU或CPU)
self.bert_model.to(self.device)
self.bert_model.set_device(self.device)
# 声明需要优化的参数
self.optim_parameters = list(self.bert_model.parameters())
self.optimizer = torch.optim.Adam(self.optim_parameters, lr=lr, weight_decay=1e-3)
Expand Down Expand Up @@ -129,22 +129,19 @@ def iteration(self, epoch, dataloader, train=True):
"2007年乔布斯向人们展示iPhone并宣称它将会改变世界还有人认为他在夸大其词然而在8年后以iPhone为代表的触屏智能手机已经席卷全球各个角落未来智能手机将会成为真正的个人电脑为人类发展做出更大的贡献",
"8月28日,网络爆料称,华住集团旗下连锁酒店用户数据疑似发生泄露。从卖家发布的内容看,数据包含华住旗下汉庭、禧玥、桔子、宜必思等10余个品牌酒店的住客信息。泄露的信息包括华住官网注册资料、酒店入住登记的身份信息及酒店开房记录,住客姓名、手机号、邮箱、身份证号、登录账号密码等。卖家对这个约5亿条数据打包出售。第三方安全平台威胁猎人对信息出售者提供的三万条数据进行验证,认为数据真实性非常高。当天下午 ,华 住集 团发声明称,已在内部迅速开展核查,并第一时间报警。当晚,上海警方消息称,接到华住集团报案,警方已经介入调查。"]
for text in test_data:
print(self.bert_model.generate(text, beam_size=3,device=self.device))
print(self.bert_model.generate(text, beam_size=3))
print("loss is " + str(report_loss))
report_loss = 0
# self.eval(epoch)
self.bert_model.train()
if step % 8000 == 0:
self.save(model_save_path)

token_ids = token_ids.to(self.device)
token_type_ids = token_type_ids.to(self.device)
target_ids = target_ids.to(self.device)
# 因为传入了target标签,因此会计算loss并且返回
predictions, loss = self.bert_model(token_ids,
token_type_ids,
labels=target_ids,
device=self.device

)
report_loss += loss.item()
# 反向传播
Expand Down
8 changes: 2 additions & 6 deletions examples/auto_title.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,14 +108,10 @@ def __init__(self):
print("device: " + str(self.device))
# 定义模型
self.bert_model = load_bert(word2idx, model_name=model_name)
## 加载预训练的模型参数~

self.bert_model.set_device(self.device)
## 加载预训练的模型参数~
self.bert_model.load_pretrain_params(model_path, keep_tokens=keep_tokens)
# 加载已经训练好的模型,继续训练
# load_recent_model(self.bert_model, self.recent_model_path)

# 将模型发送到计算设备(GPU或CPU)
self.bert_model.to(self.device)
# 声明需要优化的参数
self.optim_parameters = list(self.bert_model.parameters())
self.optimizer = torch.optim.Adam(self.optim_parameters, lr=lr, weight_decay=1e-3)
Expand Down
2 changes: 1 addition & 1 deletion examples/math_ques_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ def __init__(self):
## 加载预训练的模型参数~
self.bert_model.load_pretrain_params(model_path)
# 将模型发送到计算设备(GPU或CPU)
self.bert_model.to(self.device)
self.bert_model.set_device(self.device)
# 声明需要优化的参数
self.optim_parameters = list(self.bert_model.parameters())
self.optimizer = torch.optim.Adam(self.optim_parameters, lr=lr, weight_decay=1e-5)
Expand Down
2 changes: 1 addition & 1 deletion examples/relationship_classify_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def __init__(self):
## 加载预训练的模型参数~
self.bert_model.load_pretrain_params(model_path)
# 将模型发送到计算设备(GPU或CPU)
self.bert_model.to(self.device)
self.bert_model.set_device(self.device)
# 声明需要优化的参数
self.optim_parameters = list(self.bert_model.parameters())
self.optimizer = torch.optim.Adam(self.optim_parameters, lr=lr, weight_decay=1e-3)
Expand Down
2 changes: 1 addition & 1 deletion examples/三元组抽取_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,7 @@ def iteration(self, epoch, dataloader, train=True):
subject_ids,
subject_labels=subject_lables,
object_labels=object_labels,
device=self.device

)
# 反向传播
if train:
Expand Down
6 changes: 3 additions & 3 deletions examples/写诗_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def __init__(self):
## 加载预训练的模型参数~
self.bert_model.load_pretrain_params(model_path, keep_tokens=keep_tokens)
# 将模型发送到计算设备(GPU或CPU)
self.bert_model.to(self.device)
self.bert_model.set_device(self.device)
# 声明需要优化的参数
self.optim_parameters = list(self.bert_model.parameters())
self.optimizer = torch.optim.Adam(self.optim_parameters, lr=lr, weight_decay=1e-3)
Expand Down Expand Up @@ -172,7 +172,7 @@ def iteration(self, epoch, dataloader, train=True):
self.bert_model.eval()
test_data = ["北国风光##五言绝句", "题西林壁##七言绝句", "长安早春##五言律诗"]
for text in test_data:
print(self.bert_model.generate(text, beam_size=3,device=self.device, is_poem=True))
print(self.bert_model.generate(text, beam_size=3, is_poem=True))
self.bert_model.train()

token_ids = token_ids.to(self.device)
Expand All @@ -182,7 +182,7 @@ def iteration(self, epoch, dataloader, train=True):
predictions, loss = self.bert_model(token_ids,
token_type_ids,
labels=target_ids,
device=self.device

)
# 反向传播
if train:
Expand Down
5 changes: 1 addition & 4 deletions examples/医学ner_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ def __init__(self):
## 加载预训练的模型参数~
self.bert_model.load_pretrain_params(model_path, keep_tokens=keep_tokens)
# 将模型发送到计算设备(GPU或CPU)
self.bert_model.to(self.device)
self.bert_model.set_device(self.device)
# 声明需要优化的参数
crf_params = list(map(id, self.bert_model.crf_layer.parameters())) ## 单独把crf层参数拿出来
base_params = filter(lambda p: id(p) not in crf_params, self.bert_model.parameters())
Expand Down Expand Up @@ -309,9 +309,6 @@ def iteration(self, epoch, dataloader, train=True):
ner_print(self.bert_model, test_data, device=self.device)
self.bert_model.train()

token_ids = token_ids.to(self.device)
token_type_ids = token_type_ids.to(self.device)
target_ids = target_ids.to(self.device)
# 因为传入了target标签,因此会计算loss并且返回
predictions, loss = self.bert_model(token_ids,
labels=target_ids,
Expand Down
Loading

0 comments on commit 2e6f8d9

Please sign in to comment.