Skip to content

Commit

Permalink
fixed error
Browse files Browse the repository at this point in the history
Signed-off-by: ftgreat <[email protected]>
  • Loading branch information
ftgreat committed Mar 2, 2023
1 parent 2093bb9 commit 8d4e86a
Show file tree
Hide file tree
Showing 10 changed files with 89 additions and 174 deletions.
1 change: 1 addition & 0 deletions examples/bminf_generate/galactica_6.7b_generate.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

from flagai.model.predictor.predictor import Predictor
from flagai.auto_model.auto_loader import AutoLoader
import torch
Expand Down
2 changes: 1 addition & 1 deletion flagai/data/tokenizer/bert/bert_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def __init__(self, tokenizer_model_type=None, cache_dir=None):
CommandToken('pad', '[PAD]', self.get_specialid_from_text_tokenizer('pad')),
CommandToken('cls', '[CLS]', self.get_specialid_from_text_tokenizer('cls')),
CommandToken('MASK', '[MASK]',
self.get_specialid_from_text_tokenizer('MASK')),
self.get_specialid_from_text_tokenizer('mask')),
CommandToken('unk', '[UNK]', self.get_specialid_from_text_tokenizer('unk')),
CommandToken('sep', '[SEP]', self.get_specialid_from_text_tokenizer('sep')),
CommandToken('eos', '[PAD]', self.get_specialid_from_text_tokenizer('pad')),
Expand Down
2 changes: 1 addition & 1 deletion flagai/data/tokenizer/bert/wordpiece.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def __init__(self,
self._token_unk = '[UNK]'
self._token_mask = '[MASK]'

for token in ['pad', 'cls', 'sep', 'unk', 'MASK']:
for token in ['pad', 'cls', 'sep', 'unk', 'mask']:
_token_id = self.vocab[getattr(self, "_token_" + str(token))]
setattr(self, "_token_" + str(token) + "_id", _token_id)

Expand Down
2 changes: 1 addition & 1 deletion flagai/data/tokenizer/galactica/galactica_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def __init__(self, download_dir) -> None:
CommandToken('pad', '[PAD]', self.get_specialid_from_text_tokenizer('pad')),
CommandToken('ENC', '[CLS]', self.get_specialid_from_text_tokenizer('cls')),
CommandToken('MASK', '[MASK]',
self.get_specialid_from_text_tokenizer('MASK')),
self.get_specialid_from_text_tokenizer('mask')),
CommandToken('unk', '[UNK]', self.get_specialid_from_text_tokenizer('unk')),
CommandToken('sep', '[SEP]', self.get_specialid_from_text_tokenizer('sep')),
CommandToken('eos', '[PAD]', self.get_specialid_from_text_tokenizer('pad')),
Expand Down
2 changes: 1 addition & 1 deletion flagai/data/tokenizer/opt/opt_en_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def __init__(self, tokenizer_model_type="facebook/opt-125m", cache_dir=None):
CommandToken('pad', '[PAD]', self.get_specialid_from_text_tokenizer('pad')),
CommandToken('cls', '[CLS]', self.get_specialid_from_text_tokenizer('cls')),
CommandToken('MASK', '[MASK]',
self.get_specialid_from_text_tokenizer('MASK')),
self.get_specialid_from_text_tokenizer('mask')),
CommandToken('unk', '[UNK]', self.get_specialid_from_text_tokenizer('unk')),
CommandToken('sep', '[SEP]', self.get_specialid_from_text_tokenizer('sep')),
CommandToken('eos', '[PAD]', self.get_specialid_from_text_tokenizer('pad')),
Expand Down
2 changes: 1 addition & 1 deletion flagai/data/tokenizer/roberta/roberta_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def __init__(self, tokenizer_model_type="roberta-base", cache_dir=None):
CommandToken('pad', '[PAD]', self.get_specialid_from_text_tokenizer('pad')),
CommandToken('ENC', '[CLS]', self.get_specialid_from_text_tokenizer('cls')),
CommandToken('MASK', '[MASK]',
self.get_specialid_from_text_tokenizer('MASK')),
self.get_specialid_from_text_tokenizer('mask')),
CommandToken('unk', '[UNK]', self.get_specialid_from_text_tokenizer('unk')),
CommandToken('sep', '[SEP]', self.get_specialid_from_text_tokenizer('sep')),
CommandToken('eos', '[PAD]', self.get_specialid_from_text_tokenizer('pad')),
Expand Down
2 changes: 1 addition & 1 deletion flagai/data/tokenizer/t5/t5_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ def __init__(self,
self._token_dict = token_dict

self._token_dict_inv = {v: k for k, v in token_dict.items()}
for token in ['pad', 'cls', 'sep', 'unk', 'MASK']:
for token in ['pad', 'cls', 'sep', 'unk', 'mask']:
try:
_token_id = token_dict[getattr(self, "_token_" + str(token))]
# print(_token_id)
Expand Down
12 changes: 6 additions & 6 deletions flagai/model/gpt2_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,14 +112,14 @@ def __init__(self, config):
self.drop = nn.Dropout(config.embd_pdrop)
self.project_in = None
self.project_out = None
self.h = bminf.TransformerBlockList([
GPT2Block(config.n_ctx, config, scale=True)
for _ in range(config.n_layer)
],[0])
# self.h = nn.ModuleList([
# self.h = bminf.TransformerBlockList([
# GPT2Block(config.n_ctx, config, scale=True)
# for _ in range(config.n_layer)
# ])
# ],[0])
self.h = nn.ModuleList([
GPT2Block(config.n_ctx, config, scale=True)
for _ in range(config.n_layer)
])

self.ln_f = nn.LayerNorm(config.n_embd,
eps=config.layer_norm_epsilon)
Expand Down
1 change: 0 additions & 1 deletion flagai/model/predictor/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1117,7 +1117,6 @@ def alm_beamsearch(model, tokenizer, text, out_max_length, beam_size, eod_token=
context_length = context_length_tensor[0].item()
context_tokens_tensor = torch.LongTensor(context_tokens)
text = tokenizer.DecodeIds(context_tokens_tensor.tolist())

start_time = time.time()
mems = []
tokens = context_tokens_tensor
Expand Down
Loading

0 comments on commit 8d4e86a

Please sign in to comment.