added bminf

Signed-off-by: ftgreat <[email protected]>
great1001 · Mar 2, 2023 · 0811530 · 0811530
1 parent bcdf9b1
commit 0811530
Show file tree

Hide file tree

Showing 9 changed files with 147 additions and 44 deletions.
diff --git a/examples/bminf_generate/README.md b/examples/bminf_generate/README.md
@@ -0,0 +1,45 @@
+
+# BMInf
+
+## 简介/Overview
+
+BMInf is a low-resource inference package for large-scale pretrained language models. 
+
+BMInf supports running models with more than 10 billion parameters on a single NVIDIA GTX 1060 GPU in its minimum requirements. Running with better GPUs leads to better performance. In cases where the GPU memory supports the large model inference (such as V100 or A100), BMInf still has a significant performance improvement over the existing PyTorch implementation.
+
+BMInf Github Repository address: https://github.com/OpenBMB/BMInf
+
+BMInf (Big Model Inference) 是一个用于大规模预训练语言模型（pretrained language models, PLM）推理阶段的低资源工具包。
+
+BMInf最低支持在NVIDIA GTX 1060单卡运行百亿大模型。在此基础上，使用更好的gpu运行会有更好的性能。在显存支持进行大模型推理的情况下（如V100或A100显卡），BMInf的实现较现有PyTorch版本仍有较大性能提升。
+
+BMInf 仓库地址：https://github.com/OpenBMB/BMInf
+
+## 应用/Application
+
+在模型加载参数之后，使用如下代码来用BMInf转换模型
+
+```Python
+with torch.cuda.device(0):
+    model = bminf.wrapper(model, quantization=False, memory_limit=20 << 30)
+```
+The `quantization` parameter represents whether to use the model quantization technique, but if it is a generated class model, it needs to be set to `False`.
+
+You can use the `memory_limit` parameter to set the maximum available storage, the unit is Mb.
+
+`quantization`参数代表是否使用了模型量化的技巧，但如果是生成类模型，则需要设置成`False`
+
+可以用`memory_limit`参数设置最大的可用存储，单位为Mb
+
+如果`bminf.wrapper`不能很好的适配你的模型，你可以用以下的方法来进行手动适配。
+
+* 将 `torch.nn.ModuleList` 替换为 `bminf.TransformerBlockList`.
+```python
+module_list = bminf.TransformerBlockList([
+], [CUDA_DEVICE_INDEX])
+```
+
+* 将 `torch.nn.Linear` 替换为 `bminf.QuantizedLinear`.
+```python
+linear = bminf.QuantizedLinear(torch.nn.Linear(...))
+```
diff --git a/examples/bminf_generate/cpm1_generate.py b/examples/bminf_generate/cpm1_generate.py
@@ -0,0 +1,37 @@
+import sys 
+sys.path.append("/home/yanzhaodong/anhforth/FlagAI")
+import torch
+from flagai.auto_model.auto_loader import AutoLoader
+from flagai.model.predictor.predictor import Predictor
+import bminf
+import time
+
+
+if __name__ == '__main__':
+
+    text = '''默写古诗:
+    白日依山尽，黄河入海流。
+    床前明月光，'''
+
+    loader = AutoLoader(task_name="lm",
+                        model_name="CPM-large-ch",
+                        model_dir="./checkpoints",
+                        device="cpu")
+
+    model = loader.get_model()
+    time_start=time.time()
+    with torch.cuda.device(0):
+        model = bminf.wrapper(model, quantization=False, memory_limit=20 << 30)
+    tokenizer = loader.get_tokenizer()
+
+    predictor = Predictor(model=model,
+                          tokenizer=tokenizer,
+                          )
+
+    out = predictor.predict_generate_randomsample(text,
+                                                  top_p=0.9,
+                                                  out_max_length=50)
+    time_end=time.time()
+    print('time cost',time_end-time_start,'s')
+
+    print(out)
diff --git a/examples/bminf_generate/galactica_6.7b_generate.py b/examples/bminf_generate/galactica_6.7b_generate.py
@@ -0,0 +1,36 @@
+from flagai.model.predictor.predictor import Predictor
+from flagai.auto_model.auto_loader import AutoLoader
+import torch
+import bminf
+import time
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+
+
+loader = AutoLoader(task_name="lm",
+                    model_name="galactica-6.7b-en",
+                    model_dir="/share/projset/baaishare/baai-mrnd/xingzhaohu/")
+
+model = loader.get_model()
+with torch.cuda.device(0):
+    model = bminf.wrapper(model, quantization=False, memory_limit=20 << 30)
+model.to(device)
+model.eval()
+tokenizer = loader.get_tokenizer()
+predictor = Predictor(model, tokenizer)
+print("model loaded")
+time_start=time.time()
+
+text = "Please write a abstract about the computer vision. \n"
+out = predictor.predict_generate_randomsample(text,
+                                            out_max_length=700,
+                                            top_k=50,
+                                            repetition_penalty=1.2,
+                                            temperature=0.7
+                                            )
+
+time_end=time.time()
+print('time cost',time_end-time_start,'s')
+print(out)
+
+
+
diff --git a/...ples/gpt2_text_writting/generate_bminf.py → examples/bminf_generate/gpt2_generate.py b/...ples/gpt2_text_writting/generate_bminf.py → examples/bminf_generate/gpt2_generate.py
diff --git a/examples/cpm_1/generate_bminf.py b/examples/cpm_1/generate_bminf.py
@@ -4,6 +4,8 @@
 from flagai.auto_model.auto_loader import AutoLoader
 from flagai.model.predictor.predictor import Predictor
 import bminf
+import time
+
 
 if __name__ == '__main__':
 
@@ -17,8 +19,9 @@
                         device="cpu")
 
     model = loader.get_model()
-    # with torch.cuda.device(0):
-    #     model = bminf.wrapper(model, quantization=False, memory_limit=20 << 30)
+    time_start=time.time()
+    with torch.cuda.device(0):
+        model = bminf.wrapper(model, quantization=False, memory_limit=20 << 30)
     tokenizer = loader.get_tokenizer()
 
     predictor = Predictor(model=model,
@@ -28,5 +31,7 @@
     out = predictor.predict_generate_randomsample(text,
                                                   top_p=0.9,
                                                   out_max_length=50)
+    time_end=time.time()
+    print('time cost',time_end-time_start,'s')
 
     print(out)
diff --git a/examples/galactica/generate_galactica_1.3b.py b/examples/galactica/generate_galactica_1.3b.py
@@ -1,41 +1,25 @@
-import sys 
-sys.path.append("/home/yanzhaodong/anhforth/FlagAI")
 from flagai.model.predictor.predictor import Predictor
 from flagai.auto_model.auto_loader import AutoLoader
 import torch
-import bminf
-import time
-device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+device = torch.device("cuda:3" if torch.cuda.is_available() else "cpu")
 
+loader = AutoLoader(task_name="lm",
+                    model_name="galactica-1.3b-en",
+                    model_dir="/share/projset/baaishare/baai-mrnd/xingzhaohu/")
 
-from flagai.data.tokenizer import Tokenizer 
-tokenizer = Tokenizer.from_pretrained("galactica-1.3b-en",cache_dir="/share/projset/baaishare/baai-mrnd/xingzhaohu/")
-
-
-# loader = AutoLoader(task_name="lm",
-#                     model_name="galactica-1.3b-en",
-#                     model_dir="/share/projset/baaishare/baai-mrnd/xingzhaohu/")
-
-# model = loader.get_model()
-# model.to(device)
-# model.eval()
-# time_start=time.time()
-# # with torch.cuda.device(0):
-# #     model = bminf.wrapper(model, quantization=False, memory_limit=20 << 30)
-# tokenizer = loader.get_tokenizer()
-
-# predictor = Predictor(model, tokenizer)
-
-# text = "Please write a abstract about the computer vision. \n"
-# out = predictor.predict_generate_randomsample(text,
-#                                               out_max_length=700,
-#                                               top_k=50,
-#                                               repetition_penalty=1.2,
-#                                               temperature=0.7
-#                                               )
-# time_end=time.time()
-# print('time cost',time_end-time_start,'s')
-# print(out)
+model = loader.get_model()
+model.to(device)
+model.eval()
 
+tokenizer = loader.get_tokenizer()
 
+predictor = Predictor(model, tokenizer)
 
+text = "Please write a abstract about the computer vision. \n"
+out = predictor.predict_generate_randomsample(text,
+                                              out_max_length=700,
+                                              top_k=50,
+                                              repetition_penalty=1.2,
+                                              temperature=0.7
+                                              )
+print(out)
diff --git a/examples/gpt2_text_writting/generate.py b/examples/gpt2_text_writting/generate.py
@@ -1,8 +1,6 @@
 # Copyright © 2022 BAAI. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License")
-import sys 
-sys.path.append("/home/yanzhaodong/anhforth/FlagAI")
 from flagai.auto_model.auto_loader import AutoLoader
 from flagai.model.predictor.predictor import Predictor
 

diff --git a/flagai/data/tokenizer/tokenizer.py b/flagai/data/tokenizer/tokenizer.py
@@ -457,8 +457,12 @@ def DecodeTokens(self, tokens):
         """A list of tokens => recovered text string"""
         return self.text_tokenizer.convert_tokens_to_string(tokens)
 
+    def convert_tokens_to_ids(self, tokens):
+        return self.text_tokenizer.convert_tokens_to_ids(tokens)
+
+    def convert_ids_to_tokens(self, ids):
+        return self.text_tokenizer.convert_ids_to_tokens(ids)
 
-# class BaseTokenizer(object):
 
 class TextTokenizer(object):
     """

diff --git a/flagai/data/tokenizer/uni_tokenizer/tokenizer.py b/flagai/data/tokenizer/uni_tokenizer/tokenizer.py
@@ -153,7 +153,6 @@ def __init__(self,
         }
         self.command_id_map = {tok.Id: tok for tok in self._command_tokens}
         self._command_token_tokens = list(self.command_token_map.keys())
-        # import pdb;pdb.set_trace()
         vocab =  self.text_tokenizer.get_vocab()
         self.token_start_id = vocab.get('<s>', None)
         if not self.token_start_id:
@@ -164,10 +163,8 @@ def __init__(self,
             self.token_end_id = vocab.get('<|endoftext|>', None)
         if not self.token_end_id:
             self.token_end_id = vocab.get('[SEP]', None)
-        # import pdb;pdb.set_trace()
         print("All special tokens: ", str([(k, v.token, v.Id) for k,v in self.command_name_map.items()]))
         # logger.info("All special tokens: %s", str([(k,v.Id) for k,v in self.command_name_map.items()]))
-        import pdb;pbb.set_trace()
 
 
     def get_vocab(self):
@@ -229,7 +226,6 @@ def _encode(self, text):
         return ids
 
     def convert_tokens_to_ids(self, tokens):
-        import pdb;pdb.set_trace()
         res = []
         for token in tokens:
             if token in self.command_token_map:
@@ -239,8 +235,6 @@ def convert_tokens_to_ids(self, tokens):
         return res
 
     def convert_ids_to_tokens(self, ids):
-        # if torch.is_tensor(ids):
-        #     ids = ids.tolist()
         res = []
         for id in ids:
             if id in self.command_id_map: