modified according to comments

Signed-off-by: ftgreat <[email protected]>
great1001 · Mar 2, 2023 · df91be4 · df91be4
1 parent 948e3f9
commit df91be4
Show file tree

Hide file tree

Showing 9 changed files with 18 additions and 27 deletions.
diff --git a/examples/bminf_generate/galactica_6.7b_generate.py b/examples/bminf_generate/galactica_6.7b_generate.py
@@ -9,7 +9,7 @@
 
 loader = AutoLoader(task_name="lm",
                     model_name="galactica-6.7b-en",
-                    model_dir="/share/projset/baaishare/baai-mrnd/xingzhaohu/")
+                    model_dir="./checkpoints/")
 
 model = loader.get_model()
 with torch.cuda.device(0):

diff --git a/examples/glm_blank_filling/glm_generate_samples.py b/examples/glm_blank_filling/glm_generate_samples.py
@@ -1,6 +1,7 @@
 # Copyright © 2022 BAAI. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License")
+
 import torch
 from flagai.model.glm_model import GLMModel
 from flagai.data.tokenizer import Tokenizer
@@ -20,7 +21,7 @@
     model.cuda(torch.cuda.current_device())
 
     predictor = Predictor(model, tokenizer)
-
+    # generate samples
     text = [
         '问题：啤酒伤胃吗？回答：[gMASK]', "问题：隔夜菜能吃吗？回答：[gMASK]", "问题：如何评价许嵩？回答：[gMASK]"
     ]
@@ -34,12 +35,12 @@
         output = predictor.predict_generate_randomsample(
             t, top_k=50, repetition_penalty=4.0, top_p=1.0)
         print(t, '\n', output)
-
+    #
     text = [
         "人工智能是一个以计算机科学为基础，由计算机、数学、哲学等多学科交叉融合的交叉学科，[sMASK]，具有非常巨大的前景。",
         "最近十多年来，人工神经网络的研究工作不断深入，已经取得了很大的进展，[sMASK]，表现出了良好的智能特性。"
     ]
     for t in text:
         output = predictor.predict_generate_randomsample(
             t, top_k=50, repetition_penalty=4.0, top_p=1.0)
-        print(t, '\n', output)
+        print(t, '\n', output)
diff --git a/examples/gpt2_title_generation/deepspeed.json b/examples/gpt2_title_generation/deepspeed.json
@@ -4,7 +4,7 @@
   "steps_per_print": 50,
   "gradient_clipping": 1.0,
   "zero_optimization": {
-    "stage": 2,
+    "stage": 1,
     "contiguous_gradients": false,
     "overlap_comm": true,
     "reduce_scatter": true,

diff --git a/examples/gpt2_title_generation/train_multi_gpu.py b/examples/gpt2_title_generation/train_multi_gpu.py
@@ -11,7 +11,7 @@
 # device = torch.device("cpu")
 # single gpu
 trainer = Trainer(
-    env_type="deepspeed+mpu",
+    env_type="pytorchDDP",
     experiment_name="roberta_seq2seq",
     batch_size=1,
     gradient_accumulation_steps=1,

diff --git a/examples/t5_title_generation/generate.py b/examples/t5_title_generation/generate.py
@@ -17,12 +17,12 @@
                                                   beam_size=3,
                                                   input_max_length=512,
                                                   out_max_length=100)
-#     out_2 = predictor.predict_generate_randomsample(text,
-#                                                     input_max_length=512,
-#                                                     out_max_length=100,
-#                                                     repetition_penalty=1.5,
-#                                                     top_k=20,
-#                                                     top_p=0.8)
+    out_2 = predictor.predict_generate_randomsample(text,
+                                                    input_max_length=512,
+                                                    out_max_length=100,
+                                                    repetition_penalty=1.5,
+                                                    top_k=20,
+                                                    top_p=0.8)
 
     print(f"out_1 is {out_1}")
-#     print(f"out_2 is {out_2}")
+    print(f"out_2 is {out_2}")
diff --git a/flagai/model/galactica_model.py b/flagai/model/galactica_model.py
@@ -22,7 +22,6 @@
 from torch.nn import CrossEntropyLoss
 from flagai.model.layers.activations import ACT2FN
 from flagai.model.gpt2_model import GPT2Model, GPT2Stack, GPT2Config
-# import bminf
 
 
 class OPTLearnedPositionalEmbedding(nn.Embedding):

diff --git a/flagai/model/gpt2_model.py b/flagai/model/gpt2_model.py
@@ -9,7 +9,6 @@
 from flagai.model.utils import normal_init_method
 from flagai.model.base_model import BaseModel
 import torch.nn.functional as F
-# import bminf
 if os.getenv('ENV_TYPE') == 'deepspeed+mpu':
     from flagai.mpu.utils import divide
     from flagai.mpu.random import checkpoint
@@ -112,10 +111,6 @@ def __init__(self, config):
         self.drop = nn.Dropout(config.embd_pdrop)
         self.project_in = None
         self.project_out = None
-        # self.h = bminf.TransformerBlockList([
-        #     GPT2Block(config.n_ctx, config, scale=True)
-        #     for _ in range(config.n_layer)
-        # ],[0])
         self.h = nn.ModuleList([
             GPT2Block(config.n_ctx, config, scale=True)
             for _ in range(config.n_layer)
@@ -279,9 +274,6 @@ def __init__(self, config, **kwargs):
         self.parallel_output = True
 
         self.transformer = GPT2Stack(config_gpt)
-        # self.lm_head = bminf.QuantizedLinear(nn.Linear(config_gpt.n_embd,
-        #                          config_gpt.vocab_size,
-        #                          bias=False))
         self.lm_head = nn.Linear(config_gpt.n_embd,
                                  config_gpt.vocab_size,
                                  bias=False)

diff --git a/flagai/mp_tools.py b/flagai/mp_tools.py
@@ -7,7 +7,7 @@
 import copy
 
 from_1_to_n_models = {
-    "gpt2": {
+    "gpt": {
         "wte.weight": 0,
         "attn.c_attn.weight": 30,
         "attn.c_attn.bias": 30,
@@ -238,8 +238,7 @@ def change_pytorch_model_mp_from_1_to_n_new(model_name_brief, checkpoint: str, t
                         d = d["module"]
 
                     for k, v in d.items():
-                        if len(v.shape)>2:
-                            continue
+                        assert len(v.shape) < 3
                         flag = 0
                         for keys in trans_keys:
                             if keys in k:
@@ -413,4 +412,4 @@ def change_pytorch_model_mp_from_n_to_1(model_name_brief, checkpoint):
 
 if __name__ == "__main__":
     change_pytorch_model_mp_from_1_to_n(
-        '/mnt/test_10b_models/state_dict/GLM-10b-en', 2)
+        '/mnt/test_10b_models/state_dict/GLM-10b-en', 2)
diff --git a/setup.py b/setup.py
@@ -5,7 +5,7 @@
 
 setup(
     name="flagai",
-    version="v1.6.2",
+    version="v1.6.0",
     description="FlagAI aims to help researchers and developers to freely train and test large-scale models for NLP/CV/VL tasks.",
     long_description=open("README.md", encoding="utf-8").read(),
     long_description_content_type="text/markdown",