Fix Japanese Hugging face GPT conversion (NVIDIA#460)

* Fix: define missing variables Signed-off-by: noppayut <[email protected]> * Use GPT2LMHeadModel and state_dict() for retriving named params Signed-off-by: noppayut <[email protected]> * Add tensor param size to gpt config * Dynamically select device instead of using GPU Signed-off-by: noppayut <[email protected]> --------- Signed-off-by: noppayut <[email protected]>
jacobkahn · Feb 21, 2023 · 43ea4f3 · 43ea4f3
1 parent a04b6ce
commit 43ea4f3
Showing 1 changed file with 15 additions and 5 deletions.
diff --git a/examples/pytorch/gpt/utils/huggingface_jp_gpt_convert.py b/examples/pytorch/gpt/utils/huggingface_jp_gpt_convert.py
@@ -12,6 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+'''
+Important: You should try converting models with `huggingface_gpt_convert.py` first before running this file.
+
+This file is intended for converting old versions of Japanese GPT models from https://huggingface.co/rinna.
+'''
+
 import argparse
 import configparser
 import multiprocessing
@@ -21,7 +27,7 @@
 
 import os
 import sys
-from transformers import GPT2Model # transformers-4.10.0-py3
+from transformers import GPT2LMHeadModel # transformers-4.10.0-py3
 dir_path = os.path.dirname(os.path.realpath(__file__))
 sys.path.append(dir_path + "/../../../..")
 sys.path.append(dir_path)
@@ -34,7 +40,7 @@ def get_weight_data_type(data_type):
     else:
         assert False, f"Invalid weight data type {data_type}"
 
-def split_and_convert_process(i, saved_dir,factor,key,args, val):
+def split_and_convert_process(i, saved_dir, factor, key, args, val):
 
     if key.find("input_layernorm.weight") != -1 or key.find("input_layernorm.bias") != -1 or \
         key.find("attention.dense.bias") != -1 or key.find("post_attention_layernorm.weight") != -1 or \
@@ -98,9 +104,12 @@ def split_and_convert(args):
 
     # load position_embedding from rank 0 
     # model = torch.load(ckpt_name)
-    model = GPT2Model.from_pretrained(args.in_file).to(torch.device('cuda:0'))
+    torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
+    model = GPT2LMHeadModel.from_pretrained(args.in_file).to(torch_device)
 
     hf_config = vars(model.config)
+    config = configparser.ConfigParser()
+    config["gpt"] = {}
 
     config["gpt"]["model_name"] = "gpt" if hf_config["_name_or_path"] == '' else hf_config["_name_or_path"]
     config["gpt"]["head_num"] = str(hf_config["n_head"])
@@ -113,7 +122,8 @@ def split_and_convert(args):
     config["gpt"]["start_id"] = str(hf_config["bos_token_id"])
     config["gpt"]["end_id"] = str(hf_config["eos_token_id"])
     config['gpt']['weight_data_type'] = args.weight_data_type
-    with open(output_dir + "/config.ini", 'w') as configfile:
+    config["gpt"]["tensor_para_size"] = str(args.infer_gpu_num)
+    with open(saved_dir + "/config.ini", 'w') as configfile:
         config.write(configfile)
 
     np_weight_data_type = get_weight_data_type(args.weight_data_type)
@@ -151,7 +161,7 @@ def split_and_convert(args):
     torch.multiprocessing.set_start_method("spawn")
     torch.multiprocessing.set_sharing_strategy("file_system")
     pool = multiprocessing.Pool(args.processes)
-    for name, param in model.named_parameters():
+    for name, param in model.state_dict().items():
         if name.find("weight") == -1 and name.find("bias") == -1:
             continue
         print(name)