Skip to content

Commit

Permalink
Fix Japanese Hugging face GPT conversion (NVIDIA#460)
Browse files Browse the repository at this point in the history
* Fix: define missing variables

Signed-off-by: noppayut <[email protected]>

* Use GPT2LMHeadModel and state_dict() for retriving named params

Signed-off-by: noppayut <[email protected]>

* Add tensor param size to gpt config

* Dynamically select device instead of using GPU

Signed-off-by: noppayut <[email protected]>

---------

Signed-off-by: noppayut <[email protected]>
  • Loading branch information
noppayut authored Feb 21, 2023
1 parent a04b6ce commit 43ea4f3
Showing 1 changed file with 15 additions and 5 deletions.
20 changes: 15 additions & 5 deletions examples/pytorch/gpt/utils/huggingface_jp_gpt_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.

'''
Important: You should try converting models with `huggingface_gpt_convert.py` first before running this file.
This file is intended for converting old versions of Japanese GPT models from https://huggingface.co/rinna.
'''

import argparse
import configparser
import multiprocessing
Expand All @@ -21,7 +27,7 @@

import os
import sys
from transformers import GPT2Model # transformers-4.10.0-py3
from transformers import GPT2LMHeadModel # transformers-4.10.0-py3
dir_path = os.path.dirname(os.path.realpath(__file__))
sys.path.append(dir_path + "/../../../..")
sys.path.append(dir_path)
Expand All @@ -34,7 +40,7 @@ def get_weight_data_type(data_type):
else:
assert False, f"Invalid weight data type {data_type}"

def split_and_convert_process(i, saved_dir,factor,key,args, val):
def split_and_convert_process(i, saved_dir, factor, key, args, val):

if key.find("input_layernorm.weight") != -1 or key.find("input_layernorm.bias") != -1 or \
key.find("attention.dense.bias") != -1 or key.find("post_attention_layernorm.weight") != -1 or \
Expand Down Expand Up @@ -98,9 +104,12 @@ def split_and_convert(args):

# load position_embedding from rank 0
# model = torch.load(ckpt_name)
model = GPT2Model.from_pretrained(args.in_file).to(torch.device('cuda:0'))
torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = GPT2LMHeadModel.from_pretrained(args.in_file).to(torch_device)

hf_config = vars(model.config)
config = configparser.ConfigParser()
config["gpt"] = {}

config["gpt"]["model_name"] = "gpt" if hf_config["_name_or_path"] == '' else hf_config["_name_or_path"]
config["gpt"]["head_num"] = str(hf_config["n_head"])
Expand All @@ -113,7 +122,8 @@ def split_and_convert(args):
config["gpt"]["start_id"] = str(hf_config["bos_token_id"])
config["gpt"]["end_id"] = str(hf_config["eos_token_id"])
config['gpt']['weight_data_type'] = args.weight_data_type
with open(output_dir + "/config.ini", 'w') as configfile:
config["gpt"]["tensor_para_size"] = str(args.infer_gpu_num)
with open(saved_dir + "/config.ini", 'w') as configfile:
config.write(configfile)

np_weight_data_type = get_weight_data_type(args.weight_data_type)
Expand Down Expand Up @@ -151,7 +161,7 @@ def split_and_convert(args):
torch.multiprocessing.set_start_method("spawn")
torch.multiprocessing.set_sharing_strategy("file_system")
pool = multiprocessing.Pool(args.processes)
for name, param in model.named_parameters():
for name, param in model.state_dict().items():
if name.find("weight") == -1 and name.find("bias") == -1:
continue
print(name)
Expand Down

0 comments on commit 43ea4f3

Please sign in to comment.