forked from THUDM/ChatGLM2-6B
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Multiple GPUs support: migrate the code from ChatGLM-6B, make a few m…
…odifications to support ChatGLM2.
- Loading branch information
Showing
3 changed files
with
68 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
import os | ||
from typing import Dict, Tuple, Union, Optional | ||
|
||
from torch.nn import Module | ||
from transformers import AutoModel | ||
|
||
|
||
def auto_configure_device_map(num_gpus: int) -> Dict[str, int]: | ||
# transformer.word_embeddings 占用1层 | ||
# transformer.final_layernorm 和 lm_head 占用1层 | ||
# transformer.layers 占用 28 层 | ||
# 总共30层分配到num_gpus张卡上 | ||
num_trans_layers = 28 | ||
per_gpu_layers = 30 / num_gpus | ||
|
||
# bugfix: 在linux中调用torch.embedding传入的weight,input不在同一device上,导致RuntimeError | ||
# windows下 model.device 会被设置成 transformer.word_embeddings.device | ||
# linux下 model.device 会被设置成 lm_head.device | ||
# 在调用chat或者stream_chat时,input_ids会被放到model.device上 | ||
# 如果transformer.word_embeddings.device和model.device不同,则会导致RuntimeError | ||
# 因此这里将transformer.word_embeddings,transformer.final_layernorm,lm_head都放到第一张卡上 | ||
# 本文件来源于https://github.com/THUDM/ChatGLM-6B/blob/main/utils.py | ||
# 仅此处做少许修改以支持ChatGLM2 | ||
device_map = { | ||
'transformer.embedding.word_embeddings': 0, | ||
'transformer.encoder.final_layernorm': 0, | ||
'transformer.output_layer': 0, | ||
'transformer.rotary_pos_emb': 0, | ||
'lm_head': 0 | ||
} | ||
|
||
used = 2 | ||
gpu_target = 0 | ||
for i in range(num_trans_layers): | ||
if used >= per_gpu_layers: | ||
gpu_target += 1 | ||
used = 0 | ||
assert gpu_target < num_gpus | ||
device_map[f'transformer.encoder.layers.{i}'] = gpu_target | ||
used += 1 | ||
|
||
return device_map | ||
|
||
|
||
def load_model_on_gpus(checkpoint_path: Union[str, os.PathLike], num_gpus: int = 2, | ||
device_map: Optional[Dict[str, int]] = None, **kwargs) -> Module: | ||
if num_gpus < 2 and device_map is None: | ||
model = AutoModel.from_pretrained(checkpoint_path, trust_remote_code=True, **kwargs).half().cuda() | ||
else: | ||
from accelerate import dispatch_model | ||
|
||
model = AutoModel.from_pretrained(checkpoint_path, trust_remote_code=True, **kwargs).half() | ||
|
||
if device_map is None: | ||
device_map = auto_configure_device_map(num_gpus) | ||
|
||
model = dispatch_model(model, device_map=device_map) | ||
|
||
return model |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters