Skip to content

Commit

Permalink
Merge pull request stochasticai#146 from stochasticai/dev
Browse files Browse the repository at this point in the history
GPT-J INT8 fix and LoRA model save support
  • Loading branch information
Toan-Do authored Apr 15, 2023
2 parents 668f61b + 1170710 commit 9e29e27
Show file tree
Hide file tree
Showing 10 changed files with 282 additions and 13 deletions.
4 changes: 4 additions & 0 deletions examples/gptj/gptj_lora_int8.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
model = BaseModel.create("gptj_lora_int8")
# Finetuned the model
model.finetune(dataset=instruction_dataset)

# Save the model
model.save("./gptj_weights")

# Once the model has been finetuned, you can start doing inferences
output = model.generate(texts=["Why LLM models are becoming so important?"])
print("Generated output by the model: {}".format(output))
6 changes: 4 additions & 2 deletions examples/llama/llama_lora_int8.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@
model = BaseModel.create("llama_lora_int8")
# Finetuned the model
model.finetune(dataset=instruction_dataset)

# Save the model
model.save("./llama_weights")

# Once the model has been finetuned, you can start doing inferences
output = model.generate(texts=["Why LLM models are becoming so important?"])
print("Generated output by the model: {}".format(output))
# Save the model
model.save("./llama_weights")

# If you want to load the model just do BaseModel.load("./llama_weights")
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,10 @@ dependencies = [
"transformers==4.27.3",
"datasets",
"evaluate",
"bitsandbytes",
"bitsandbytes==0.37.2",
"sentencepiece",
"deepspeed",
"gradio",
"bitsandbytes",
"click",
"wget",
"ai21",
Expand All @@ -59,6 +58,7 @@ dependencies = [
"openai >= 0.27.0",
"pydantic >= 1.10.0",
"rouge-score >= 0.1.2",
"accelerate",
]

[project.scripts]
Expand Down
3 changes: 3 additions & 0 deletions src/xturing/config/finetuning_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ llama:
learning_rate: 5e-5
weight_decay: 0.01
num_train_epochs: 3
optimizer_name: cpu_adam

llama_lora:
learning_rate: 1e-4
Expand All @@ -36,6 +37,7 @@ gptj:
learning_rate: 5e-5
weight_decay: 0.01
num_train_epochs: 3
optimizer_name: cpu_adam

gptj_lora:
learning_rate: 1e-4
Expand Down Expand Up @@ -84,6 +86,7 @@ galactica:
learning_rate: 5e-5
weight_decay: 0.01
num_train_epochs: 3
optimizer_name: cpu_adam

galactica_lora:
learning_rate: 1e-4
Expand Down
13 changes: 13 additions & 0 deletions src/xturing/config/read_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,19 @@ def exists_xturing_config_file(dir_path: Union[Path, str] = None):
return xturing_config_file_path.is_file()


def exists_lora_config_file(dir_path: Union[Path, str] = None):
if dir_path is None:
return False
dir_path = Path(dir_path)
assert dir_path.is_dir(), "The following path {} should be a directory".format(
str(dir_path)
)

lora_config_file_path = dir_path / "adapter_config.json"

return lora_config_file_path.is_file()


def read_xturing_config_file(dir_path: Union[Path, str]):
dir_path = Path(dir_path)
assert dir_path.is_dir(), "The following path {} should be a directory".format(
Expand Down
21 changes: 16 additions & 5 deletions src/xturing/engines/causal.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@
from transformers import AutoModelForCausalLM, AutoTokenizer

from xturing.config import DEFAULT_DEVICE, DEFAULT_DTYPE
from xturing.config.read_config import exists_xturing_config_file
from xturing.config.read_config import (
exists_lora_config_file,
exists_xturing_config_file,
)
from xturing.engines.base import BaseEngine
from xturing.engines.lora_engine import (
LoraConfig,
Expand Down Expand Up @@ -148,31 +151,39 @@ def __init__(
lora_dropout=0.05,
bias="none",
inference_mode=False,
base_model_name_or_path=self.base_model.__dict__.get("name_or_path", None),
)

if len(target_modules) == 1:
lora_config.fan_in_fan_out = True
lora_config.enable_lora = [True, False, True]
# self.model = LoraModel(lora_config, self.model)

self.model = LoraModel(lora_config, self.base_model)

if weights_path is not None and exists_xturing_config_file(weights_path):
if weights_path is not None and exists_lora_config_file(weights_path):
self.model = LoraModel.from_pretrained(self.base_model, weights_path)
elif weights_path is not None and exists_xturing_config_file(weights_path):
self.model = LoraModel(lora_config, self.model)
model_weights_path = str(Path(weights_path).resolve() / "pytorch_model.bin")
self.model.load_state_dict(
torch.load(
model_weights_path # , map_location=torch.device(DEFAULT_DEVICE)
)
)
else:
self.model = LoraModel(lora_config, self.model)
self.model.print_trainable_parameters()

self.loss_fct = CrossEntropyLoss()

def save(self, saving_path: Union[str, Path]):
# Save HF config file
self.base_model.config.save_pretrained(str(saving_path))
self.model.config.save_pretrained(str(saving_path))
# Save model weights
model_weights = str(Path(saving_path).resolve() / "pytorch_model.bin")

torch.save(self.model.state_dict(), model_weights)
# save adapter
self.model.save_pretrained(saving_path)

# Save tokenizer
self.tokenizer.save_pretrained(saving_path)
6 changes: 2 additions & 4 deletions src/xturing/engines/gptj_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,8 @@ def __init__(self, weights_path: Optional[Union[str, Path]] = None):
"philschmid/gpt-j-6B-fp16-sharded", load_in_8bit=True, device_map=device_map
)

tokenizer = AutoTokenizer.from_pretrained(
"philschmid/gpt-j-6B-fp16-sharded"
)
tokenizer.pad_token = self.tokenizer.eos_token
tokenizer = AutoTokenizer.from_pretrained("philschmid/gpt-j-6B-fp16-sharded")
tokenizer.pad_token = tokenizer.eos_token
super().__init__(
weights_path=weights_path, model=model, tokenizer=tokenizer, load_8bit=True
)
Expand Down
145 changes: 145 additions & 0 deletions src/xturing/engines/lora_engine/lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import importlib
import json
import math
import os
import re
import warnings
from dataclasses import asdict, dataclass, field
Expand All @@ -25,6 +27,11 @@
import torch.nn.functional as F
from transformers.pytorch_utils import Conv1D

from xturing.engines.lora_engine.save_and_load import (
get_peft_model_state_dict,
set_peft_model_state_dict,
)


def is_bnb_available():
return importlib.util.find_spec("bitsandbytes") is not None
Expand All @@ -38,6 +45,10 @@ def transpose(weight, fan_in_fan_out):
return weight.T if fan_in_fan_out else weight


WEIGHTS_NAME = "adapter_model.bin"
CONFIG_NAME = "adapter_config.json"


@dataclass
class LoraConfig:
"""
Expand Down Expand Up @@ -100,6 +111,92 @@ class LoraConfig:
default=False, metadata={"help": "Whether to use inference mode"}
)

base_model_name_or_path: str = field(
default=None, metadata={"help": "The name of the base model to use."}
)

@property
def __dict__(self):
return asdict(self)

def to_dict(self):
return self.__dict__

def save_pretrained(self, save_directory, **kwargs):
r"""
This method saves the configuration of your adapter model in a directory.
Args:
save_directory (`str`):
The directory where the configuration will be saved.
kwargs (additional keyword arguments, *optional*):
Additional keyword arguments passed along to the [`~transformers.utils.PushToHubMixin.push_to_hub`]
method.
"""
if os.path.isfile(save_directory):
raise AssertionError(
f"Provided path ({save_directory}) should be a directory, not a file"
)

os.makedirs(save_directory, exist_ok=True)

output_dict = self.__dict__
output_path = os.path.join(save_directory, CONFIG_NAME)

# save it
with open(output_path, "w") as writer:
writer.write(json.dumps(output_dict, indent=2, sort_keys=True))

@classmethod
def from_pretrained(cls, pretrained_model_name_or_path, subfolder=None, **kwargs):
r"""
This method loads the configuration of your adapter model from a directory.
Args:
pretrained_model_name_or_path (`str`):
The directory or the Hub repository id where the configuration is saved.
kwargs (additional keyword arguments, *optional*):
Additional keyword arguments passed along to the child class initialization.
"""
path = (
os.path.join(pretrained_model_name_or_path, subfolder)
if subfolder is not None
else pretrained_model_name_or_path
)
if os.path.isfile(os.path.join(path, CONFIG_NAME)):
config_file = os.path.join(path, CONFIG_NAME)
else:
# try:
# config_file = hf_hub_download(pretrained_model_name_or_path, CONFIG_NAME, subfolder=subfolder)
# except Exception:
raise ValueError(
f"Can't find '{CONFIG_NAME}' at '{pretrained_model_name_or_path}'"
)

loaded_attributes = cls.from_json_file(config_file)

config = cls(**kwargs)

for key, value in loaded_attributes.items():
if hasattr(config, key):
setattr(config, key, value)

return config

@classmethod
def from_json_file(cls, path_json_file, **kwargs):
r"""
Loads a configuration file from a json file.
Args:
path_json_file (`str`):
The path to the json file.
"""
with open(path_json_file, "r") as file:
json_object = json.load(file)

return json_object


class LoraModel(torch.nn.Module):
"""
Expand Down Expand Up @@ -342,6 +439,54 @@ def print_trainable_parameters(self):
f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
)

def save_pretrained(self, save_directory, **kwargs):
if os.path.isfile(save_directory):
raise ValueError(
f"Provided path ({save_directory}) should be a directory, not a file"
)
os.makedirs(save_directory, exist_ok=True)

# for adapter_name, peft_config in self.peft_config.items():
# save only the trainable weights
output_state_dict = get_peft_model_state_dict(
self, kwargs.get("state_dict", None)
)
output_dir = save_directory
os.makedirs(output_dir, exist_ok=True)
torch.save(output_state_dict, os.path.join(output_dir, WEIGHTS_NAME))

# save the config and change the inference mode to `True`
if self.peft_config.base_model_name_or_path is None:
self.peft_config.base_model_name_or_path = self.model.__dict__.get(
"name_or_path", None
)

inference_mode = self.peft_config.inference_mode
self.peft_config.inference_mode = True
self.peft_config.save_pretrained(output_dir)
self.peft_config.inference_mode = inference_mode

@classmethod
def from_pretrained(cls, model, saved_dir):
config = LoraConfig.from_pretrained(saved_dir)
model = cls(config, model)

if os.path.exists(os.path.join(saved_dir, WEIGHTS_NAME)):
filename = os.path.join(saved_dir, WEIGHTS_NAME)
else:
raise ValueError(
f"Please check that the file {WEIGHTS_NAME} is present at {saved_dir}."
)

adapters_weights = torch.load(
filename,
map_location=torch.device("cuda" if torch.cuda.is_available() else "cpu"),
)
# load the weights into the model
set_peft_model_state_dict(model, adapters_weights)
model.eval()
return model


# Below code is based on https://github.com/microsoft/LoRA/blob/main/loralib/layers.py
# and modified to work with PyTorch FSDP
Expand Down
Loading

0 comments on commit 9e29e27

Please sign in to comment.