Merge pull request stochasticai#146 from stochasticai/dev

GPT-J INT8 fix and LoRA model save support
tarunchy · Apr 15, 2023 · 9e29e27 · 9e29e27
2 parents 668f61b + 1170710
commit 9e29e27
Show file tree

Hide file tree

Showing 10 changed files with 282 additions and 13 deletions.
diff --git a/examples/gptj/gptj_lora_int8.py b/examples/gptj/gptj_lora_int8.py
@@ -6,6 +6,10 @@
 model = BaseModel.create("gptj_lora_int8")
 # Finetuned the model
 model.finetune(dataset=instruction_dataset)
+
+# Save the model
+model.save("./gptj_weights")
+
 # Once the model has been finetuned, you can start doing inferences
 output = model.generate(texts=["Why LLM models are becoming so important?"])
 print("Generated output by the model: {}".format(output))
diff --git a/examples/llama/llama_lora_int8.py b/examples/llama/llama_lora_int8.py
@@ -6,10 +6,12 @@
 model = BaseModel.create("llama_lora_int8")
 # Finetuned the model
 model.finetune(dataset=instruction_dataset)
+
+# Save the model
+model.save("./llama_weights")
+
 # Once the model has been finetuned, you can start doing inferences
 output = model.generate(texts=["Why LLM models are becoming so important?"])
 print("Generated output by the model: {}".format(output))
-# Save the model
-model.save("./llama_weights")
 
 # If you want to load the model just do BaseModel.load("./llama_weights")
diff --git a/pyproject.toml b/pyproject.toml
@@ -46,11 +46,10 @@ dependencies = [
     "transformers==4.27.3",
     "datasets",
     "evaluate",
-    "bitsandbytes",
+    "bitsandbytes==0.37.2",
     "sentencepiece",
     "deepspeed",
     "gradio",
-    "bitsandbytes",
     "click",
     "wget",
     "ai21",
@@ -59,6 +58,7 @@ dependencies = [
     "openai >= 0.27.0",
     "pydantic >= 1.10.0",
     "rouge-score >= 0.1.2",
+    "accelerate",
 ]
 
 [project.scripts]

diff --git a/src/xturing/config/finetuning_config.yaml b/src/xturing/config/finetuning_config.yaml
@@ -18,6 +18,7 @@ llama:
   learning_rate: 5e-5
   weight_decay: 0.01
   num_train_epochs: 3
+  optimizer_name: cpu_adam
 
 llama_lora:
   learning_rate: 1e-4
@@ -36,6 +37,7 @@ gptj:
   learning_rate: 5e-5
   weight_decay: 0.01
   num_train_epochs: 3
+  optimizer_name: cpu_adam
 
 gptj_lora:
   learning_rate: 1e-4
@@ -84,6 +86,7 @@ galactica:
   learning_rate: 5e-5
   weight_decay: 0.01
   num_train_epochs: 3
+  optimizer_name: cpu_adam
 
 galactica_lora:
   learning_rate: 1e-4

diff --git a/src/xturing/config/read_config.py b/src/xturing/config/read_config.py
@@ -44,6 +44,19 @@ def exists_xturing_config_file(dir_path: Union[Path, str] = None):
     return xturing_config_file_path.is_file()
 
 
+def exists_lora_config_file(dir_path: Union[Path, str] = None):
+    if dir_path is None:
+        return False
+    dir_path = Path(dir_path)
+    assert dir_path.is_dir(), "The following path {} should be a directory".format(
+        str(dir_path)
+    )
+
+    lora_config_file_path = dir_path / "adapter_config.json"
+
+    return lora_config_file_path.is_file()
+
+
 def read_xturing_config_file(dir_path: Union[Path, str]):
     dir_path = Path(dir_path)
     assert dir_path.is_dir(), "The following path {} should be a directory".format(

diff --git a/src/xturing/engines/causal.py b/src/xturing/engines/causal.py
@@ -7,7 +7,10 @@
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
 from xturing.config import DEFAULT_DEVICE, DEFAULT_DTYPE
-from xturing.config.read_config import exists_xturing_config_file
+from xturing.config.read_config import (
+    exists_lora_config_file,
+    exists_xturing_config_file,
+)
 from xturing.engines.base import BaseEngine
 from xturing.engines.lora_engine import (
     LoraConfig,
@@ -148,31 +151,39 @@ def __init__(
             lora_dropout=0.05,
             bias="none",
             inference_mode=False,
+            base_model_name_or_path=self.base_model.__dict__.get("name_or_path", None),
         )
 
         if len(target_modules) == 1:
             lora_config.fan_in_fan_out = True
             lora_config.enable_lora = [True, False, True]
+        # self.model = LoraModel(lora_config, self.model)
 
-        self.model = LoraModel(lora_config, self.base_model)
-
-        if weights_path is not None and exists_xturing_config_file(weights_path):
+        if weights_path is not None and exists_lora_config_file(weights_path):
+            self.model = LoraModel.from_pretrained(self.base_model, weights_path)
+        elif weights_path is not None and exists_xturing_config_file(weights_path):
+            self.model = LoraModel(lora_config, self.model)
             model_weights_path = str(Path(weights_path).resolve() / "pytorch_model.bin")
             self.model.load_state_dict(
                 torch.load(
                     model_weights_path  # , map_location=torch.device(DEFAULT_DEVICE)
                 )
             )
         else:
+            self.model = LoraModel(lora_config, self.model)
             self.model.print_trainable_parameters()
 
         self.loss_fct = CrossEntropyLoss()
 
     def save(self, saving_path: Union[str, Path]):
         # Save HF config file
-        self.base_model.config.save_pretrained(str(saving_path))
+        self.model.config.save_pretrained(str(saving_path))
         # Save model weights
         model_weights = str(Path(saving_path).resolve() / "pytorch_model.bin")
+
         torch.save(self.model.state_dict(), model_weights)
+        # save adapter
+        self.model.save_pretrained(saving_path)
+
         # Save tokenizer
         self.tokenizer.save_pretrained(saving_path)
diff --git a/src/xturing/engines/gptj_engine.py b/src/xturing/engines/gptj_engine.py
@@ -46,10 +46,8 @@ def __init__(self, weights_path: Optional[Union[str, Path]] = None):
             "philschmid/gpt-j-6B-fp16-sharded", load_in_8bit=True, device_map=device_map
         )
 
-        tokenizer = AutoTokenizer.from_pretrained(
-            "philschmid/gpt-j-6B-fp16-sharded"
-        )
-        tokenizer.pad_token = self.tokenizer.eos_token
+        tokenizer = AutoTokenizer.from_pretrained("philschmid/gpt-j-6B-fp16-sharded")
+        tokenizer.pad_token = tokenizer.eos_token
         super().__init__(
             weights_path=weights_path, model=model, tokenizer=tokenizer, load_8bit=True
         )

diff --git a/src/xturing/engines/lora_engine/lora.py b/src/xturing/engines/lora_engine/lora.py
@@ -13,7 +13,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import importlib
+import json
 import math
+import os
 import re
 import warnings
 from dataclasses import asdict, dataclass, field
@@ -25,6 +27,11 @@
 import torch.nn.functional as F
 from transformers.pytorch_utils import Conv1D
 
+from xturing.engines.lora_engine.save_and_load import (
+    get_peft_model_state_dict,
+    set_peft_model_state_dict,
+)
+
 
 def is_bnb_available():
     return importlib.util.find_spec("bitsandbytes") is not None
@@ -38,6 +45,10 @@ def transpose(weight, fan_in_fan_out):
     return weight.T if fan_in_fan_out else weight
 
 
+WEIGHTS_NAME = "adapter_model.bin"
+CONFIG_NAME = "adapter_config.json"
+
+
 @dataclass
 class LoraConfig:
     """
@@ -100,6 +111,92 @@ class LoraConfig:
         default=False, metadata={"help": "Whether to use inference mode"}
     )
 
+    base_model_name_or_path: str = field(
+        default=None, metadata={"help": "The name of the base model to use."}
+    )
+
+    @property
+    def __dict__(self):
+        return asdict(self)
+
+    def to_dict(self):
+        return self.__dict__
+
+    def save_pretrained(self, save_directory, **kwargs):
+        r"""
+        This method saves the configuration of your adapter model in a directory.
+
+        Args:
+            save_directory (`str`):
+                The directory where the configuration will be saved.
+            kwargs (additional keyword arguments, *optional*):
+                Additional keyword arguments passed along to the [`~transformers.utils.PushToHubMixin.push_to_hub`]
+                method.
+        """
+        if os.path.isfile(save_directory):
+            raise AssertionError(
+                f"Provided path ({save_directory}) should be a directory, not a file"
+            )
+
+        os.makedirs(save_directory, exist_ok=True)
+
+        output_dict = self.__dict__
+        output_path = os.path.join(save_directory, CONFIG_NAME)
+
+        # save it
+        with open(output_path, "w") as writer:
+            writer.write(json.dumps(output_dict, indent=2, sort_keys=True))
+
+    @classmethod
+    def from_pretrained(cls, pretrained_model_name_or_path, subfolder=None, **kwargs):
+        r"""
+        This method loads the configuration of your adapter model from a directory.
+
+        Args:
+            pretrained_model_name_or_path (`str`):
+                The directory or the Hub repository id where the configuration is saved.
+            kwargs (additional keyword arguments, *optional*):
+                Additional keyword arguments passed along to the child class initialization.
+        """
+        path = (
+            os.path.join(pretrained_model_name_or_path, subfolder)
+            if subfolder is not None
+            else pretrained_model_name_or_path
+        )
+        if os.path.isfile(os.path.join(path, CONFIG_NAME)):
+            config_file = os.path.join(path, CONFIG_NAME)
+        else:
+            # try:
+            #     config_file = hf_hub_download(pretrained_model_name_or_path, CONFIG_NAME, subfolder=subfolder)
+            # except Exception:
+            raise ValueError(
+                f"Can't find '{CONFIG_NAME}' at '{pretrained_model_name_or_path}'"
+            )
+
+        loaded_attributes = cls.from_json_file(config_file)
+
+        config = cls(**kwargs)
+
+        for key, value in loaded_attributes.items():
+            if hasattr(config, key):
+                setattr(config, key, value)
+
+        return config
+
+    @classmethod
+    def from_json_file(cls, path_json_file, **kwargs):
+        r"""
+        Loads a configuration file from a json file.
+
+        Args:
+            path_json_file (`str`):
+                The path to the json file.
+        """
+        with open(path_json_file, "r") as file:
+            json_object = json.load(file)
+
+        return json_object
+
 
 class LoraModel(torch.nn.Module):
     """
@@ -342,6 +439,54 @@ def print_trainable_parameters(self):
             f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
         )
 
+    def save_pretrained(self, save_directory, **kwargs):
+        if os.path.isfile(save_directory):
+            raise ValueError(
+                f"Provided path ({save_directory}) should be a directory, not a file"
+            )
+        os.makedirs(save_directory, exist_ok=True)
+
+        # for adapter_name, peft_config in self.peft_config.items():
+        # save only the trainable weights
+        output_state_dict = get_peft_model_state_dict(
+            self, kwargs.get("state_dict", None)
+        )
+        output_dir = save_directory
+        os.makedirs(output_dir, exist_ok=True)
+        torch.save(output_state_dict, os.path.join(output_dir, WEIGHTS_NAME))
+
+        # save the config and change the inference mode to `True`
+        if self.peft_config.base_model_name_or_path is None:
+            self.peft_config.base_model_name_or_path = self.model.__dict__.get(
+                "name_or_path", None
+            )
+
+        inference_mode = self.peft_config.inference_mode
+        self.peft_config.inference_mode = True
+        self.peft_config.save_pretrained(output_dir)
+        self.peft_config.inference_mode = inference_mode
+
+    @classmethod
+    def from_pretrained(cls, model, saved_dir):
+        config = LoraConfig.from_pretrained(saved_dir)
+        model = cls(config, model)
+
+        if os.path.exists(os.path.join(saved_dir, WEIGHTS_NAME)):
+            filename = os.path.join(saved_dir, WEIGHTS_NAME)
+        else:
+            raise ValueError(
+                f"Please check that the file {WEIGHTS_NAME} is present at {saved_dir}."
+            )
+
+        adapters_weights = torch.load(
+            filename,
+            map_location=torch.device("cuda" if torch.cuda.is_available() else "cpu"),
+        )
+        # load the weights into the model
+        set_peft_model_state_dict(model, adapters_weights)
+        model.eval()
+        return model
+
 
 # Below code is based on https://github.com/microsoft/LoRA/blob/main/loralib/layers.py
 # and modified to work with PyTorch FSDP