Skip to content

Commit

Permalink
Merge pull request FlagAI-Open#480 from Anhforth/add_lora_input
Browse files Browse the repository at this point in the history
Add lora input
  • Loading branch information
ftgreat authored Jul 5, 2023
2 parents dcf1cd1 + c97dd5b commit 98ebc08
Show file tree
Hide file tree
Showing 34 changed files with 3,452 additions and 942 deletions.
16 changes: 9 additions & 7 deletions examples/Aquila/Aquila-chat/Aquila-chat-lora.yaml
Original file line number Diff line number Diff line change
@@ -1,18 +1,20 @@
batch_size: 8
gradient_accumulation_steps: 4
lr: 2.0e-4
batch_size: 4
gradient_accumulation_steps: 1
lr: 3.0e-4
warm_up: 0.01
warm_up_iters: 100
warm_up_iters: 200
lora_r: 16
lora_alpha: 32
epochs: 1000
save_interval: 3000
log_interval: 1
epochs: 30000000
save_interval: 300
log_interval: 10
bmt_cpu_offload: False
bmt_pre_load: True

save_optim: True
save_rng: True

eps: 1.0e-8
lora: True

enable_sft_dataset_dir: './data/'
Expand Down
12 changes: 6 additions & 6 deletions examples/Aquila/Aquila-chat/Aquila-chat.yaml
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
batch_size: 1
gradient_accumulation_steps: 4
batch_size: 4
gradient_accumulation_steps: 1
lr: 3.0e-4
warm_up: 0.01

bmt_cpu_offload: False
bmt_pre_load: True
save_interval: 3000
log_interval: 1
warm_up: 0.01
save_interval: 300
log_interval: 10
warm_up_iters: 100
save_optim: True
save_rng: True
lora: False
eps: 1.0e-8
enable_sft_dataset_dir: './data/'
enable_sft_dataset_file: 'convo_samples.jsonl'
enable_sft_dataset_file: 'sft_v0.9.4_train.jsonl'
7 changes: 4 additions & 3 deletions examples/Aquila/Aquila-chat/aquila_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import jsonlines
import numpy as np
import cyg_conversation as conversation_lib
from flagai.model.tools.lora.prepare_lora import lora_transfer
from flagai.model.tools.peft.prepare_lora import lora_transfer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# You can input all parameters by the command line.
Expand Down Expand Up @@ -84,7 +84,7 @@
config_file = os.path.join(cache_dir, 'config.json')
from flagai.model.aquila_model import AQUILAModel
model = AQUILAModel.init_from_json(config_file=config_file, device=device, fp16=True)
print('*'*20, "model", model)
# print('*'*20, "model", model)

#lora
if env_args.lora:
Expand All @@ -98,7 +98,7 @@

trainer.pre_train(model)

print('*'*20, "model", model, flush=True)
# print('*'*20, "model", model, flush=True)

assert env_args.enable_sft_dataset_dir is not None and \
env_args.enable_sft_dataset_file is not None
Expand Down Expand Up @@ -246,6 +246,7 @@ def padding(indice, max_length, pad_idx=0):
maxlen=max_seq_len)
#print(f"train_dataset \n {train_dataset[0]}")

print(len(train_dataset))
valid_dataset = None
if jsonl_data_val is not None:
conversations_val = read_file(jsonl_data_val)
Expand Down
48 changes: 48 additions & 0 deletions examples/Aquila/Aquila-chat/deepspeed.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
{
"train_micro_batch_size_per_gpu": 64,
"gradient_accumulation_steps": 1,
"steps_per_print": 100,
"gradient_clipping": 1.0,
"zero_optimization": {
"stage": 3,
"contiguous_gradients": false,
"overlap_comm": true,
"reduce_scatter": true,
"reduce_bucket_size": 5e7,
"allgather_bucket_size": 5e7,
"cpu_offload": true
},
"scheduler": {
"type": "WarmupLR",
"params": {
"warmup_min_lr": 0,
"warmup_max_lr": 1e-5,
"warmup_num_steps": 2000
}
},
"zero_allow_untested_optimizer": true,
"fp16": {
"enabled": true,
"loss_scale": 0,
"loss_scale_window": 1000,
"hysteresis": 2,
"min_loss_scale": 1
},
"optimizer": {
"type": "Adam",
"params": {
"lr": 1e-5,
"weight_decay": 0.1,
"betas": [
0.9,
0.98
],
"eps": 1e-6
}
},
"activation_checkpointing": {
"partition_activations": true,
"contiguous_memory_optimization": false
},
"wall_clock_breakdown": false
}
2 changes: 1 addition & 1 deletion examples/Aquila/Aquila-chat/generate_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from flagai.data.tokenizer import Tokenizer

state_dict = "./checkpoints_in"
model_name = 'aquilachat-7b'
model_name = 'aquila-7b'

loader = AutoLoader("lm",
model_dir=state_dict,
Expand Down
8 changes: 3 additions & 5 deletions examples/Aquila/Aquila-chat/generate_chat_lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
model_name=model_name,
use_cache=True,
fp16=True,
device='cuda',
adapter_dir='directory of adapter files') # eg: /mnt/yzd/git/FlagAI/examples/Aquila/Aquila-chat/checkpoints_out/aquila_experiment/2023062909
device='cuda:2',
adapter_dir='/data2/yzd/FlagAI/examples/Aquila/Aquila-chat/checkpoints_out/aquila_experiment75new/2023070515/') # eg: /mnt/yzd/git/FlagAI/examples/Aquila/Aquila-chat/checkpoints_out/aquila_experiment/2023062909
model = loader.get_model()

tokenizer = loader.get_tokenizer()
Expand All @@ -32,8 +32,6 @@

texts = [
"Find the product of the numbers: 5 and 8",
"Provide five tips for effectively using tape measures",
"Create a resume for a job in web development.",
]

for text in texts:
Expand All @@ -59,4 +57,4 @@
max_gen_len := 200,
top_p=0.95,
prompts_tokens=[tokens])
print(f"pred is {out}")
print(f"pred is {out}")
2 changes: 1 addition & 1 deletion examples/Aquila/Aquila-chat/hostfile
Original file line number Diff line number Diff line change
@@ -1 +1 @@
192.168.21.4 slots=3
192.168.21.4 slots=4
1 change: 0 additions & 1 deletion flagai/env_trainer_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,6 @@ def do_train(self,

# For all the batches in the dataset.
for iteration_, batch in enumerate(train_dataloader):

# skip batches when resume_dataset=True
iteration_in_epoch = 0
if in_first_epoch and self.resume_dataset and 'iteration_in_epoch' in self.sd:
Expand Down
5 changes: 4 additions & 1 deletion flagai/model/aquila_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ def pre_train_hook(self):
self.layers = bmt.TransformerBlockList(blocks)

def forward(self, input_ids: torch.Tensor, start_pos=0, labels=None, **kwargs):

_bsz, seqlen = input_ids.shape
h = self.tok_embeddings(input_ids)

Expand All @@ -177,13 +178,15 @@ def forward(self, input_ids: torch.Tensor, start_pos=0, labels=None, **kwargs):
for layer in self.layers:
layer.use_cache = self.use_cache
layer.start_pos = start_pos

h = self.layers(h, freqs_cis, mask)
else:
for layer in self.layers:
layer.use_cache = self.use_cache
layer.start_pos = start_pos
h = layer(h, freqs_cis, mask)


# import pdb;pdb.set_trace()
h = self.norm(h)
if labels is not None:
h = self.output(h)
Expand Down
2 changes: 1 addition & 1 deletion flagai/model/base_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def load_local(checkpoint_path, only_download_config=False):
if only_download_config:
return model
if 'adapter_dir' in kwargs:
from peft import PeftModel
from flagai.model.tools.peft import PeftModel
model = PeftModel.from_pretrained(model, kwargs['adapter_dir'])
if os.getenv('ENV_TYPE') != 'deepspeed+mpu':
if os.path.exists(checkpoint_path):
Expand Down
1 change: 1 addition & 0 deletions flagai/model/layers/attentions.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ def forward(
qkv = einops.rearrange(qkv, '... (three h d) -> ... three h d', three=3, d=self.head_dim)
qkv = self.rotary_emb(qkv)
else:
xq = self.wq(x)
xq, xk, xv = self.wq(x), self.wk(x), self.wv(x)
xq = xq.view(bsz, seqlen, self.n_local_heads, self.head_dim)
xk = xk.view(bsz, seqlen, self.n_local_heads, self.head_dim)
Expand Down
1 change: 0 additions & 1 deletion flagai/model/layers/attentions_bmt.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,6 @@ def forward(self,
Return:
out (:obj:`torch.Tensor` of shape ``(batch, len_q, dim_model)``): The attention output.
"""

batch_size = query.size(0)
len_q = query.size(1)
len_k = key_value.size(1)
Expand Down
2 changes: 1 addition & 1 deletion flagai/model/predictor/aquila.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def aquila_generate(
start_pos = min_prompt_size
prev_pos = 0
for cur_pos in range(start_pos, total_len):
logits = model.forward(tokens[:, prev_pos:cur_pos], prev_pos)["logits"]
logits = model.forward(input_ids=tokens[:, prev_pos:cur_pos], start_pos=prev_pos)["logits"]
#print(logits.shape)
if temperature > 0:
logits /= temperature
Expand Down
85 changes: 0 additions & 85 deletions flagai/model/tools/lora/mapping.py

This file was deleted.

45 changes: 0 additions & 45 deletions flagai/model/tools/lora/prepare_lora.py

This file was deleted.

Loading

0 comments on commit 98ebc08

Please sign in to comment.