Skip to content

Commit

Permalink
modified docs
Browse files Browse the repository at this point in the history
Signed-off-by: ftgreat <[email protected]>
  • Loading branch information
ftgreat committed Jun 8, 2023
1 parent 5fe0b59 commit b9961b0
Show file tree
Hide file tree
Showing 23 changed files with 2,650 additions and 834 deletions.
18 changes: 7 additions & 11 deletions examples/Aquila/Aquila-code/generate_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,30 +18,23 @@

print(f"building model...")
loader = AutoLoader("lm", model_name="aquilacode-7b-nv",
only_download_config=True,
use_cache=True,
fp16=True,
model_dir=model_dir)

model = loader.get_model()
tokenizer = loader.get_tokenizer()

model.half()
model.eval()

model.cuda()
model.to(device)

vocab = tokenizer.get_vocab()

id2word = {v:k for k, v in vocab.items()}

predictor = Predictor(model, tokenizer)

max_new_tokens = 256

# test_file = "./datasets/code_test.txt"
# with open(test_file) as fin:
# prompt = '\n'+fin.read()+'\n'

texts = ["#补全代码\ndef quick_sort(x):",
'"""\n向用户询问他们的名字并说“你好”\m"""',
'"""\nAsk the user for their name and say "Hello\n""""' ]
Expand All @@ -52,8 +45,11 @@

max_length = input_length+max_new_tokens
with torch.no_grad():
res = predictor.predict_generate_randomsample(prompt,
res = predictor.predict_generate_randomsample(text,
out_max_length=max_length,
top_p=0.95,
temperature=0.7)
print(res)
print(res)



8 changes: 4 additions & 4 deletions examples/Aquila/Aquila-pretrain/README_Aquila.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@ We also support [Huggingface](hflink)

| Model | License | Commercial use? | GPU | Model link
| :---------------- | :------- | :-- |:-- | :-- |
| <font color=red>Aquila-7B </font> | Apache 2.0 | ✅ | Nvidia-A100 | mhlink
| <font color=red>Aquila-7B </font> | Apache 2.0 | ✅ | Nvidia-A100 | https://model.baai.ac.cn/model-detail/100098
| <font color=red>Aquila-33B | Apache 2.0 | ✅ | Nvidia-A100 | mhlink
| AquilaCode-7B-nv | Apache 2.0 | ✅ | Nvidia-A100 | mhlink
| AquilaCode-7B-ts | Apache 2.0 | ✅ | Tianshu-BI-V100 | mhlink
| AquilaChat-7B | Apache 2.0 | ✅ | Nvidia-A100 | mhlink
| AquilaCode-7B-nv | Apache 2.0 | ✅ | Nvidia-A100 | https://model.baai.ac.cn/model-detail/100102
| AquilaCode-7B-ts | Apache 2.0 | ✅ | Tianshu-BI-V100 | https://model.baai.ac.cn/model-detail/100099
| AquilaChat-7B | Apache 2.0 | ✅ | Nvidia-A100 | https://model.baai.ac.cn/model-detail/100101

我们使用了一系列更高效的底层算子来辅助模型训练,其中包括参考[flash-attention](https://github.com/HazyResearch/flash-attention)的方法并替换了一些中间计算,同时还使用了RMSNorm。在此基础上,我们升级了[BMtrain](https://github.com/OpenBMB/BMTrain)技术进行轻量化的并行训练,该技术采用了数据并行、ZeRO(零冗余优化器)、优化器卸载、检查点和操作融合、通信-计算重叠等方法来优化模型训练过程。

Expand Down
17 changes: 8 additions & 9 deletions examples/Aquila/Aquila-sft/Aquila-sft.yaml
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
epochs: 3
batch_size: 4
gradient_accumulation_steps: 1
lr: 9.65e-6
warm_up: 0.1
save_interval: 1000
gradient_accumulation_steps: 4
lr: 3.0e-4
warm_up: 0.01

bmt_lr_decay_style: "linear"
bmt_cpu_offload: False

bmt_pre_load: True
enable_sft_dataset_dir: './data/'
enable_sft_dataset_file: 'sft_samples.jsonl'

save_optim: True
save_rng: True
enable_sft_dataset_dir: '../datasets/'
enable_sft_dataset_file: 'convo_v2.jsonl'
20 changes: 6 additions & 14 deletions examples/Aquila/Aquila-sft/aquila_sft.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import jsonlines
import numpy as np
from examples.Aquila import cyg_conversation as conversation_lib
from flagai.model.tools.lora.prepare_lora import lora_transfer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# You can input all parameters by the command line.
Expand Down Expand Up @@ -69,20 +70,6 @@

print('*'*20, "model_name", model_name, flush=True)

'''
auto_loader = AutoLoader(
"lm",
model_name=model_name,
model_dir=checkpoints,
only_download_config=True,
)
model = auto_loader.get_model()
tokenizer = auto_loader.get_tokenizer()
print('*'*20, "model", model)
trainer.pre_train(model)
print('*'*20, "model", model)
'''

cache_dir = os.path.join(checkpoints, model_name)
print('*'*20, "cache_dir", cache_dir)
Expand All @@ -100,6 +87,11 @@
model = AQUILAModel.init_from_json(config_file=config_file)
print('*'*20, "model", model)

#lora
if env_args.lora:
model = lora_transfer(model,env_args)
model.print_trainable_parameters()

## bmt_pre_load
checkpoint_path = os.path.join(cache_dir, "pytorch_model.bin")
if env_args.bmt_pre_load:
Expand Down
6 changes: 1 addition & 5 deletions examples/Aquila/Aquila-sft/generate_sft.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from flagai.model.predictor.predictor import Predictor
from flagai.model.predictor.aquila import aquila_generate
from flagai.data.tokenizer import Tokenizer
import bminf

state_dict = "./checkpoints_in"
model_name = 'aquilachat-7b'
Expand All @@ -21,8 +20,6 @@

model.eval()
model.half()
# with torch.cuda.device(0):
# model = bminf.wrapper(model, quantization=False, memory_limit=2 << 30)
model.cuda()

predictor = Predictor(model, tokenizer)
Expand Down Expand Up @@ -107,5 +104,4 @@ def convo_tokenize(convo_obj, tokenizer):

with torch.no_grad():
out = aquila_generate(tokenizer, model, [text], max_gen_len:=200, top_p=0.95, prompts_tokens=[tokens])
print(f"pred is {out}")

print(f"pred is {out}")
Loading

0 comments on commit b9961b0

Please sign in to comment.