Skip to content

Commit

Permalink
Improve sft (huggingface#5)
Browse files Browse the repository at this point in the history
* first commit

* working training

* change model_id

* Update scripts/training/sft.py

Co-authored-by: Quentin Gallouédec <[email protected]>

---------

Co-authored-by: Quentin Gallouédec <[email protected]>
  • Loading branch information
eliebak and qgallouedec authored Jan 24, 2025
1 parent 52aefc2 commit c421bc8
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 29 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
model_name_or_path: Qwen/Qwen2.5-1.5B-Instruct
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2
attn_implementation: sdpa

# Data training arguments
dataset_mixer:
Expand All @@ -20,8 +20,9 @@ gradient_accumulation_steps: 1
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: False
hub_model_id: HuggingFaceH4/Qwen2.5-1.5B-R1-v00.00
hub_model_id: HuggingFaceH4/qwen-test
hub_strategy: every_save
hub_private_repo: true
learning_rate: 2.0e-05
log_level: info
logging_steps: 5
Expand All @@ -33,7 +34,7 @@ num_train_epochs: 1
output_dir: data/Qwen2.5-1.5B-Distill-R1-v00.00
overwrite_output_dir: true
per_device_eval_batch_size: 8
per_device_train_batch_size: 16
per_device_train_batch_size: 8
push_to_hub: true
remove_unused_columns: true
report_to:
Expand Down
30 changes: 19 additions & 11 deletions recipes/launch.slurm
Original file line number Diff line number Diff line change
@@ -1,33 +1,41 @@
#!/bin/bash
#SBATCH --job-name=default
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --exclusive
#SBATCH --gres=gpu:8
#SBATCH --partition=hopper-prod # Adjust this for your cluster
#SBATCH --output=/fsx/h4/logs/%x-%j.out # Adjust this for your cluster
#SBATCH --err=/fsx/h4/logs/%x-%j.err # Adjust this for your cluster
#SBATCH --partition=hopper-prod
#SBATCH --qos=high
#SBATCH --time=01:59:00
#SBATCH --output=/fsx/elie_bakouch/open-r1/logs/%x-%j.out
#SBATCH --err=/fsx/elie_bakouch/open-r1/logs/%x-%j.err

set -x -e

# DON'T FORGET TO MODIFY THE HEADER WITH YOUR PATH
# @todo:eliebak maybe with use submitit at some point

source ~/.bashrc
conda activate openr1
module load cuda/12.1
echo "START TIME: $(date)"
echo "PYTHON ENV: $(which python)"

MODEL=$1
TASK=$2
PRECISION=$3
ACCELERATOR=$4
OPTIONAL_ARGS=$5
MODEL=Qwen2.5-1.5B-Instruct
TASK=sft
PRECISION=v00.00
ACCELERATOR=deepspeed_zero3

# Training setup
NUM_NODES=$SLURM_NNODES
GPUS_PER_NODE=8
WORLD_SIZE=$(($NUM_NODES*$GPUS_PER_NODE))
# Due to conflicts between Accelerate's DeepSpeed configs and Transformers' TrainingArguments, we need to parse the gradient accumulation steps from the config file to ensure they match
CONFIG_FILE=recipes/$MODEL/$TASK/config_$PRECISION.yaml

echo "CONFIG_FILE: $CONFIG_FILE"
GRAD_ACC_STEPS=$(grep 'gradient_accumulation_steps' $CONFIG_FILE | awk '{print $2}')

# Split the string into individual arguments
IFS=' ' read -ra ARGS <<< "$OPTIONAL_ARGS"

# Loop through the arguments and find the one with "--gradient_accumulation_steps"
for arg in "${ARGS[@]}"; do
Expand All @@ -44,7 +52,7 @@ MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1)
MASTER_PORT=6000

export CMD=" \
scripts/run_$TASK.py $CONFIG_FILE $OPTIONAL_ARGS
scripts/training/$TASK.py $CONFIG_FILE $OPTIONAL_ARGS
"

export LAUNCHER="HF_HUB_ENABLE_HF_TRANSFER=1 ACCELERATE_LOG_LEVEL=info TRANSFORMERS_VERBOSITY=info accelerate launch \
Expand Down
36 changes: 23 additions & 13 deletions scripts/training/sft.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
get_quantization_config,
get_tokenizer,
)
from trl import SFTTrainer, setup_chat_format
from trl import SFTTrainer, SFTConfig, setup_chat_format


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -91,11 +91,13 @@ def main():
configs=data_args.dataset_configs,
columns_to_keep=["messages", "chosen", "rejected", "prompt", "completion", "label"],
)


logger.info(
f"Training on the following datasets and their proportions: {[split + ' : ' + str(dset.num_rows) for split, dset in raw_datasets.items()]}"
)
column_names = list(raw_datasets["train"].features)

print(f"column_names: {column_names}")
################
# Load tokenizer
################
Expand All @@ -122,10 +124,10 @@ def main():

model = model_args.model_name_or_path
# For ChatML we need to add special tokens and resize the embedding layer
if "<|im_start|>" in tokenizer.chat_template and "gemma-tokenizer-chatml" not in tokenizer.name_or_path:
model = AutoModelForCausalLM.from_pretrained(model_args.model_name_or_path, **model_kwargs)
model, tokenizer = setup_chat_format(model, tokenizer)
model_kwargs = None
# if "<|im_start|>" in tokenizer.chat_template and "gemma-tokenizer-chatml" not in tokenizer.name_or_path:
# model = AutoModelForCausalLM.from_pretrained(model_args.model_name_or_path, **model_kwargs)
# model, tokenizer = setup_chat_format(model, tokenizer)
# model_kwargs = None

#####################
# Apply chat template
Expand All @@ -142,8 +144,10 @@ def main():
desc="Applying chat template",
)

print(f"raw_datasets['train'].features: {raw_datasets['train'].features}")

##########################
# Decontaminate benchmarks
# Decontaminate benchmarks (change this with math)
##########################
num_raw_train_samples = len(raw_datasets["train"])
raw_datasets = raw_datasets.filter(decontaminate_humaneval, batched=True, batch_size=10_000, num_proc=1)
Expand All @@ -159,21 +163,26 @@ def main():
for index in random.sample(range(len(raw_datasets["train"])), 3):
logger.info(f"Sample {index} of the processed training set:\n\n{raw_datasets['train'][index]['text']}")


########################
# Initialize the Trainer
########################

# Adding packing and dataset_text_field to the config
setattr(training_args, "model_init_kwargs", model_kwargs)

trainer = SFTTrainer(
model=model,
model_init_kwargs=model_kwargs,
# model_init_kwargs=model_kwargs,
args=training_args,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
dataset_text_field="text",
max_seq_length=training_args.max_seq_length,
# dataset_text_field="text",
# max_seq_length=training_args.max_seq_length,
tokenizer=tokenizer,
packing=True,
# packing=True,
peft_config=get_peft_config(model_args),
dataset_kwargs=training_args.dataset_kwargs,
# dataset_kwargs=training_args.dataset_kwargs,
)

###############
Expand Down Expand Up @@ -212,8 +221,9 @@ def main():
trainer.model.config.use_cache = True
trainer.model.config.save_pretrained(training_args.output_dir)


##########
# Evaluate
# Evaluate (to change or supress?)
##########
if training_args.do_eval:
logger.info("*** Evaluate ***")
Expand Down
4 changes: 2 additions & 2 deletions src/open_r1/model_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@

from accelerate import Accelerator
from huggingface_hub import list_repo_files
from huggingface_hub.utils._errors import RepositoryNotFoundError
from huggingface_hub.utils._validators import HFValidationError
from huggingface_hub.utils import RepositoryNotFoundError
from huggingface_hub.utils import HFValidationError
from peft import LoraConfig, PeftConfig

from .configs import DataArguments, DPOConfig, ModelArguments, SFTConfig
Expand Down

0 comments on commit c421bc8

Please sign in to comment.