Skip to content

Commit

Permalink
Set bf16 flags corretly for a10/a100
Browse files Browse the repository at this point in the history
  • Loading branch information
srowen committed Jun 6, 2023
1 parent fd1a733 commit 6609c85
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 3 deletions.
5 changes: 4 additions & 1 deletion config/a100_config.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
{
"fp16": {
"enabled": false
},
"bf16": {
"enabled": "auto"
"enabled": true
},
"optimizer": {
"type": "AdamW",
Expand Down
5 changes: 4 additions & 1 deletion config/a10_config.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
{
"fp16": {
"enabled": false
},
"bf16": {
"enabled": "auto"
"enabled": true
},
"optimizer": {
"type": "AdamW",
Expand Down
3 changes: 3 additions & 0 deletions config/v100_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
"fp16": {
"enabled": true
},
"bf16": {
"enabled": false
},
"optimizer": {
"type": "AdamW",
"params": {
Expand Down
8 changes: 7 additions & 1 deletion train_dolly.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,11 @@
num_gpus = int(num_gpus)
num_gpus_flag = f"--num_gpus={num_gpus}"

if gpu_family == "v100":
bf16_flag = "--bf16 false"
else:
bf16_flag = "--bf16 true"

os.environ["TOKENIZERS_PARALLELISM"] = "false"

# COMMAND ----------
Expand All @@ -184,7 +189,8 @@
--eval-steps 50 \
--warmup-steps 50 \
--test-size 200 \
--lr 5e-6
--lr 5e-6 \
{bf16_flag}

# COMMAND ----------

Expand Down

0 comments on commit 6609c85

Please sign in to comment.