1
1
import os
2
- from typing import Dict , Tuple
2
+ from typing import Dict , Tuple , Union
3
3
4
4
import numpy as np
5
5
import pandas as pd
15
15
EPOCHS ,
16
16
GENERATION ,
17
17
INPUT_FEATURES ,
18
+ MERGE_ADAPTER_INTO_BASE_MODEL ,
18
19
MODEL_LLM ,
19
20
MODEL_TYPE ,
20
21
OUTPUT_FEATURES ,
22
+ POSTPROCESSOR ,
21
23
PREPROCESSING ,
22
24
PRETRAINED_ADAPTER_WEIGHTS ,
25
+ PROGRESSBAR ,
23
26
PROMPT ,
24
27
TRAINER ,
25
28
TYPE ,
@@ -352,6 +355,84 @@ def _prepare_finetuning_test(
352
355
return train_df , prediction_df , config
353
356
354
357
358
+ def _finetune_strategy_requires_cuda (finetune_strategy_name : str , quantization_args : Union [dict , None ]) -> bool :
359
+ """This method returns whether or not a given finetine_strategy requires CUDA.
360
+
361
+ For all finetune strategies, except "qlora", the decision is based just on the name of the finetine_strategy; in the
362
+ case of qlora, if the quantization dictionary is non-empty (i.e., contains quantization specifications), then the
363
+ original finetine_strategy name of "lora" is interpreted as "qlora" and used in the lookup, based on the list of
364
+ finetine strategies requiring CUDA.
365
+ """
366
+ cuda_only_finetune_strategy_names : list [str ] = [
367
+ "prompt_tuning" ,
368
+ "prefix_tuning" ,
369
+ "p_tuning" ,
370
+ "qlora" ,
371
+ ]
372
+
373
+ if finetune_strategy_name == "lora" and quantization_args :
374
+ finetune_strategy_name = "qlora"
375
+
376
+ return finetune_strategy_name in cuda_only_finetune_strategy_names
377
+
378
+
379
+ def _verify_lm_lora_finetuning_layers (
380
+ attention_layer : torch .nn .Module ,
381
+ merge_adapter_into_base_model : bool ,
382
+ expected_lora_in_features : int ,
383
+ expected_lora_out_features : int ,
384
+ ) -> bool :
385
+ """This method verifies that LoRA finetuning layers have correct types and shapes, depending on whether or not
386
+ the optional "model.merge_and_unload()" method (based on the "merge_adapter_into_base_model" directive) was
387
+ executed.
388
+
389
+ If merge_adapter_into_base_model is True, then both LoRA projection layers, V and Q, in the attention layer must
390
+ contain square weight matrices (with the dimensions expected_lora_in_features by expected_lora_in_features).
391
+ However, if merge_adapter_into_base_model is False, then the LoRA part of the attention layer must include Lora_A
392
+ and Lora_B children layers for each of V and Q projections, such that the product of V and Q matrices is a square
393
+ matrix (with the dimensions expected_lora_in_features by expected_lora_in_features) for both V and Q projections.
394
+ """
395
+ success : bool = True
396
+ success = success and isinstance (attention_layer .v_proj , torch .nn .Linear )
397
+ success = success and isinstance (attention_layer .q_proj , torch .nn .Linear )
398
+ if merge_adapter_into_base_model :
399
+ success = success and (attention_layer .v_proj .in_features , attention_layer .v_proj .out_features ) == (
400
+ expected_lora_in_features ,
401
+ expected_lora_out_features ,
402
+ )
403
+ success = success and (attention_layer .q_proj .in_features , attention_layer .q_proj .out_features ) == (
404
+ expected_lora_in_features ,
405
+ expected_lora_out_features ,
406
+ )
407
+ success = success and not list (attention_layer .v_proj .children ())
408
+ success = success and not list (attention_layer .q_proj .children ())
409
+ else :
410
+ v_proj_named_children : dict [str , torch .nn .Modeule ] = dict (attention_layer .v_proj .named_children ())
411
+ assert isinstance (v_proj_named_children ["lora_A" ]["default" ], torch .nn .Linear )
412
+ assert (
413
+ v_proj_named_children ["lora_A" ]["default" ].in_features ,
414
+ v_proj_named_children ["lora_A" ]["default" ].out_features ,
415
+ ) == (expected_lora_in_features , expected_lora_out_features )
416
+ assert isinstance (v_proj_named_children ["lora_B" ]["default" ], torch .nn .Linear )
417
+ assert (
418
+ v_proj_named_children ["lora_B" ]["default" ].in_features ,
419
+ v_proj_named_children ["lora_B" ]["default" ].out_features ,
420
+ ) == (expected_lora_out_features , expected_lora_in_features )
421
+ q_proj_named_children : dict [str , torch .nn .Modeule ] = dict (attention_layer .q_proj .named_children ())
422
+ assert isinstance (q_proj_named_children ["lora_A" ]["default" ], torch .nn .Linear )
423
+ assert (
424
+ q_proj_named_children ["lora_A" ]["default" ].in_features ,
425
+ q_proj_named_children ["lora_A" ]["default" ].out_features ,
426
+ ) == (expected_lora_in_features , expected_lora_out_features )
427
+ assert isinstance (q_proj_named_children ["lora_B" ]["default" ], torch .nn .Linear )
428
+ assert (
429
+ q_proj_named_children ["lora_B" ]["default" ].in_features ,
430
+ q_proj_named_children ["lora_B" ]["default" ].out_features ,
431
+ ) == (expected_lora_out_features , expected_lora_in_features )
432
+
433
+ return success
434
+
435
+
355
436
# TODO(arnav): p-tuning and prefix tuning have errors when enabled that seem to stem from DDP:
356
437
#
357
438
# prefix tuning:
@@ -376,8 +457,12 @@ def _prepare_finetuning_test(
376
457
(None , {}),
377
458
("lora" , {}),
378
459
("lora" , {"r" : 4 , "dropout" : 0.1 }),
460
+ ("lora" , {POSTPROCESSOR : {MERGE_ADAPTER_INTO_BASE_MODEL : True , PROGRESSBAR : True }}),
461
+ ("lora" , {POSTPROCESSOR : {MERGE_ADAPTER_INTO_BASE_MODEL : False }}),
379
462
("adalora" , {}),
380
463
("adalora" , {"init_r" : 8 , "beta1" : 0.8 }),
464
+ ("adalora" , {POSTPROCESSOR : {MERGE_ADAPTER_INTO_BASE_MODEL : True , PROGRESSBAR : True }}),
465
+ ("adalora" , {POSTPROCESSOR : {MERGE_ADAPTER_INTO_BASE_MODEL : False }}),
381
466
("adaption_prompt" , {}),
382
467
("adaption_prompt" , {"adapter_len" : 6 , "adapter_layers" : 1 }),
383
468
# (
@@ -403,8 +488,12 @@ def _prepare_finetuning_test(
403
488
"full" ,
404
489
"lora-defaults" ,
405
490
"lora-modified-defaults" ,
491
+ "lora_merged" ,
492
+ "lora_not_merged" ,
406
493
"adalora-defaults" ,
407
494
"adalora-modified-defaults" ,
495
+ "adalora_merged" ,
496
+ "adalora_not_merged" ,
408
497
"adaption_prompt-defaults" ,
409
498
"adaption_prompt-modified-defaults" ,
410
499
# "prompt_tuning_init_random",
@@ -445,7 +534,10 @@ def test_llm_finetuning_strategies(tmpdir, csv_filename, backend, finetune_strat
445
534
],
446
535
)
447
536
def test_llm_finetuning_strategies_quantized (tmpdir , csv_filename , finetune_strategy , adapter_args , quantization ):
448
- if quantization and (not torch .cuda .is_available () or torch .cuda .device_count () == 0 ):
537
+ if (
538
+ _finetune_strategy_requires_cuda (finetune_strategy_name = finetune_strategy , quantization_args = quantization )
539
+ and not (torch .cuda .is_available () and torch .cuda .device_count ()) > 0
540
+ ):
449
541
pytest .skip ("Skip: quantization requires GPU and none are available." )
450
542
451
543
backend = LOCAL_BACKEND
@@ -469,6 +561,66 @@ def test_llm_finetuning_strategies_quantized(tmpdir, csv_filename, finetune_stra
469
561
assert preds
470
562
471
563
564
+ @pytest .mark .llm
565
+ @pytest .mark .parametrize (
566
+ "backend" ,
567
+ [
568
+ pytest .param (LOCAL_BACKEND , id = "local" ),
569
+ # TODO: Re-enable once we can run tests on GPUs
570
+ # This is because fine-tuning requires Ray with the deepspeed strategy, and deepspeed
571
+ # only works with GPUs
572
+ # pytest.param(RAY_BACKEND, id="ray"),
573
+ ],
574
+ )
575
+ @pytest .mark .parametrize (
576
+ "merge_adapter_into_base_model,expected_lora_in_features,expected_lora_out_features" ,
577
+ [
578
+ pytest .param (
579
+ False ,
580
+ 32 ,
581
+ 8 ,
582
+ id = "lora_not_merged" ,
583
+ ),
584
+ pytest .param (
585
+ True ,
586
+ 32 ,
587
+ 32 ,
588
+ id = "lora_merged" ,
589
+ ),
590
+ ],
591
+ )
592
+ def test_llm_lora_finetuning_merge_and_unload (
593
+ tmpdir , csv_filename , backend , merge_adapter_into_base_model , expected_lora_in_features , expected_lora_out_features
594
+ ):
595
+ finetune_strategy : str = "lora"
596
+ adapter_args : dict = {
597
+ POSTPROCESSOR : {
598
+ MERGE_ADAPTER_INTO_BASE_MODEL : merge_adapter_into_base_model ,
599
+ },
600
+ }
601
+ train_df , prediction_df , config = _prepare_finetuning_test (
602
+ csv_filename = csv_filename , finetune_strategy = finetune_strategy , backend = backend , adapter_args = adapter_args
603
+ )
604
+
605
+ model = LudwigModel (config )
606
+ model .train (dataset = train_df , output_directory = str (tmpdir ), skip_save_processed_input = False )
607
+ assert _verify_lm_lora_finetuning_layers (
608
+ attention_layer = model .model .model .base_model .model .transformer .h [1 ].attn ,
609
+ merge_adapter_into_base_model = merge_adapter_into_base_model ,
610
+ expected_lora_in_features = expected_lora_in_features ,
611
+ expected_lora_out_features = expected_lora_out_features ,
612
+ )
613
+
614
+ # Make sure we can load the saved model and verify that the LoRA layers have expected shapes.
615
+ model = LudwigModel .load (os .path .join (str (tmpdir ), "api_experiment_run" , "model" ), backend = backend )
616
+ assert _verify_lm_lora_finetuning_layers (
617
+ attention_layer = model .model .model .base_model .model .transformer .h [1 ].attn ,
618
+ merge_adapter_into_base_model = merge_adapter_into_base_model ,
619
+ expected_lora_in_features = expected_lora_in_features ,
620
+ expected_lora_out_features = expected_lora_out_features ,
621
+ )
622
+
623
+
472
624
@pytest .mark .llm
473
625
@pytest .mark .parametrize ("use_adapter" , [True , False ], ids = ["with_adapter" , "without_adapter" ])
474
626
def test_llm_training_with_gradient_checkpointing (tmpdir , csv_filename , use_adapter ):
@@ -628,23 +780,37 @@ def test_load_pretrained_adapter_weights(adapter):
628
780
629
781
def _compare_models (model_1 : torch .nn .Module , model_2 : torch .nn .Module ) -> bool :
630
782
# For a full explanation of this 8-bit workaround, see https://github.com/ludwig-ai/ludwig/pull/3606
631
- def filter_for_weight_format (i ):
632
- """Remove bitsandbytes metadata keys added on state dict creation.
633
783
634
- 8-bit quantized models that have been put on gpu will have a set of `weight_format` keys in their state dict.
635
- These contain strings that are used to reshape quantized tensors, however these have no impact until the state
636
- dict is loaded into a model. These keys were causing `torch.equal` to raise an exception, so we skip them in the
637
- evaluation.
638
- """
639
- return "weight_format" not in i [0 ]
784
+ # TODO: Uncomment "filter_for_weight_format()" method definition and enable its usage once GPU tests are set up.
785
+ # def filter_for_weight_format(i):
786
+ # """Remove bitsandbytes metadata keys added on state dict creation.
787
+ #
788
+ # 8-bit quantized models that have been put on gpu will have a set of `weight_format` keys in their state dict.
789
+ # These contain strings that are used to reshape quantized tensors, however these have no impact until the state
790
+ # dict is loaded into a model. These keys were causing `torch.equal` to raise an exception, so we skip them in
791
+ # the evaluation.
792
+ # """
793
+ # return "weight_format" not in i[0]
640
794
641
- model_1_filtered_state_dict = filter (filter_for_weight_format , model_1 .state_dict ().items ())
642
- model_2_filtered_state_dict = filter (filter_for_weight_format , model_2 .state_dict ().items ())
795
+ # model_1_filtered_state_dict = filter(filter_for_weight_format, model_1.state_dict().items())
796
+ # model_2_filtered_state_dict = filter(filter_for_weight_format, model_2.state_dict().items())
643
797
644
798
# Source: https://discuss.pytorch.org/t/check-if-models-have-same-weights/4351/6
645
- for key_item_1 , key_item_2 in zip (model_1_filtered_state_dict , model_2_filtered_state_dict ):
799
+
800
+ if model_1 .__class__ .__name__ != model_2 .__class__ .__name__ :
801
+ return False
802
+
803
+ if (
804
+ hasattr (model_1 , "model" )
805
+ and hasattr (model_2 , "model" )
806
+ and not _compare_models (model_1 = model_1 .model , model_2 = model_2 .model )
807
+ ):
808
+ return False
809
+
810
+ for key_item_1 , key_item_2 in zip (model_1 .state_dict ().items (), model_2 .state_dict ().items ()):
646
811
if not torch .equal (key_item_1 [1 ], key_item_2 [1 ]):
647
812
return False
813
+
648
814
return True
649
815
650
816
0 commit comments