Additional Memory clean up for slow tests (huggingface#7436)

* update * update * update
EMinsight · Mar 25, 2024 · 9a34953 · 9a34953
1 parent e29f16c
commit 9a34953
Show file tree

Hide file tree

Showing 22 changed files with 188 additions and 29 deletions.
diff --git a/tests/pipelines/deepfloyd_if/test_if.py b/tests/pipelines/deepfloyd_if/test_if.py
@@ -85,6 +85,12 @@ def test_xformers_attention_forwardGenerator_pass(self):
 @slow
 @require_torch_gpu
 class IFPipelineSlowTests(unittest.TestCase):
+    def setUp(self):
+        # clean up the VRAM before each test
+        super().setUp()
+        gc.collect()
+        torch.cuda.empty_cache()
+
     def tearDown(self):
         # clean up the VRAM after each test
         super().tearDown()

diff --git a/tests/pipelines/deepfloyd_if/test_if_img2img.py b/tests/pipelines/deepfloyd_if/test_if_img2img.py
@@ -94,6 +94,12 @@ def test_inference_batch_single_identical(self):
 @slow
 @require_torch_gpu
 class IFImg2ImgPipelineSlowTests(unittest.TestCase):
+    def setUp(self):
+        # clean up the VRAM before each test
+        super().setUp()
+        gc.collect()
+        torch.cuda.empty_cache()
+
     def tearDown(self):
         # clean up the VRAM after each test
         super().tearDown()
@@ -109,6 +115,10 @@ def test_if_img2img(self):
         pipe.unet.set_attn_processor(AttnAddedKVProcessor())
         pipe.enable_model_cpu_offload()
 
+        torch.cuda.reset_max_memory_allocated()
+        torch.cuda.empty_cache()
+        torch.cuda.reset_peak_memory_stats()
+
         image = floats_tensor((1, 3, 64, 64), rng=random.Random(0)).to(torch_device)
         generator = torch.Generator(device="cpu").manual_seed(0)
         output = pipe(

diff --git a/tests/pipelines/deepfloyd_if/test_if_img2img_superresolution.py b/tests/pipelines/deepfloyd_if/test_if_img2img_superresolution.py
@@ -92,6 +92,12 @@ def test_inference_batch_single_identical(self):
 @slow
 @require_torch_gpu
 class IFImg2ImgSuperResolutionPipelineSlowTests(unittest.TestCase):
+    def setUp(self):
+        # clean up the VRAM before each test
+        super().setUp()
+        gc.collect()
+        torch.cuda.empty_cache()
+
     def tearDown(self):
         # clean up the VRAM after each test
         super().tearDown()
@@ -107,6 +113,10 @@ def test_if_img2img_superresolution(self):
         pipe.unet.set_attn_processor(AttnAddedKVProcessor())
         pipe.enable_model_cpu_offload()
 
+        torch.cuda.reset_max_memory_allocated()
+        torch.cuda.empty_cache()
+        torch.cuda.reset_peak_memory_stats()
+
         generator = torch.Generator(device="cpu").manual_seed(0)
 
         original_image = floats_tensor((1, 3, 256, 256), rng=random.Random(0)).to(torch_device)

diff --git a/tests/pipelines/deepfloyd_if/test_if_inpainting.py b/tests/pipelines/deepfloyd_if/test_if_inpainting.py
@@ -92,6 +92,12 @@ def test_inference_batch_single_identical(self):
 @slow
 @require_torch_gpu
 class IFInpaintingPipelineSlowTests(unittest.TestCase):
+    def setUp(self):
+        # clean up the VRAM before each test
+        super().setUp()
+        gc.collect()
+        torch.cuda.empty_cache()
+
     def tearDown(self):
         # clean up the VRAM after each test
         super().tearDown()
@@ -105,7 +111,6 @@ def test_if_inpainting(self):
         pipe.unet.set_attn_processor(AttnAddedKVProcessor())
         pipe.enable_model_cpu_offload()
 
-        # Super resolution test
         torch.cuda.empty_cache()
         torch.cuda.reset_max_memory_allocated()
         torch.cuda.reset_peak_memory_stats()

diff --git a/tests/pipelines/deepfloyd_if/test_if_inpainting_superresolution.py b/tests/pipelines/deepfloyd_if/test_if_inpainting_superresolution.py
@@ -94,6 +94,12 @@ def test_inference_batch_single_identical(self):
 @slow
 @require_torch_gpu
 class IFInpaintingSuperResolutionPipelineSlowTests(unittest.TestCase):
+    def setUp(self):
+        # clean up the VRAM before each test
+        super().setUp()
+        gc.collect()
+        torch.cuda.empty_cache()
+
     def tearDown(self):
         # clean up the VRAM after each test
         super().tearDown()

diff --git a/tests/pipelines/deepfloyd_if/test_if_superresolution.py b/tests/pipelines/deepfloyd_if/test_if_superresolution.py
@@ -87,6 +87,12 @@ def test_inference_batch_single_identical(self):
 @slow
 @require_torch_gpu
 class IFSuperResolutionPipelineSlowTests(unittest.TestCase):
+    def setUp(self):
+        # clean up the VRAM before each test
+        super().setUp()
+        gc.collect()
+        torch.cuda.empty_cache()
+
     def tearDown(self):
         # clean up the VRAM after each test
         super().tearDown()

diff --git a/tests/pipelines/ip_adapters/test_ip_adapter_stable_diffusion.py b/tests/pipelines/ip_adapters/test_ip_adapter_stable_diffusion.py
@@ -50,7 +50,14 @@
 class IPAdapterNightlyTestsMixin(unittest.TestCase):
     dtype = torch.float16
 
+    def setUp(self):
+        # clean up the VRAM before each test
+        super().setUp()
+        gc.collect()
+        torch.cuda.empty_cache()
+
     def tearDown(self):
+        # clean up the VRAM after each test
         super().tearDown()
         gc.collect()
         torch.cuda.empty_cache()
@@ -313,7 +320,7 @@ def test_text_to_image_sdxl(self):
             feature_extractor=feature_extractor,
             torch_dtype=self.dtype,
         )
-        pipeline.to(torch_device)
+        pipeline.enable_model_cpu_offload()
         pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")
 
         inputs = self.get_dummy_inputs()
@@ -373,7 +380,7 @@ def test_image_to_image_sdxl(self):
             feature_extractor=feature_extractor,
             torch_dtype=self.dtype,
         )
-        pipeline.to(torch_device)
+        pipeline.enable_model_cpu_offload()
         pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")
 
         inputs = self.get_dummy_inputs(for_image_to_image=True)
@@ -442,7 +449,7 @@ def test_inpainting_sdxl(self):
             feature_extractor=feature_extractor,
             torch_dtype=self.dtype,
         )
-        pipeline.to(torch_device)
+        pipeline.enable_model_cpu_offload()
         pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")
 
         inputs = self.get_dummy_inputs(for_inpainting=True)
@@ -490,7 +497,7 @@ def test_ip_adapter_single_mask(self):
             image_encoder=image_encoder,
             torch_dtype=self.dtype,
         )
-        pipeline.to(torch_device)
+        pipeline.enable_model_cpu_offload()
         pipeline.load_ip_adapter(
             "h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter-plus-face_sdxl_vit-h.safetensors"
         )
@@ -518,7 +525,7 @@ def test_ip_adapter_multiple_masks(self):
             image_encoder=image_encoder,
             torch_dtype=self.dtype,
         )
-        pipeline.to(torch_device)
+        pipeline.enable_model_cpu_offload()
         pipeline.load_ip_adapter(
             "h94/IP-Adapter", subfolder="sdxl_models", weight_name=["ip-adapter-plus-face_sdxl_vit-h.safetensors"] * 2
         )

diff --git a/tests/pipelines/kandinsky/test_kandinsky.py b/tests/pipelines/kandinsky/test_kandinsky.py
@@ -275,6 +275,12 @@ def test_offloads(self):
 @slow
 @require_torch_gpu
 class KandinskyPipelineIntegrationTests(unittest.TestCase):
+    def setUp(self):
+        # clean up the VRAM before each test
+        super().setUp()
+        gc.collect()
+        torch.cuda.empty_cache()
+
     def tearDown(self):
         # clean up the VRAM after each test
         super().tearDown()

diff --git a/tests/pipelines/kandinsky/test_kandinsky_img2img.py b/tests/pipelines/kandinsky/test_kandinsky_img2img.py
@@ -299,6 +299,12 @@ def test_dict_tuple_outputs_equivalent(self):
 @slow
 @require_torch_gpu
 class KandinskyImg2ImgPipelineIntegrationTests(unittest.TestCase):
+    def setUp(self):
+        # clean up the VRAM before each test
+        super().setUp()
+        gc.collect()
+        torch.cuda.empty_cache()
+
     def tearDown(self):
         # clean up the VRAM after each test
         super().tearDown()

diff --git a/tests/pipelines/kandinsky/test_kandinsky_inpaint.py b/tests/pipelines/kandinsky/test_kandinsky_inpaint.py
@@ -297,6 +297,12 @@ def test_float16_inference(self):
 @nightly
 @require_torch_gpu
 class KandinskyInpaintPipelineIntegrationTests(unittest.TestCase):
+    def setUp(self):
+        # clean up the VRAM before each test
+        super().setUp()
+        gc.collect()
+        torch.cuda.empty_cache()
+
     def tearDown(self):
         # clean up the VRAM after each test
         super().tearDown()

diff --git a/tests/pipelines/kandinsky2_2/test_kandinsky.py b/tests/pipelines/kandinsky2_2/test_kandinsky.py
@@ -27,7 +27,6 @@
     load_numpy,
     require_torch_gpu,
     slow,
-    torch_device,
 )
 
 from ..test_pipelines_common import PipelineTesterMixin, assert_mean_pixel_difference
@@ -223,6 +222,12 @@ def test_float16_inference(self):
 @slow
 @require_torch_gpu
 class KandinskyV22PipelineIntegrationTests(unittest.TestCase):
+    def setUp(self):
+        # clean up the VRAM before each test
+        super().setUp()
+        gc.collect()
+        torch.cuda.empty_cache()
+
     def tearDown(self):
         # clean up the VRAM after each test
         super().tearDown()
@@ -238,12 +243,12 @@ def test_kandinsky_text2img(self):
         pipe_prior = KandinskyV22PriorPipeline.from_pretrained(
             "kandinsky-community/kandinsky-2-2-prior", torch_dtype=torch.float16
         )
-        pipe_prior.to(torch_device)
+        pipe_prior.enable_model_cpu_offload()
 
         pipeline = KandinskyV22Pipeline.from_pretrained(
             "kandinsky-community/kandinsky-2-2-decoder", torch_dtype=torch.float16
         )
-        pipeline = pipeline.to(torch_device)
+        pipeline = pipeline.enable_model_cpu_offload()
         pipeline.set_progress_bar_config(disable=None)
 
         prompt = "red cat, 4k photo"
@@ -252,7 +257,7 @@ def test_kandinsky_text2img(self):
         image_emb, zero_image_emb = pipe_prior(
             prompt,
             generator=generator,
-            num_inference_steps=5,
+            num_inference_steps=3,
             negative_prompt="",
         ).to_tuple()
 
@@ -261,7 +266,7 @@ def test_kandinsky_text2img(self):
             image_embeds=image_emb,
             negative_image_embeds=zero_image_emb,
             generator=generator,
-            num_inference_steps=100,
+            num_inference_steps=3,
             output_type="np",
         )
 

diff --git a/tests/pipelines/kandinsky2_2/test_kandinsky_controlnet.py b/tests/pipelines/kandinsky2_2/test_kandinsky_controlnet.py
@@ -34,7 +34,6 @@
     load_numpy,
     nightly,
     require_torch_gpu,
-    torch_device,
 )
 
 from ..test_pipelines_common import PipelineTesterMixin, assert_mean_pixel_difference
@@ -228,6 +227,12 @@ def test_inference_batch_single_identical(self):
 @nightly
 @require_torch_gpu
 class KandinskyV22ControlnetPipelineIntegrationTests(unittest.TestCase):
+    def setUp(self):
+        # clean up the VRAM before each test
+        super().setUp()
+        gc.collect()
+        torch.cuda.empty_cache()
+
     def tearDown(self):
         # clean up the VRAM after each test
         super().tearDown()
@@ -250,12 +255,12 @@ def test_kandinsky_controlnet(self):
         pipe_prior = KandinskyV22PriorPipeline.from_pretrained(
             "kandinsky-community/kandinsky-2-2-prior", torch_dtype=torch.float16
         )
-        pipe_prior.to(torch_device)
+        pipe_prior.enable_model_cpu_offload()
 
         pipeline = KandinskyV22ControlnetPipeline.from_pretrained(
             "kandinsky-community/kandinsky-2-2-controlnet-depth", torch_dtype=torch.float16
         )
-        pipeline = pipeline.to(torch_device)
+        pipeline = pipeline.enable_model_cpu_offload()
         pipeline.set_progress_bar_config(disable=None)
 
         prompt = "A robot, 4k photo"
@@ -264,7 +269,7 @@ def test_kandinsky_controlnet(self):
         image_emb, zero_image_emb = pipe_prior(
             prompt,
             generator=generator,
-            num_inference_steps=5,
+            num_inference_steps=2,
             negative_prompt="",
         ).to_tuple()
 
@@ -274,7 +279,7 @@ def test_kandinsky_controlnet(self):
             negative_image_embeds=zero_image_emb,
             hint=hint,
             generator=generator,
-            num_inference_steps=100,
+            num_inference_steps=2,
             output_type="np",
         )
 

diff --git a/tests/pipelines/kandinsky2_2/test_kandinsky_controlnet_img2img.py b/tests/pipelines/kandinsky2_2/test_kandinsky_controlnet_img2img.py
@@ -35,7 +35,6 @@
     load_numpy,
     nightly,
     require_torch_gpu,
-    torch_device,
 )
 
 from ..test_pipelines_common import PipelineTesterMixin, assert_mean_pixel_difference
@@ -235,6 +234,12 @@ def test_float16_inference(self):
 @nightly
 @require_torch_gpu
 class KandinskyV22ControlnetImg2ImgPipelineIntegrationTests(unittest.TestCase):
+    def setUp(self):
+        # clean up the VRAM before each test
+        super().setUp()
+        gc.collect()
+        torch.cuda.empty_cache()
+
     def tearDown(self):
         # clean up the VRAM after each test
         super().tearDown()
@@ -264,12 +269,12 @@ def test_kandinsky_controlnet_img2img(self):
         pipe_prior = KandinskyV22PriorEmb2EmbPipeline.from_pretrained(
             "kandinsky-community/kandinsky-2-2-prior", torch_dtype=torch.float16
         )
-        pipe_prior.to(torch_device)
+        pipe_prior.enable_model_cpu_offload()
 
         pipeline = KandinskyV22ControlnetImg2ImgPipeline.from_pretrained(
             "kandinsky-community/kandinsky-2-2-controlnet-depth", torch_dtype=torch.float16
         )
-        pipeline = pipeline.to(torch_device)
+        pipeline = pipeline.enable_model_cpu_offload()
 
         pipeline.set_progress_bar_config(disable=None)
 
@@ -281,6 +286,7 @@ def test_kandinsky_controlnet_img2img(self):
             strength=0.85,
             generator=generator,
             negative_prompt="",
+            num_inference_steps=5,
         ).to_tuple()
 
         output = pipeline(
@@ -289,7 +295,7 @@ def test_kandinsky_controlnet_img2img(self):
             negative_image_embeds=zero_image_emb,
             hint=hint,
             generator=generator,
-            num_inference_steps=100,
+            num_inference_steps=5,
             height=512,
             width=512,
             strength=0.5,