Skip to content

Commit

Permalink
Additional Memory clean up for slow tests (huggingface#7436)
Browse files Browse the repository at this point in the history
* update

* update

* update
  • Loading branch information
DN6 authored Mar 25, 2024
1 parent e29f16c commit 9a34953
Show file tree
Hide file tree
Showing 22 changed files with 188 additions and 29 deletions.
6 changes: 6 additions & 0 deletions tests/pipelines/deepfloyd_if/test_if.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,12 @@ def test_xformers_attention_forwardGenerator_pass(self):
@slow
@require_torch_gpu
class IFPipelineSlowTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()

def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
Expand Down
10 changes: 10 additions & 0 deletions tests/pipelines/deepfloyd_if/test_if_img2img.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,12 @@ def test_inference_batch_single_identical(self):
@slow
@require_torch_gpu
class IFImg2ImgPipelineSlowTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()

def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
Expand All @@ -109,6 +115,10 @@ def test_if_img2img(self):
pipe.unet.set_attn_processor(AttnAddedKVProcessor())
pipe.enable_model_cpu_offload()

torch.cuda.reset_max_memory_allocated()
torch.cuda.empty_cache()
torch.cuda.reset_peak_memory_stats()

image = floats_tensor((1, 3, 64, 64), rng=random.Random(0)).to(torch_device)
generator = torch.Generator(device="cpu").manual_seed(0)
output = pipe(
Expand Down
10 changes: 10 additions & 0 deletions tests/pipelines/deepfloyd_if/test_if_img2img_superresolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,12 @@ def test_inference_batch_single_identical(self):
@slow
@require_torch_gpu
class IFImg2ImgSuperResolutionPipelineSlowTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()

def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
Expand All @@ -107,6 +113,10 @@ def test_if_img2img_superresolution(self):
pipe.unet.set_attn_processor(AttnAddedKVProcessor())
pipe.enable_model_cpu_offload()

torch.cuda.reset_max_memory_allocated()
torch.cuda.empty_cache()
torch.cuda.reset_peak_memory_stats()

generator = torch.Generator(device="cpu").manual_seed(0)

original_image = floats_tensor((1, 3, 256, 256), rng=random.Random(0)).to(torch_device)
Expand Down
7 changes: 6 additions & 1 deletion tests/pipelines/deepfloyd_if/test_if_inpainting.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,12 @@ def test_inference_batch_single_identical(self):
@slow
@require_torch_gpu
class IFInpaintingPipelineSlowTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()

def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
Expand All @@ -105,7 +111,6 @@ def test_if_inpainting(self):
pipe.unet.set_attn_processor(AttnAddedKVProcessor())
pipe.enable_model_cpu_offload()

# Super resolution test
torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated()
torch.cuda.reset_peak_memory_stats()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,12 @@ def test_inference_batch_single_identical(self):
@slow
@require_torch_gpu
class IFInpaintingSuperResolutionPipelineSlowTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()

def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
Expand Down
6 changes: 6 additions & 0 deletions tests/pipelines/deepfloyd_if/test_if_superresolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,12 @@ def test_inference_batch_single_identical(self):
@slow
@require_torch_gpu
class IFSuperResolutionPipelineSlowTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()

def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
Expand Down
17 changes: 12 additions & 5 deletions tests/pipelines/ip_adapters/test_ip_adapter_stable_diffusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,14 @@
class IPAdapterNightlyTestsMixin(unittest.TestCase):
dtype = torch.float16

def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()

def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
Expand Down Expand Up @@ -313,7 +320,7 @@ def test_text_to_image_sdxl(self):
feature_extractor=feature_extractor,
torch_dtype=self.dtype,
)
pipeline.to(torch_device)
pipeline.enable_model_cpu_offload()
pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")

inputs = self.get_dummy_inputs()
Expand Down Expand Up @@ -373,7 +380,7 @@ def test_image_to_image_sdxl(self):
feature_extractor=feature_extractor,
torch_dtype=self.dtype,
)
pipeline.to(torch_device)
pipeline.enable_model_cpu_offload()
pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")

inputs = self.get_dummy_inputs(for_image_to_image=True)
Expand Down Expand Up @@ -442,7 +449,7 @@ def test_inpainting_sdxl(self):
feature_extractor=feature_extractor,
torch_dtype=self.dtype,
)
pipeline.to(torch_device)
pipeline.enable_model_cpu_offload()
pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")

inputs = self.get_dummy_inputs(for_inpainting=True)
Expand Down Expand Up @@ -490,7 +497,7 @@ def test_ip_adapter_single_mask(self):
image_encoder=image_encoder,
torch_dtype=self.dtype,
)
pipeline.to(torch_device)
pipeline.enable_model_cpu_offload()
pipeline.load_ip_adapter(
"h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter-plus-face_sdxl_vit-h.safetensors"
)
Expand Down Expand Up @@ -518,7 +525,7 @@ def test_ip_adapter_multiple_masks(self):
image_encoder=image_encoder,
torch_dtype=self.dtype,
)
pipeline.to(torch_device)
pipeline.enable_model_cpu_offload()
pipeline.load_ip_adapter(
"h94/IP-Adapter", subfolder="sdxl_models", weight_name=["ip-adapter-plus-face_sdxl_vit-h.safetensors"] * 2
)
Expand Down
6 changes: 6 additions & 0 deletions tests/pipelines/kandinsky/test_kandinsky.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,12 @@ def test_offloads(self):
@slow
@require_torch_gpu
class KandinskyPipelineIntegrationTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()

def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
Expand Down
6 changes: 6 additions & 0 deletions tests/pipelines/kandinsky/test_kandinsky_img2img.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,12 @@ def test_dict_tuple_outputs_equivalent(self):
@slow
@require_torch_gpu
class KandinskyImg2ImgPipelineIntegrationTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()

def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
Expand Down
6 changes: 6 additions & 0 deletions tests/pipelines/kandinsky/test_kandinsky_inpaint.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,12 @@ def test_float16_inference(self):
@nightly
@require_torch_gpu
class KandinskyInpaintPipelineIntegrationTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()

def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
Expand Down
15 changes: 10 additions & 5 deletions tests/pipelines/kandinsky2_2/test_kandinsky.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
load_numpy,
require_torch_gpu,
slow,
torch_device,
)

from ..test_pipelines_common import PipelineTesterMixin, assert_mean_pixel_difference
Expand Down Expand Up @@ -223,6 +222,12 @@ def test_float16_inference(self):
@slow
@require_torch_gpu
class KandinskyV22PipelineIntegrationTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()

def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
Expand All @@ -238,12 +243,12 @@ def test_kandinsky_text2img(self):
pipe_prior = KandinskyV22PriorPipeline.from_pretrained(
"kandinsky-community/kandinsky-2-2-prior", torch_dtype=torch.float16
)
pipe_prior.to(torch_device)
pipe_prior.enable_model_cpu_offload()

pipeline = KandinskyV22Pipeline.from_pretrained(
"kandinsky-community/kandinsky-2-2-decoder", torch_dtype=torch.float16
)
pipeline = pipeline.to(torch_device)
pipeline = pipeline.enable_model_cpu_offload()
pipeline.set_progress_bar_config(disable=None)

prompt = "red cat, 4k photo"
Expand All @@ -252,7 +257,7 @@ def test_kandinsky_text2img(self):
image_emb, zero_image_emb = pipe_prior(
prompt,
generator=generator,
num_inference_steps=5,
num_inference_steps=3,
negative_prompt="",
).to_tuple()

Expand All @@ -261,7 +266,7 @@ def test_kandinsky_text2img(self):
image_embeds=image_emb,
negative_image_embeds=zero_image_emb,
generator=generator,
num_inference_steps=100,
num_inference_steps=3,
output_type="np",
)

Expand Down
15 changes: 10 additions & 5 deletions tests/pipelines/kandinsky2_2/test_kandinsky_controlnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
load_numpy,
nightly,
require_torch_gpu,
torch_device,
)

from ..test_pipelines_common import PipelineTesterMixin, assert_mean_pixel_difference
Expand Down Expand Up @@ -228,6 +227,12 @@ def test_inference_batch_single_identical(self):
@nightly
@require_torch_gpu
class KandinskyV22ControlnetPipelineIntegrationTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()

def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
Expand All @@ -250,12 +255,12 @@ def test_kandinsky_controlnet(self):
pipe_prior = KandinskyV22PriorPipeline.from_pretrained(
"kandinsky-community/kandinsky-2-2-prior", torch_dtype=torch.float16
)
pipe_prior.to(torch_device)
pipe_prior.enable_model_cpu_offload()

pipeline = KandinskyV22ControlnetPipeline.from_pretrained(
"kandinsky-community/kandinsky-2-2-controlnet-depth", torch_dtype=torch.float16
)
pipeline = pipeline.to(torch_device)
pipeline = pipeline.enable_model_cpu_offload()
pipeline.set_progress_bar_config(disable=None)

prompt = "A robot, 4k photo"
Expand All @@ -264,7 +269,7 @@ def test_kandinsky_controlnet(self):
image_emb, zero_image_emb = pipe_prior(
prompt,
generator=generator,
num_inference_steps=5,
num_inference_steps=2,
negative_prompt="",
).to_tuple()

Expand All @@ -274,7 +279,7 @@ def test_kandinsky_controlnet(self):
negative_image_embeds=zero_image_emb,
hint=hint,
generator=generator,
num_inference_steps=100,
num_inference_steps=2,
output_type="np",
)

Expand Down
14 changes: 10 additions & 4 deletions tests/pipelines/kandinsky2_2/test_kandinsky_controlnet_img2img.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
load_numpy,
nightly,
require_torch_gpu,
torch_device,
)

from ..test_pipelines_common import PipelineTesterMixin, assert_mean_pixel_difference
Expand Down Expand Up @@ -235,6 +234,12 @@ def test_float16_inference(self):
@nightly
@require_torch_gpu
class KandinskyV22ControlnetImg2ImgPipelineIntegrationTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()

def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
Expand Down Expand Up @@ -264,12 +269,12 @@ def test_kandinsky_controlnet_img2img(self):
pipe_prior = KandinskyV22PriorEmb2EmbPipeline.from_pretrained(
"kandinsky-community/kandinsky-2-2-prior", torch_dtype=torch.float16
)
pipe_prior.to(torch_device)
pipe_prior.enable_model_cpu_offload()

pipeline = KandinskyV22ControlnetImg2ImgPipeline.from_pretrained(
"kandinsky-community/kandinsky-2-2-controlnet-depth", torch_dtype=torch.float16
)
pipeline = pipeline.to(torch_device)
pipeline = pipeline.enable_model_cpu_offload()

pipeline.set_progress_bar_config(disable=None)

Expand All @@ -281,6 +286,7 @@ def test_kandinsky_controlnet_img2img(self):
strength=0.85,
generator=generator,
negative_prompt="",
num_inference_steps=5,
).to_tuple()

output = pipeline(
Expand All @@ -289,7 +295,7 @@ def test_kandinsky_controlnet_img2img(self):
negative_image_embeds=zero_image_emb,
hint=hint,
generator=generator,
num_inference_steps=100,
num_inference_steps=5,
height=512,
width=512,
strength=0.5,
Expand Down
Loading

0 comments on commit 9a34953

Please sign in to comment.