diff --git a/src/diffusers/utils/testing_utils.py b/src/diffusers/utils/testing_utils.py index ec220ff973be..e19a9f83fdb9 100644 --- a/src/diffusers/utils/testing_utils.py +++ b/src/diffusers/utils/testing_utils.py @@ -635,10 +635,10 @@ def load_numpy(arry: Union[str, np.ndarray], local_path: Optional[str] = None) - return arry -def load_pt(url: str, map_location: str): +def load_pt(url: str, map_location: Optional[str] = None, weights_only: Optional[bool] = True): response = requests.get(url, timeout=DIFFUSERS_REQUEST_TIMEOUT) response.raise_for_status() - arry = torch.load(BytesIO(response.content), map_location=map_location) + arry = torch.load(BytesIO(response.content), map_location=map_location, weights_only=weights_only) return arry diff --git a/tests/pipelines/stable_cascade/test_stable_cascade_decoder.py b/tests/pipelines/stable_cascade/test_stable_cascade_decoder.py index afcd8fca71ca..2b51e59f6b31 100644 --- a/tests/pipelines/stable_cascade/test_stable_cascade_decoder.py +++ b/tests/pipelines/stable_cascade/test_stable_cascade_decoder.py @@ -304,7 +304,8 @@ def test_stable_cascade_decoder(self): generator = torch.Generator(device="cpu").manual_seed(0) image_embedding = load_pt( - "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/stable_cascade/image_embedding.pt" + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/stable_cascade/image_embedding.pt", + map_location=torch_device, ) image = pipe( @@ -320,4 +321,4 @@ def test_stable_cascade_decoder(self): "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/stable_cascade/stable_cascade_decoder_image.npy" ) max_diff = numpy_cosine_similarity_distance(image.flatten(), expected_image.flatten()) - assert max_diff < 1e-4 + assert max_diff < 2e-4 diff --git a/tests/pipelines/stable_diffusion_k_diffusion/test_stable_diffusion_k_diffusion.py b/tests/pipelines/stable_diffusion_k_diffusion/test_stable_diffusion_k_diffusion.py index fe78f0ec3c1a..33a44c0fcd08 100644 --- a/tests/pipelines/stable_diffusion_k_diffusion/test_stable_diffusion_k_diffusion.py +++ b/tests/pipelines/stable_diffusion_k_diffusion/test_stable_diffusion_k_diffusion.py @@ -20,26 +20,32 @@ import torch from diffusers import StableDiffusionKDiffusionPipeline -from diffusers.utils.testing_utils import enable_full_determinism, nightly, require_torch_gpu, torch_device +from diffusers.utils.testing_utils import ( + backend_empty_cache, + enable_full_determinism, + nightly, + require_torch_accelerator, + torch_device, +) enable_full_determinism() @nightly -@require_torch_gpu +@require_torch_accelerator class StableDiffusionPipelineIntegrationTests(unittest.TestCase): def setUp(self): # clean up the VRAM before each test super().setUp() gc.collect() - torch.cuda.empty_cache() + backend_empty_cache(torch_device) def tearDown(self): # clean up the VRAM after each test super().tearDown() gc.collect() - torch.cuda.empty_cache() + backend_empty_cache(torch_device) def test_stable_diffusion_1(self): sd_pipe = StableDiffusionKDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4") diff --git a/tests/pipelines/stable_diffusion_ldm3d/test_stable_diffusion_ldm3d.py b/tests/pipelines/stable_diffusion_ldm3d/test_stable_diffusion_ldm3d.py index 8f07d02aad5e..2c4ae94ede55 100644 --- a/tests/pipelines/stable_diffusion_ldm3d/test_stable_diffusion_ldm3d.py +++ b/tests/pipelines/stable_diffusion_ldm3d/test_stable_diffusion_ldm3d.py @@ -28,7 +28,13 @@ StableDiffusionLDM3DPipeline, UNet2DConditionModel, ) -from diffusers.utils.testing_utils import enable_full_determinism, nightly, require_torch_gpu, torch_device +from diffusers.utils.testing_utils import ( + backend_empty_cache, + enable_full_determinism, + nightly, + require_torch_accelerator, + torch_device, +) from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS @@ -205,17 +211,17 @@ def test_stable_diffusion_negative_prompt(self): @nightly -@require_torch_gpu +@require_torch_accelerator class StableDiffusionLDM3DPipelineSlowTests(unittest.TestCase): def setUp(self): super().setUp() gc.collect() - torch.cuda.empty_cache() + backend_empty_cache(torch_device) def tearDown(self): super().tearDown() gc.collect() - torch.cuda.empty_cache() + backend_empty_cache(torch_device) def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0): generator = torch.Generator(device=generator_device).manual_seed(seed) @@ -256,17 +262,17 @@ def test_ldm3d_stable_diffusion(self): @nightly -@require_torch_gpu +@require_torch_accelerator class StableDiffusionPipelineNightlyTests(unittest.TestCase): def setUp(self): super().setUp() gc.collect() - torch.cuda.empty_cache() + backend_empty_cache(torch_device) def tearDown(self): super().tearDown() gc.collect() - torch.cuda.empty_cache() + backend_empty_cache(torch_device) def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0): generator = torch.Generator(device=generator_device).manual_seed(seed) diff --git a/tests/pipelines/stable_diffusion_sag/test_stable_diffusion_sag.py b/tests/pipelines/stable_diffusion_sag/test_stable_diffusion_sag.py index bd1ba268d2d9..ebdd17c46f11 100644 --- a/tests/pipelines/stable_diffusion_sag/test_stable_diffusion_sag.py +++ b/tests/pipelines/stable_diffusion_sag/test_stable_diffusion_sag.py @@ -29,7 +29,13 @@ StableDiffusionSAGPipeline, UNet2DConditionModel, ) -from diffusers.utils.testing_utils import enable_full_determinism, nightly, require_torch_gpu, torch_device +from diffusers.utils.testing_utils import ( + backend_empty_cache, + enable_full_determinism, + nightly, + require_torch_accelerator, + torch_device, +) from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS from ..test_pipelines_common import ( @@ -162,19 +168,19 @@ def test_encode_prompt_works_in_isolation(self): @nightly -@require_torch_gpu +@require_torch_accelerator class StableDiffusionPipelineIntegrationTests(unittest.TestCase): def setUp(self): # clean up the VRAM before each test super().setUp() gc.collect() - torch.cuda.empty_cache() + backend_empty_cache(torch_device) def tearDown(self): # clean up the VRAM after each test super().tearDown() gc.collect() - torch.cuda.empty_cache() + backend_empty_cache(torch_device) def test_stable_diffusion_1(self): sag_pipe = StableDiffusionSAGPipeline.from_pretrained("CompVis/stable-diffusion-v1-4") diff --git a/tests/pipelines/stable_unclip/test_stable_unclip.py b/tests/pipelines/stable_unclip/test_stable_unclip.py index 8cf103dffd56..ca6568eb698a 100644 --- a/tests/pipelines/stable_unclip/test_stable_unclip.py +++ b/tests/pipelines/stable_unclip/test_stable_unclip.py @@ -13,7 +13,17 @@ UNet2DConditionModel, ) from diffusers.pipelines.stable_diffusion.stable_unclip_image_normalizer import StableUnCLIPImageNormalizer -from diffusers.utils.testing_utils import enable_full_determinism, load_numpy, nightly, require_torch_gpu, torch_device +from diffusers.utils.testing_utils import ( + backend_empty_cache, + backend_max_memory_allocated, + backend_reset_max_memory_allocated, + backend_reset_peak_memory_stats, + enable_full_determinism, + load_numpy, + nightly, + require_torch_accelerator, + torch_device, +) from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS from ..test_pipelines_common import ( @@ -190,19 +200,19 @@ def test_encode_prompt_works_in_isolation(self): @nightly -@require_torch_gpu +@require_torch_accelerator class StableUnCLIPPipelineIntegrationTests(unittest.TestCase): def setUp(self): # clean up the VRAM before each test super().setUp() gc.collect() - torch.cuda.empty_cache() + backend_empty_cache(torch_device) def tearDown(self): # clean up the VRAM after each test super().tearDown() gc.collect() - torch.cuda.empty_cache() + backend_empty_cache(torch_device) def test_stable_unclip(self): expected_image = load_numpy( @@ -226,9 +236,9 @@ def test_stable_unclip(self): assert_mean_pixel_difference(image, expected_image) def test_stable_unclip_pipeline_with_sequential_cpu_offloading(self): - torch.cuda.empty_cache() - torch.cuda.reset_max_memory_allocated() - torch.cuda.reset_peak_memory_stats() + backend_empty_cache(torch_device) + backend_reset_max_memory_allocated(torch_device) + backend_reset_peak_memory_stats(torch_device) pipe = StableUnCLIPPipeline.from_pretrained("fusing/stable-unclip-2-1-l", torch_dtype=torch.float16) pipe.set_progress_bar_config(disable=None) @@ -242,6 +252,6 @@ def test_stable_unclip_pipeline_with_sequential_cpu_offloading(self): output_type="np", ) - mem_bytes = torch.cuda.max_memory_allocated() + mem_bytes = backend_max_memory_allocated(torch_device) # make sure that less than 7 GB is allocated assert mem_bytes < 7 * 10**9 diff --git a/tests/pipelines/stable_unclip/test_stable_unclip_img2img.py b/tests/pipelines/stable_unclip/test_stable_unclip_img2img.py index 176b6954d616..b821625f691c 100644 --- a/tests/pipelines/stable_unclip/test_stable_unclip_img2img.py +++ b/tests/pipelines/stable_unclip/test_stable_unclip_img2img.py @@ -18,12 +18,16 @@ from diffusers.pipelines.stable_diffusion.stable_unclip_image_normalizer import StableUnCLIPImageNormalizer from diffusers.utils.import_utils import is_xformers_available from diffusers.utils.testing_utils import ( + backend_empty_cache, + backend_max_memory_allocated, + backend_reset_max_memory_allocated, + backend_reset_peak_memory_stats, enable_full_determinism, floats_tensor, load_image, load_numpy, nightly, - require_torch_gpu, + require_torch_accelerator, skip_mps, torch_device, ) @@ -213,19 +217,19 @@ def test_encode_prompt_works_in_isolation(self): @nightly -@require_torch_gpu +@require_torch_accelerator class StableUnCLIPImg2ImgPipelineIntegrationTests(unittest.TestCase): def setUp(self): # clean up the VRAM before each test super().setUp() gc.collect() - torch.cuda.empty_cache() + backend_empty_cache(torch_device) def tearDown(self): # clean up the VRAM after each test super().tearDown() gc.collect() - torch.cuda.empty_cache() + backend_empty_cache(torch_device) def test_stable_unclip_l_img2img(self): input_image = load_image( @@ -286,9 +290,9 @@ def test_stable_unclip_img2img_pipeline_with_sequential_cpu_offloading(self): "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/stable_unclip/turtle.png" ) - torch.cuda.empty_cache() - torch.cuda.reset_max_memory_allocated() - torch.cuda.reset_peak_memory_stats() + backend_empty_cache(torch_device) + backend_reset_max_memory_allocated(torch_device) + backend_reset_peak_memory_stats(torch_device) pipe = StableUnCLIPImg2ImgPipeline.from_pretrained( "fusing/stable-unclip-2-1-h-img2img", torch_dtype=torch.float16 @@ -304,6 +308,6 @@ def test_stable_unclip_img2img_pipeline_with_sequential_cpu_offloading(self): output_type="np", ) - mem_bytes = torch.cuda.max_memory_allocated() + mem_bytes = backend_max_memory_allocated(torch_device) # make sure that less than 7 GB is allocated assert mem_bytes < 7 * 10**9 diff --git a/tests/pipelines/text_to_video_synthesis/test_text_to_video_zero.py b/tests/pipelines/text_to_video_synthesis/test_text_to_video_zero.py index f1bf6ee52206..314641464605 100644 --- a/tests/pipelines/text_to_video_synthesis/test_text_to_video_zero.py +++ b/tests/pipelines/text_to_video_synthesis/test_text_to_video_zero.py @@ -19,37 +19,44 @@ import torch from diffusers import DDIMScheduler, TextToVideoZeroPipeline -from diffusers.utils.testing_utils import load_pt, nightly, require_torch_gpu +from diffusers.utils.testing_utils import ( + backend_empty_cache, + load_pt, + nightly, + require_torch_accelerator, + torch_device, +) from ..test_pipelines_common import assert_mean_pixel_difference @nightly -@require_torch_gpu +@require_torch_accelerator class TextToVideoZeroPipelineSlowTests(unittest.TestCase): def setUp(self): # clean up the VRAM before each test super().setUp() gc.collect() - torch.cuda.empty_cache() + backend_empty_cache(torch_device) def tearDown(self): # clean up the VRAM after each test super().tearDown() gc.collect() - torch.cuda.empty_cache() + backend_empty_cache(torch_device) def test_full_model(self): model_id = "stable-diffusion-v1-5/stable-diffusion-v1-5" - pipe = TextToVideoZeroPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda") + pipe = TextToVideoZeroPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to(torch_device) pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config) - generator = torch.Generator(device="cuda").manual_seed(0) + generator = torch.Generator(device="cpu").manual_seed(0) prompt = "A bear is playing a guitar on Times Square" result = pipe(prompt=prompt, generator=generator).images expected_result = load_pt( - "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/text-to-video/A bear is playing a guitar on Times Square.pt" + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/text-to-video/A bear is playing a guitar on Times Square.pt", + weights_only=False, ) assert_mean_pixel_difference(result, expected_result) diff --git a/tests/pipelines/text_to_video_synthesis/test_text_to_video_zero_sdxl.py b/tests/pipelines/text_to_video_synthesis/test_text_to_video_zero_sdxl.py index db24767b60fc..827e83243bee 100644 --- a/tests/pipelines/text_to_video_synthesis/test_text_to_video_zero_sdxl.py +++ b/tests/pipelines/text_to_video_synthesis/test_text_to_video_zero_sdxl.py @@ -24,11 +24,11 @@ from diffusers import AutoencoderKL, DDIMScheduler, TextToVideoZeroSDXLPipeline, UNet2DConditionModel from diffusers.utils.testing_utils import ( + backend_empty_cache, enable_full_determinism, nightly, require_accelerate_version_greater, - require_accelerator, - require_torch_gpu, + require_torch_accelerator, torch_device, ) @@ -220,7 +220,7 @@ def test_dict_tuple_outputs_equivalent(self, expected_max_difference=1e-4): self.assertLess(max_diff, expected_max_difference) @unittest.skipIf(torch_device not in ["cuda", "xpu"], reason="float16 requires CUDA or XPU") - @require_accelerator + @require_torch_accelerator def test_float16_inference(self, expected_max_diff=5e-2): components = self.get_dummy_components() for name, module in components.items(): @@ -262,7 +262,7 @@ def test_inference_batch_consistent(self): def test_inference_batch_single_identical(self): pass - @require_accelerator + @require_torch_accelerator @require_accelerate_version_greater("0.17.0") def test_model_cpu_offload_forward_pass(self, expected_max_diff=2e-4): components = self.get_dummy_components() @@ -285,7 +285,7 @@ def test_pipeline_call_signature(self): pass @unittest.skipIf(torch_device not in ["cuda", "xpu"], reason="float16 requires CUDA or XPU") - @require_accelerator + @require_torch_accelerator def test_save_load_float16(self, expected_max_diff=1e-2): components = self.get_dummy_components() for name, module in components.items(): @@ -337,7 +337,7 @@ def test_save_load_optional_components(self): def test_sequential_cpu_offload_forward_pass(self): pass - @require_accelerator + @require_torch_accelerator def test_to_device(self): components = self.get_dummy_components() pipe = self.pipeline_class(**components) @@ -365,19 +365,19 @@ def test_xformers_attention_forwardGenerator_pass(self): @nightly -@require_torch_gpu +@require_torch_accelerator class TextToVideoZeroSDXLPipelineSlowTests(unittest.TestCase): def setUp(self): # clean up the VRAM before each test super().setUp() gc.collect() - torch.cuda.empty_cache() + backend_empty_cache(torch_device) def tearDown(self): # clean up the VRAM after each test super().tearDown() gc.collect() - torch.cuda.empty_cache() + backend_empty_cache(torch_device) def test_full_model(self): model_id = "stabilityai/stable-diffusion-xl-base-1.0" diff --git a/tests/pipelines/unclip/test_unclip.py b/tests/pipelines/unclip/test_unclip.py index 834d97f30d18..64f3d9de2418 100644 --- a/tests/pipelines/unclip/test_unclip.py +++ b/tests/pipelines/unclip/test_unclip.py @@ -23,10 +23,14 @@ from diffusers import PriorTransformer, UnCLIPPipeline, UnCLIPScheduler, UNet2DConditionModel, UNet2DModel from diffusers.pipelines.unclip.text_proj import UnCLIPTextProjModel from diffusers.utils.testing_utils import ( + backend_empty_cache, + backend_max_memory_allocated, + backend_reset_max_memory_allocated, + backend_reset_peak_memory_stats, enable_full_determinism, load_numpy, nightly, - require_torch_gpu, + require_torch_accelerator, skip_mps, torch_device, ) @@ -426,13 +430,13 @@ def setUp(self): # clean up the VRAM before each test super().setUp() gc.collect() - torch.cuda.empty_cache() + backend_empty_cache(torch_device) def tearDown(self): # clean up the VRAM after each test super().tearDown() gc.collect() - torch.cuda.empty_cache() + backend_empty_cache(torch_device) def test_unclip_karlo_cpu_fp32(self): expected_image = load_numpy( @@ -458,19 +462,19 @@ def test_unclip_karlo_cpu_fp32(self): @nightly -@require_torch_gpu +@require_torch_accelerator class UnCLIPPipelineIntegrationTests(unittest.TestCase): def setUp(self): # clean up the VRAM before each test super().setUp() gc.collect() - torch.cuda.empty_cache() + backend_empty_cache(torch_device) def tearDown(self): # clean up the VRAM after each test super().tearDown() gc.collect() - torch.cuda.empty_cache() + backend_empty_cache(torch_device) def test_unclip_karlo(self): expected_image = load_numpy( @@ -496,9 +500,9 @@ def test_unclip_karlo(self): assert_mean_pixel_difference(image, expected_image) def test_unclip_pipeline_with_sequential_cpu_offloading(self): - torch.cuda.empty_cache() - torch.cuda.reset_max_memory_allocated() - torch.cuda.reset_peak_memory_stats() + backend_empty_cache(torch_device) + backend_reset_max_memory_allocated(torch_device) + backend_reset_peak_memory_stats(torch_device) pipe = UnCLIPPipeline.from_pretrained("kakaobrain/karlo-v1-alpha", torch_dtype=torch.float16) pipe.set_progress_bar_config(disable=None) @@ -514,6 +518,6 @@ def test_unclip_pipeline_with_sequential_cpu_offloading(self): output_type="np", ) - mem_bytes = torch.cuda.max_memory_allocated() + mem_bytes = backend_max_memory_allocated(torch_device) # make sure that less than 7 GB is allocated assert mem_bytes < 7 * 10**9 diff --git a/tests/pipelines/unclip/test_unclip_image_variation.py b/tests/pipelines/unclip/test_unclip_image_variation.py index e402629fe1b9..3aa3c1b1152d 100644 --- a/tests/pipelines/unclip/test_unclip_image_variation.py +++ b/tests/pipelines/unclip/test_unclip_image_variation.py @@ -37,12 +37,13 @@ ) from diffusers.pipelines.unclip.text_proj import UnCLIPTextProjModel from diffusers.utils.testing_utils import ( + backend_empty_cache, enable_full_determinism, floats_tensor, load_image, load_numpy, nightly, - require_torch_gpu, + require_torch_accelerator, skip_mps, torch_device, ) @@ -496,19 +497,19 @@ def test_float16_inference(self): @nightly -@require_torch_gpu +@require_torch_accelerator class UnCLIPImageVariationPipelineIntegrationTests(unittest.TestCase): def setUp(self): # clean up the VRAM before each test super().setUp() gc.collect() - torch.cuda.empty_cache() + backend_empty_cache(torch_device) def tearDown(self): # clean up the VRAM after each test super().tearDown() gc.collect() - torch.cuda.empty_cache() + backend_empty_cache(torch_device) def test_unclip_image_variation_karlo(self): input_image = load_image(