Skip to content

enable pipeline test cases on xpu #11527

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/diffusers/utils/testing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -635,10 +635,10 @@ def load_numpy(arry: Union[str, np.ndarray], local_path: Optional[str] = None) -
return arry


def load_pt(url: str, map_location: str):
def load_pt(url: str, map_location: Optional[str] = None, weights_only: Optional[bool] = True):
response = requests.get(url, timeout=DIFFUSERS_REQUEST_TIMEOUT)
response.raise_for_status()
arry = torch.load(BytesIO(response.content), map_location=map_location)
arry = torch.load(BytesIO(response.content), map_location=map_location, weights_only=weights_only)
return arry


Expand Down
5 changes: 3 additions & 2 deletions tests/pipelines/stable_cascade/test_stable_cascade_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,8 @@ def test_stable_cascade_decoder(self):

generator = torch.Generator(device="cpu").manual_seed(0)
image_embedding = load_pt(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/stable_cascade/image_embedding.pt"
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/stable_cascade/image_embedding.pt",
map_location=torch_device,
)

image = pipe(
Expand All @@ -320,4 +321,4 @@ def test_stable_cascade_decoder(self):
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/stable_cascade/stable_cascade_decoder_image.npy"
)
max_diff = numpy_cosine_similarity_distance(image.flatten(), expected_image.flatten())
assert max_diff < 1e-4
assert max_diff < 2e-4
Original file line number Diff line number Diff line change
Expand Up @@ -20,26 +20,32 @@
import torch

from diffusers import StableDiffusionKDiffusionPipeline
from diffusers.utils.testing_utils import enable_full_determinism, nightly, require_torch_gpu, torch_device
from diffusers.utils.testing_utils import (
backend_empty_cache,
enable_full_determinism,
nightly,
require_torch_accelerator,
torch_device,
)


enable_full_determinism()


@nightly
@require_torch_gpu
@require_torch_accelerator
class StableDiffusionPipelineIntegrationTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def test_stable_diffusion_1(self):
sd_pipe = StableDiffusionKDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,13 @@
StableDiffusionLDM3DPipeline,
UNet2DConditionModel,
)
from diffusers.utils.testing_utils import enable_full_determinism, nightly, require_torch_gpu, torch_device
from diffusers.utils.testing_utils import (
backend_empty_cache,
enable_full_determinism,
nightly,
require_torch_accelerator,
torch_device,
)

from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS

Expand Down Expand Up @@ -205,17 +211,17 @@ def test_stable_diffusion_negative_prompt(self):


@nightly
@require_torch_gpu
@require_torch_accelerator
class StableDiffusionLDM3DPipelineSlowTests(unittest.TestCase):
def setUp(self):
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def tearDown(self):
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0):
generator = torch.Generator(device=generator_device).manual_seed(seed)
Expand Down Expand Up @@ -256,17 +262,17 @@ def test_ldm3d_stable_diffusion(self):


@nightly
@require_torch_gpu
@require_torch_accelerator
class StableDiffusionPipelineNightlyTests(unittest.TestCase):
def setUp(self):
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def tearDown(self):
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0):
generator = torch.Generator(device=generator_device).manual_seed(seed)
Expand Down
14 changes: 10 additions & 4 deletions tests/pipelines/stable_diffusion_sag/test_stable_diffusion_sag.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,13 @@
StableDiffusionSAGPipeline,
UNet2DConditionModel,
)
from diffusers.utils.testing_utils import enable_full_determinism, nightly, require_torch_gpu, torch_device
from diffusers.utils.testing_utils import (
backend_empty_cache,
enable_full_determinism,
nightly,
require_torch_accelerator,
torch_device,
)

from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS
from ..test_pipelines_common import (
Expand Down Expand Up @@ -162,19 +168,19 @@ def test_encode_prompt_works_in_isolation(self):


@nightly
@require_torch_gpu
@require_torch_accelerator
class StableDiffusionPipelineIntegrationTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def test_stable_diffusion_1(self):
sag_pipe = StableDiffusionSAGPipeline.from_pretrained("CompVis/stable-diffusion-v1-4")
Expand Down
26 changes: 18 additions & 8 deletions tests/pipelines/stable_unclip/test_stable_unclip.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,17 @@
UNet2DConditionModel,
)
from diffusers.pipelines.stable_diffusion.stable_unclip_image_normalizer import StableUnCLIPImageNormalizer
from diffusers.utils.testing_utils import enable_full_determinism, load_numpy, nightly, require_torch_gpu, torch_device
from diffusers.utils.testing_utils import (
backend_empty_cache,
backend_max_memory_allocated,
backend_reset_max_memory_allocated,
backend_reset_peak_memory_stats,
enable_full_determinism,
load_numpy,
nightly,
require_torch_accelerator,
torch_device,
)

from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS
from ..test_pipelines_common import (
Expand Down Expand Up @@ -190,19 +200,19 @@ def test_encode_prompt_works_in_isolation(self):


@nightly
@require_torch_gpu
@require_torch_accelerator
class StableUnCLIPPipelineIntegrationTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def test_stable_unclip(self):
expected_image = load_numpy(
Expand All @@ -226,9 +236,9 @@ def test_stable_unclip(self):
assert_mean_pixel_difference(image, expected_image)

def test_stable_unclip_pipeline_with_sequential_cpu_offloading(self):
torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated()
torch.cuda.reset_peak_memory_stats()
backend_empty_cache(torch_device)
backend_reset_max_memory_allocated(torch_device)
backend_reset_peak_memory_stats(torch_device)

pipe = StableUnCLIPPipeline.from_pretrained("fusing/stable-unclip-2-1-l", torch_dtype=torch.float16)
pipe.set_progress_bar_config(disable=None)
Expand All @@ -242,6 +252,6 @@ def test_stable_unclip_pipeline_with_sequential_cpu_offloading(self):
output_type="np",
)

mem_bytes = torch.cuda.max_memory_allocated()
mem_bytes = backend_max_memory_allocated(torch_device)
# make sure that less than 7 GB is allocated
assert mem_bytes < 7 * 10**9
20 changes: 12 additions & 8 deletions tests/pipelines/stable_unclip/test_stable_unclip_img2img.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,16 @@
from diffusers.pipelines.stable_diffusion.stable_unclip_image_normalizer import StableUnCLIPImageNormalizer
from diffusers.utils.import_utils import is_xformers_available
from diffusers.utils.testing_utils import (
backend_empty_cache,
backend_max_memory_allocated,
backend_reset_max_memory_allocated,
backend_reset_peak_memory_stats,
enable_full_determinism,
floats_tensor,
load_image,
load_numpy,
nightly,
require_torch_gpu,
require_torch_accelerator,
skip_mps,
torch_device,
)
Expand Down Expand Up @@ -213,19 +217,19 @@ def test_encode_prompt_works_in_isolation(self):


@nightly
@require_torch_gpu
@require_torch_accelerator
class StableUnCLIPImg2ImgPipelineIntegrationTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def test_stable_unclip_l_img2img(self):
input_image = load_image(
Expand Down Expand Up @@ -286,9 +290,9 @@ def test_stable_unclip_img2img_pipeline_with_sequential_cpu_offloading(self):
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/stable_unclip/turtle.png"
)

torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated()
torch.cuda.reset_peak_memory_stats()
backend_empty_cache(torch_device)
backend_reset_max_memory_allocated(torch_device)
backend_reset_peak_memory_stats(torch_device)

pipe = StableUnCLIPImg2ImgPipeline.from_pretrained(
"fusing/stable-unclip-2-1-h-img2img", torch_dtype=torch.float16
Expand All @@ -304,6 +308,6 @@ def test_stable_unclip_img2img_pipeline_with_sequential_cpu_offloading(self):
output_type="np",
)

mem_bytes = torch.cuda.max_memory_allocated()
mem_bytes = backend_max_memory_allocated(torch_device)
# make sure that less than 7 GB is allocated
assert mem_bytes < 7 * 10**9
21 changes: 14 additions & 7 deletions tests/pipelines/text_to_video_synthesis/test_text_to_video_zero.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,37 +19,44 @@
import torch

from diffusers import DDIMScheduler, TextToVideoZeroPipeline
from diffusers.utils.testing_utils import load_pt, nightly, require_torch_gpu
from diffusers.utils.testing_utils import (
backend_empty_cache,
load_pt,
nightly,
require_torch_accelerator,
torch_device,
)

from ..test_pipelines_common import assert_mean_pixel_difference


@nightly
@require_torch_gpu
@require_torch_accelerator
class TextToVideoZeroPipelineSlowTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)

def test_full_model(self):
model_id = "stable-diffusion-v1-5/stable-diffusion-v1-5"
pipe = TextToVideoZeroPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda")
pipe = TextToVideoZeroPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to(torch_device)
pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
generator = torch.Generator(device="cuda").manual_seed(0)
generator = torch.Generator(device="cpu").manual_seed(0)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PRNG will behave different across accelerators, set back to "cpu" for cross-device reproducibility, as other cases in diffusers


prompt = "A bear is playing a guitar on Times Square"
result = pipe(prompt=prompt, generator=generator).images

expected_result = load_pt(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/text-to-video/A bear is playing a guitar on Times Square.pt"
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/text-to-video/A bear is playing a guitar on Times Square.pt",
weights_only=False,
)

assert_mean_pixel_difference(result, expected_result)
Loading