Skip to content

Commit 474a248

Browse files
authored
[tests] Fix HunyuanVideo Framepack device tests (#11789)
update
1 parent 7bc0a07 commit 474a248

File tree

3 files changed

+38
-6
lines changed

3 files changed

+38
-6
lines changed

tests/pipelines/hunyuan_video/test_hunyuan_video_framepack.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,6 @@ class HunyuanVideoFramepackPipelineFastTests(
7171
)
7272

7373
supports_dduf = False
74-
# there is no xformers processor for Flux
7574
test_xformers_attention = False
7675
test_layerwise_casting = True
7776
test_group_offloading = True
@@ -360,6 +359,30 @@ def test_vae_tiling(self, expected_diff_max: float = 0.2):
360359
"VAE tiling should not affect the inference results",
361360
)
362361

362+
def test_float16_inference(self, expected_max_diff=0.2):
363+
# NOTE: this test needs a higher tolerance because of multiple forwards through
364+
# the model, which compounds the overall fp32 vs fp16 numerical differences. It
365+
# shouldn't be expected that the results are the same, so we bump the tolerance.
366+
return super().test_float16_inference(expected_max_diff)
367+
368+
@unittest.skip("The image_encoder uses SiglipVisionModel, which does not support sequential CPU offloading.")
369+
def test_sequential_cpu_offload_forward_pass(self):
370+
# https://github.com/huggingface/transformers/blob/21cb353b7b4f77c6f5f5c3341d660f86ff416d04/src/transformers/models/siglip/modeling_siglip.py#L803
371+
# This is because it instantiates it's attention layer from torch.nn.MultiheadAttention, which calls to
372+
# `torch.nn.functional.multi_head_attention_forward` with the weights and bias. Since the hook is never
373+
# triggered with a forward pass call, the weights stay on the CPU. There are more examples where we skip
374+
# this test because of MHA (example: HunyuanDiT because of AttentionPooling layer).
375+
pass
376+
377+
@unittest.skip("The image_encoder uses SiglipVisionModel, which does not support sequential CPU offloading.")
378+
def test_sequential_offload_forward_pass_twice(self):
379+
# https://github.com/huggingface/transformers/blob/21cb353b7b4f77c6f5f5c3341d660f86ff416d04/src/transformers/models/siglip/modeling_siglip.py#L803
380+
# This is because it instantiates it's attention layer from torch.nn.MultiheadAttention, which calls to
381+
# `torch.nn.functional.multi_head_attention_forward` with the weights and bias. Since the hook is never
382+
# triggered with a forward pass call, the weights stay on the CPU. There are more examples where we skip
383+
# this test because of MHA (example: HunyuanDiT because of AttentionPooling layer).
384+
pass
385+
363386
# TODO(aryan): Create a dummy gemma model with smol vocab size
364387
@unittest.skip(
365388
"A very small vocab size is used for fast tests. So, any kind of prompt other than the empty default used in other tests will lead to a embedding lookup error. This test uses a long prompt that causes the error."

tests/pipelines/hunyuandit/test_hunyuan_dit.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,14 +124,22 @@ def test_inference(self):
124124
max_diff = np.abs(image_slice.flatten() - expected_slice).max()
125125
self.assertLessEqual(max_diff, 1e-3)
126126

127-
@unittest.skip("Not supported.")
127+
@unittest.skip("The HunyuanDiT Attention pooling layer does not support sequential CPU offloading.")
128128
def test_sequential_cpu_offload_forward_pass(self):
129129
# TODO(YiYi) need to fix later
130+
# This is because it instantiates it's attention layer from torch.nn.MultiheadAttention, which calls to
131+
# `torch.nn.functional.multi_head_attention_forward` with the weights and bias. Since the hook is never
132+
# triggered with a forward pass call, the weights stay on the CPU. There are more examples where we skip
133+
# this test because of MHA (example: HunyuanVideo Framepack)
130134
pass
131135

132-
@unittest.skip("Not supported.")
136+
@unittest.skip("The HunyuanDiT Attention pooling layer does not support sequential CPU offloading.")
133137
def test_sequential_offload_forward_pass_twice(self):
134138
# TODO(YiYi) need to fix later
139+
# This is because it instantiates it's attention layer from torch.nn.MultiheadAttention, which calls to
140+
# `torch.nn.functional.multi_head_attention_forward` with the weights and bias. Since the hook is never
141+
# triggered with a forward pass call, the weights stay on the CPU. There are more examples where we skip
142+
# this test because of MHA (example: HunyuanVideo Framepack)
135143
pass
136144

137145
def test_inference_batch_single_identical(self):

tests/pipelines/test_pipelines_common.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2270,9 +2270,10 @@ def enable_group_offload_on_component(pipe, group_offloading_kwargs):
22702270
if hasattr(module, "_diffusers_hook")
22712271
)
22722272
)
2273-
for component_name in ["vae", "vqvae"]:
2274-
if hasattr(pipe, component_name):
2275-
getattr(pipe, component_name).to(torch_device)
2273+
for component_name in ["vae", "vqvae", "image_encoder"]:
2274+
component = getattr(pipe, component_name, None)
2275+
if isinstance(component, torch.nn.Module):
2276+
component.to(torch_device)
22762277

22772278
def run_forward(pipe):
22782279
torch.manual_seed(0)

0 commit comments

Comments
 (0)