Skip to content

Commit bfca113

Browse files
committed
download model to hf component
Signed-off-by: Daniel Dowler <12484302+dandawg@users.noreply.github.com>
1 parent 259cc88 commit bfca113

7 files changed

Lines changed: 209 additions & 0 deletions

File tree

components/deployment/README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Deployment Components
2+
3+
This directory contains components in the **Deployment** category:
4+
5+
- [Download Model From Hf](./download_model_from_hf/README.md): Downloads a model from HuggingFace Hub to a local directory
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
approvers:
2+
- dandawg
3+
reviewers:
4+
- dandawg
5+
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# Download Model From Hf ✨
2+
3+
## Overview 🧾
4+
5+
Downloads a model from HuggingFace Hub to a local directory
6+
7+
(use mounted PVC path with sufficient storage for larger models).
8+
9+
## Inputs 📥
10+
11+
| Parameter | Type | Default | Description |
12+
|-----------|------|---------|-------------|
13+
| `model_identifier` | `str` | `None` | HuggingFace model identifier (e.g., "Qwen/Qwen3-VL-2B-Instruct") |
14+
| `local_model_dir` | `str` | `/models` | Local directory to save the model files to (default: "/models") |
15+
| `if_exists` | `str` | `skip` | Behavior if files already exist in local_model_dir. Options: ["skip" (default), "overwrite", "error"] |
16+
17+
## Metadata 🗂️
18+
19+
- **Name**: download_model_from_hf
20+
- **Tier**: core
21+
- **Stability**: alpha
22+
- **Dependencies**:
23+
- Kubeflow:
24+
- Name: Pipelines, Version: >=2.5
25+
- External Services: None
26+
- **Tags**:
27+
- deployment
28+
- **Last Verified**: 2025-01-08 00:00:00+00:00
29+
- **Owners**:
30+
- Approvers:
31+
- dandawg
32+
- Reviewers:
33+
- dandawg
34+
35+
## Additional Resources 📚
36+
37+
- **Documentation**: [https://huggingface.co/docs/huggingface_hub/guides/download](https://huggingface.co/docs/huggingface_hub/guides/download)
38+
- **Issue Tracker**: [https://github.com/kubeflow/pipelines-components/issues](https://github.com/kubeflow/pipelines-components/issues)
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from .component import download_model_from_hf
2+
3+
__all__ = ["download_model_from_hf"]
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
"""Component to download a model from HuggingFace Hub to a local directory (PVC)."""
2+
3+
from kfp.dsl import component
4+
5+
6+
@component(base_image="python:3.12-slim-bullseye", packages_to_install=["huggingface-hub"])
7+
def download_model_from_hf(model_identifier: str, local_model_dir: str = "/models", if_exists: str = "skip"):
8+
"""Downloads a model from HuggingFace Hub to a local directory
9+
10+
(use mounted PVC path with sufficient storage for larger models).
11+
12+
Args:
13+
model_identifier: HuggingFace model identifier (e.g., "Qwen/Qwen3-VL-2B-Instruct")
14+
local_model_dir: Local directory to save the model files to (default: "/models")
15+
if_exists: Behavior if files already exist in local_model_dir. Options: ["skip" (default), "overwrite", "error"]
16+
17+
Environment Variables (Assumed mounted as secret via kfp.kubernetes.use_secret_as_env):
18+
HUGGINGFACE_TOKEN: HuggingFace token (optional)
19+
"""
20+
import os
21+
import pathlib
22+
23+
from huggingface_hub import snapshot_download
24+
25+
print(f"Checking for existing model files in: {local_model_dir}")
26+
27+
# Check if model files already exist
28+
output_path = pathlib.Path(local_model_dir)
29+
existing_files = []
30+
if output_path.exists() and output_path.is_dir():
31+
existing_files = [f for f in output_path.rglob("*") if f.is_file()]
32+
33+
if existing_files:
34+
print(f"Found {len(existing_files)} existing files in {local_model_dir}")
35+
36+
if if_exists == "skip":
37+
print("Skipping download - model files already exist (if_exists='skip')")
38+
print(f"\nExisting files ({len(existing_files)}):")
39+
for f in sorted(existing_files):
40+
rel_path = f.relative_to(output_path)
41+
size_mb = f.stat().st_size / (1024 * 1024)
42+
print(f" {rel_path} ({size_mb:.2f} MB)")
43+
print(f"\nUsing existing model files from: {local_model_dir}")
44+
return
45+
elif if_exists == "error":
46+
raise RuntimeError(
47+
f"Model files already exist in {local_model_dir}. "
48+
f"Found {len(existing_files)} files. "
49+
f"Use if_exists='skip' to use existing files or if_exists='overwrite' to replace them."
50+
)
51+
elif if_exists == "overwrite":
52+
print("Overwriting existing files (if_exists='overwrite')")
53+
else:
54+
raise ValueError(
55+
f"Invalid value for if_exists: '{if_exists}'. " f"Must be one of: 'skip', 'overwrite', 'error'"
56+
)
57+
else:
58+
print(f"No existing files found in {local_model_dir}")
59+
60+
print(f"Downloading model: {model_identifier}")
61+
62+
# Download model to output directory
63+
# This will download all model files (config, tokenizer, model weights, etc.)
64+
snapshot_download(
65+
repo_id=model_identifier,
66+
local_dir=local_model_dir,
67+
local_dir_use_symlinks=False,
68+
token=os.getenv("HUGGINGFACE_TOKEN"),
69+
)
70+
71+
# Verify downloaded files
72+
output_path = pathlib.Path(local_model_dir)
73+
if output_path.exists():
74+
files = list(output_path.rglob("*"))
75+
files = [f for f in files if f.is_file()]
76+
print(f"\nDownloaded {len(files)} files:")
77+
for f in sorted(files):
78+
# Show relative path from output_dir.path
79+
rel_path = f.relative_to(output_path)
80+
size_mb = f.stat().st_size / (1024 * 1024)
81+
print(f" {rel_path} ({size_mb:.2f} MB)")
82+
else:
83+
raise RuntimeError(f"Output directory {local_model_dir} was not created")
84+
85+
print(f"\nModel download complete. Files saved to: {local_model_dir}")
86+
87+
88+
if __name__ == "__main__":
89+
# compile the component
90+
from kfp.compiler import Compiler
91+
92+
compiler = Compiler()
93+
compiler.compile(download_model_from_hf, package_path="download_model_from_hf.yaml")
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
from kfp import kubernetes as k8s
2+
from kfp.dsl import Else, If, pipeline
3+
from kfp_components.components.deployment import download_model_from_hf
4+
5+
6+
# download model from HuggingFace pipeline
7+
@pipeline(name="download-hf-model", description="Download a model from HuggingFace to a local directory (PVC).")
8+
def download_model_from_hf_pipeline(
9+
model_identifier: str,
10+
local_model_dir: str = "/models",
11+
pvc_name: str = "model-pvc",
12+
hf_connection_secret: str = None,
13+
):
14+
"""Download a model from HuggingFace to a local directory (PVC).
15+
16+
Args:
17+
model_identifier: HuggingFace model identifier (e.g., "Qwen/Qwen3-VL-2B-Instruct")
18+
local_model_dir: Local directory to save the model files under (default: "/models")
19+
pvc_name: Name of the PVC to mount (default: "model-pvc")
20+
hf_connection_secret: Name of the secret to use for the HuggingFace connection (default: None)
21+
"""
22+
with If(hf_connection_secret is not None):
23+
download_task = download_model_from_hf(model_identifier=model_identifier, local_model_dir=local_model_dir)
24+
download_task.set_cpu_limit("2")
25+
download_task.set_memory_limit("8Gi")
26+
download_task.set_cpu_request("2")
27+
download_task.set_memory_request("8Gi")
28+
29+
k8s.mount_pvc(task=download_task, pvc_name=pvc_name, mount_path="/models")
30+
31+
k8s.use_secret_as_env(
32+
download_task,
33+
secret_name=hf_connection_secret,
34+
secret_key_to_env={"HUGGINGFACE_TOKEN": "HUGGINGFACE_TOKEN"},
35+
)
36+
with Else():
37+
download_task = download_model_from_hf(model_identifier=model_identifier, local_model_dir=local_model_dir)
38+
download_task.set_cpu_limit("2")
39+
download_task.set_memory_limit("8Gi")
40+
download_task.set_cpu_request("2")
41+
download_task.set_memory_request("8Gi")
42+
43+
k8s.mount_pvc(task=download_task, pvc_name=pvc_name, mount_path="/models")
44+
45+
46+
if __name__ == "__main__":
47+
from kfp.compiler import Compiler
48+
49+
Compiler().compile(download_model_from_hf_pipeline, package_path="download_model_from_hf_pipeline.yaml")
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
name: download_model_from_hf
2+
tier: core
3+
stability: alpha
4+
dependencies:
5+
kubeflow:
6+
- name: Pipelines
7+
version: '>=2.5'
8+
external_services: {}
9+
tags:
10+
- deployment
11+
lastVerified: 2025-01-08T00:00:00Z
12+
ci:
13+
skip_dependency_probe: false
14+
links:
15+
documentation: https://huggingface.co/docs/huggingface_hub/guides/download
16+
issue_tracker: https://github.com/kubeflow/pipelines-components/issues

0 commit comments

Comments
 (0)