Skip to content

Commit f1df357

Browse files
authored
Qwen3 hpu support (#656)
Signed-off-by: Liu, Kaixuan <[email protected]>
1 parent 4e3a0bc commit f1df357

File tree

2 files changed

+462
-0
lines changed

2 files changed

+462
-0
lines changed

backends/python/server/text_embeddings_server/models/__init__.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from text_embeddings_server.models.classification_model import ClassificationModel
1414
from text_embeddings_server.models.jinaBert_model import FlashJinaBert
1515
from text_embeddings_server.models.flash_mistral import FlashMistral
16+
from text_embeddings_server.models.flash_qwen3 import FlashQwen3
1617
from text_embeddings_server.utils.device import get_device, use_ipex
1718

1819
__all__ = ["Model"]
@@ -121,6 +122,12 @@ def get_model(model_path: Path, dtype: Optional[str], pool: str):
121122
except FileNotFoundError:
122123
return create_model(DefaultModel, model_path, device, datatype, pool)
123124

125+
if config.model_type == "qwen3" and device.type == "hpu":
126+
try:
127+
return create_model(FlashQwen3, model_path, device, datatype, pool)
128+
except FileNotFoundError:
129+
return create_model(DefaultModel, model_path, device, datatype, pool)
130+
124131
# Default case
125132
if config.architectures[0].endswith("Classification"):
126133
return create_model(ClassificationModel, model_path, device, datatype)

0 commit comments

Comments
 (0)