Skip to content

Commit be40719

Browse files
authoredMay 16, 2025··
Upgrade pytorch and ipex to 2.7 version (#607)
Signed-off-by: Liu, Kaixuan <kaixuan.liu@intel.com>
1 parent f21a638 commit be40719

File tree

2 files changed

+41
-21
lines changed

2 files changed

+41
-21
lines changed
 

‎Dockerfile-intel

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \
5959
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
6060
cargo build --release --bin text-embeddings-router -F grpc -F python --no-default-features && sccache -s
6161

62-
FROM intel/intel-extension-for-pytorch:2.6.0-pip-base AS cpu
62+
FROM intel/intel-extension-for-pytorch:2.7.0-pip-base AS cpu
6363
ENV HUGGINGFACE_HUB_CACHE=/data \
6464
PORT=80
6565

@@ -77,7 +77,7 @@ COPY backends/python/server/text_embeddings_server/models/__init__.py backends/p
7777
COPY backends/python/server/pyproject.toml backends/python/server/pyproject.toml
7878
COPY backends/python/server/requirements-intel.txt backends/python/server/requirements.txt
7979

80-
RUN python -m pip install torch==2.6.0 torchvision torchaudio==2.6.0 --index-url https://download.pytorch.org/whl/cpu --no-cache-dir
80+
RUN python -m pip install torch==2.7.0 torchvision torchaudio==2.7.0 --index-url https://download.pytorch.org/whl/cpu --no-cache-dir
8181

8282
RUN cd backends/python/server && \
8383
make install
@@ -103,7 +103,7 @@ COPY backends/python/server/requirements-hpu.txt backends/python/server/requirem
103103
RUN cd backends/python/server && \
104104
make install
105105

106-
FROM intel/intel-extension-for-pytorch:2.6.10-xpu AS xpu
106+
FROM intel/intel-extension-for-pytorch:2.7.10-xpu AS xpu
107107

108108
ENV HUGGINGFACE_HUB_CACHE=/data \
109109
PORT=80
@@ -117,8 +117,8 @@ RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRO
117117

118118
RUN apt-get update && apt install -y intel-basekit cmake vim python3-dev ninja-build pciutils
119119
WORKDIR /usr/src
120-
RUN pip install torch==2.6.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/xpu --no-cache-dir
121-
RUN pip install intel-extension-for-pytorch==2.6.10+xpu oneccl_bind_pt==2.6.0+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ --no-cache-dir
120+
RUN pip install torch==2.7.0 torchvision==0.22.0 torchaudio==2.7.0 --index-url https://download.pytorch.org/whl/xpu --no-cache-dir
121+
RUN pip install intel-extension-for-pytorch==2.7.10+xpu oneccl_bind_pt==2.7.0+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ --no-cache-dir
122122

123123
ENV CCL_ROOT=/opt/intel/oneapi/ccl/latest
124124
ENV I_MPI_ROOT=/opt/intel/oneapi/mpi/latest

‎backends/python/server/text_embeddings_server/utils/flash_attn.py

Lines changed: 36 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -93,22 +93,42 @@ def attention(
9393
if use_ipex:
9494
import intel_extension_for_pytorch as ipex
9595

96-
return ipex.llm.functional.varlen_attention(
97-
q.contiguous() if q.device.type == "xpu" else q,
98-
k.contiguous() if k.device.type == "xpu" else k,
99-
v.contiguous() if v.device.type == "xpu" else v,
100-
out,
101-
cu_seqlens,
102-
cu_seqlens,
103-
max_s,
104-
max_s,
105-
0,
106-
softmax_scale,
107-
zero_tensors=False,
108-
is_causal=False,
109-
return_softmax=False,
110-
gen_=None,
111-
)
96+
if q.device.type == "xpu":
97+
return ipex.llm.functional.varlen_attention(
98+
q.contiguous(),
99+
k.contiguous(),
100+
v.contiguous(),
101+
out,
102+
cu_seqlens,
103+
cu_seqlens,
104+
None,
105+
max_s,
106+
max_s,
107+
0,
108+
softmax_scale,
109+
zero_tensors=False,
110+
is_causal=False,
111+
return_softmax=False,
112+
gen_=None,
113+
)
114+
elif q.device.type == "cpu":
115+
return ipex.llm.functional.varlen_attention(
116+
q,
117+
k,
118+
v,
119+
out,
120+
cu_seqlens,
121+
cu_seqlens,
122+
max_s,
123+
max_s,
124+
0,
125+
softmax_scale,
126+
zero_tensors=False,
127+
is_causal=False,
128+
return_softmax=False,
129+
gen_=None,
130+
)
131+
112132
elif is_hpu:
113133
return hpu_attn(
114134
q,

0 commit comments

Comments
 (0)
Please sign in to comment.