Skip to content

Commit e9b1645

Browse files
authored
align vllm hpu version to latest vllm-fork (#1061)
Signed-off-by: Xinyao Wang <[email protected]>
1 parent 6263b51 commit e9b1645

File tree

4 files changed

+82
-82
lines changed

4 files changed

+82
-82
lines changed

ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ services:
8686
MAX_WARMUP_SEQUENCE_LENGTH: 512
8787
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
8888
vllm-service:
89-
image: ${REGISTRY:-opea}/llm-vllm-hpu:${TAG:-latest}
89+
image: ${REGISTRY:-opea}/vllm-hpu:${TAG:-latest}
9090
container_name: vllm-gaudi-server
9191
ports:
9292
- "8007:80"
@@ -104,7 +104,7 @@ services:
104104
cap_add:
105105
- SYS_NICE
106106
ipc: host
107-
command: /bin/bash -c "export VLLM_CPU_KVCACHE_SPACE=40 && python3 -m vllm.entrypoints.openai.api_server --enforce-eager --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048"
107+
command: --enforce-eager --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
108108
chatqna-gaudi-backend-server:
109109
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
110110
container_name: chatqna-gaudi-backend-server

ChatQnA/docker_image_build/build.yaml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -77,12 +77,6 @@ services:
7777
dockerfile: comps/llms/text-generation/vllm/langchain/Dockerfile
7878
extends: chatqna
7979
image: ${REGISTRY:-opea}/llm-vllm:${TAG:-latest}
80-
llm-vllm-hpu:
81-
build:
82-
context: GenAIComps
83-
dockerfile: comps/llms/text-generation/vllm/langchain/dependency/Dockerfile.intel_hpu
84-
extends: chatqna
85-
image: ${REGISTRY:-opea}/llm-vllm-hpu:${TAG:-latest}
8680
llm-vllm-ray-hpu:
8781
build:
8882
context: GenAIComps
@@ -113,6 +107,12 @@ services:
113107
dockerfile: Dockerfile.cpu
114108
extends: chatqna
115109
image: ${REGISTRY:-opea}/vllm:${TAG:-latest}
110+
vllm-hpu:
111+
build:
112+
context: vllm-fork
113+
dockerfile: Dockerfile.hpu
114+
extends: chatqna
115+
image: ${REGISTRY:-opea}/vllm-hpu:${TAG:-latest}
116116
nginx:
117117
build:
118118
context: GenAIComps

ChatQnA/tests/test_compose_vllm_on_gaudi.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,10 @@ ip_address=$(hostname -I | awk '{print $1}')
1717
function build_docker_images() {
1818
cd $WORKPATH/docker_image_build
1919
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
20+
git clone https://github.com/HabanaAI/vllm-fork.git
2021

2122
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
22-
service_list="chatqna chatqna-ui dataprep-redis retriever-redis llm-vllm-hpu nginx"
23+
service_list="chatqna chatqna-ui dataprep-redis retriever-redis vllm-hpu nginx"
2324
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
2425

2526
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5

0 commit comments

Comments
 (0)