File tree Expand file tree Collapse file tree 4 files changed +82
-82
lines changed
docker_compose/intel/hpu/gaudi Expand file tree Collapse file tree 4 files changed +82
-82
lines changed Original file line number Diff line number Diff line change @@ -86,7 +86,7 @@ services:
8686 MAX_WARMUP_SEQUENCE_LENGTH : 512
8787 command : --model-id ${RERANK_MODEL_ID} --auto-truncate
8888 vllm-service :
89- image : ${REGISTRY:-opea}/llm- vllm-hpu:${TAG:-latest}
89+ image : ${REGISTRY:-opea}/vllm-hpu:${TAG:-latest}
9090 container_name : vllm-gaudi-server
9191 ports :
9292 - " 8007:80"
@@ -104,7 +104,7 @@ services:
104104 cap_add :
105105 - SYS_NICE
106106 ipc : host
107- command : /bin/bash -c "export VLLM_CPU_KVCACHE_SPACE=40 && python3 -m vllm.entrypoints.openai.api_server -- enforce-eager --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048"
107+ command : -- enforce-eager --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
108108 chatqna-gaudi-backend-server :
109109 image : ${REGISTRY:-opea}/chatqna:${TAG:-latest}
110110 container_name : chatqna-gaudi-backend-server
Original file line number Diff line number Diff line change @@ -77,12 +77,6 @@ services:
7777 dockerfile : comps/llms/text-generation/vllm/langchain/Dockerfile
7878 extends : chatqna
7979 image : ${REGISTRY:-opea}/llm-vllm:${TAG:-latest}
80- llm-vllm-hpu :
81- build :
82- context : GenAIComps
83- dockerfile : comps/llms/text-generation/vllm/langchain/dependency/Dockerfile.intel_hpu
84- extends : chatqna
85- image : ${REGISTRY:-opea}/llm-vllm-hpu:${TAG:-latest}
8680 llm-vllm-ray-hpu :
8781 build :
8882 context : GenAIComps
@@ -113,6 +107,12 @@ services:
113107 dockerfile : Dockerfile.cpu
114108 extends : chatqna
115109 image : ${REGISTRY:-opea}/vllm:${TAG:-latest}
110+ vllm-hpu :
111+ build :
112+ context : vllm-fork
113+ dockerfile : Dockerfile.hpu
114+ extends : chatqna
115+ image : ${REGISTRY:-opea}/vllm-hpu:${TAG:-latest}
116116 nginx :
117117 build :
118118 context : GenAIComps
Original file line number Diff line number Diff line change @@ -17,9 +17,10 @@ ip_address=$(hostname -I | awk '{print $1}')
1717function build_docker_images() {
1818 cd $WORKPATH /docker_image_build
1919 git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout " ${opea_branch:- " main" } " && cd ../
20+ git clone https://github.com/HabanaAI/vllm-fork.git
2021
2122 echo " Build all the images with --no-cache, check docker_image_build.log for details..."
22- service_list=" chatqna chatqna-ui dataprep-redis retriever-redis llm- vllm-hpu nginx"
23+ service_list=" chatqna chatqna-ui dataprep-redis retriever-redis vllm-hpu nginx"
2324 docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH} /docker_image_build.log
2425
2526 docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
You can’t perform that action at this time.
0 commit comments