Skip to content

Commit 3c164f3

Browse files
Make rerank run on gaudi for hpu docker compose (#980)
Signed-off-by: lvliang-intel <[email protected]>
1 parent 7669c42 commit 3c164f3

File tree

5 files changed

+42
-16
lines changed

5 files changed

+42
-16
lines changed

ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,11 @@ services:
4040
no_proxy: ${no_proxy}
4141
http_proxy: ${http_proxy}
4242
https_proxy: ${https_proxy}
43+
HF_HUB_DISABLE_PROGRESS_BARS: 1
44+
HF_HUB_ENABLE_HF_TRANSFER: 0
4345
HABANA_VISIBLE_DEVICES: all
4446
OMPI_MCA_btl_vader_single_copy_mechanism: none
4547
MAX_WARMUP_SEQUENCE_LENGTH: 512
46-
INIT_HCCL_ON_ACQUIRE: 0
47-
ENABLE_EXPERIMENTAL_FLAGS: true
4848
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
4949
retriever:
5050
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
@@ -65,20 +65,25 @@ services:
6565
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
6666
restart: unless-stopped
6767
tei-reranking-service:
68-
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
68+
image: ghcr.io/huggingface/tei-gaudi:latest
6969
container_name: tei-reranking-gaudi-server
7070
ports:
7171
- "8808:80"
7272
volumes:
7373
- "./data:/data"
74-
shm_size: 1g
74+
runtime: habana
75+
cap_add:
76+
- SYS_NICE
77+
ipc: host
7578
environment:
7679
no_proxy: ${no_proxy}
7780
http_proxy: ${http_proxy}
7881
https_proxy: ${https_proxy}
79-
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
8082
HF_HUB_DISABLE_PROGRESS_BARS: 1
8183
HF_HUB_ENABLE_HF_TRANSFER: 0
84+
HABANA_VISIBLE_DEVICES: all
85+
OMPI_MCA_btl_vader_single_copy_mechanism: none
86+
MAX_WARMUP_SEQUENCE_LENGTH: 512
8287
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
8388
tgi-service:
8489
image: ghcr.io/huggingface/tgi-gaudi:2.0.5

ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,8 @@ services:
7979
no_proxy: ${no_proxy}
8080
http_proxy: ${http_proxy}
8181
https_proxy: ${https_proxy}
82+
HF_HUB_DISABLE_PROGRESS_BARS: 1
83+
HF_HUB_ENABLE_HF_TRANSFER: 0
8284
HABANA_VISIBLE_DEVICES: all
8385
OMPI_MCA_btl_vader_single_copy_mechanism: none
8486
MAX_WARMUP_SEQUENCE_LENGTH: 512
@@ -102,20 +104,25 @@ services:
102104
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
103105
restart: unless-stopped
104106
tei-reranking-service:
105-
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
107+
image: ghcr.io/huggingface/tei-gaudi:latest
106108
container_name: tei-reranking-gaudi-server
107109
ports:
108110
- "8808:80"
109111
volumes:
110112
- "./data:/data"
111-
shm_size: 1g
113+
runtime: habana
114+
cap_add:
115+
- SYS_NICE
116+
ipc: host
112117
environment:
113118
no_proxy: ${no_proxy}
114119
http_proxy: ${http_proxy}
115120
https_proxy: ${https_proxy}
116-
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
117121
HF_HUB_DISABLE_PROGRESS_BARS: 1
118122
HF_HUB_ENABLE_HF_TRANSFER: 0
123+
HABANA_VISIBLE_DEVICES: all
124+
OMPI_MCA_btl_vader_single_copy_mechanism: none
125+
MAX_WARMUP_SEQUENCE_LENGTH: 512
119126
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
120127
tgi-service:
121128
image: ghcr.io/huggingface/tgi-gaudi:2.0.5

ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm.yaml

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ services:
4040
no_proxy: ${no_proxy}
4141
http_proxy: ${http_proxy}
4242
https_proxy: ${https_proxy}
43+
HF_HUB_DISABLE_PROGRESS_BARS: 1
44+
HF_HUB_ENABLE_HF_TRANSFER: 0
4345
HABANA_VISIBLE_DEVICES: all
4446
OMPI_MCA_btl_vader_single_copy_mechanism: none
4547
MAX_WARMUP_SEQUENCE_LENGTH: 512
@@ -63,20 +65,25 @@ services:
6365
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
6466
restart: unless-stopped
6567
tei-reranking-service:
66-
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
68+
image: ghcr.io/huggingface/tei-gaudi:latest
6769
container_name: tei-reranking-gaudi-server
6870
ports:
6971
- "8808:80"
7072
volumes:
7173
- "./data:/data"
72-
shm_size: 1g
74+
runtime: habana
75+
cap_add:
76+
- SYS_NICE
77+
ipc: host
7378
environment:
7479
no_proxy: ${no_proxy}
7580
http_proxy: ${http_proxy}
7681
https_proxy: ${https_proxy}
77-
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
7882
HF_HUB_DISABLE_PROGRESS_BARS: 1
7983
HF_HUB_ENABLE_HF_TRANSFER: 0
84+
HABANA_VISIBLE_DEVICES: all
85+
OMPI_MCA_btl_vader_single_copy_mechanism: none
86+
MAX_WARMUP_SEQUENCE_LENGTH: 512
8087
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
8188
vllm-service:
8289
image: ${REGISTRY:-opea}/llm-vllm-hpu:${TAG:-latest}

ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm_ray.yaml

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ services:
4040
no_proxy: ${no_proxy}
4141
http_proxy: ${http_proxy}
4242
https_proxy: ${https_proxy}
43+
HF_HUB_DISABLE_PROGRESS_BARS: 1
44+
HF_HUB_ENABLE_HF_TRANSFER: 0
4345
HABANA_VISIBLE_DEVICES: all
4446
OMPI_MCA_btl_vader_single_copy_mechanism: none
4547
MAX_WARMUP_SEQUENCE_LENGTH: 512
@@ -63,20 +65,25 @@ services:
6365
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
6466
restart: unless-stopped
6567
tei-reranking-service:
66-
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
68+
image: ghcr.io/huggingface/tei-gaudi:latest
6769
container_name: tei-reranking-gaudi-server
6870
ports:
6971
- "8808:80"
7072
volumes:
7173
- "./data:/data"
72-
shm_size: 1g
74+
runtime: habana
75+
cap_add:
76+
- SYS_NICE
77+
ipc: host
7378
environment:
7479
no_proxy: ${no_proxy}
7580
http_proxy: ${http_proxy}
7681
https_proxy: ${https_proxy}
77-
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
7882
HF_HUB_DISABLE_PROGRESS_BARS: 1
7983
HF_HUB_ENABLE_HF_TRANSFER: 0
84+
HABANA_VISIBLE_DEVICES: all
85+
OMPI_MCA_btl_vader_single_copy_mechanism: none
86+
MAX_WARMUP_SEQUENCE_LENGTH: 512
8087
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
8188
vllm-ray-service:
8289
image: ${REGISTRY:-opea}/llm-vllm-ray-hpu:${TAG:-latest}

ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,11 @@ services:
4040
no_proxy: ${no_proxy}
4141
http_proxy: ${http_proxy}
4242
https_proxy: ${https_proxy}
43+
HF_HUB_DISABLE_PROGRESS_BARS: 1
44+
HF_HUB_ENABLE_HF_TRANSFER: 0
4345
HABANA_VISIBLE_DEVICES: all
4446
OMPI_MCA_btl_vader_single_copy_mechanism: none
4547
MAX_WARMUP_SEQUENCE_LENGTH: 512
46-
INIT_HCCL_ON_ACQUIRE: 0
47-
ENABLE_EXPERIMENTAL_FLAGS: true
4848
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
4949
retriever:
5050
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}

0 commit comments

Comments
 (0)