Skip to content

Commit 87e3c0f

Browse files
authored
Update chatqna values file changes (#1844)
Signed-off-by: Dolpher Du <[email protected]>
1 parent 27813b3 commit 87e3c0f

File tree

3 files changed

+34
-23
lines changed

3 files changed

+34
-23
lines changed
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# Copyright (C) 2025 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
redis-vector-db:
5+
enabled: false
6+
milvus:
7+
enabled: true
8+
9+
data-prep:
10+
DATAPREP_BACKEND: "MILVUS"
11+
COLLECTION_NAME: "rag_milvus"
12+
retriever-usvc:
13+
RETRIEVER_BACKEND: "MILVUS"
14+
COLLECTION_NAME: "rag_milvus"
Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
# Copyright (C) 2025 Intel Corporation
22
# SPDX-License-Identifier: Apache-2.0
33

4-
image:
5-
repository: opea/chatqna
4+
vllm:
5+
LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct
6+
# Uncomment the following model specific settings for DeepSeek models
7+
#VLLM_CPU_KVCACHE_SPACE: 40
8+
#resources:
9+
# requests:
10+
# memory: 60Gi # 40G for KV cache, and 20G for DeepSeek-R1-Distill-Qwen-7B, need to adjust it for other models

ChatQnA/kubernetes/helm/guardrails-gaudi-values.yaml

Lines changed: 13 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,9 @@ CHATQNA_TYPE: "CHATQNA_GUARDRAILS"
1010
# guardrails related config
1111
guardrails-usvc:
1212
enabled: true
13-
# SAFETY_GUARD_ENDPOINT: "http://{{ .Release.Name }}-tgi-guardrails"
13+
SAFETY_GUARD_ENDPOINT: "http://{{ .Release.Name }}-vllm-guardrails"
1414
SAFETY_GUARD_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"
15+
retryTimeoutSeconds: 720
1516

1617
# gaudi related config
1718
# tei running on CPU by default
@@ -41,33 +42,24 @@ teirerank:
4142
readinessProbe:
4243
timeoutSeconds: 1
4344

44-
tgi-guardrails:
45+
vllm-guardrails:
4546
enabled: true
4647
accelDevice: "gaudi"
47-
LLM_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"
4848
image:
49-
repository: ghcr.io/huggingface/tgi-gaudi
50-
tag: "2.3.1"
49+
repository: opea/vllm-gaudi
50+
LLM_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"
5151
resources:
5252
limits:
5353
habana.ai/gaudi: 1
54-
MAX_INPUT_LENGTH: "1024"
55-
MAX_TOTAL_TOKENS: "2048"
56-
CUDA_GRAPHS: ""
57-
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
58-
ENABLE_HPU_GRAPH: "true"
59-
LIMIT_HPU_GRAPH: "true"
60-
USE_FLASH_ATTENTION: "true"
61-
FLASH_ATTENTION_RECOMPUTE: "true"
62-
readinessProbe:
63-
initialDelaySeconds: 5
64-
periodSeconds: 5
65-
timeoutSeconds: 1
54+
extraCmdArgs: [
55+
"--tensor-parallel-size", "1",
56+
"--block-size", "128",
57+
"--max-num-seqs", "256",
58+
"--max-seq-len-to-capture", "2048"
59+
]
6660
startupProbe:
67-
initialDelaySeconds: 5
68-
periodSeconds: 5
69-
timeoutSeconds: 1
70-
failureThreshold: 120
61+
failureThreshold: 360
62+
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
7163

7264
tgi:
7365
enabled: false

0 commit comments

Comments
 (0)