Skip to content

Commit 121ca55

Browse files
committed
Align vllm settings with docker compose version
Align settings with PR opea-project/GenAIExamples#1061. Make llm-uservice support both tgi and vllm backend. Signed-off-by: Dolpher Du <[email protected]>
1 parent 66de41c commit 121ca55

File tree

6 files changed

+45
-7
lines changed

6 files changed

+45
-7
lines changed

helm-charts/common/llm-uservice/Chart.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,7 @@ dependencies:
1313
version: 1.0.0
1414
repository: file://../tgi
1515
condition: tgi.enabled
16+
- name: vllm
17+
version: 1.0.0
18+
repository: file://../vllm
19+
condition: vllm.enabled

helm-charts/common/llm-uservice/templates/configmap.yaml

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,23 @@ data:
1313
{{- else }}
1414
TGI_LLM_ENDPOINT: "http://{{ .Release.Name }}-tgi"
1515
{{- end }}
16+
{{- if .Values.vLLM_ENDPOINT }}
17+
vLLM_ENDPOINT: {{ .Values.vLLM_ENDPOINT | quote}}
18+
{{- else }}
19+
vLLM_ENDPOINT: "http://{{ .Release.Name }}-vllm"
20+
{{- end }}
21+
{{- if .Values.LLM_MODEL_ID }}
22+
LLM_MODEL: {{ .Values.LLM_MODEL_ID | quote}}
23+
{{- end }}
1624
HUGGINGFACEHUB_API_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}}
1725
HF_HOME: "/tmp/.cache/huggingface"
1826
{{- if .Values.global.HF_ENDPOINT }}
1927
HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}}
2028
{{- end }}
2129
http_proxy: {{ .Values.global.http_proxy | quote }}
2230
https_proxy: {{ .Values.global.https_proxy | quote }}
23-
{{- if and (not .Values.TGI_LLM_ENDPOINT) (or .Values.global.http_proxy .Values.global.https_proxy) }}
24-
no_proxy: "{{ .Release.Name }}-tgi,{{ .Values.global.no_proxy }}"
31+
{{- if or .Values.global.http_proxy .Values.global.https_proxy }}
32+
no_proxy: "{{ .Release.Name }}-tgi,{{ .Release.Name }}-vllm,{{ .Values.global.no_proxy }}"
2533
{{- else }}
2634
no_proxy: {{ .Values.global.no_proxy | quote }}
2735
{{- end }}

helm-charts/common/llm-uservice/values.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,15 @@
77

88
tgi:
99
enabled: false
10+
vllm:
11+
enabled: false
1012

1113
replicaCount: 1
14+
# For tgi
1215
TGI_LLM_ENDPOINT: ""
16+
# For vllm, set the LLM_MODEL_ID the same as vllm sub chart
17+
vLLM_ENDPOINT: ""
18+
LLM_MODEL_ID: ""
1319

1420
# Set it as a non-null string, such as true, if you want to enable logging facility,
1521
# otherwise, keep it as "" to disable it.
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
# Default values for llm-uservice.
5+
# This is a YAML-formatted file.
6+
# Declare variables to be passed into your templates.
7+
tgi:
8+
enabled: false
9+
vllm:
10+
enabled: true
11+
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
12+
13+
vLLM_ENDPOINT: ""
14+
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
15+
image:
16+
repository: opea/llm-vllm
17+
tag: "latest"

helm-charts/common/vllm/gaudi-values.yaml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,14 @@
66
# Declare variables to be passed into your templates.
77

88
image:
9-
repository: opea/llm-vllm-hpu
9+
repository: opea/vllm-hpu
1010
tag: "latest"
1111

12-
VLLM_CPU_KVCACHE_SPACE: "40"
13-
12+
# VLLM_CPU_KVCACHE_SPACE: "40"
13+
OMPI_MCA_btl_vader_single_copy_mechanism: none
14+
extraCmdArgs: ["--enforce-eager","--tensor-parallel-size","1","--block-size","128","--max-num-seqs","256","--max-seq_len-to-capture","2048"]
1415
# Workaround for current HPU image with start command /bin/bash
15-
# extraCmdArgs: ["--enforce-eager","--tensor-parallel-size","1","--block-size","128","--max-num-seqs","256","--max-seq_len-to-capture","2048"]
16-
extraCmdArgs: ["/bin/bash","-c","python3 -m vllm.entrypoints.openai.api_server --enforce-eager --model Intel/neural-chat-7b-v3-3 --tensor-parallel-size 1 --host 0.0.0.0 --port 2080 --download-dir /data --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048"]
16+
# extraCmdArgs: ["/bin/bash","-c","python3 -m vllm.entrypoints.openai.api_server --enforce-eager --model Intel/neural-chat-7b-v3-3 --tensor-parallel-size 1 --host 0.0.0.0 --port 2080 --download-dir /data --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048"]
1717
resources:
1818
limits:
1919
habana.ai/gaudi: 1

helm-charts/common/vllm/templates/configmap.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,6 @@ data:
2323
{{- if .Values.VLLM_CPU_KVCACHE_SPACE }}
2424
VLLM_CPU_KVCACHE_SPACE: {{ .Values.VLLM_CPU_KVCACHE_SPACE | quote}}
2525
{{- end }}
26+
{{- if .Values.OMPI_MCA_btl_vader_single_copy_mechanism }}
27+
OMPI_MCA_btl_vader_single_copy_mechanism: {{ .Values.OMPI_MCA_btl_vader_single_copy_mechanism | quote}}
28+
{{- end }}

0 commit comments

Comments
 (0)