Skip to content

Commit 21b5792

Browse files
alexsin368pre-commit-ci[bot]chensuyue
authored
Audioqna: fix input for TTS service, enable remote endpoints (#2101)
Signed-off-by: alexsin368 <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: chen, suyue <[email protected]>
1 parent 169f44b commit 21b5792

File tree

3 files changed

+135
-9
lines changed

3 files changed

+135
-9
lines changed

AudioQnA/audioqna.py

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# Copyright (C) 2024 Intel Corporation
22
# SPDX-License-Identifier: Apache-2.0
33

4+
import json
45
import os
56

67
from comps import MegaServiceEndpoint, MicroService, ServiceOrchestrator, ServiceRoleType, ServiceType
@@ -9,14 +10,14 @@
910
from fastapi import Request
1011

1112
MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
12-
1313
WHISPER_SERVER_HOST_IP = os.getenv("WHISPER_SERVER_HOST_IP", "0.0.0.0")
1414
WHISPER_SERVER_PORT = int(os.getenv("WHISPER_SERVER_PORT", 7066))
1515
SPEECHT5_SERVER_HOST_IP = os.getenv("SPEECHT5_SERVER_HOST_IP", "0.0.0.0")
1616
SPEECHT5_SERVER_PORT = int(os.getenv("SPEECHT5_SERVER_PORT", 7055))
1717
LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0")
1818
LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 3006))
1919
LLM_MODEL_ID = os.getenv("LLM_MODEL_ID", "meta-llama/Meta-Llama-3-8B-Instruct")
20+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", None)
2021

2122

2223
def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
@@ -29,23 +30,57 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k
2930
next_inputs["top_p"] = llm_parameters_dict["top_p"]
3031
next_inputs["stream"] = inputs["stream"] # False as default
3132
next_inputs["frequency_penalty"] = inputs["frequency_penalty"]
32-
# next_inputs["presence_penalty"] = inputs["presence_penalty"]
33-
# next_inputs["repetition_penalty"] = inputs["repetition_penalty"]
3433
next_inputs["temperature"] = inputs["temperature"]
3534
inputs = next_inputs
3635
elif self.services[cur_node].service_type == ServiceType.TTS:
3736
next_inputs = {}
38-
next_inputs["text"] = inputs["choices"][0]["message"]["content"]
37+
next_inputs["text"] = inputs["text"]
3938
next_inputs["voice"] = kwargs["voice"]
4039
inputs = next_inputs
4140
return inputs
4241

4342

43+
def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_dict, **kwargs):
44+
next_data = {}
45+
if self.services[cur_node].service_type == ServiceType.LLM and not llm_parameters_dict["stream"]:
46+
next_data["text"] = data["choices"][0]["message"]["content"]
47+
else:
48+
next_data = data
49+
50+
return next_data
51+
52+
53+
def align_generator(self, gen, **kwargs):
54+
# OpenAI response format
55+
# b'data:{"id":"","object":"text_completion","created":1725530204,"model":"meta-llama/Meta-Llama-3-8B-Instruct","system_fingerprint":"2.0.1-native","choices":[{"index":0,"delta":{"role":"assistant","content":"?"},"logprobs":null,"finish_reason":null}]}\n\n'
56+
for line in gen:
57+
line = line.decode("utf-8")
58+
start = line.find("{")
59+
end = line.rfind("}") + 1
60+
61+
json_str = line[start:end]
62+
try:
63+
# sometimes yield empty chunk, do a fallback here
64+
json_data = json.loads(json_str)
65+
if "ops" in json_data and "op" in json_data["ops"][0]:
66+
if "value" in json_data["ops"][0] and isinstance(json_data["ops"][0]["value"], str):
67+
yield f"data: {repr(json_data['ops'][0]['value'].encode('utf-8'))}\n\n"
68+
else:
69+
pass
70+
elif "content" in json_data["choices"][0]["delta"]:
71+
yield f"data: {repr(json_data['choices'][0]['delta']['content'].encode('utf-8'))}\n\n"
72+
except Exception as e:
73+
yield f"data: {repr(json_str.encode('utf-8'))}\n\n"
74+
yield "data: [DONE]\n\n"
75+
76+
4477
class AudioQnAService:
4578
def __init__(self, host="0.0.0.0", port=8000):
4679
self.host = host
4780
self.port = port
4881
ServiceOrchestrator.align_inputs = align_inputs
82+
ServiceOrchestrator.align_outputs = align_outputs
83+
ServiceOrchestrator.align_generator = align_generator
4984
self.megaservice = ServiceOrchestrator()
5085

5186
self.endpoint = str(MegaServiceEndpoint.AUDIO_QNA)
@@ -63,6 +98,7 @@ def add_remote_service(self):
6398
name="llm",
6499
host=LLM_SERVER_HOST_IP,
65100
port=LLM_SERVER_PORT,
101+
api_key=OPENAI_API_KEY,
66102
endpoint="/v1/chat/completions",
67103
use_remote_service=True,
68104
service_type=ServiceType.LLM,

AudioQnA/docker_compose/intel/cpu/xeon/README.md

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -134,11 +134,34 @@ docker compose -f compose.yaml down
134134

135135
In the context of deploying an AudioQnA pipeline on an Intel® Xeon® platform, we can pick and choose different large language model serving frameworks, or single English TTS/multi-language TTS component. The table below outlines the various configurations that are available as part of the application. These configurations can be used as templates and can be extended to different components available in [GenAIComps](https://github.com/opea-project/GenAIComps.git).
136136

137-
| File | Description |
138-
| -------------------------------------------------- | ----------------------------------------------------------------------------------------- |
139-
| [compose.yaml](./compose.yaml) | Default compose file using vllm as serving framework and redis as vector database |
140-
| [compose_tgi.yaml](./compose_tgi.yaml) | The LLM serving framework is TGI. All other configurations remain the same as the default |
141-
| [compose_multilang.yaml](./compose_multilang.yaml) | The TTS component is GPT-SoVITS. All other configurations remain the same as the default |
137+
| File | Description |
138+
| -------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
139+
| [compose.yaml](./compose.yaml) | Default compose file using vllm as serving framework and redis as vector database |
140+
| [compose_tgi.yaml](./compose_tgi.yaml) | The LLM serving framework is TGI. All other configurations remain the same as the default |
141+
| [compose_multilang.yaml](./compose_multilang.yaml) | The TTS component is GPT-SoVITS. All other configurations remain the same as the default |
142+
| [compose_remote.yaml](./compose_remote.yaml) | The LLM used is hosted on a remote server and an endpoint is used to access this model. Additional environment variables need to be set before running. See [instructions](#running-llm-models-with-remote-endpoints) below. |
143+
144+
### Running LLM models with remote endpoints
145+
146+
When models are deployed on a remote server, a base URL and an API key are required to access them. To set up a remote server and acquire the base URL and API key, refer to [Intel® AI for Enterprise Inference](https://www.intel.com/content/www/us/en/developer/topic-technology/artificial-intelligence/enterprise-inference.html) offerings.
147+
148+
Set the following environment variables.
149+
150+
- `REMOTE_ENDPOINT` is the HTTPS endpoint of the remote server with the model of choice (i.e. https://api.example.com). **Note:** If the API for the models does not use LiteLLM, the second part of the model card needs to be appended to the URL. For example, set `REMOTE_ENDPOINT` to https://api.example.com/Llama-3.3-70B-Instruct if the model card is `meta-llama/Llama-3.3-70B-Instruct`.
151+
- `API_KEY` is the access token or key to access the model(s) on the server.
152+
- `LLM_MODEL_ID` is the model card which may need to be overwritten depending on what it is set to `set_env.sh`.
153+
154+
```bash
155+
export REMOTE_ENDPOINT=<https-endpoint-of-remote-server>
156+
export API_KEY=<your-api-key>
157+
export LLM_MODEL_ID=<model-card>
158+
```
159+
160+
After setting these environment variables, run `docker compose` with `compose_remote.yaml`:
161+
162+
```bash
163+
docker compose -f compose_remote.yaml up -d
164+
```
142165

143166
## Validate MicroServices
144167

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
services:
5+
whisper-service:
6+
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
7+
container_name: whisper-service
8+
ports:
9+
- ${WHISPER_SERVER_PORT:-7066}:7066
10+
ipc: host
11+
environment:
12+
no_proxy: ${no_proxy}
13+
http_proxy: ${http_proxy}
14+
https_proxy: ${https_proxy}
15+
restart: unless-stopped
16+
speecht5-service:
17+
image: ${REGISTRY:-opea}/speecht5:${TAG:-latest}
18+
container_name: speecht5-service
19+
ports:
20+
- ${SPEECHT5_SERVER_PORT:-7055}:7055
21+
ipc: host
22+
environment:
23+
no_proxy: ${no_proxy}
24+
http_proxy: ${http_proxy}
25+
https_proxy: ${https_proxy}
26+
restart: unless-stopped
27+
audioqna-xeon-backend-server:
28+
image: ${REGISTRY:-opea}/audioqna:${TAG:-latest}
29+
container_name: audioqna-xeon-backend-server
30+
depends_on:
31+
- whisper-service
32+
- speecht5-service
33+
ports:
34+
- "3008:8888"
35+
environment:
36+
- no_proxy=${no_proxy}
37+
- https_proxy=${https_proxy}
38+
- http_proxy=${http_proxy}
39+
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
40+
- WHISPER_SERVER_HOST_IP=${WHISPER_SERVER_HOST_IP}
41+
- WHISPER_SERVER_PORT=${WHISPER_SERVER_PORT}
42+
- LLM_SERVER_HOST_IP=${REMOTE_ENDPOINT}
43+
- LLM_SERVER_PORT=${LLM_SERVER_PORT}
44+
- LLM_MODEL_ID=${LLM_MODEL_ID}
45+
- OPENAI_API_KEY=${API_KEY}
46+
- SPEECHT5_SERVER_HOST_IP=${SPEECHT5_SERVER_HOST_IP}
47+
- SPEECHT5_SERVER_PORT=${SPEECHT5_SERVER_PORT}
48+
ipc: host
49+
restart: always
50+
audioqna-xeon-ui-server:
51+
image: ${REGISTRY:-opea}/audioqna-ui:${TAG:-latest}
52+
container_name: audioqna-xeon-ui-server
53+
depends_on:
54+
- audioqna-xeon-backend-server
55+
ports:
56+
- "5173:5173"
57+
environment:
58+
- no_proxy=${no_proxy}
59+
- https_proxy=${https_proxy}
60+
- http_proxy=${http_proxy}
61+
- CHAT_URL=${BACKEND_SERVICE_ENDPOINT}
62+
ipc: host
63+
restart: always
64+
65+
networks:
66+
default:
67+
driver: bridge

0 commit comments

Comments
 (0)