Refine AgentQnA Compose Files and READMEs (#2139)

letonghan · pre-commit-ci[bot] · web-flow · commit fc4146c54482 · 2025-08-12T10:23:15.000+08:00
Signed-off-by: letonghan &lt;letong.han@intel.com&gt;
Co-authored-by: pre-commit-ci[bot] &lt;66853113+pre-commit-ci[bot]@users.noreply.github.com&gt;
diff --git a/AgentQnA/docker_compose/intel/cpu/xeon/README.md b/AgentQnA/docker_compose/intel/cpu/xeon/README.md
@@ -33,7 +33,7 @@ git clone https://github.com/opea-project/GenAIExamples.git
 cd GenAIExamples/AgentQnA
 ```
 
-Then checkout a released version, such as v1.4:
+To checkout a released version, such as v1.4:
 
 ```bash
 git checkout v1.4
@@ -49,7 +49,6 @@ export HF_TOKEN="Your_HuggingFace_API_Token"  # the huggingface API token you ap
 export http_proxy="Your_HTTP_Proxy"           # http proxy if any
 export https_proxy="Your_HTTPs_Proxy"         # https proxy if any
 export no_proxy=localhost,127.0.0.1,$host_ip  # additional no proxies if needed
-export NGINX_PORT=${your_nginx_port}          # your usable port for nginx, 80 for example
 ```
 
 #### [Optional] OPENAI_API_KEY to use OpenAI models or LLM models with remote endpoints
@@ -67,7 +66,8 @@ export OPENAI_API_KEY=<your-openai-key>
 #### Then, set up environment variables for the selected hardware using the corresponding `set_env.sh`
 
 ```bash
-source $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/cpu/xeon/set_env.sh
+cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/cpu/xeon
+source ./set_env.sh
 ```
 
 ### Deploy the Services Using Docker Compose
@@ -115,8 +115,8 @@ docker compose -f $WORKDIR/GenAIExamples/DocIndexRetriever/docker_compose/intel/
 Please refer to the table below to build different microservices from source:
 
 | Microservice | Deployment Guide                                                                                                                                 |
-| ------------ | ------------------------------------------------------------------------------------------------------------------------------------------------ | --- |
-| Agent        | [Agent build guide](https://github.com/opea-project/GenAIComps/blob/main/comps/agent/src/README.md#21-build-docker-image-for-agent-microservice) |     |
+| ------------ | ------------------------------------------------------------------------------------------------------------------------------------------------ |
+| Agent        | [Agent build guide](https://github.com/opea-project/GenAIComps/blob/main/comps/agent/src/README.md#21-build-docker-image-for-agent-microservice) |
 | UI           | [Basic UI build guide](../../../../README_miscellaneous.md#build-ui-docker-image)                                                                |
 
 ### Ingest Data into the Vector Database
diff --git a/AgentQnA/docker_compose/intel/cpu/xeon/compose_openai.yaml b/AgentQnA/docker_compose/intel/cpu/xeon/compose_openai.yaml
@@ -101,11 +101,18 @@ services:
       - "8080:8000"
     ipc: host
   agent-ui:
-    image: opea/agent-ui
+    image: ghcr.io/open-webui/open-webui:main
     container_name: agent-ui
     ports:
       - "5173:8080"
     ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      OPENAI_API_KEYS: "empty"
+      OPENAI_API_BASE_URLS: ${SUPERVISOR_AGENT_ENDPOINT}
+      ENABLE_OLLAMA_API: False
 
 networks:
   default:
diff --git a/AgentQnA/docker_compose/intel/cpu/xeon/compose_remote.yaml b/AgentQnA/docker_compose/intel/cpu/xeon/compose_remote.yaml
@@ -2,17 +2,49 @@
 # SPDX-License-Identifier: Apache-2.0
 
 services:
+  agent-vllm-service:
+    image: ${REGISTRY:-opea}/vllm:${TAG:-latest}
+    container_name: vllm-service
+    ports:
+      - "9009:80"
+    volumes:
+      - "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub"
+    shm_size: 128g
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HF_TOKEN: ${HF_TOKEN}
+      LLM_MODEL_ID: ${model}
+      VLLM_TORCH_PROFILER_DIR: "/mnt"
+      VLLM_CPU_KVCACHE_SPACE: 40
+    healthcheck:
+      test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"]
+      interval: 10s
+      timeout: 10s
+      retries: 100
+    command: --model $model --host 0.0.0.0 --port 80
+
   worker-rag-agent:
+    depends_on:
+      agent-vllm-service:
+        condition: service_healthy
     environment:
-      llm_endpoint_url: ${REMOTE_ENDPOINT}
-      api_key: ${OPENAI_API_KEY}
+      llm_engine: vllm
+      llm_endpoint_url: ${LLM_ENDPOINT_URL}
 
   worker-sql-agent:
+    depends_on:
+      agent-vllm-service:
+        condition: service_healthy
     environment:
-      llm_endpoint_url: ${REMOTE_ENDPOINT}
-      api_key: ${OPENAI_API_KEY}
+      llm_engine: vllm
+      llm_endpoint_url: ${LLM_ENDPOINT_URL}
 
   supervisor-react-agent:
+    depends_on:
+      agent-vllm-service:
+        condition: service_healthy
     environment:
-      llm_endpoint_url: ${REMOTE_ENDPOINT}
-      api_key: ${OPENAI_API_KEY}
+      llm_engine: vllm
+      llm_endpoint_url: ${LLM_ENDPOINT_URL}
diff --git a/AgentQnA/docker_compose/intel/cpu/xeon/set_env.sh b/AgentQnA/docker_compose/intel/cpu/xeon/set_env.sh
@@ -28,9 +28,11 @@ export OPENAI_API_KEY=${OPENAI_API_KEY}
 export WORKER_AGENT_URL="http://${ip_address}:9095/v1/chat/completions"
 export SQL_AGENT_URL="http://${ip_address}:9096/v1/chat/completions"
 export RETRIEVAL_TOOL_URL="http://${ip_address}:8889/v1/retrievaltool"
-export CRAG_SERVER=http://${ip_address}:8080
+export LLM_ENDPOINT_URL="http://${ip_address}:9009"
+export CRAG_SERVER="http://${ip_address}:8080"
 export db_name=Chinook
 export db_path="sqlite:////home/user/chinook-db/Chinook_Sqlite.sqlite"
+export SUPERVISOR_AGENT_ENDPOINT="http://${ip_address}:9090/v1"
 
 if [ ! -f $WORKDIR/GenAIExamples/AgentQnA/tests/Chinook_Sqlite.sqlite ]; then
     echo "Download Chinook_Sqlite!"
@@ -61,4 +63,4 @@ export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6008/v1/dataprep/get"
 export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete"
 
 
-export no_proxy="$no_proxy,rag-agent-endpoint,sql-agent-endpoint,react-agent-endpoint,agent-ui"
+export no_proxy="$no_proxy,$host_ip,rag-agent-endpoint,sql-agent-endpoint,react-agent-endpoint,agent-ui,vllm-service"
diff --git a/AgentQnA/docker_compose/intel/hpu/gaudi/README.md b/AgentQnA/docker_compose/intel/hpu/gaudi/README.md
@@ -33,7 +33,7 @@ git clone https://github.com/opea-project/GenAIExamples.git
 cd GenAIExamples/AgentQnA
 ```
 
-Then checkout a released version, such as v1.4:
+To checkout a released version, such as v1.4:
 
 ```bash
 git checkout v1.4
@@ -49,7 +49,6 @@ export HF_TOKEN="Your_HuggingFace_API_Token"  # the huggingface API token you ap
 export http_proxy="Your_HTTP_Proxy"           # http proxy if any
 export https_proxy="Your_HTTPs_Proxy"         # https proxy if any
 export no_proxy=localhost,127.0.0.1,$host_ip  # additional no proxies if needed
-export NGINX_PORT=${your_nginx_port}          # your usable port for nginx, 80 for example
 ```
 
 #### [Optional] OPENAI_API_KEY to use OpenAI models or Intel® AI for Enterprise Inference
@@ -107,37 +106,13 @@ docker compose -f $WORKDIR/GenAIExamples/DocIndexRetriever/docker_compose/intel/
 
 </details>
 
-#### OpenAI Models
-
-The command below will launch the multi-agent system with the `DocIndexRetriever` as the retrieval tool for the Worker RAG agent.
-
-```bash
-docker compose -f $WORKDIR/GenAIExamples/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml -f compose_openai.yaml up -d
-```
-
-#### Models on Remote Server
-
-When models are deployed on a remote server with Intel® AI for Enterprise Inference, a base URL and an API key are required to access them. To run the Agent microservice on Gaudi while using models deployed on a remote server, add `compose_remote.yaml` to the `docker compose` command and set additional environment variables.
-
-#### Notes
-
-- `OPENAI_API_KEY` is already set in a previous step.
-- `model` is used to overwrite the value set for this environment variable in `set_env.sh`.
-- `LLM_ENDPOINT_URL` is the base URL given from the owner of the on-prem machine or cloud service provider. It will follow this format: "https://<DNS>". Here is an example: "https://api.inference.example.com".
-
-```bash
-export model=<name-of-model-card>
-export LLM_ENDPOINT_URL=<http-endpoint-of-remote-server>
-docker compose -f $WORKDIR/GenAIExamples/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml -f compose_openai.yaml -f compose_remote.yaml up -d
-```
-
 #### Build image from source
 
 Please refer to the table below to build different microservices from source:
 
 | Microservice | Deployment Guide                                                                                                                                 |
-| ------------ | ------------------------------------------------------------------------------------------------------------------------------------------------ | --- |
-| Agent        | [Agent build guide](https://github.com/opea-project/GenAIComps/blob/main/comps/agent/src/README.md#21-build-docker-image-for-agent-microservice) |     |
+| ------------ | ------------------------------------------------------------------------------------------------------------------------------------------------ |
+| Agent        | [Agent build guide](https://github.com/opea-project/GenAIComps/blob/main/comps/agent/src/README.md#21-build-docker-image-for-agent-microservice) |
 | UI           | [Basic UI build guide](../../../../README_miscellaneous.md#build-ui-docker-image)                                                                |
 
 ### Ingest Data into the Vector Database
diff --git a/AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -10,6 +10,9 @@ services:
     ports:
       - "9095:9095"
     ipc: host
+    depends_on:
+      vllm-service:
+        condition: service_healthy
     environment:
       ip_address: ${ip_address}
       strategy: rag_agent_llama
@@ -41,6 +44,9 @@ services:
     ports:
       - "9096:9096"
     ipc: host
+    depends_on:
+      vllm-service:
+        condition: service_healthy
     environment:
       ip_address: ${ip_address}
       strategy: sql_agent_llama
@@ -66,8 +72,12 @@ services:
     image: ${REGISTRY:-opea}/agent:${TAG:-latest}
     container_name: react-agent-endpoint
     depends_on:
-      - worker-rag-agent
-      - worker-sql-agent
+      worker-rag-agent:
+        condition: service_started
+      worker-sql-agent:
+        condition: service_started
+      vllm-service:
+        condition: service_healthy
     volumes:
       - ${TOOLSET_PATH:-../../../../tools}:/home/user/tools/
     ports:
@@ -106,11 +116,17 @@ services:
   agent-ui:
     image: ${REGISTRY:-opea}/agent-ui:${TAG:-latest}
     container_name: agent-ui
-    environment:
-      host_ip: ${host_ip}
     ports:
       - "5173:8080"
     ipc: host
+    environment:
+      host_ip: ${host_ip}
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      OPENAI_API_KEYS: "empty"
+      OPENAI_API_BASE_URLS: ${SUPERVISOR_AGENT_ENDPOINT}
+      ENABLE_OLLAMA_API: False
   vllm-service:
     image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest}
     container_name: vllm-gaudi-server
diff --git a/AgentQnA/docker_compose/intel/hpu/gaudi/set_env.sh b/AgentQnA/docker_compose/intel/hpu/gaudi/set_env.sh
@@ -36,6 +36,7 @@ export WORKER_AGENT_URL="http://${ip_address}:9095/v1/chat/completions"
 export SQL_AGENT_URL="http://${ip_address}:9096/v1/chat/completions"
 export RETRIEVAL_TOOL_URL="http://${ip_address}:8889/v1/retrievaltool"
 export CRAG_SERVER=http://${ip_address}:8080
+export SUPERVISOR_AGENT_ENDPOINT="http://${ip_address}:9090/v1"
 
 export db_name=Chinook
 export db_path="sqlite:////home/user/chinook-db/Chinook_Sqlite.sqlite"