upgrade ghcr.io/huggingface/tei-gaudi:1.5.0 into ghcr.io/huggingface/text-embeddings-inference:hpu-1.7 (#2119)

chensuyue · web-flow · commit 9b71d653fd50 · 2025-07-07T16:26:29.000+08:00
Signed-off-by: chensuyue &lt;suyue.chen@intel.com&gt;
diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/README.md b/ChatQnA/docker_compose/intel/hpu/gaudi/README.md
@@ -92,7 +92,7 @@ f090fe262c74   opea/chatqna-ui:latest
 ec97d7651c96   opea/chatqna:latest                                                                             "python chatqna.py"      2 minutes ago   Up 2 minutes                0.0.0.0:8888->8888/tcp, :::8888->8888/tcp                                              chatqna-gaudi-backend-server
 a61fb7dc4fae   opea/dataprep:latest                                                                            "sh -c 'python $( [ …"   2 minutes ago   Up 2 minutes                0.0.0.0:6007->5000/tcp, [::]:6007->5000/tcp                                            dataprep-redis-server
 d560c232b120   opea/retriever:latest                                                                           "python opea_retriev…"   2 minutes ago   Up 2 minutes                0.0.0.0:7000->7000/tcp, :::7000->7000/tcp                                              retriever-redis-server
-a1d7ca2d3787   ghcr.io/huggingface/tei-gaudi:1.5.0                                                             "text-embeddings-rou…"   2 minutes ago   Up 2 minutes                0.0.0.0:8808->80/tcp, [::]:8808->80/tcp                                                tei-reranking-gaudi-server
+a1d7ca2d3787   ghcr.io/huggingface/text-embeddings-inference:hpu-1.7                                           "text-embeddings-rou…"   2 minutes ago   Up 2 minutes                0.0.0.0:8808->80/tcp, [::]:8808->80/tcp                                                tei-reranking-gaudi-server
 9a9f3fd4fd4c   opea/vllm-gaudi:latest                                                                          "python3 -m vllm.ent…"   2 minutes ago   Exited (1) 2 minutes ago                                                                                           vllm-gaudi-server
 1ab9bbdf5182   redis/redis-stack:7.2.0-v9                                                                      "/entrypoint.sh"         2 minutes ago   Up 2 minutes                0.0.0.0:6379->6379/tcp, :::6379->6379/tcp, 0.0.0.0:8001->8001/tcp, :::8001->8001/tcp   redis-vector-db
 9ee0789d819e   ghcr.io/huggingface/text-embeddings-inference:cpu-1.7                                           "text-embeddings-rou…"   2 minutes ago   Up 2 minutes                0.0.0.0:8090->80/tcp, [::]:8090->80/tcp                                                tei-embedding-gaudi-server
@@ -150,7 +150,7 @@ The default deployment utilizes Gaudi devices primarily for the `vllm-service`,
 | dataprep-redis-service       | opea/dataprep:latest                                  | No           |
 | tei-embedding-service        | ghcr.io/huggingface/text-embeddings-inference:cpu-1.7 | No           |
 | retriever                    | opea/retriever:latest                                 | No           |
-| tei-reranking-service        | ghcr.io/huggingface/tei-gaudi:1.5.0                   | 1 card       |
+| tei-reranking-service        | ghcr.io/huggingface/text-embeddings-inference:hpu-1.7 | 1 card       |
 | vllm-service                 | opea/vllm-gaudi:latest                                | Configurable |
 | chatqna-gaudi-backend-server | opea/chatqna:latest                                   | No           |
 | chatqna-gaudi-ui-server      | opea/chatqna-ui:latest                                | No           |
@@ -166,7 +166,7 @@ The TGI (Text Generation Inference) deployment and the default deployment differ
 | dataprep-redis-service       | opea/dataprep:latest                                  | No             |
 | tei-embedding-service        | ghcr.io/huggingface/text-embeddings-inference:cpu-1.7 | No             |
 | retriever                    | opea/retriever:latest                                 | No             |
-| tei-reranking-service        | ghcr.io/huggingface/tei-gaudi:1.5.0                   | 1 card         |
+| tei-reranking-service        | ghcr.io/huggingface/text-embeddings-inference:hpu-1.7 | 1 card         |
 | **tgi-service**              | ghcr.io/huggingface/tgi-gaudi:2.3.1                   | Configurable   |
 | chatqna-gaudi-backend-server | opea/chatqna:latest                                   | No             |
 | chatqna-gaudi-ui-server      | opea/chatqna-ui:latest                                | No             |
@@ -186,7 +186,7 @@ The TGI (Text Generation Inference) deployment and the default deployment differ
 | dataprep-redis-service       | opea/dataprep:latest                                  | No           |
 | tei-embedding-service        | ghcr.io/huggingface/text-embeddings-inference:cpu-1.7 | No           |
 | retriever                    | opea/retriever:latest                                 | No           |
-| tei-reranking-service        | ghcr.io/huggingface/tei-gaudi:1.5.0                   | 1 card       |
+| tei-reranking-service        | ghcr.io/huggingface/text-embeddings-inference:hpu-1.7 | 1 card       |
 | vllm-service                 | opea/vllm-gaudi:latest                                | Configurable |
 | **llm-faqgen**               | **opea/llm-faqgen:latest**                            | No           |
 | chatqna-gaudi-backend-server | opea/chatqna:latest                                   | No           |
@@ -224,7 +224,7 @@ The _compose_guardrails.yaml_ Docker Compose file introduces enhancements over t
 | _guardrails_                 | opea/guardrails:latest                                | No             | No       |
 | tei-embedding-service        | ghcr.io/huggingface/text-embeddings-inference:cpu-1.7 | No             | No       |
 | retriever                    | opea/retriever:latest                                 | No             | No       |
-| tei-reranking-service        | ghcr.io/huggingface/tei-gaudi:1.5.0                   | 1 card         | No       |
+| tei-reranking-service        | ghcr.io/huggingface/text-embeddings-inference:hpu-1.7 | 1 card         | No       |
 | vllm-service                 | opea/vllm-gaudi:latest                                | Configurable   | Yes      |
 | chatqna-gaudi-backend-server | opea/chatqna:latest                                   | No             | No       |
 | chatqna-gaudi-ui-server      | opea/chatqna-ui:latest                                | No             | No       |
@@ -260,7 +260,7 @@ The table provides a comprehensive overview of the ChatQnA services utilized acr
 | dataprep-redis-service       | opea/dataprep:latest                                  | No       | Prepares data and interacts with the Redis database.                                               |
 | tei-embedding-service        | ghcr.io/huggingface/text-embeddings-inference:cpu-1.7 | No       | Provides text embedding services, often using Hugging Face models.                                 |
 | retriever                    | opea/retriever:latest                                 | No       | Retrieves data from the Redis database and interacts with embedding services.                      |
-| tei-reranking-service        | ghcr.io/huggingface/tei-gaudi:1.5.0                   | Yes      | Reranks text embeddings, typically using Gaudi hardware for enhanced performance.                  |
+| tei-reranking-service        | ghcr.io/huggingface/text-embeddings-inference:hpu-1.7 | Yes      | Reranks text embeddings, typically using Gaudi hardware for enhanced performance.                  |
 | vllm-service                 | opea/vllm-gaudi:latest                                | No       | Handles large language model (LLM) tasks, utilizing Gaudi hardware.                                |
 | tgi-service                  | ghcr.io/huggingface/tgi-gaudi:2.3.1                   | Yes      | Specific to the TGI deployment, focuses on text generation inference using Gaudi hardware.         |
 | tgi-guardrails-service       | ghcr.io/huggingface/tgi-gaudi:2.3.1                   | Yes      | Provides guardrails functionality, ensuring safe operations within defined limits.                 |
diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -70,7 +70,7 @@ services:
       HF_TOKEN: ${HF_TOKEN}
     restart: unless-stopped
   tei-reranking-service:
-    image: ghcr.io/huggingface/tei-gaudi:1.5.0
+    image: ghcr.io/huggingface/text-embeddings-inference:hpu-1.7
     container_name: tei-reranking-gaudi-server
     ports:
       - "8808:80"
diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_faqgen.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_faqgen.yaml
@@ -64,7 +64,7 @@ services:
       HF_TOKEN: ${HF_TOKEN}
     restart: unless-stopped
   tei-reranking-service:
-    image: ghcr.io/huggingface/tei-gaudi:1.5.0
+    image: ghcr.io/huggingface/text-embeddings-inference:hpu-1.7
     container_name: tei-reranking-gaudi-server
     ports:
       - "8808:80"
diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_faqgen_tgi.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_faqgen_tgi.yaml
@@ -64,7 +64,7 @@ services:
       HF_TOKEN: ${HF_TOKEN}
     restart: unless-stopped
   tei-reranking-service:
-    image: ghcr.io/huggingface/tei-gaudi:1.5.0
+    image: ghcr.io/huggingface/text-embeddings-inference:hpu-1.7
     container_name: tei-reranking-gaudi-server
     ports:
       - "8808:80"
diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml
@@ -109,7 +109,7 @@ services:
       RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
     restart: unless-stopped
   tei-reranking-service:
-    image: ghcr.io/huggingface/tei-gaudi:1.5.0
+    image: ghcr.io/huggingface/text-embeddings-inference:hpu-1.7
     container_name: tei-reranking-gaudi-server
     ports:
       - "8808:80"
diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml
@@ -65,7 +65,7 @@ services:
       RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
     restart: unless-stopped
   tei-reranking-service:
-    image: ghcr.io/huggingface/tei-gaudi:1.5.0
+    image: ghcr.io/huggingface/text-embeddings-inference:hpu-1.7
     container_name: tei-reranking-gaudi-server
     ports:
       - "8808:80"
diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/how_to_validate_service.md b/ChatQnA/docker_compose/intel/hpu/gaudi/how_to_validate_service.md
@@ -48,7 +48,7 @@ f810f3b4d329   opea/embedding:latest                               "python embed
 2fa17d84605f   opea/dataprep:latest                              "python prepare_doc_…"   2 minutes ago   Up 2 minutes                    0.0.0.0:6007->6007/tcp, :::6007->5000/tcp                                              dataprep-redis-server
 69e1fb59e92c   opea/retriever:latest                             "/home/user/comps/re…"   2 minutes ago   Up 2 minutes                    0.0.0.0:7000->7000/tcp, :::7000->7000/tcp                                              retriever-redis-server
 313b9d14928a   opea/reranking-tei:latest                               "python reranking_te…"   2 minutes ago   Up 2 minutes                    0.0.0.0:8000->8000/tcp, :::8000->8000/tcp                                              reranking-tei-gaudi-server
-174bd43fa6b5   ghcr.io/huggingface/tei-gaudi:1.5.0                    "text-embeddings-rou…"   2 minutes ago   Up 2 minutes                    0.0.0.0:8090->80/tcp, :::8090->80/tcp                                                  tei-embedding-gaudi-server
+174bd43fa6b5   ghcr.io/huggingface/text-embeddings-inference:hpu-1.7   "text-embeddings-rou…"   2 minutes ago   Up 2 minutes                    0.0.0.0:8090->80/tcp, :::8090->80/tcp                                                  tei-embedding-gaudi-server
 05c40b636239   ghcr.io/huggingface/tgi-gaudi:2.3.1                     "text-generation-lau…"   2 minutes ago   Exited (1) About a minute ago                                                                                          tgi-gaudi-server
 74084469aa33   redis/redis-stack:7.2.0-v9                              "/entrypoint.sh"         2 minutes ago   Up 2 minutes                    0.0.0.0:6379->6379/tcp, :::6379->6379/tcp, 0.0.0.0:8001->8001/tcp, :::8001->8001/tcp   redis-vector-db
 88399dbc9e43   ghcr.io/huggingface/text-embeddings-inference:cpu-1.7   "text-embeddings-rou…"   2 minutes ago   Up 2 minutes                    0.0.0.0:8808->80/tcp, :::8808->80/tcp                                                  tei-reranking-gaudi-server
diff --git a/ChatQnA/kubernetes/gmc/README.md b/ChatQnA/kubernetes/gmc/README.md
@@ -24,7 +24,7 @@ The ChatQnA uses the below prebuilt images if you choose a Xeon deployment
 Should you desire to use the Gaudi accelerator, two alternate images are used for the embedding and llm services.
 For Gaudi:
 
-tei-embedding-service: ghcr.io/huggingface/tei-gaudi:1.5.0
+tei-embedding-service: ghcr.io/huggingface/text-embeddings-inference:hpu-1.7
 tgi-service: gghcr.io/huggingface/tgi-gaudi:2.3.1
 
 
diff --git a/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose.yaml b/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -36,7 +36,7 @@ services:
       retries: 10
     restart: unless-stopped
   tei-embedding-service:
-    image: ghcr.io/huggingface/tei-gaudi:1.5.0
+    image: ghcr.io/huggingface/text-embeddings-inference:hpu-1.7
     entrypoint: /bin/sh -c "apt-get update && apt-get install -y curl && text-embeddings-router --json-output --model-id ${EMBEDDING_MODEL_ID} --auto-truncate"
     container_name: tei-embedding-gaudi-server
     ports:
diff --git a/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose_milvus.yaml b/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose_milvus.yaml
@@ -95,7 +95,7 @@ services:
         condition: service_healthy
 
   tei-embedding-service:
-    image: ghcr.io/huggingface/tei-gaudi:1.5.0
+    image: ghcr.io/huggingface/text-embeddings-inference:hpu-1.7
     entrypoint: /bin/sh -c "apt-get update && apt-get install -y curl && text-embeddings-router --json-output --model-id ${EMBEDDING_MODEL_ID} --auto-truncate"
     container_name: tei-embedding-gaudi-server
     ports:
diff --git a/SearchQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/SearchQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -3,7 +3,7 @@
 
 services:
   tei-embedding-service:
-    image: ghcr.io/huggingface/tei-gaudi:1.5.0
+    image: ghcr.io/huggingface/text-embeddings-inference:hpu-1.7
     entrypoint: /bin/sh -c "apt-get update && apt-get install -y curl && text-embeddings-router --json-output --model-id ${EMBEDDING_MODEL_ID} --auto-truncate"
     container_name: tei-embedding-gaudi-server
     ports: