update tgi-gaudi version (#389)

chensuyue · Spycsh · web-flow · commit 3505bd25a4f3 · 2024-07-12T15:01:03.000+08:00
Signed-off-by: chensuyue &lt;suyue.chen@intel.com&gt;
Co-authored-by: Spycsh &lt;sihan.chen@intel.com&gt;
diff --git a/AudioQnA/docker/gaudi/docker_compose.yaml b/AudioQnA/docker/gaudi/docker_compose.yaml
@@ -54,7 +54,7 @@ services:
     environment:
       TTS_ENDPOINT: ${TTS_ENDPOINT}
   tgi-service:
-    image: ghcr.io/huggingface/tgi-gaudi:1.2.1
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.1
     container_name: tgi-gaudi-server
     ports:
       - "3006:80"
@@ -73,7 +73,7 @@ services:
     cap_add:
       - SYS_NICE
     ipc: host
-    command: --model-id ${LLM_MODEL_ID}
+    command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048
   llm:
     image: opea/llm-tgi:latest
     container_name: llm-tgi-gaudi-server
diff --git a/AudioQnA/tests/test_audioqna_on_gaudi.sh b/AudioQnA/tests/test_audioqna_on_gaudi.sh
@@ -20,7 +20,7 @@ function build_docker_images() {
     docker build -t opea/speecht5:latest  -f comps/tts/speecht5/Dockerfile_hpu .
     docker build -t opea/tts:latest  -f comps/tts/Dockerfile .
 
-    docker pull ghcr.io/huggingface/tgi-gaudi:1.2.1
+    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
 
     cd ..
 
@@ -76,9 +76,17 @@ function start_services() {
 
 function validate_megaservice() {
     result=$(http_proxy="" curl http://${ip_address}:3008/v1/audioqna -XPOST -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}' -H 'Content-Type: application/json')
+    echo "result is === $result"
     if [[ $result == *"AAA"* ]]; then
         echo "Result correct."
     else
+        docker logs whisper-service > $LOG_PATH/whisper-service.log
+        docker logs asr-service > $LOG_PATH/asr-service.log
+        docker logs speecht5-service > $LOG_PATH/tts-service.log
+        docker logs tts-service > $LOG_PATH/tts-service.log
+        docker logs tgi-gaudi-server > $LOG_PATH/tgi-gaudi-server.log
+        docker logs llm-tgi-gaudi-server > $LOG_PATH/llm-tgi-gaudi-server.log
+
         echo "Result wrong."
         exit 1
     fi
diff --git a/AudioQnA/tests/test_audioqna_on_xeon.sh b/AudioQnA/tests/test_audioqna_on_xeon.sh
@@ -19,7 +19,7 @@ function build_docker_images() {
     docker build -t opea/speecht5:latest -f comps/tts/speecht5/Dockerfile .
     docker build -t opea/tts:latest -f comps/tts/Dockerfile .
 
-    docker pull ghcr.io/huggingface/tgi-gaudi:1.2.1
+    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
 
     cd $WORKPATH/docker
     docker build --no-cache -t opea/audioqna:latest -f Dockerfile .
diff --git a/ChatQnA/chatqna.yaml b/ChatQnA/chatqna.yaml
@@ -48,7 +48,7 @@ opea_micro_services:
   tgi_service:
     host: ${TGI_SERVICE_IP}
     ports: ${TGI_SERVICE_PORT}
-    image: ghcr.io/huggingface/tgi-gaudi:1.2.1
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.1
     volumes:
       - "./data:/data"
     runtime: habana
diff --git a/ChatQnA/docker/gaudi/docker_compose.yaml b/ChatQnA/docker/gaudi/docker_compose.yaml
@@ -43,6 +43,7 @@ services:
       https_proxy: ${https_proxy}
       HABANA_VISIBLE_DEVICES: all
       OMPI_MCA_btl_vader_single_copy_mechanism: none
+      MAX_WARMUP_SEQUENCE_LENGTH: 512
     command: --model-id ${EMBEDDING_MODEL_ID}
   embedding:
     image: opea/embedding-tei:latest
@@ -116,7 +117,7 @@ services:
       LANGCHAIN_PROJECT: "opea-reranking-service"
     restart: unless-stopped
   tgi-service:
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.0
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.1
     container_name: tgi-gaudi-server
     ports:
       - "8008:80"
diff --git a/ChatQnA/tests/test_chatqna_on_gaudi.sh b/ChatQnA/tests/test_chatqna_on_gaudi.sh
@@ -20,12 +20,12 @@ function build_docker_images() {
     docker build -t opea/llm-tgi:latest -f comps/llms/text-generation/tgi/Dockerfile .
     docker build -t opea/dataprep-redis:latest -f comps/dataprep/redis/langchain/docker/Dockerfile .
 
-    cd ..
-    git clone https://github.com/huggingface/tei-gaudi
-    cd tei-gaudi/
-    docker build --no-cache -f Dockerfile-hpu -t opea/tei-gaudi:latest .
+#    cd ..
+#    git clone https://github.com/huggingface/tei-gaudi
+#    cd tei-gaudi/
+#    docker build --no-cache -f Dockerfile-hpu -t opea/tei-gaudi:latest .
 
-    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.0
+    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
     docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
 
     cd $WORKPATH/docker
@@ -38,8 +38,13 @@ function build_docker_images() {
 }
 
 function start_services() {
-    cd $WORKPATH/docker/gaudi
+    # build tei-gaudi for each test instead of pull from local registry
+    cd $WORKPATH
+    git clone https://github.com/huggingface/tei-gaudi
+    cd tei-gaudi/
+    docker build --no-cache -f Dockerfile-hpu -t opea/tei-gaudi:latest .
 
+    cd $WORKPATH/docker/gaudi
     export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
     export RERANK_MODEL_ID="BAAI/bge-reranker-base"
     export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
@@ -66,6 +71,9 @@ function start_services() {
         sed -i "s#image: opea/chatqna-ui:latest#image: opea/chatqna-ui:${IMAGE_TAG}#g" docker_compose.yaml
         sed -i "s#image: opea/chatqna-conversation-ui:latest#image: opea/chatqna-conversation-ui:${IMAGE_TAG}#g" docker_compose.yaml
         sed -i "s#image: opea/*#image: ${IMAGE_REPO}opea/#g" docker_compose.yaml
+        sed -i "s#image: ${IMAGE_REPO}opea/tei-gaudi:latest#image: opea/tei-gaudi:latest#g" docker_compose.yaml
+        echo "cat docker_compose.yaml"
+        cat docker_compose.yaml
     fi
 
     # Start Docker Containers
diff --git a/CodeGen/codegen.yaml b/CodeGen/codegen.yaml
@@ -6,7 +6,7 @@ opea_micro_services:
   tgi_service:
     host: ${TGI_SERVICE_IP}
     ports: ${TGI_SERVICE_PORT}
-    image: ghcr.io/huggingface/tgi-gaudi:1.2.1
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.1
     volumes:
       - "./data:/data"
     runtime: habana
diff --git a/CodeGen/docker/gaudi/docker_compose.yaml b/CodeGen/docker/gaudi/docker_compose.yaml
@@ -6,7 +6,7 @@ version: "3.8"
 
 services:
   tgi_service:
-    image: ghcr.io/huggingface/tgi-gaudi:1.2.1
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.1
     container_name: tgi-gaudi-server
     ports:
       - "8028:80"
@@ -23,7 +23,7 @@ services:
     cap_add:
       - SYS_NICE
     ipc: host
-    command: --model-id ${LLM_MODEL_ID}
+    command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048
   llm:
     image: opea/llm-tgi:latest
     container_name: llm-tgi-gaudi-server
diff --git a/CodeGen/tests/test_codegen_on_gaudi.sh b/CodeGen/tests/test_codegen_on_gaudi.sh
@@ -15,7 +15,7 @@ function build_docker_images() {
 
     docker build -t opea/llm-tgi:latest -f comps/llms/text-generation/tgi/Dockerfile .
 
-    docker pull ghcr.io/huggingface/tgi-gaudi:1.2.1
+    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
 
     cd $WORKPATH/docker
     docker build --no-cache -t opea/codegen:latest -f Dockerfile .
diff --git a/CodeTrans/codetrans.yaml b/CodeTrans/codetrans.yaml
@@ -6,7 +6,7 @@ opea_micro_services:
   tgi_service:
     host: ${TGI_SERVICE_IP}
     ports: ${TGI_SERVICE_PORT}
-    image: ghcr.io/huggingface/tgi-gaudi:1.2.1
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.1
     volumes:
       - "./data:/data"
     runtime: habana
diff --git a/CodeTrans/docker/gaudi/docker_compose.yaml b/CodeTrans/docker/gaudi/docker_compose.yaml
@@ -6,7 +6,7 @@ version: "3.8"
 
 services:
   tgi_service:
-    image: ghcr.io/huggingface/tgi-gaudi:1.2.1
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.1
     container_name: codetrans-tgi-service
     ports:
       - "8008:80"
@@ -23,7 +23,7 @@ services:
     cap_add:
       - SYS_NICE
     ipc: host
-    command: --model-id ${LLM_MODEL_ID}
+    command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048
   llm:
     image: opea/llm-tgi:latest
     container_name: llm-tgi-gaudi-server
diff --git a/CodeTrans/tests/test_codetrans_on_gaudi.sh b/CodeTrans/tests/test_codetrans_on_gaudi.sh
@@ -15,7 +15,7 @@ function build_docker_images() {
 
     docker build -t opea/llm-tgi:latest -f comps/llms/text-generation/tgi/Dockerfile .
 
-    docker pull ghcr.io/huggingface/tgi-gaudi:1.2.1
+    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
 
     cd $WORKPATH/docker
     docker build --no-cache -t opea/codetrans:latest -f Dockerfile .
diff --git a/DocSum/docker/gaudi/README.md b/DocSum/docker/gaudi/README.md
@@ -16,7 +16,7 @@ cd GenAIComps
 As TGI Gaudi has been officially published as a Docker image, we simply need to pull it:
 
 ```bash
-docker pull ghcr.io/huggingface/tgi-gaudi:1.2.1
+docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
 ```
 
 ### 2. Build LLM Image
@@ -46,7 +46,7 @@ docker build -t opea/docsum-ui:latest --build-arg https_proxy=$https_proxy --bui
 
 Then run the command `docker images`, you will have the following Docker Images:
 
-1. `ghcr.io/huggingface/tgi-gaudi:1.2.1`
+1. `ghcr.io/huggingface/tgi-gaudi:2.0.1`
 2. `opea/llm-docsum-tgi:latest`
 3. `opea/docsum:latest`
 4. `opea/docsum-ui:latest`
diff --git a/DocSum/docker/gaudi/docker_compose.yaml b/DocSum/docker/gaudi/docker_compose.yaml
@@ -6,7 +6,7 @@ version: "3.8"
 
 services:
   tgi_service:
-    image: ghcr.io/huggingface/tgi-gaudi:1.2.1
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.1
     container_name: tgi-gaudi-server
     ports:
       - "8008:80"
@@ -22,7 +22,7 @@ services:
     cap_add:
       - SYS_NICE
     ipc: host
-    command: --model-id ${LLM_MODEL_ID}
+    command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048
   llm:
     image: opea/llm-docsum-tgi:latest
     container_name: llm-docsum-gaudi-server
diff --git a/DocSum/docsum.yaml b/DocSum/docsum.yaml
@@ -6,7 +6,7 @@ opea_micro_services:
   tgi_service:
     host: ${TGI_SERVICE_IP}
     ports: ${TGI_SERVICE_PORT}
-    image: ghcr.io/huggingface/tgi-gaudi:1.2.1
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.1
     volumes:
       - "./data:/data"
     runtime: habana
diff --git a/DocSum/tests/test_docsum_on_gaudi.sh b/DocSum/tests/test_docsum_on_gaudi.sh
@@ -15,7 +15,7 @@ function build_docker_images() {
 
     docker build --no-cache -t opea/llm-docsum-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/summarization/tgi/Dockerfile .
 
-    docker pull ghcr.io/huggingface/tgi-gaudi:1.2.1
+    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
 
     cd $WORKPATH/docker
     docker build --no-cache -t opea/docsum:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
diff --git a/SearchQnA/docker/gaudi/docker_compose.yaml b/SearchQnA/docker/gaudi/docker_compose.yaml
@@ -22,6 +22,7 @@ services:
       https_proxy: ${https_proxy}
       HABANA_VISIBLE_DEVICES: all
       OMPI_MCA_btl_vader_single_copy_mechanism: none
+      MAX_WARMUP_SEQUENCE_LENGTH: 512
     command: --model-id ${EMBEDDING_MODEL_ID}
   embedding:
     image: opea/embedding-tei:latest
@@ -86,7 +87,7 @@ services:
       LANGCHAIN_PROJECT: "opea-reranking-service"
     restart: unless-stopped
   tgi-service:
-    image: ghcr.io/huggingface/tgi-gaudi:1.2.1
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.1
     container_name: tgi-gaudi-server
     ports:
       - "3006:80"
@@ -105,7 +106,7 @@ services:
     cap_add:
       - SYS_NICE
     ipc: host
-    command: --model-id ${LLM_MODEL_ID}
+    command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048
   llm:
     image: opea/llm-tgi:latest
     container_name: llm-tgi-gaudi-server
diff --git a/SearchQnA/tests/test_searchqna_on_gaudi.sh b/SearchQnA/tests/test_searchqna_on_gaudi.sh
@@ -2,6 +2,8 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+# for test
+
 set -e
 
 WORKPATH=$(dirname "$PWD")
@@ -13,20 +15,20 @@ function build_docker_images() {
     git clone https://github.com/opea-project/GenAIComps.git
     cd GenAIComps
 
-    docker build -t opea/embedding-tei:latest  -f comps/embeddings/langchain/docker/Dockerfile .
-    docker build -t opea/web-retriever-chroma:latest  -f comps/web_retrievers/langchain/chroma/docker/Dockerfile .
-    docker build -t opea/reranking-tei:latest  -f comps/reranks/tei/docker/Dockerfile .
-    docker build -t opea/llm-tgi:latest  -f comps/llms/text-generation/tgi/Dockerfile .
+    docker build --no-cache -t opea/embedding-tei:latest  -f comps/embeddings/langchain/docker/Dockerfile .
+    docker build --no-cache -t opea/web-retriever-chroma:latest  -f comps/web_retrievers/langchain/chroma/docker/Dockerfile .
+    docker build --no-cache -t opea/reranking-tei:latest  -f comps/reranks/tei/docker/Dockerfile .
+    docker build --no-cache -t opea/llm-tgi:latest  -f comps/llms/text-generation/tgi/Dockerfile .
 
     cd ..
     git clone https://github.com/huggingface/tei-gaudi
     cd tei-gaudi/
     docker build --no-cache -f Dockerfile-hpu -t opea/tei-gaudi:latest .
 
     docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
-    docker pull ghcr.io/huggingface/tgi-gaudi:1.2.1
+    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
     cd $WORKPATH/docker
-    docker build -t opea/searchqna:latest -f Dockerfile .
+    docker build --no-cache -t opea/searchqna:latest -f Dockerfile .
 
     # cd $WORKPATH/docker/ui
     # docker build --no-cache -t opea/searchqna-ui:latest -f docker/Dockerfile .
@@ -87,8 +89,10 @@ function validate_megaservice() {
     if [[ $result == *"news"* ]]; then
         echo "Result correct."
     else
-        docker logs web-retriever-chroma-server
-        docker logs searchqna-gaudi-backend-server
+        docker logs web-retriever-chroma-server > ${LOG_PATH}/web-retriever-chroma-server.log
+        docker logs searchqna-gaudi-backend-server > ${LOG_PATH}/searchqna-gaudi-backend-server.log
+        docker logs tei-embedding-gaudi-server > ${LOG_PATH}/tei-embedding-gaudi-server.log
+        docker logs embedding-tei-server > ${LOG_PATH}/embedding-tei-server.log
         echo "Result wrong."
         exit 1
     fi
diff --git a/Translation/docker/gaudi/README.md b/Translation/docker/gaudi/README.md
@@ -38,7 +38,7 @@ docker build -t opea/translation-ui:latest --build-arg https_proxy=$https_proxy
 
 Then run the command `docker images`, you will have the following four Docker Images:
 
-1. `ghcr.io/huggingface/tgi-gaudi:1.2.1`
+1. `ghcr.io/huggingface/tgi-gaudi:2.0.1`
 2. `opea/gen-ai-comps:llm-tgi-gaudi-server`
 3. `opea/gen-ai-comps:translation-megaservice-server`
 4. `opea/gen-ai-comps:translation-ui-server`
diff --git a/Translation/docker/gaudi/docker_compose.yaml b/Translation/docker/gaudi/docker_compose.yaml
@@ -16,7 +16,7 @@ version: "3.8"
 
 services:
   tgi_service:
-    image: ghcr.io/huggingface/tgi-gaudi:1.2.1
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.1
     container_name: tgi_gaudi_service
     ports:
       - "8008:80"
@@ -27,7 +27,7 @@ services:
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
     volumes:
       - "./data:/data"
-    command: --model-id ${LLM_MODEL_ID}
+    command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048
   llm:
     image: opea/llm-tgi:latest
     container_name: llm-tgi-gaudi-server
diff --git a/Translation/tests/test_translation_on_gaudi.sh b/Translation/tests/test_translation_on_gaudi.sh
@@ -17,7 +17,7 @@ function build_docker_images() {
     cd $WORKPATH/docker
     docker build --no-cache -t opea/translation:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
 
-    docker pull ghcr.io/huggingface/tgi-gaudi:1.2.1
+    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
 
     cd $WORKPATH/docker/ui
     docker build --no-cache -t opea/translation-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f docker/Dockerfile .
diff --git a/Translation/translation.yaml b/Translation/translation.yaml
@@ -6,7 +6,7 @@ opea_micro_services:
   tgi_service:
     host: ${TGI_SERVICE_IP}
     ports: ${TGI_SERVICE_PORT}
-    image: ghcr.io/huggingface/tgi-gaudi:1.2.1
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.1
     volumes:
       - "./data:/data"
     runtime: habana