revert

OlivierDehaene · OlivierDehaene · commit 4e5cc1aa584d · 2024-04-23T14:03:07.000+02:00
diff --git a/cuda-all-entrypoint.sh b/cuda-all-entrypoint.sh
@@ -1,21 +1,21 @@
 #!/bin/bash
 
 if ! command -v nvidia-smi &> /dev/null; then
-    # It does not matter which router we exec, the model will be served on CPU
+    echo "Error: 'nvidia-smi' command not found."
+    exit 1
+fi
+
+compute_cap=$(nvidia-smi --query-gpu=compute_cap --format=csv | sed -n '2p' | sed 's/\.//g')
+
+if [ ${compute_cap} -eq 75 ]
+then
     exec text-embeddings-router-75 "$@"
+elif [ ${compute_cap} -ge 80 -a ${compute_cap} -lt 90 ]
+then
+    exec text-embeddings-router-80 "$@"
+elif [ ${compute_cap} -eq 90 ]
+then
+    exec text-embeddings-router-90 "$@"
 else
-    compute_cap=$(nvidia-smi --query-gpu=compute_cap --format=csv | sed -n '2p' | sed 's/\.//g')
-
-    if [ ${compute_cap} -eq 75 ]
-    then
-        exec text-embeddings-router-75 "$@"
-    elif [ ${compute_cap} -ge 80 -a ${compute_cap} -lt 90 ]
-    then
-        exec text-embeddings-router-80 "$@"
-    elif [ ${compute_cap} -eq 90 ]
-    then
-        exec text-embeddings-router-90 "$@"
-    else
-        echo "cuda compute cap ${compute_cap} is not supported"; exit 1
-    fi
+    echo "cuda compute cap ${compute_cap} is not supported"; exit 1
 fi
diff --git a/sagemaker-entrypoint-cuda-all.sh b/sagemaker-entrypoint-cuda-all.sh
@@ -11,26 +11,26 @@ if [[ -n "${HF_MODEL_REVISION}" ]]; then
 fi
 
 if ! command -v nvidia-smi &> /dev/null; then
-    # It does not matter which router we exec, the model will be served on CPU
-    text-embeddings-router-75 --port 8080 --json-output
+    echo "Error: 'nvidia-smi' command not found."
+    exit 1
+fi
+
+if [[ -z "${CUDA_COMPUTE_CAP}" ]]
+then
+    compute_cap=$(nvidia-smi --query-gpu=compute_cap --format=csv | sed -n '2p' | sed 's/\.//g')
 else
-    if [[ -z "${CUDA_COMPUTE_CAP}" ]]
-    then
-        compute_cap=$(nvidia-smi --query-gpu=compute_cap --format=csv | sed -n '2p' | sed 's/\.//g')
-    else
-        compute_cap=$CUDA_COMPUTE_CAP
-    fi
+    compute_cap=$CUDA_COMPUTE_CAP
+fi
 
-    if [[ ${compute_cap} -eq 75 ]]
-    then
-        text-embeddings-router-75 --port 8080 --json-output
-    elif [[ ${compute_cap} -ge 80 && ${compute_cap} -lt 90 ]]
-    then
-        text-embeddings-router-80 --port 8080 --json-output
-    elif [[ ${compute_cap} -eq 90 ]]
-    then
-        text-embeddings-router-90 --port 8080 --json-output
-    else
-        echo "cuda compute cap ${compute_cap} is not supported"; exit 1
-    fi
+if [[ ${compute_cap} -eq 75 ]]
+then
+    text-embeddings-router-75 --port 8080 --json-output
+elif [[ ${compute_cap} -ge 80 && ${compute_cap} -lt 90 ]]
+then
+    text-embeddings-router-80 --port 8080 --json-output
+elif [[ ${compute_cap} -eq 90 ]]
+then
+    text-embeddings-router-90 --port 8080 --json-output
+else
+    echo "cuda compute cap ${compute_cap} is not supported"; exit 1
 fi