@@ -11,26 +11,26 @@ if [[ -n "${HF_MODEL_REVISION}" ]]; then
11
11
fi
12
12
13
13
if ! command -v nvidia-smi & > /dev/null; then
14
- # It does not matter which router we exec, the model will be served on CPU
15
- text-embeddings-router-75 --port 8080 --json-output
14
+ echo " Error: 'nvidia-smi' command not found."
15
+ exit 1
16
+ fi
17
+
18
+ if [[ -z " ${CUDA_COMPUTE_CAP} " ]]
19
+ then
20
+ compute_cap=$( nvidia-smi --query-gpu=compute_cap --format=csv | sed -n ' 2p' | sed ' s/\.//g' )
16
21
else
17
- if [[ -z " ${CUDA_COMPUTE_CAP} " ]]
18
- then
19
- compute_cap=$( nvidia-smi --query-gpu=compute_cap --format=csv | sed -n ' 2p' | sed ' s/\.//g' )
20
- else
21
- compute_cap=$CUDA_COMPUTE_CAP
22
- fi
22
+ compute_cap=$CUDA_COMPUTE_CAP
23
+ fi
23
24
24
- if [[ ${compute_cap} -eq 75 ]]
25
- then
26
- text-embeddings-router-75 --port 8080 --json-output
27
- elif [[ ${compute_cap} -ge 80 && ${compute_cap} -lt 90 ]]
28
- then
29
- text-embeddings-router-80 --port 8080 --json-output
30
- elif [[ ${compute_cap} -eq 90 ]]
31
- then
32
- text-embeddings-router-90 --port 8080 --json-output
33
- else
34
- echo " cuda compute cap ${compute_cap} is not supported" ; exit 1
35
- fi
25
+ if [[ ${compute_cap} -eq 75 ]]
26
+ then
27
+ text-embeddings-router-75 --port 8080 --json-output
28
+ elif [[ ${compute_cap} -ge 80 && ${compute_cap} -lt 90 ]]
29
+ then
30
+ text-embeddings-router-80 --port 8080 --json-output
31
+ elif [[ ${compute_cap} -eq 90 ]]
32
+ then
33
+ text-embeddings-router-90 --port 8080 --json-output
34
+ else
35
+ echo " cuda compute cap ${compute_cap} is not supported" ; exit 1
36
36
fi
0 commit comments