File tree Expand file tree Collapse file tree 5 files changed +8
-8
lines changed
text-generation-inference
server/text_generation_server Expand file tree Collapse file tree 5 files changed +8
-8
lines changed Original file line number Diff line number Diff line change 10
10
11
11
12
12
def get_export_kwargs_from_env ():
13
- batch_size = os .environ .get ("HF_BATCH_SIZE " , None )
13
+ batch_size = os .environ .get ("MAX_BATCH_SIZE " , None )
14
14
if batch_size is not None :
15
15
batch_size = int (batch_size )
16
16
sequence_length = os .environ .get ("HF_SEQUENCE_LENGTH" , None )
Original file line number Diff line number Diff line change @@ -65,7 +65,7 @@ docker run -p 8080:80 \
65
65
--net=host --privileged \
66
66
-v $(pwd)/data:/data \
67
67
-e HF_TOKEN=${HF_TOKEN} \
68
- -e HF_BATCH_SIZE=1 \
68
+ -e MAX_BATCH_SIZE=4 \
69
69
-e HF_SEQUENCE_LENGTH=1024 \
70
70
ghcr.io/huggingface/tpu-tgi:latest \
71
71
--model-id mistralai/Mistral-7B-v0.1 \
Original file line number Diff line number Diff line change 5
5
ulimit -l 68719476736
6
6
7
7
# Hugging Face Hub related
8
- if [[ -z " ${BATCH_SIZE } " ]]; then
9
- BATCH_SIZE=2
8
+ if [[ -z " ${MAX_BATCH_SIZE } " ]]; then
9
+ MAX_BATCH_SIZE=4
10
10
fi
11
- export BATCH_SIZE =" ${BATCH_SIZE } "
11
+ export MAX_BATCH_SIZE =" ${MAX_BATCH_SIZE } "
12
12
13
13
if [[ -z " ${JSON_OUTPUT_DISABLE} " ]]; then
14
14
JSON_OUTPUT_DISABLE=--json-output
@@ -33,6 +33,6 @@ export QUANTIZATION="${QUANTIZATION}"
33
33
34
34
35
35
exec text-generation-launcher --port 8080 \
36
- --max-batch-size ${BATCH_SIZE } \
36
+ --max-batch-size ${MAX_BATCH_SIZE } \
37
37
${JSON_OUTPUT_DISABLE} \
38
38
--model-id ${MODEL_ID}
Original file line number Diff line number Diff line change @@ -109,7 +109,7 @@ def docker_launcher(
109
109
if HUGGING_FACE_HUB_TOKEN is not None :
110
110
env ["HUGGING_FACE_HUB_TOKEN" ] = HUGGING_FACE_HUB_TOKEN
111
111
112
- for var in ["HF_BATCH_SIZE " , "HF_SEQUENCE_LENGTH" ]:
112
+ for var in ["MAX_BATCH_SIZE " , "HF_SEQUENCE_LENGTH" ]:
113
113
if var in os .environ :
114
114
env [var ] = os .environ [var ]
115
115
Original file line number Diff line number Diff line change @@ -75,7 +75,7 @@ def serve(
75
75
from .server import serve
76
76
77
77
# Read environment variables forwarded by the launcher
78
- max_batch_size = int (os .environ .get ("MAX_BATCH_SIZE" , "1 " ))
78
+ max_batch_size = int (os .environ .get ("MAX_BATCH_SIZE" , "4 " ))
79
79
max_total_tokens = int (os .environ .get ("MAX_TOTAL_TOKENS" , "64" ))
80
80
81
81
# Start the server
You can’t perform that action at this time.
0 commit comments