Skip to content

Commit ffa990d

Browse files
🧹 Cleanup of the batch size environment variables (#121)
* refactor HF_BATCH_SIZE and BATCH_SIZE into MAX_BATCH_SIZE * change default batch size to 4
1 parent 8c2c199 commit ffa990d

File tree

5 files changed

+8
-8
lines changed

5 files changed

+8
-8
lines changed

‎optimum/tpu/model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111

1212
def get_export_kwargs_from_env():
13-
batch_size = os.environ.get("HF_BATCH_SIZE", None)
13+
batch_size = os.environ.get("MAX_BATCH_SIZE", None)
1414
if batch_size is not None:
1515
batch_size = int(batch_size)
1616
sequence_length = os.environ.get("HF_SEQUENCE_LENGTH", None)

‎text-generation-inference/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ docker run -p 8080:80 \
6565
--net=host --privileged \
6666
-v $(pwd)/data:/data \
6767
-e HF_TOKEN=${HF_TOKEN} \
68-
-e HF_BATCH_SIZE=1 \
68+
-e MAX_BATCH_SIZE=4 \
6969
-e HF_SEQUENCE_LENGTH=1024 \
7070
ghcr.io/huggingface/tpu-tgi:latest \
7171
--model-id mistralai/Mistral-7B-v0.1 \

‎text-generation-inference/docker/entrypoint.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@
55
ulimit -l 68719476736
66

77
# Hugging Face Hub related
8-
if [[ -z "${BATCH_SIZE}" ]]; then
9-
BATCH_SIZE=2
8+
if [[ -z "${MAX_BATCH_SIZE}" ]]; then
9+
MAX_BATCH_SIZE=4
1010
fi
11-
export BATCH_SIZE="${BATCH_SIZE}"
11+
export MAX_BATCH_SIZE="${MAX_BATCH_SIZE}"
1212

1313
if [[ -z "${JSON_OUTPUT_DISABLE}" ]]; then
1414
JSON_OUTPUT_DISABLE=--json-output
@@ -33,6 +33,6 @@ export QUANTIZATION="${QUANTIZATION}"
3333

3434

3535
exec text-generation-launcher --port 8080 \
36-
--max-batch-size ${BATCH_SIZE} \
36+
--max-batch-size ${MAX_BATCH_SIZE} \
3737
${JSON_OUTPUT_DISABLE} \
3838
--model-id ${MODEL_ID}

‎text-generation-inference/integration-tests/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ def docker_launcher(
109109
if HUGGING_FACE_HUB_TOKEN is not None:
110110
env["HUGGING_FACE_HUB_TOKEN"] = HUGGING_FACE_HUB_TOKEN
111111

112-
for var in ["HF_BATCH_SIZE", "HF_SEQUENCE_LENGTH"]:
112+
for var in ["MAX_BATCH_SIZE", "HF_SEQUENCE_LENGTH"]:
113113
if var in os.environ:
114114
env[var] = os.environ[var]
115115

‎text-generation-inference/server/text_generation_server/cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def serve(
7575
from .server import serve
7676

7777
# Read environment variables forwarded by the launcher
78-
max_batch_size = int(os.environ.get("MAX_BATCH_SIZE", "1"))
78+
max_batch_size = int(os.environ.get("MAX_BATCH_SIZE", "4"))
7979
max_total_tokens = int(os.environ.get("MAX_TOTAL_TOKENS", "64"))
8080

8181
# Start the server

0 commit comments

Comments
 (0)