Skip to content

Commit 3b88895

Browse files
alvarobarttfgbelidjivarunmoris
authored
Add huggingface/pytorch/tei/docker/1.7.0 (#141)
* Add `huggingface/pytorch/tei/docker/1.6.1` * Update `releases.json` * Apply suggestions from code review Co-Authored-by: Florent Gbelidji <[email protected]> * Add `HF_HUB_USER_AGENT_ORIGIN` Introduced in huggingface/text-embeddings-inference#534 * Fix spacing in `Dockerfile` for CPU * Add `start-cuda-compat.sh` and copy in `Dockerfile` * Update `README.md` * Add `start-cuda-compat.sh` * Rename 1.6.1 to 1.7.0 (just released) * Update `Dockerfile` for CPU * Update `Dockerfile` for GPU * Run `ruff` * Add `subprocess` to update submodules * Add `libgssapi-krb5-2` to patch CVE Apparently only required for GPU builds * Fix path to `start-cuda-compat.sh` in `Dockerfile` --------- Co-authored-by: Florent Gbelidji <[email protected]> Co-authored-by: varunmoris <[email protected]>
1 parent ced5651 commit 3b88895

File tree

6 files changed

+350
-41
lines changed

6 files changed

+350
-41
lines changed

README.md

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,15 @@
22

33
Welcome to the LLM Hosting Container GitHub repository!
44

5-
This repository contains Dockerfile and associated resources for building and
6-
hosting containers for large language models.
5+
This repository contains the Dockerfiles and associated resources for building and
6+
hosting containers for large language models and embedding models.
77

8-
* HuggingFace Text Generation Inference (TGI) container
8+
* Hugging Face Text Generation Inference (TGI) container
9+
* Hugging Face Text Embeddings Inference (TEI) container
910

1011
## Security
1112

12-
See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information.
13+
See [CONTRIBUTING](CONTRIBUTING.md) for more information.
1314

1415
## License
1516

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
FROM lukemathwalker/cargo-chef:latest-rust-1.85-bookworm AS chef
2+
WORKDIR /usr/src
3+
4+
ENV SCCACHE=0.5.4
5+
ENV RUSTC_WRAPPER=/usr/local/bin/sccache
6+
7+
# Donwload, configure sccache
8+
RUN curl -fsSL https://github.com/mozilla/sccache/releases/download/v$SCCACHE/sccache-v$SCCACHE-x86_64-unknown-linux-musl.tar.gz | tar -xzv --strip-components=1 -C /usr/local/bin sccache-v$SCCACHE-x86_64-unknown-linux-musl/sccache && \
9+
chmod +x /usr/local/bin/sccache
10+
11+
FROM chef AS planner
12+
13+
COPY candle-extensions candle-extensions
14+
COPY backends backends
15+
COPY core core
16+
COPY router router
17+
COPY Cargo.toml ./
18+
COPY Cargo.lock ./
19+
20+
RUN cargo chef prepare --recipe-path recipe.json
21+
22+
FROM chef AS builder
23+
24+
ARG GIT_SHA
25+
ARG DOCKER_LABEL
26+
27+
# sccache specific variables
28+
ARG SCCACHE_GHA_ENABLED
29+
30+
RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
31+
| gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \
32+
echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | \
33+
tee /etc/apt/sources.list.d/oneAPI.list
34+
35+
RUN apt-get update && apt-get upgrade -y && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
36+
intel-oneapi-mkl-devel=2024.0.0-49656 \
37+
build-essential \
38+
&& rm -rf /var/lib/apt/lists/*
39+
40+
RUN echo "int mkl_serv_intel_cpu_true() {return 1;}" > fakeintel.c && \
41+
gcc -shared -fPIC -o libfakeintel.so fakeintel.c
42+
43+
COPY --from=planner /usr/src/recipe.json recipe.json
44+
45+
RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \
46+
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
47+
cargo chef cook --release --features ort,candle,mkl --no-default-features --recipe-path recipe.json && sccache -s
48+
49+
COPY backends backends
50+
COPY core core
51+
COPY router router
52+
COPY Cargo.toml ./
53+
COPY Cargo.lock ./
54+
55+
FROM builder AS http-builder
56+
57+
RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \
58+
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
59+
cargo build --release --bin text-embeddings-router --features ort,candle,mkl,http --no-default-features && sccache -s
60+
61+
FROM builder AS grpc-builder
62+
63+
RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
64+
curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
65+
unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
66+
unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
67+
rm -f $PROTOC_ZIP
68+
69+
COPY proto proto
70+
71+
RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \
72+
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
73+
cargo build --release --bin text-embeddings-router --features ort,candle,mkl,grpc --no-default-features && sccache -s
74+
75+
FROM debian:bookworm-slim AS base
76+
77+
ENV HUGGINGFACE_HUB_CACHE=/data \
78+
PORT=80 \
79+
HF_HUB_USER_AGENT_ORIGIN=aws:sagemaker:cpu:inference:tei \
80+
MKL_ENABLE_INSTRUCTIONS=AVX512_E4 \
81+
RAYON_NUM_THREADS=8 \
82+
LD_PRELOAD=/usr/local/libfakeintel.so \
83+
LD_LIBRARY_PATH=/usr/local/lib
84+
85+
RUN apt-get update && apt-get upgrade -y && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
86+
libomp-dev \
87+
ca-certificates \
88+
libssl-dev \
89+
curl \
90+
&& rm -rf /var/lib/apt/lists/*
91+
92+
# Copy a lot of the Intel shared objects because of the mkl_serv_intel_cpu_true patch...
93+
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_intel_lp64.so.2 /usr/local/lib/libmkl_intel_lp64.so.2
94+
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_intel_thread.so.2 /usr/local/lib/libmkl_intel_thread.so.2
95+
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_core.so.2 /usr/local/lib/libmkl_core.so.2
96+
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_vml_def.so.2 /usr/local/lib/libmkl_vml_def.so.2
97+
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_def.so.2 /usr/local/lib/libmkl_def.so.2
98+
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_vml_avx2.so.2 /usr/local/lib/libmkl_vml_avx2.so.2
99+
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_vml_avx512.so.2 /usr/local/lib/libmkl_vml_avx512.so.2
100+
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_avx2.so.2 /usr/local/lib/libmkl_avx2.so.2
101+
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_avx512.so.2 /usr/local/lib/libmkl_avx512.so.2
102+
COPY --from=builder /usr/src/libfakeintel.so /usr/local/libfakeintel.so
103+
104+
FROM base AS grpc
105+
106+
COPY --from=grpc-builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router
107+
108+
ENTRYPOINT ["text-embeddings-router"]
109+
CMD ["--json-output"]
110+
111+
FROM base AS http
112+
113+
COPY --from=http-builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router
114+
115+
# Amazon SageMaker compatible image
116+
FROM http AS sagemaker
117+
118+
COPY --chmod=775 sagemaker-entrypoint.sh entrypoint.sh
119+
120+
ENTRYPOINT ["./entrypoint.sh"]
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
FROM nvidia/cuda:12.2.0-devel-ubuntu22.04 AS base-builder
2+
3+
ENV SCCACHE=0.5.4
4+
ENV RUSTC_WRAPPER=/usr/local/bin/sccache
5+
ENV PATH="/root/.cargo/bin:${PATH}"
6+
# aligned with `cargo-chef` version in `lukemathwalker/cargo-chef:latest-rust-1.85-bookworm`
7+
ENV CARGO_CHEF=0.1.71
8+
9+
RUN apt-get update && apt-get upgrade -y && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
10+
curl \
11+
libssl-dev \
12+
pkg-config \
13+
libgssapi-krb5-2 \
14+
&& rm -rf /var/lib/apt/lists/*
15+
16+
# Donwload and configure sccache
17+
RUN curl -fsSL https://github.com/mozilla/sccache/releases/download/v$SCCACHE/sccache-v$SCCACHE-x86_64-unknown-linux-musl.tar.gz | tar -xzv --strip-components=1 -C /usr/local/bin sccache-v$SCCACHE-x86_64-unknown-linux-musl/sccache && \
18+
chmod +x /usr/local/bin/sccache
19+
20+
RUN curl https://sh.rustup.rs -sSf | bash -s -- -y
21+
RUN cargo install cargo-chef --version $CARGO_CHEF --locked
22+
23+
FROM base-builder AS planner
24+
25+
WORKDIR /usr/src
26+
27+
COPY candle-extensions candle-extensions
28+
COPY backends backends
29+
COPY core core
30+
COPY router router
31+
COPY Cargo.toml ./
32+
COPY Cargo.lock ./
33+
34+
RUN cargo chef prepare --recipe-path recipe.json
35+
36+
FROM base-builder AS builder
37+
38+
ARG GIT_SHA
39+
ARG DOCKER_LABEL
40+
41+
# sccache specific variables
42+
ARG SCCACHE_GHA_ENABLED
43+
44+
# Limit parallelism
45+
ARG RAYON_NUM_THREADS=4
46+
ARG CARGO_BUILD_JOBS
47+
ARG CARGO_BUILD_INCREMENTAL
48+
49+
WORKDIR /usr/src
50+
51+
COPY --from=planner /usr/src/recipe.json recipe.json
52+
53+
RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \
54+
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
55+
cargo chef cook --release --recipe-path recipe.json && sccache -s;
56+
57+
RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \
58+
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
59+
CUDA_COMPUTE_CAP=75 cargo chef cook --release --features candle-cuda-turing --recipe-path recipe.json && sccache -s;
60+
61+
RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \
62+
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
63+
CUDA_COMPUTE_CAP=80 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s;
64+
65+
RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \
66+
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
67+
CUDA_COMPUTE_CAP=90 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s;
68+
69+
COPY candle-extensions candle-extensions
70+
COPY backends backends
71+
COPY core core
72+
COPY router router
73+
COPY Cargo.toml ./
74+
COPY Cargo.lock ./
75+
76+
RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \
77+
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
78+
CUDA_COMPUTE_CAP=75 cargo build --release --bin text-embeddings-router -F candle-cuda-turing && sccache -s;
79+
80+
RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-75
81+
82+
RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \
83+
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
84+
CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s;
85+
86+
RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-80
87+
88+
RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \
89+
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
90+
CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s;
91+
92+
RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-90
93+
94+
FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04 AS base
95+
96+
ARG DEFAULT_USE_FLASH_ATTENTION=True
97+
98+
ENV HUGGINGFACE_HUB_CACHE=/data \
99+
PORT=80 \
100+
USE_FLASH_ATTENTION=$DEFAULT_USE_FLASH_ATTENTION \
101+
HF_HUB_USER_AGENT_ORIGIN=aws:sagemaker:gpu-cuda:inference:tei
102+
103+
RUN apt-get update && apt-get upgrade -y && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
104+
ca-certificates \
105+
libssl-dev \
106+
curl \
107+
&& rm -rf /var/lib/apt/lists/*
108+
109+
COPY --from=builder /usr/src/target/release/text-embeddings-router-75 /usr/local/bin/text-embeddings-router-75
110+
COPY --from=builder /usr/src/target/release/text-embeddings-router-80 /usr/local/bin/text-embeddings-router-80
111+
COPY --from=builder /usr/src/target/release/text-embeddings-router-90 /usr/local/bin/text-embeddings-router-90
112+
113+
# Amazon SageMaker compatible image
114+
FROM base AS sagemaker
115+
116+
COPY --chmod=775 sagemaker-entrypoint-cuda-all.sh entrypoint.sh
117+
COPY --chmod=775 /huggingface/pytorch/tei/docker/1.7.0/gpu/start-cuda-compat.sh start-cuda-compat.sh
118+
119+
ENTRYPOINT ["./entrypoint.sh"]
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#!/bin/bash
2+
3+
verlt() {
4+
[ "$1" = "$2" ] && return 1 || [ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ]
5+
}
6+
7+
if [ -f /usr/local/cuda/compat/libcuda.so.1 ]; then
8+
CUDA_COMPAT_MAX_DRIVER_VERSION=$(readlink /usr/local/cuda/compat/libcuda.so.1 | cut -d'.' -f 3-)
9+
echo "CUDA compat package should be installed for NVIDIA driver smaller than ${CUDA_COMPAT_MAX_DRIVER_VERSION}"
10+
NVIDIA_DRIVER_VERSION=$(sed -n 's/^NVRM.*Kernel Module *\([0-9.]*\).*$/\1/p' /proc/driver/nvidia/version 2>/dev/null || true)
11+
echo "Current installed NVIDIA driver version is ${NVIDIA_DRIVER_VERSION}"
12+
if verlt $NVIDIA_DRIVER_VERSION $CUDA_COMPAT_MAX_DRIVER_VERSION; then
13+
echo "Adding CUDA compat to LD_LIBRARY_PATH"
14+
export LD_LIBRARY_PATH=/usr/local/cuda/compat:$LD_LIBRARY_PATH
15+
echo $LD_LIBRARY_PATH
16+
else
17+
echo "Skipping CUDA compat setup as newer NVIDIA driver is installed"
18+
fi
19+
else
20+
echo "Skipping CUDA compat setup as package not found"
21+
fi

0 commit comments

Comments
 (0)