Skip to content

Commit df27858

Browse files
authored
Merge branch 'master' into benchmarks
2 parents 989be8e + 74e0513 commit df27858

File tree

326 files changed

+19861
-10090
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

326 files changed

+19861
-10090
lines changed

.devops/cann.Dockerfile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
# Define the CANN base image for easier version updates later
66
ARG CHIP_TYPE=910b
7-
ARG CANN_BASE_IMAGE=quay.io/ascend/cann:8.3.rc1.alpha001-${CHIP_TYPE}-openeuler22.03-py3.11
7+
ARG CANN_BASE_IMAGE=quay.io/ascend/cann:8.3.rc2-${CHIP_TYPE}-openeuler24.03-py3.11
88

99
# ==============================================================================
1010
# BUILD STAGE
@@ -107,11 +107,11 @@ ENTRYPOINT ["/app/tools.sh"]
107107
# ENTRYPOINT ["/app/llama-server"]
108108

109109
### Target: light
110-
# Lightweight image containing only llama-cli
110+
# Lightweight image containing only llama-cli and llama-completion
111111
# ==============================================================================
112112
FROM base AS light
113113

114-
COPY --from=build /app/full/llama-cli /app
114+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
115115

116116
ENTRYPOINT [ "/app/llama-cli" ]
117117

.devops/cpu.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ ENTRYPOINT ["/app/tools.sh"]
6868
### Light, CLI only
6969
FROM base AS light
7070

71-
COPY --from=build /app/full/llama-cli /app
71+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
7272

7373
WORKDIR /app
7474

.devops/cuda.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ ENTRYPOINT ["/app/tools.sh"]
7474
### Light, CLI only
7575
FROM base AS light
7676

77-
COPY --from=build /app/full/llama-cli /app
77+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
7878

7979
WORKDIR /app
8080

.devops/intel.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ ENTRYPOINT ["/app/tools.sh"]
7373
FROM base AS light
7474

7575
COPY --from=build /app/lib/ /app
76-
COPY --from=build /app/full/llama-cli /app
76+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
7777

7878
WORKDIR /app
7979

.devops/llama-cli-cann.Dockerfile

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,12 @@ ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH
2323
RUN echo "Building with static libs" && \
2424
source /usr/local/Ascend/ascend-toolkit/set_env.sh --force && \
2525
cmake -B build -DGGML_NATIVE=OFF -DGGML_CANN=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_TESTS=OFF && \
26-
cmake --build build --config Release --target llama-cli
26+
cmake --build build --config Release --target llama-cli && \
27+
cmake --build build --config Release --target llama-completion
2728

2829
# TODO: use image with NNRT
2930
FROM ascendai/cann:$ASCEND_VERSION AS runtime
30-
COPY --from=build /app/build/bin/llama-cli /llama-cli
31+
COPY --from=build /app/build/bin/llama-cli /app/build/bin/llama-completion /
3132

3233
ENV LC_ALL=C.utf8
3334

.devops/llama-cpp-cuda.srpm.spec

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ make -j GGML_CUDA=1
3737
%install
3838
mkdir -p %{buildroot}%{_bindir}/
3939
cp -p llama-cli %{buildroot}%{_bindir}/llama-cuda-cli
40+
cp -p llama-completion %{buildroot}%{_bindir}/llama-cuda-completion
4041
cp -p llama-server %{buildroot}%{_bindir}/llama-cuda-server
4142
cp -p llama-simple %{buildroot}%{_bindir}/llama-cuda-simple
4243

@@ -68,6 +69,7 @@ rm -rf %{_builddir}/*
6869

6970
%files
7071
%{_bindir}/llama-cuda-cli
72+
%{_bindir}/llama-cuda-completion
7173
%{_bindir}/llama-cuda-server
7274
%{_bindir}/llama-cuda-simple
7375
/usr/lib/systemd/system/llamacuda.service

.devops/llama-cpp.srpm.spec

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ make -j
3939
%install
4040
mkdir -p %{buildroot}%{_bindir}/
4141
cp -p llama-cli %{buildroot}%{_bindir}/llama-cli
42+
cp -p llama-completion %{buildroot}%{_bindir}/llama-completion
4243
cp -p llama-server %{buildroot}%{_bindir}/llama-server
4344
cp -p llama-simple %{buildroot}%{_bindir}/llama-simple
4445

@@ -70,6 +71,7 @@ rm -rf %{_builddir}/*
7071

7172
%files
7273
%{_bindir}/llama-cli
74+
%{_bindir}/llama-completion
7375
%{_bindir}/llama-server
7476
%{_bindir}/llama-simple
7577
/usr/lib/systemd/system/llama.service

.devops/musa.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ ENTRYPOINT ["/app/tools.sh"]
8181
### Light, CLI only
8282
FROM base AS light
8383

84-
COPY --from=build /app/full/llama-cli /app
84+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
8585

8686
WORKDIR /app
8787

.devops/rocm.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ ENTRYPOINT ["/app/tools.sh"]
9494
### Light, CLI only
9595
FROM base AS light
9696

97-
COPY --from=build /app/full/llama-cli /app
97+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
9898

9999
WORKDIR /app
100100

.devops/s390x.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ WORKDIR /llama.cpp/bin
105105

106106
# Copy llama.cpp binaries and libraries
107107
COPY --from=collector /llama.cpp/bin/*.so /llama.cpp/bin
108-
COPY --from=collector /llama.cpp/bin/llama-cli /llama.cpp/bin
108+
COPY --from=collector /llama.cpp/bin/llama-cli /llama.cpp/bin/llama-completion /llama.cpp/bin
109109

110110
ENTRYPOINT [ "/llama.cpp/bin/llama-cli" ]
111111

0 commit comments

Comments
 (0)