Skip to content

Commit 943be86

Browse files
authored
Fixing the static-linking. (#547)
1 parent f4b1aa3 commit 943be86

File tree

13 files changed

+76
-48
lines changed

13 files changed

+76
-48
lines changed

Cargo.lock

Lines changed: 8 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,18 +47,22 @@ thiserror = "1.0"
4747
rand = "0.9"
4848
serial_test = "2.0.0"
4949
cudarc = { version = "0.13" , features =["cuda-12020"]}
50+
intel-mkl-src = { version = "0.8" }
5051
candle = { version = "0.8", package = "candle-core" }
5152
candle-nn = { version = "0.8", package = "candle-nn" }
5253
candle-transformers = { version = "0.8", package = "candle-transformers" }
5354
candle-flash-attn = { version = "0.8", package = "candle-flash-attn" }
5455
half = { version = "2.3.1", features = ["num-traits"] }
5556

5657
[patch.crates-io]
57-
cudarc = { git = "https://github.com/Narsil/cudarc" , rev = "1956436aeddea1da04fc3226282bc07c07eeaa35"}
58-
candle = { git = "https://github.com/Narsil/candle", rev = "2e273ddf31b1b796d3cfcd181ccb98deaa48466e", package = "candle-core" }
59-
candle-nn = { git = "https://github.com/Narsil/candle", rev = "2e273ddf31b1b796d3cfcd181ccb98deaa48466e", package = "candle-nn" }
60-
candle-transformers = { git = "https://github.com/Narsil/candle", rev = "2e273ddf31b1b796d3cfcd181ccb98deaa48466e", package = "candle-transformers" }
61-
candle-flash-attn = { git = "https://github.com/Narsil/candle", rev = "2e273ddf31b1b796d3cfcd181ccb98deaa48466e", package = "candle-flash-attn" }
58+
cudarc = { git = "https://github.com/Narsil/cudarc" , rev = "18ae111a4e8779c11377636b9cc3379f686e99c6"}
59+
candle = { git = "https://github.com/huggingface/candle", rev = "ec6d7ca7738f4052b6613edc8f4d2bb6866a7539", package = "candle-core" }
60+
candle-nn = { git = "https://github.com/huggingface/candle", rev = "ec6d7ca7738f4052b6613edc8f4d2bb6866a7539", package = "candle-nn" }
61+
candle-transformers = { git = "https://github.com/huggingface/candle", rev = "ec6d7ca7738f4052b6613edc8f4d2bb6866a7539", package = "candle-transformers" }
62+
candle-flash-attn = { git = "https://github.com/huggingface/candle", rev = "ec6d7ca7738f4052b6613edc8f4d2bb6866a7539", package = "candle-flash-attn" }
63+
# candle = { path = "../candle/candle-core", package = "candle-core" }
64+
# candle-nn = { path = "../candle/candle-nn" }
65+
# candle-flash-attn = { path = "../candle/candle-flash-attn" }
6266

6367
[profile.release]
6468
debug = 0

Dockerfile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ COPY --from=planner /usr/src/recipe.json recipe.json
4444

4545
RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \
4646
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
47-
cargo chef cook --release --features ort --features candle --features mkl-dynamic --no-default-features --recipe-path recipe.json && sccache -s
47+
cargo chef cook --release --features ort,candle,mkl --no-default-features --recipe-path recipe.json && sccache -s
4848

4949
COPY backends backends
5050
COPY core core
@@ -56,7 +56,7 @@ FROM builder AS http-builder
5656

5757
RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \
5858
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
59-
cargo build --release --bin text-embeddings-router -F ort -F candle -F mkl-dynamic -F http --no-default-features && sccache -s
59+
cargo build --release --bin text-embeddings-router --features ort,candle,mkl,http --no-default-features && sccache -s
6060

6161
FROM builder AS grpc-builder
6262

@@ -70,7 +70,7 @@ COPY proto proto
7070

7171
RUN --mount=type=secret,id=actions_cache_url,env=ACTIONS_CACHE_URL \
7272
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
73-
cargo build --release --bin text-embeddings-router -F grpc -F ort -F candle -F mkl-dynamic --no-default-features && sccache -s
73+
cargo build --release --bin text-embeddings-router --features ort,candle,mkl,grpc --no-default-features && sccache -s
7474

7575
FROM debian:bookworm-slim AS base
7676

backends/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ candle = ["dep:text-embeddings-backend-candle"]
2525
cuda = ["text-embeddings-backend-candle?/cuda"]
2626
metal = ["text-embeddings-backend-candle?/metal"]
2727
mkl = ["text-embeddings-backend-candle?/mkl"]
28-
mkl-dynamic = ["text-embeddings-backend-candle?/mkl-dynamic"]
2928
accelerate = ["text-embeddings-backend-candle?/accelerate"]
3029
flash-attn = ["text-embeddings-backend-candle?/flash-attn"]
3130
flash-attn-v1 = ["text-embeddings-backend-candle?/flash-attn-v1"]

backends/candle/Cargo.toml

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ homepage.workspace = true
88
[dependencies]
99
anyhow = { workspace = true }
1010
accelerate-src = { version = "0.3.2", optional = true }
11-
intel-mkl-src = { version = "0.8.1", optional = true }
11+
intel-mkl-src = { workspace = true, optional = true }
1212
candle = { workspace = true }
1313
candle-nn = { workspace = true }
1414
candle-transformers = { workspace = true }
@@ -40,8 +40,7 @@ anyhow = { version = "1", features = ["backtrace"] }
4040
[features]
4141
accelerate = ["dep:accelerate-src", "candle/accelerate", "candle-nn/accelerate"]
4242
metal = ["candle/metal", "candle-nn/metal"]
43-
mkl = ["dep:intel-mkl-src", "intel-mkl-src/mkl-static-lp64-iomp", "candle/mkl"]
44-
mkl-dynamic = ["dep:intel-mkl-src", "intel-mkl-src/mkl-dynamic-lp64-iomp", "candle/mkl"]
45-
cuda = ["candle/cuda", "candle-nn/cuda", "dep:candle-cublaslt", "dep:candle-layer-norm", "dep:candle-rotary"]
43+
mkl = ["dep:intel-mkl-src", "candle/_mkl"]
44+
cuda = ["candle/_cuda", "candle-nn/_cuda", "dep:candle-cublaslt", "dep:candle-layer-norm", "dep:candle-rotary"]
4645
flash-attn-v1 = ["dep:candle-flash-attn-v1", "cuda"]
4746
flash-attn = ["dep:candle-flash-attn", "cuda"]

backends/candle/src/models/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#[cfg(any(feature = "mkl", feature = "mkl-dynamic"))]
1+
#[cfg(feature = "mkl")]
22
extern crate intel_mkl_src;
33

44
#[cfg(feature = "accelerate")]

backends/src/dtype.rs

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -38,19 +38,13 @@ impl fmt::Display for DType {
3838
#[allow(clippy::derivable_impls)]
3939
impl Default for DType {
4040
fn default() -> Self {
41-
#[cfg(any(
42-
feature = "accelerate",
43-
feature = "mkl",
44-
feature = "mkl-dynamic",
45-
feature = "ort"
46-
))]
41+
#[cfg(any(feature = "accelerate", feature = "mkl", feature = "ort"))]
4742
{
4843
DType::Float32
4944
}
5045
#[cfg(not(any(
5146
feature = "accelerate",
5247
feature = "mkl",
53-
feature = "mkl-dynamic",
5448
feature = "ort",
5549
feature = "python"
5650
)))]

candle-extensions/candle-cublaslt/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,6 @@ edition = "2021"
66
description = "CUBLASLt gemm for the candle ML framework."
77

88
[dependencies]
9-
candle = { workspace=true, features = ["cuda"]}
9+
candle = { workspace=true, features = ["_cuda"]}
1010
cudarc = { workspace = true, features = [ "cublaslt", "f16" ]}
1111
half = { workspace = true}

candle-extensions/candle-flash-attn-v1/build.rs

Lines changed: 41 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,39 @@
33
// variable in order to cache the compiled artifacts and avoid recompiling too often.
44
use anyhow::{Context, Result};
55
use rayon::prelude::*;
6+
use std::fs;
67
use std::path::PathBuf;
78
use std::str::FromStr;
89

9-
const KERNEL_FILES: [&str; 4] = [
10-
"flash_api.cu",
11-
"fmha_fwd_hdim32.cu",
12-
"fmha_fwd_hdim64.cu",
13-
"fmha_fwd_hdim128.cu",
14-
];
10+
// const KERNEL_FILES: [&str; 4] = [
11+
// "flash_api.cu",
12+
// "fmha_fwd_hdim32.cu",
13+
// "fmha_fwd_hdim64.cu",
14+
// "fmha_fwd_hdim128.cu",
15+
// ];
16+
17+
/// Recursively reads the filenames in a directory and stores them in a Vec.
18+
fn _read_dir_recursively(dir_path: &PathBuf, paths: &mut Vec<PathBuf>) -> std::io::Result<()> {
19+
for entry in fs::read_dir(dir_path)? {
20+
let entry = entry?;
21+
let path = entry.path();
22+
23+
if path.is_dir() {
24+
_read_dir_recursively(&path, paths)?;
25+
} else {
26+
paths.push(path);
27+
}
28+
}
29+
30+
Ok(())
31+
}
32+
33+
/// Recursively reads the filenames in a directory and stores them in a Vec.
34+
fn read_dir_recursively(dir_path: &PathBuf) -> std::io::Result<Vec<PathBuf>> {
35+
let mut paths = Vec::new();
36+
_read_dir_recursively(dir_path, &mut paths)?;
37+
Ok(paths)
38+
}
1539

1640
fn main() -> Result<()> {
1741
let num_cpus = std::env::var("RAYON_NUM_THREADS").map_or_else(
@@ -25,12 +49,11 @@ fn main() -> Result<()> {
2549
.unwrap();
2650

2751
println!("cargo:rerun-if-changed=build.rs");
28-
for kernel_file in KERNEL_FILES.iter() {
29-
println!("cargo:rerun-if-changed=kernels/{kernel_file}");
52+
53+
let paths = read_dir_recursively(&PathBuf::from_str("kernels")?)?;
54+
for file in paths.iter() {
55+
println!("cargo:rerun-if-changed={}", file.display());
3056
}
31-
println!("cargo:rerun-if-changed=kernels/**.h");
32-
println!("cargo:rerun-if-changed=kernels/**.cuh");
33-
println!("cargo:rerun-if-changed=kernels/fmha/**.h");
3457
let out_dir = PathBuf::from(std::env::var("OUT_DIR").context("OUT_DIR not set")?);
3558
let build_dir = match std::env::var("CANDLE_FLASH_ATTN_BUILD_DIR") {
3659
Err(_) =>
@@ -57,12 +80,17 @@ fn main() -> Result<()> {
5780
let out_file = build_dir.join("libflashattentionv1.a");
5881

5982
let kernel_dir = PathBuf::from("kernels");
60-
let cu_files: Vec<_> = KERNEL_FILES
83+
let kernels: Vec<_> = paths
84+
.iter()
85+
.filter(|f| f.extension().map(|ext| ext == "cu").unwrap_or_default())
86+
.collect();
87+
let cu_files: Vec<_> = kernels
6188
.iter()
6289
.map(|f| {
6390
let mut obj_file = out_dir.join(f);
91+
fs::create_dir_all(obj_file.parent().unwrap()).unwrap();
6492
obj_file.set_extension("o");
65-
(kernel_dir.join(f), obj_file)
93+
(f, obj_file)
6694
})
6795
.collect();
6896
let out_modified: Result<_, _> = out_file.metadata().and_then(|m| m.modified());

candle-extensions/candle-layer-norm/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ edition = "2021"
66
description = "Layer Norm layer for the candle ML framework."
77

88
[dependencies]
9-
candle = { workspace = true, features = ["cuda"] }
9+
candle = { workspace = true, features = ["_cuda"] }
1010
half = { workspace = true }
1111

1212
[build-dependencies]

candle-extensions/candle-layer-norm/build.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ fn main() -> Result<()> {
2323
for kernel_file in KERNEL_FILES.iter() {
2424
println!("cargo:rerun-if-changed=kernels/{kernel_file}");
2525
}
26-
println!("cargo:rerun-if-changed=kernels/**.cu");
2726
println!("cargo:rerun-if-changed=kernels/ln_fwd_kernels.cuh");
27+
println!("cargo:rerun-if-changed=kernels/ln.h");
2828
println!("cargo:rerun-if-changed=kernels/ln_kernel_traits.h");
2929
println!("cargo:rerun-if-changed=kernels/ln_utils.cuh");
3030
println!("cargo:rerun-if-changed=kernels/static_switch.h");
@@ -176,6 +176,8 @@ fn set_cuda_include_dir() -> Result<()> {
176176
.chain(roots)
177177
.find(|path| path.join("include").join("cuda.h").is_file())
178178
.context("cannot find include/cuda.h")?;
179+
println!("cargo:rustc-link-search={}", root.join("lib").display());
180+
println!("cargo:rustc-link-search={}", root.join("lib64").display());
179181
println!(
180182
"cargo:rustc-env=CUDA_INCLUDE_DIR={}",
181183
root.join("include").display()

candle-extensions/candle-rotary/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ license = "MIT OR Apache-2.0"
1010
readme = "README.md"
1111

1212
[dependencies]
13-
candle = { workspace = true, features = ["cuda"]}
13+
candle = { workspace = true, features = ["_cuda"]}
1414
half = { workspace = true }
1515

1616
[build-dependencies]

router/Cargo.toml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ tokio-stream = { version = "0.1.14", optional = true }
5959

6060
# Optional
6161
cudarc = { workspace = true, optional = true }
62+
intel-mkl-src = { workspace = true, optional = true }
6263

6364
# Malloc trim hack for linux
6465
[target.'cfg(target_os = "linux")'.dependencies]
@@ -78,18 +79,18 @@ vergen = { version = "8.0.0", features = ["build", "git", "gitcl"] }
7879
tonic-build = { version = "0.11.0", optional = true }
7980

8081
[features]
81-
default = ["candle", "http"]
82+
default = ["candle", "http", "dynamic-linking"]
8283
http = ["dep:axum", "dep:axum-tracing-opentelemetry", "dep:base64", "dep:tower-http", "dep:utoipa", "dep:utoipa-swagger-ui"]
8384
grpc = ["metrics-exporter-prometheus/http-listener", "dep:prost", "dep:tonic", "dep:tonic-health", "dep:tonic-reflection", "dep:tonic-build", "dep:async-stream", "dep:tokio-stream"]
8485
metal = ["text-embeddings-backend/metal"]
8586
mkl = ["text-embeddings-backend/mkl"]
86-
mkl-dynamic = ["text-embeddings-backend/mkl-dynamic"]
8787
accelerate = ["text-embeddings-backend/accelerate"]
8888
python = ["text-embeddings-backend/python"]
8989
ort = ["text-embeddings-backend/ort"]
9090
candle = ["text-embeddings-backend/candle"]
9191
candle-cuda = ["candle", "text-embeddings-backend/flash-attn"]
9292
candle-cuda-turing = ["candle", "text-embeddings-backend/flash-attn-v1"]
9393
candle-cuda-volta = ["candle", "text-embeddings-backend/cuda"]
94-
static-linking = ["cudarc/static-linking"]
94+
static-linking = ["cudarc?/static-linking", "intel-mkl-src?/mkl-static-lp64-iomp"]
95+
dynamic-linking = ["cudarc?/dynamic-linking", "intel-mkl-src?/mkl-dynamic-lp64-iomp"]
9596
google = []

0 commit comments

Comments
 (0)