huggingface
diff --git a/‎README.md
Lines changed: 8 additions & 6 deletions b/‎README.md
Lines changed: 8 additions & 6 deletions
diff --git a/‎backends/candle/src/lib.rs
Lines changed: 4 additions & 3 deletions b/‎backends/candle/src/lib.rs
Lines changed: 4 additions & 3 deletions
diff --git a/‎backends/candle/tests/snapshots/test_gte__gte_batch.snap renamed to ‎backends/candle/tests/snapshots/test_gte__alibaba_gte_batch.snap b/‎backends/candle/tests/snapshots/test_gte__gte_batch.snap renamed to ‎backends/candle/tests/snapshots/test_gte__alibaba_gte_batch.snap
diff --git a/‎backends/candle/tests/snapshots/test_gte__gte_single.snap renamed to ‎backends/candle/tests/snapshots/test_gte__alibaba_gte_single.snap b/‎backends/candle/tests/snapshots/test_gte__gte_single.snap renamed to ‎backends/candle/tests/snapshots/test_gte__alibaba_gte_single.snap
@@ -71,17 +71,19 @@ Below are some examples of the currently supported models:
 
 | MTEB Rank | Model Size          | Model Type  | Model ID                                                                                         |
 |-----------|---------------------|-------------|--------------------------------------------------------------------------------------------------|
-| 1         | 7B (Very Expensive) | Mistral     | [Salesforce/SFR-Embedding-2_R](https://hf.co/Salesforce/SFR-Embedding-2_R)                       |
-| 2         | 7B (Very Expensive) | Qwen2       | [Alibaba-NLP/gte-Qwen2-7B-instruct](https://hf.co/Alibaba-NLP/gte-Qwen2-7B-instruct)             |
-| 9         | 1.5B (Expensive)    | Qwen2       | [Alibaba-NLP/gte-Qwen2-1.5B-instruct](https://hf.co/Alibaba-NLP/gte-Qwen2-1.5B-instruct)         |
-| 15        | 0.4B                | Alibaba GTE | [Alibaba-NLP/gte-large-en-v1.5](https://hf.co/Alibaba-NLP/gte-large-en-v1.5)                     |
+| 3         | 7B (Very Expensive) | Qwen2       | [Alibaba-NLP/gte-Qwen2-7B-instruct](https://hf.co/Alibaba-NLP/gte-Qwen2-7B-instruct)             |
+| 11        | 1.5B (Expensive)    | Qwen2       | [Alibaba-NLP/gte-Qwen2-1.5B-instruct](https://hf.co/Alibaba-NLP/gte-Qwen2-1.5B-instruct)         |
+| 14        | 7B (Very Expensive) | Mistral     | [Salesforce/SFR-Embedding-2_R](https://hf.co/Salesforce/SFR-Embedding-2_R)                       |
 | 20        | 0.3B                | Bert        | [WhereIsAI/UAE-Large-V1](https://hf.co/WhereIsAI/UAE-Large-V1)                                   |
-| 24        | 0.5B                | XLM-RoBERTa | [intfloat/multilingual-e5-large-instruct](https://hf.co/intfloat/multilingual-e5-large-instruct) |
+| 31        | 0.5B                | XLM-RoBERTa | [Snowflake/snowflake-arctic-embed-l-v2.0](https://hf.co/Snowflake/snowflake-arctic-embed-l-v2.0) |
+| 37        | 0.3B                | Alibaba GTE | [Snowflake/snowflake-arctic-embed-m-v2.0](https://hf.co/Snowflake/snowflake-arctic-embed-m-v2.0) |
+| 49        | 0.5B                | XLM-RoBERTa | [intfloat/multilingual-e5-large-instruct](https://hf.co/intfloat/multilingual-e5-large-instruct) |
+| N/A       | 0.4B                | Alibaba GTE | [Alibaba-NLP/gte-large-en-v1.5](https://hf.co/Alibaba-NLP/gte-large-en-v1.5)                     |
 | N/A       | 0.1B                | NomicBert   | [nomic-ai/nomic-embed-text-v1](https://hf.co/nomic-ai/nomic-embed-text-v1)                       |
 | N/A       | 0.1B                | NomicBert   | [nomic-ai/nomic-embed-text-v1.5](https://hf.co/nomic-ai/nomic-embed-text-v1.5)                   |
 | N/A       | 0.1B                | JinaBERT    | [jinaai/jina-embeddings-v2-base-en](https://hf.co/jinaai/jina-embeddings-v2-base-en)             |
 | N/A       | 0.1B                | JinaBERT    | [jinaai/jina-embeddings-v2-base-code](https://hf.co/jinaai/jina-embeddings-v2-base-code)         |
-| N/A       | 0.1B                | MPNet       | [sentence-transformers/all-mpnet-base-v2](https://hf.co/sentence-transformers/all-mpnet-base-v2)            |
+| N/A       | 0.1B                | MPNet       | [sentence-transformers/all-mpnet-base-v2](https://hf.co/sentence-transformers/all-mpnet-base-v2) |
 
 To explore the list of best performing text embeddings models, visit the
 [Massive Text Embedding Benchmark (MTEB) Leaderboard](https://huggingface.co/spaces/mteb/leaderboard).
 
@@ -59,8 +59,9 @@ enum Config {
     NomicBert(NomicConfig),
     #[allow(dead_code)]
     Mistral(MistralConfig),
-    #[serde(rename = "new")]
     Gte(GTEConfig),
+    #[serde(rename = "new")]
+    GteAlibaba(GTEConfig),
     #[allow(dead_code)]
     Qwen2(Qwen2Config),
     #[serde(rename = "mpnet")]
@@ -223,7 +224,7 @@ impl CandleBackend {
                 "Mistral is only supported on Cuda devices in fp16 with flash attention enabled"
                     .to_string(),
             )),
-            (Config::Gte(config), Device::Cpu | Device::Metal(_)) => {
+            (Config::Gte(config) | Config::GteAlibaba(config), Device::Cpu | Device::Metal(_)) => {
                 tracing::info!("Starting GTE model on {:?}", device);
                 Ok(Box::new(GTEModel::load(vb, &config, model_type).s()?))
             }
@@ -354,7 +355,7 @@ impl CandleBackend {
                 ))
             }
             #[cfg(feature = "cuda")]
-            (Config::Gte(config), Device::Cuda(_)) => {
+            (Config::Gte(config) | Config::GteAlibaba(config), Device::Cuda(_)) => {
                 if dtype != DType::F16
                     || !cfg!(any(feature = "flash-attn", feature = "flash-attn-v1"))
                 {