Make sliding_window for Qwen2 optional (#546)

alvarobartt · web-flow · commit 8eb7a844e477 · 2025-04-02T10:59:27.000+02:00
No need for that, what you have is good enough for now. There are other things we could do better overall regarding tensor names.
diff --git a/backends/candle/src/models/flash_qwen2.rs b/backends/candle/src/models/flash_qwen2.rs
@@ -22,7 +22,7 @@ struct Qwen2Attention {
 impl Qwen2Attention {
     pub fn load(vb: VarBuilder, config: &Qwen2Config) -> Result<Self> {
         if config.use_sliding_window {
-            candle::bail!("Sliding window is not supported");
+            candle::bail!("Sliding window is not supported for Qwen2",);
         }
 
         let num_attention_heads = config.num_attention_heads;
@@ -264,7 +264,15 @@ impl FlashQwen2Model {
             ModelType::Embedding(pool) => pool,
         };
 
-        let vb = vb.pp("model");
+        // Pushing the prefix for `model` is apparently only required if the model architecture is
+        // ForCausalLM as it contains the `lm_head`, other than that, the `model` key won't be
+        // present e.g. a model without the `model` key as it's a `Qwen2Model` instance not a
+        // `Qwen2ModelForCausalLM` is https://huggingface.co/mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B
+        let vb = if vb.contains_tensor("model.embed_tokens.weight") {
+            vb.pp("model")
+        } else {
+            vb
+        };
 
         let embeddings = Embedding::new(
             vb.pp("embed_tokens")
diff --git a/backends/candle/src/models/qwen2.rs b/backends/candle/src/models/qwen2.rs
@@ -13,6 +13,6 @@ pub struct Qwen2Config {
     pub max_position_embeddings: usize,
     pub rms_norm_eps: f32,
     pub rope_theta: f32,
-    pub sliding_window: usize,
+    pub sliding_window: Option<usize>,
     pub use_sliding_window: bool,
 }

Original file line number	Diff line number	Diff line change
`@@ -13,6 +13,6 @@ pub struct Qwen2Config {`
`13`	`13`	`pub max_position_embeddings: usize,`
`14`	`14`	`pub rms_norm_eps: f32,`
`15`	`15`	`pub rope_theta: f32,`
`16`		`- pub sliding_window: usize,`
	`16`	`+ pub sliding_window: Option<usize>,`
`17`	`17`	`pub use_sliding_window: bool,`
`18`	`18`	`}`