@@ -41,26 +41,40 @@ impl Qwen3Attention {
41
41
( num_attention_heads * attention_head_size, hidden_size) ,
42
42
"weight" ,
43
43
) ?;
44
- let query_bias = vb. pp ( "q_proj" ) . get ( hidden_size, "bias" ) ?;
45
- let q_proj = Linear :: new ( query_weight, Some ( query_bias) , None ) ;
44
+ let query_bias = if config. attention_bias {
45
+ Some ( vb. pp ( "q_proj" ) . get ( hidden_size, "bias" ) ?)
46
+ } else {
47
+ None
48
+ } ;
49
+ let q_proj = Linear :: new ( query_weight, query_bias, None ) ;
46
50
47
51
let key_weight = vb. pp ( "k_proj" ) . get (
48
52
( num_key_value_heads * attention_head_size, hidden_size) ,
49
53
"weight" ,
50
54
) ?;
51
- let key_bias = vb
52
- . pp ( "k_proj" )
53
- . get ( num_key_value_heads * attention_head_size, "bias" ) ?;
54
- let k_proj = Linear :: new ( key_weight, Some ( key_bias) , None ) ;
55
+ let key_bias = if config. attention_bias {
56
+ Some (
57
+ vb. pp ( "k_proj" )
58
+ . get ( num_key_value_heads * attention_head_size, "bias" ) ?,
59
+ )
60
+ } else {
61
+ None
62
+ } ;
63
+ let k_proj = Linear :: new ( key_weight, key_bias, None ) ;
55
64
56
65
let value_weight = vb. pp ( "v_proj" ) . get (
57
66
( num_key_value_heads * attention_head_size, hidden_size) ,
58
67
"weight" ,
59
68
) ?;
60
- let value_bias = vb
61
- . pp ( "v_proj" )
62
- . get ( num_key_value_heads * attention_head_size, "bias" ) ?;
63
- let v_proj = Linear :: new ( value_weight, Some ( value_bias) , None ) ;
69
+ let value_bias = if config. attention_bias {
70
+ Some (
71
+ vb. pp ( "v_proj" )
72
+ . get ( num_key_value_heads * attention_head_size, "bias" ) ?,
73
+ )
74
+ } else {
75
+ None
76
+ } ;
77
+ let v_proj = Linear :: new ( value_weight, value_bias, None ) ;
64
78
65
79
let o_proj_weight = vb. pp ( "o_proj" ) . get (
66
80
( num_attention_heads * attention_head_size, hidden_size) ,
0 commit comments