@@ -58,7 +58,9 @@ def from_hf_config(cls, config) -> "LFM2Config":
5858 if multiplier is not None :
5959 intermediate_size = int (multiplier * intermediate_size )
6060 multiple_of = getattr (config , "block_multiple_of" , 256 )
61- intermediate_size = multiple_of * ((intermediate_size + multiple_of - 1 ) // multiple_of )
61+ intermediate_size = multiple_of * (
62+ (intermediate_size + multiple_of - 1 ) // multiple_of
63+ )
6264
6365 return cls (
6466 hidden_size = config .hidden_size ,
@@ -83,7 +85,9 @@ class LFM2Builder(ONNXBuilderBase):
8385 - Fused Microsoft operators (SimplifiedLayerNormalization, RotaryEmbedding, GroupQueryAttention)
8486 """
8587
86- def __init__ (self , config : LFM2Config , use_integrated_rope : bool = False , vl_naming : bool = False ):
88+ def __init__ (
89+ self , config : LFM2Config , use_integrated_rope : bool = False , vl_naming : bool = False
90+ ):
8791 """
8892 Args:
8993 config: Model configuration
@@ -819,7 +823,10 @@ def build_lm_head(self, hidden_state: str) -> str:
819823 self .add_initializer (final_norm_weight , self .weights ["model.embedding_norm.weight" ])
820824 # Community uses SkipLayerNorm as node name suffix
821825 normed = self .make_skip_layernorm (
822- hidden_state , hidden_state , final_norm_weight , final_norm_output ,
826+ hidden_state ,
827+ hidden_state ,
828+ final_norm_weight ,
829+ final_norm_output ,
823830 name = f"/model/layers.{ num_layers } /final_norm_layernorm/SkipLayerNorm" ,
824831 )
825832
@@ -843,12 +850,10 @@ def build_value_info(self):
843850 the community model format.
844851 """
845852 H = self .config .hidden_size
846- nh = self .config .num_attention_heads
847853 nkv = self .config .num_key_value_heads
848854 hd = self .head_dim
849855 kv_hidden = nkv * hd
850856 intermediate = self .config .intermediate_size
851- L = self .config .conv_L_cache
852857 num_layers = self .config .num_hidden_layers
853858 mask_prefix = "/model/attn_mask_reformat/attn_mask_subgraph"
854859
@@ -872,7 +877,9 @@ def build_value_info(self):
872877
873878 # === Embedding output ===
874879 self .add_value_info (
875- "/model/embed_tokens/Gather/output_0" , TensorProto .FLOAT , ["batch_size" , "sequence_length" , H ]
880+ "/model/embed_tokens/Gather/output_0" ,
881+ TensorProto .FLOAT ,
882+ ["batch_size" , "sequence_length" , H ],
876883 )
877884
878885 # === Per-layer outputs ===
@@ -900,16 +907,24 @@ def build_value_info(self):
900907 ["batch_size" , 3 * H , "sequence_length" ],
901908 )
902909 self .add_value_info (
903- f"{ prefix } /conv/Split/output_0" , TensorProto .FLOAT , ["batch_size" , H , "sequence_length" ]
910+ f"{ prefix } /conv/Split/output_0" ,
911+ TensorProto .FLOAT ,
912+ ["batch_size" , H , "sequence_length" ],
904913 )
905914 self .add_value_info (
906- f"{ prefix } /conv/Split/output_1" , TensorProto .FLOAT , ["batch_size" , H , "sequence_length" ]
915+ f"{ prefix } /conv/Split/output_1" ,
916+ TensorProto .FLOAT ,
917+ ["batch_size" , H , "sequence_length" ],
907918 )
908919 self .add_value_info (
909- f"{ prefix } /conv/Split/output_2" , TensorProto .FLOAT , ["batch_size" , H , "sequence_length" ]
920+ f"{ prefix } /conv/Split/output_2" ,
921+ TensorProto .FLOAT ,
922+ ["batch_size" , H , "sequence_length" ],
910923 )
911924 self .add_value_info (
912- f"{ prefix } /conv/Mul_1/output_0" , TensorProto .FLOAT , ["batch_size" , H , "sequence_length" ]
925+ f"{ prefix } /conv/Mul_1/output_0" ,
926+ TensorProto .FLOAT ,
927+ ["batch_size" , H , "sequence_length" ],
913928 )
914929 self .add_value_info (
915930 f"{ prefix } /conv/Conv_Input/output_0" ,
@@ -921,11 +936,17 @@ def build_value_info(self):
921936 conv_gather_name = "Gather_1" if self .vl_naming else "Gather_for_slice"
922937 self .add_value_info (f"{ prefix } /conv/split_sizes" , TensorProto .INT64 , [3 ])
923938 self .add_value_info (f"{ prefix } /conv/{ shape_name } /output_0" , TensorProto .INT64 , [3 ])
924- self .add_value_info (f"{ prefix } /conv/{ conv_gather_name } /output_0" , TensorProto .INT64 , [])
939+ self .add_value_info (
940+ f"{ prefix } /conv/{ conv_gather_name } /output_0" , TensorProto .INT64 , []
941+ )
925942 self .add_value_info (f"{ prefix } /conv/Neg_Seq_Len/output_0" , TensorProto .INT64 , [])
926- self .add_value_info (f"{ prefix } /conv/Unsqueeze_starts/output_0" , TensorProto .INT64 , [1 ])
927943 self .add_value_info (
928- f"{ prefix } /conv/Mul_2/output_0" , TensorProto .FLOAT , ["batch_size" , H , "sequence_length" ]
944+ f"{ prefix } /conv/Unsqueeze_starts/output_0" , TensorProto .INT64 , [1 ]
945+ )
946+ self .add_value_info (
947+ f"{ prefix } /conv/Mul_2/output_0" ,
948+ TensorProto .FLOAT ,
949+ ["batch_size" , H , "sequence_length" ],
929950 )
930951 self .add_value_info (
931952 f"{ prefix } /conv/Transpose_2/output_0" ,
@@ -1062,7 +1083,9 @@ def build_value_info(self):
10621083 TensorProto .FLOAT ,
10631084 ["batch_size" , "sequence_length" , H ],
10641085 )
1065- self .add_value_info ("/lm_head/Transpose/output_0" , TensorProto .FLOAT , [H , self .config .vocab_size ])
1086+ self .add_value_info (
1087+ "/lm_head/Transpose/output_0" , TensorProto .FLOAT , [H , self .config .vocab_size ]
1088+ )
10661089
10671090 def load_weights (self , model_path : str ):
10681091 """Load weights from HuggingFace model."""
0 commit comments