@@ -107,21 +107,24 @@ def _run_strategy(text, func, strat, args):
107107 )
108108 outdir = write_chunks (chunks , strat )
109109
110- avg_recall , per_questions = 0.0 , []
110+ metrics = {"avg_recall" : 0.0 , "avg_precision" : 0.0 , "avg_f1" : 0.0 }
111+ per_questions = []
111112 questions = (
112113 scorer .load_test_file (args .test_file )
113114 if getattr (args , "test_file" , None )
114115 else None
115116 )
116117 if questions :
117- avg_recall , per_questions = scorer .evaluate_strategy (
118+ metrics , per_questions = scorer .evaluate_strategy (
118119 chunks , questions , args .top_k
119120 )
120121
121122 return {
122123 "strategy" : strat ,
123124 "chunks" : len (chunks ),
124- "avg_recall" : round (avg_recall , 4 ),
125+ "avg_recall" : round (metrics ["avg_recall" ], 4 ),
126+ "avg_precision" : round (metrics ["avg_precision" ], 4 ),
127+ "avg_f1" : round (metrics ["avg_f1" ], 4 ),
125128 "saved" : str (outdir ),
126129 }, per_questions
127130
@@ -137,27 +140,55 @@ def _write_results(results, detail, output):
137140 table .add_column ("strategy" , style = "cyan" )
138141 table .add_column ("chunks" , justify = "right" )
139142 table .add_column ("avg_recall" , justify = "right" )
143+ table .add_column ("avg_precision" , justify = "right" )
144+ table .add_column ("avg_f1" , justify = "right" )
140145 table .add_column ("saved" )
141146 for r in results :
142- avg = r .get ("avg_recall" , 0.0 )
147+ recall = r .get ("avg_recall" , 0.0 )
148+ precision = r .get ("avg_precision" , 0.0 )
149+ f1 = r .get ("avg_f1" , 0.0 )
150+
151+ # Format recall with color
143152 try :
144- pct = f"{ avg * 100 :.2f} %"
153+ recall_pct = f"{ recall * 100 :.2f} %"
145154 except (TypeError , ValueError ):
146- pct = str (avg )
147- if isinstance (avg , float ):
148- if avg >= 0.85 :
155+ recall_pct = str (recall )
156+ if isinstance (recall , float ):
157+ if recall >= 0.85 :
149158 color = "green"
150- elif avg >= 0.7 :
159+ elif recall >= 0.7 :
151160 color = "yellow"
152161 else :
153162 color = "red"
154- pct_cell = f"[{ color } ]{ pct } [/{ color } ]"
163+ recall_cell = f"[{ color } ]{ recall_pct } [/{ color } ]"
155164 else :
156- pct_cell = pct
165+ recall_cell = recall_pct
166+
167+ # Format precision
168+ precision_pct = f"{ precision * 100 :.2f} %" if isinstance (precision , float ) else str (precision )
169+
170+ # Format F1 with color
171+ try :
172+ f1_pct = f"{ f1 * 100 :.2f} %"
173+ except (TypeError , ValueError ):
174+ f1_pct = str (f1 )
175+ if isinstance (f1 , float ):
176+ if f1 >= 0.85 :
177+ color = "green"
178+ elif f1 >= 0.7 :
179+ color = "yellow"
180+ else :
181+ color = "red"
182+ f1_cell = f"[{ color } ]{ f1_pct } [/{ color } ]"
183+ else :
184+ f1_cell = f1_pct
185+
157186 table .add_row (
158187 str (r .get ("strategy" , "" )),
159188 str (r .get ("chunks" , "" )),
160- pct_cell ,
189+ recall_cell ,
190+ precision_pct ,
191+ f1_cell ,
161192 str (r .get ("saved" , "" )),
162193 )
163194 console .print (table )
@@ -172,9 +203,11 @@ def _write_results(results, detail, output):
172203 wpath = Path ("analysis_results.csv" )
173204 with wpath .open ("w" , newline = "" , encoding = "utf-8" ) as f :
174205 w = csv .writer (f )
175- w .writerow (["strategy" , "chunks" , "avg_recall" , "saved" ])
206+ w .writerow (["strategy" , "chunks" , "avg_recall" , "avg_precision" ,
207+ "avg_f1" , "saved" ])
176208 for r in results :
177- w .writerow ([r ["strategy" ], r ["chunks" ], r ["avg_recall" ], r ["saved" ]])
209+ w .writerow ([r ["strategy" ], r ["chunks" ], r ["avg_recall" ],
210+ r ["avg_precision" ], r ["avg_f1" ], r ["saved" ]])
178211 print (str (wpath ))
179212 return
180213 print ("Unsupported output format" )
@@ -191,7 +224,8 @@ def build_parser():
191224 "--strategy" ,
192225 type = str ,
193226 default = "fixed-size" ,
194- choices = ["fixed-size" , "sliding-window" , "paragraph" , "all" ],
227+ choices = ["fixed-size" , "sliding-window" , "paragraph" ,
228+ "recursive-character" , "all" ],
195229 help = "Chunking strategy or all" ,
196230 )
197231 analyze_p .add_argument (
0 commit comments