@@ -1497,25 +1497,34 @@ def evaluate_doc_chain(
1497
1497
new_c = 0
1498
1498
1499
1499
elif "n" in self .eval_llm_params or self .query_eval_check_number == 1 :
1500
+
1501
+ def _parse_outputs (out ) -> List [str ]:
1502
+ reasons = [
1503
+ gen .generation_info ["finish_reason" ] for gen in out .generations
1504
+ ]
1505
+ outputs = [gen .text for gen in out .generations ]
1506
+ # don't always crash if finish_reason is not stop, because it can sometimes still be parsed.
1507
+ if not all (r == "stop" for r in reasons ):
1508
+ red (
1509
+ f"Unexpected generation finish_reason: '{ reasons } ' for generations: '{ outputs } '. Expected 'stop'"
1510
+ )
1511
+ assert outputs , "No generations found by query eval llm"
1512
+ # parse_eval_output will crash if the output is bad anyway
1513
+ outputs = [parse_eval_output (o ) for o in outputs ]
1514
+ return outputs
1515
+
1500
1516
try :
1501
1517
out = self .eval_llm ._generate_with_cache (
1502
1518
prompts .evaluate .format_messages (** inputs )
1503
1519
)
1520
+ outputs = _parse_outputs (out )
1504
1521
except Exception : # retry without cache
1522
+ yel (f"Failed to run eval_llm on an input. Retrying without cache." )
1505
1523
out = self .eval_llm ._generate (
1506
1524
prompts .evaluate .format_messages (** inputs )
1507
1525
)
1508
- reasons = [
1509
- gen .generation_info ["finish_reason" ] for gen in out .generations
1510
- ]
1511
- outputs = [gen .text for gen in out .generations ]
1512
- # don't crash if finish_reason is not stop, because it can sometimes still be parsed.
1513
- if not all (r in ["stop" , "length" ] for r in reasons ):
1514
- red (
1515
- f"Unexpected generation finish_reason: '{ reasons } ' for generations: '{ outputs } '"
1516
- )
1517
- assert outputs , "No generations found by query eval llm"
1518
- outputs = [parse_eval_output (o ) for o in outputs ]
1526
+ outputs = _parse_outputs (out )
1527
+
1519
1528
if out .llm_output :
1520
1529
new_p = out .llm_output ["token_usage" ]["prompt_tokens" ]
1521
1530
new_c = out .llm_output ["token_usage" ]["completion_tokens" ]
@@ -1533,7 +1542,7 @@ async def do_eval(subinputs):
1533
1542
val = await self .eval_llm ._agenerate_with_cache (
1534
1543
prompts .evaluate .format_messages (** subinputs )
1535
1544
)
1536
- except Exception : # retrywithout cache
1545
+ except Exception : # retry without cache
1537
1546
val = await self .eval_llm ._agenerate (
1538
1547
prompts .evaluate .format_messages (** subinputs )
1539
1548
)
0 commit comments