Handle no valid eval results for mt_bench

Signed-off-by: Dan McPherson <[email protected]>
instructlab · Nov 14, 2024 · 6c02db7 · 6c02db7
1 parent 8e32704
commit 6c02db7
Show file tree

Hide file tree

Showing 2 changed files with 19 additions and 2 deletions.
diff --git a/src/instructlab/eval/exceptions.py b/src/instructlab/eval/exceptions.py
@@ -123,6 +123,17 @@ def __init__(self, tasks_dir) -> None:
         self.tasks_dir = tasks_dir
         self.message = f"Invalid Tasks Dir: {tasks_dir}"
 
+class InvalidEvaluationResult(EvalError):
+    """
+    Error raised for invalid eval results
+    Attributes
+        message         error message to be printed on raise
+    """
+
+    def __init__(self, message) -> None:
+        super().__init__()
+        self.message = message
+
 
 class ModelServingAPIError(EvalError):
     """

diff --git a/src/instructlab/eval/mt_bench_judgment.py b/src/instructlab/eval/mt_bench_judgment.py
@@ -8,6 +8,9 @@
 import numpy as np
 import pandas as pd
 
+# First Party
+from instructlab.eval import exceptions
+
 # Local
 from .logger_config import setup_logger
 from .mt_bench_common import (
@@ -97,8 +100,11 @@ def make_judgment(
     turn_scores = []
     # First turn
     df_1 = judgment_df[judgment_df["turn"] == 1].groupby(["model", "turn"]).mean()
-    overall_score = df_1["score"].iloc[0]
-    turn_scores.append(overall_score)
+    if len(df_1.index) > 0:
+        overall_score = df_1["score"].iloc[0]
+        turn_scores.append(overall_score)
+    else:
+        raise exceptions.InvalidEvaluationResult("Evaluation provided no result. See logs for more details.")
 
     if bench_name == "mt_bench":
         # Second turn