refactor the code and passed all tests

mlrun · Feb 21, 2024 · fefbc78 · fefbc78
1 parent 10dda0f
commit fefbc78
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 14 deletions.
diff --git a/llm_judge/llm_judge.py b/llm_judge/llm_judge.py
@@ -915,7 +915,7 @@ def _extract_score_explanation(self, response) -> Dict[str, Any]:
         :param response: the response to extract the score and the explanation from
         :returns: the score and the explanation
         """
-        logger.info(f"Extract the score and the explanation from the response")
+        logger.info(f"Extract the score and the explanation from the {response}")
         try:
             res = json.loads(response)
             result_dict = {}
@@ -930,7 +930,7 @@ def _extract_score_explanation(self, response) -> Dict[str, Any]:
             return result_dict
         except Exception:
             # Adjusted pattern to match the text format and separate lines
-            pattern = r"-?\s?[Ss]core of assistant ([a-zA-Z]+): (\d+).*?-?\s?[Ee]xplanation of assistant [a-zA-Z]+: (.*?)(?=-?\s?score of assistant [a-zA-Z]+:|$)"
+            pattern = r"-?\s?[Ss]core of [aA]ssistant ([a-zA-Z]+): (\d+).*?-?\s?[Ee]xplanation of [aA]ssistant [a-zA-Z]+: (.*?)(?=-?\s?[sS]core of [aA]ssistant [a-zA-Z]+:|$)"
             matches = re.findall(pattern, response, re.DOTALL)
 
             if matches:
@@ -949,7 +949,7 @@ def _extract_score_explanation(self, response) -> Dict[str, Any]:
                 )
 
 
-class OPENAIJudgeReferenceGrading(OPENAIJudgePairwiseGrading, LLMJudgeReferenceGrading):
+class OPENAIJudgeReferenceGrading(OPENAIJudgePairwiseGrading):
     """
     OPENAI Judge Reference Grading class
     you need to give the name of the metrics, give the grading rubric and the bench mark model to use
@@ -1009,16 +1009,6 @@ def __init__(
             prompt_template,
         )
 
-    @_open_mpi_handler(worker_inputs="sample_df")
-    def _compute_over_data(self, sample_df: pd.DataFrame) -> pd.DataFrame:
-        """
-        Compute the metrics over a dataset
-        :param sample_df: the data to compute the metrics over
-        :returns: the metrics score and the explanation
-        """
-        return LLMJudgeReferenceGrading._compute_over_data(self, sample_df)
-
-
 MetricsType_dic = {
     "LLMJudgeSingleGrading": LLMJudgeSingleGrading,
     "LLMJudgePairwiseGrading": LLMJudgePairwiseGrading,

diff --git a/llm_judge/test_llm_judge.py b/llm_judge/test_llm_judge.py
@@ -229,8 +229,8 @@ def test_openai_reference_grading_scores(prompt_fixture):
         model_judge=OPENAI_MODEL,
         model_bench_mark=BENCHMARK_MODEL,
         model_bench_mark_config=BENCHMARK_CONFIG,
-        model_bench_mark_infer_config=BENCHMARK_INFER_CONFIG,
         tokenizer_bench_mark_config=TOKENIZER_BENCHMARK_CONFIG,
+        model_bench_mark_infer_config=BENCHMARK_INFER_CONFIG,
         prompt_config=prompt_config,
     )