Skip to content

Commit

Permalink
refactor the code and passed all tests
Browse files Browse the repository at this point in the history
  • Loading branch information
pengwei715 committed Feb 21, 2024
1 parent 10dda0f commit fefbc78
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 14 deletions.
16 changes: 3 additions & 13 deletions llm_judge/llm_judge.py
Original file line number Diff line number Diff line change
Expand Up @@ -915,7 +915,7 @@ def _extract_score_explanation(self, response) -> Dict[str, Any]:
:param response: the response to extract the score and the explanation from
:returns: the score and the explanation
"""
logger.info(f"Extract the score and the explanation from the response")
logger.info(f"Extract the score and the explanation from the {response}")
try:
res = json.loads(response)
result_dict = {}
Expand All @@ -930,7 +930,7 @@ def _extract_score_explanation(self, response) -> Dict[str, Any]:
return result_dict
except Exception:
# Adjusted pattern to match the text format and separate lines
pattern = r"-?\s?[Ss]core of assistant ([a-zA-Z]+): (\d+).*?-?\s?[Ee]xplanation of assistant [a-zA-Z]+: (.*?)(?=-?\s?score of assistant [a-zA-Z]+:|$)"
pattern = r"-?\s?[Ss]core of [aA]ssistant ([a-zA-Z]+): (\d+).*?-?\s?[Ee]xplanation of [aA]ssistant [a-zA-Z]+: (.*?)(?=-?\s?[sS]core of [aA]ssistant [a-zA-Z]+:|$)"
matches = re.findall(pattern, response, re.DOTALL)

if matches:
Expand All @@ -949,7 +949,7 @@ def _extract_score_explanation(self, response) -> Dict[str, Any]:
)


class OPENAIJudgeReferenceGrading(OPENAIJudgePairwiseGrading, LLMJudgeReferenceGrading):
class OPENAIJudgeReferenceGrading(OPENAIJudgePairwiseGrading):
"""
OPENAI Judge Reference Grading class
you need to give the name of the metrics, give the grading rubric and the bench mark model to use
Expand Down Expand Up @@ -1009,16 +1009,6 @@ def __init__(
prompt_template,
)

@_open_mpi_handler(worker_inputs="sample_df")
def _compute_over_data(self, sample_df: pd.DataFrame) -> pd.DataFrame:
"""
Compute the metrics over a dataset
:param sample_df: the data to compute the metrics over
:returns: the metrics score and the explanation
"""
return LLMJudgeReferenceGrading._compute_over_data(self, sample_df)


MetricsType_dic = {
"LLMJudgeSingleGrading": LLMJudgeSingleGrading,
"LLMJudgePairwiseGrading": LLMJudgePairwiseGrading,
Expand Down
2 changes: 1 addition & 1 deletion llm_judge/test_llm_judge.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,8 +229,8 @@ def test_openai_reference_grading_scores(prompt_fixture):
model_judge=OPENAI_MODEL,
model_bench_mark=BENCHMARK_MODEL,
model_bench_mark_config=BENCHMARK_CONFIG,
model_bench_mark_infer_config=BENCHMARK_INFER_CONFIG,
tokenizer_bench_mark_config=TOKENIZER_BENCHMARK_CONFIG,
model_bench_mark_infer_config=BENCHMARK_INFER_CONFIG,
prompt_config=prompt_config,
)

Expand Down

0 comments on commit fefbc78

Please sign in to comment.