Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Evaluate Base Model Endpoints giving errors #173

Open
eitansela opened this issue Dec 25, 2024 · 0 comments
Open

Evaluate Base Model Endpoints giving errors #173

eitansela opened this issue Dec 25, 2024 · 0 comments
Labels
bug Something isn't working

Comments

@eitansela
Copy link
Contributor

eitansela commented Dec 25, 2024

Operating System

MacOS

Version Information

Python Version: 3.12.5
azure-ai-evaluation package version: 1.1.0
promptflow-tracing package version: 1.16.2
promptflow-core package version: 1.16.2

Steps to reproduce

  1. Run the Evaluate_Base_Model_Endpoint.ipynb notebook.
  2. Run the cell with the loop over the models, invoking the evaluation API:
for model in models:
    randomNum = random.randint(1111, 9999)
    results = evaluate(
        evaluation_name="Eval-Run-" + str(randomNum) + "-" + model.title(),
        data=path,
        target=ModelEndpoints(env_var, model),
        evaluators={
            "relevance": relevance_evaluator,
        },
        evaluator_config={
            "relevance": {
                "column_mapping": {
                    "response": "${target.response}",
                    "context": "${data.context}",
                    "query": "${data.query}",
                },
            },
        },
    )

Expected behavior

Evaluation for each model completes successfully.

Actual behavior

Getting the following error:

EvaluationException: (InternalError) Generate meta failed, detail error:
["Failed to collect flow entry '__main__:ModelEndpoints' in module '<module>'."]

Addition information

Full stack trace:

---------------------------------------------------------------------------
GenerateFlowMetaJsonError                 Traceback (most recent call last)
File ~/Dev/github/azureai-samples/scenarios/evaluate/.venv/lib/python3.12/site-packages/azure/ai/evaluation/_evaluate/_evaluate.py:610, in evaluate(data, evaluators, evaluation_name, target, evaluator_config, azure_ai_project, output_path, **kwargs)
    609 try:
--> 610     return _evaluate(
    611         evaluation_name=evaluation_name,
    612         target=target,
    613         data=data,
    614         evaluators=evaluators,
    615         evaluator_config=evaluator_config,
    616         azure_ai_project=azure_ai_project,
    617         output_path=output_path,
    618         **kwargs,
    619     )
    620 except Exception as e:
    621     # Handle multiprocess bootstrap error

File ~/Dev/github/azureai-samples/scenarios/evaluate/.venv/lib/python3.12/site-packages/azure/ai/evaluation/_evaluate/_evaluate.py:704, in _evaluate(evaluators, evaluation_name, target, data, evaluator_config, azure_ai_project, output_path, **kwargs)
    703 if data is not None and target is not None:
--> 704     input_data_df, target_generated_columns, target_run = _apply_target_to_data(
    705         target, data, pf_client, input_data_df, evaluation_name, **kwargs
    706     )
    708     for evaluator_name, mapping in column_mapping.items():

File ~/Dev/github/azureai-samples/scenarios/evaluate/.venv/lib/python3.12/site-packages/azure/ai/evaluation/_evaluate/_evaluate.py:470, in _apply_target_to_data(target, data, pf_client, initial_data, evaluation_name, **kwargs)
    469 with TargetRunContext():
--> 470     run: Run = pf_client.run(
    471         flow=target,
    472         display_name=evaluation_name,
    473         data=data,
    474         stream=True,
    475         name=_run_name,
    476     )
    478 target_output: pd.DataFrame = pf_client.runs.get_details(run, all_results=True)

File ~/Dev/github/azureai-samples/scenarios/evaluate/.venv/lib/python3.12/site-packages/promptflow/_sdk/_pf_client.py:301, in PFClient.run(self, flow, data, run, column_mapping, variant, connections, environment_variables, name, display_name, tags, resume_from, code, init, **kwargs)
    246 """Run flow against provided data or run.
    247 
    248 .. note::
   (...)
    299 :rtype: ~promptflow.entities.Run
    300 """
--> 301 return self._run(
    302     flow=flow,
    303     data=data,
    304     run=run,
    305     column_mapping=column_mapping,
    306     variant=variant,
    307     connections=connections,
    308     environment_variables=environment_variables,
    309     name=name,
    310     display_name=display_name,
    311     tags=tags,
    312     resume_from=resume_from,
    313     code=code,
    314     init=init,
    315     **kwargs,
    316 )

File ~/Dev/github/azureai-samples/scenarios/evaluate/.venv/lib/python3.12/site-packages/promptflow/_sdk/_pf_client.py:226, in PFClient._run(self, flow, data, run, column_mapping, variant, connections, environment_variables, properties, name, display_name, tags, resume_from, code, init, **kwargs)
    210 run = Run(
    211     name=name,
    212     display_name=display_name,
   (...)
    224     dynamic_callable=dynamic_callable,
    225 )
--> 226 return self.runs.create_or_update(run=run, **kwargs)

File ~/Dev/github/azureai-samples/scenarios/evaluate/.venv/lib/python3.12/site-packages/promptflow/_sdk/_telemetry/activity.py:265, in monitor_operation.<locals>.monitor.<locals>.wrapper(self, *args, **kwargs)
    264     thread.start()
--> 265 return f(self, *args, **kwargs)

File ~/Dev/github/azureai-samples/scenarios/evaluate/.venv/lib/python3.12/site-packages/promptflow/_sdk/operations/_run_operations.py:135, in RunOperations.create_or_update(self, run, **kwargs)
    133 from promptflow._sdk._orchestrator import RunSubmitter
--> 135 created_run = RunSubmitter(client=self._client).submit(run=run, **kwargs)
    136 if stream:

File ~/Dev/github/azureai-samples/scenarios/evaluate/.venv/lib/python3.12/site-packages/promptflow/_sdk/_orchestrator/run_submitter.py:52, in RunSubmitter.submit(self, run, stream, **kwargs)
     51     wait(tasks, return_when=ALL_COMPLETED)
---> 52     task_results = [task.result() for task in tasks]
     54 # upload run to cloud if the trace destination is set to cloud

File ~/.pyenv/versions/3.12.5/lib/python3.12/concurrent/futures/_base.py:449, in Future.result(self, timeout)
    448 elif self._state == FINISHED:
--> 449     return self.__get_result()
    451 self._condition.wait(timeout)

File ~/.pyenv/versions/3.12.5/lib/python3.12/concurrent/futures/_base.py:401, in Future.__get_result(self)
    400 try:
--> 401     raise self._exception
    402 finally:
    403     # Break a reference cycle with the exception in self._exception

File ~/.pyenv/versions/3.12.5/lib/python3.12/concurrent/futures/thread.py:58, in _WorkItem.run(self)
     57 try:
---> 58     result = self.fn(*self.args, **self.kwargs)
     59 except BaseException as exc:

File ~/Dev/github/azureai-samples/scenarios/evaluate/.venv/lib/python3.12/site-packages/promptflow/_sdk/_orchestrator/run_submitter.py:131, in RunSubmitter._run_bulk(self, run, stream, **kwargs)
    130 flow_obj = load_flow(source=run.flow)
--> 131 with flow_overwrite_context(
    132     flow_obj, tuning_node, variant, connections=run.connections, init_kwargs=run.init
    133 ) as flow:
    134     self._submit_bulk_run(flow=flow, run=run, local_storage=local_storage, **kwargs)

File ~/.pyenv/versions/3.12.5/lib/python3.12/contextlib.py:137, in _GeneratorContextManager.__enter__(self)
    136 try:
--> 137     return next(self.gen)
    138 except StopIteration:

File ~/Dev/github/azureai-samples/scenarios/evaluate/.venv/lib/python3.12/site-packages/promptflow/_sdk/_orchestrator/utils.py:279, in flow_overwrite_context(flow, tuning_node, variant, connections, overrides, drop_node_variants, init_kwargs)
    278 with tempfile.TemporaryDirectory() as temp_dir:
--> 279     override_flow_yaml(
    280         flow=flow,
    281         flow_dag=flow_dag,
    282         flow_dir_path=flow_dir_path,
    283         tuning_node=tuning_node,
    284         variant=variant,
    285         connections=connections,
    286         overrides=overrides,
    287         drop_node_variants=drop_node_variants,
    288         init_kwargs=init_kwargs,
    289     )
    290     flow_path = dump_flow_dag_according_to_content(flow_dag=flow_dag, flow_path=Path(temp_dir))

File ~/Dev/github/azureai-samples/scenarios/evaluate/.venv/lib/python3.12/site-packages/promptflow/_sdk/_orchestrator/utils.py:232, in override_flow_yaml(flow, flow_dag, flow_dir_path, tuning_node, variant, connections, overrides, drop_node_variants, init_kwargs)
    229             logger.warning(
    230                 "Eager flow does not support tuning node, variant, connection override. " f"Dropping params {param}"
    231             )
--> 232     update_signatures(code=flow_dir_path, data=flow_dag)
    233 else:
    234     # always overwrite variant since we need to overwrite default variant if not specified.

File ~/Dev/github/azureai-samples/scenarios/evaluate/.venv/lib/python3.12/site-packages/promptflow/_sdk/_utilities/signature_utils.py:148, in update_signatures(code, data)
    147 entry = data.get("entry")
--> 148 signatures, _, _ = infer_signature_for_flex_flow(
    149     entry=entry,
    150     code=code.as_posix(),
    151     language=data.get(LANGUAGE_KEY, "python"),
    152     validate=False,
    153     include_primitive_output=True,
    154 )
    155 # TODO: allow user only specify partial signatures in the yaml

File ~/Dev/github/azureai-samples/scenarios/evaluate/.venv/lib/python3.12/site-packages/promptflow/_sdk/_utilities/signature_utils.py:72, in infer_signature_for_flex_flow(entry, language, code, keep_entry, validate, include_primitive_output)
     71     # TODO: extract description?
---> 72     flow_meta = inspector_proxy.get_entry_meta(entry=entry, working_dir=code)
     73 elif code is not None:
     74     # TODO: support specifying code when inferring signature?

File ~/Dev/github/azureai-samples/scenarios/evaluate/.venv/lib/python3.12/site-packages/promptflow/_proxy/_python_inspector_proxy.py:43, in PythonInspectorProxy.get_entry_meta(self, entry, working_dir, **kwargs)
     42 # generate flow.json only for eager flow for now
---> 43 return _generate_flow_meta(
     44     flow_directory=working_dir,
     45     source_path=resolve_python_entry_file(entry=flow_dag.get("entry"), working_dir=working_dir),
     46     data=flow_dag,
     47     timeout=timeout,
     48     load_in_subprocess=load_in_subprocess,
     49 )

File ~/Dev/github/azureai-samples/scenarios/evaluate/.venv/lib/python3.12/site-packages/promptflow/_core/entry_meta_generator.py:80, in _generate_flow_meta(flow_directory, source_path, data, timeout, load_in_subprocess)
     79     error_message = "Generate meta failed, detail error:\n" + str(exception_list)
---> 80     raise GenerateFlowMetaJsonError(error_message)
     81 return dict(meta_dict)

GenerateFlowMetaJsonError: Generate meta failed, detail error:
["Failed to collect flow entry '__main__:ModelEndpoints' in module '<module>'."]

The above exception was the direct cause of the following exception:

EvaluationException                       Traceback (most recent call last)
Cell In[9], line 23
     21 for model in models:
     22     randomNum = random.randint(1111, 9999)
---> 23     results = evaluate(
     24         evaluation_name="Eval-Run-" + str(randomNum) + "-" + model.title(),
     25         data=path,
     26         target=ModelEndpoints(env_var, model),
     27         evaluators={
     28             "relevance": relevance_evaluator,
     29         },
     30         evaluator_config={
     31             "relevance": {
     32                 "column_mapping": {
     33                     "response": "${target.response}",
     34                     "context": "${data.context}",
     35                     "query": "${data.query}",
     36                 },
     37             },
     38         },
     39     )

File ~/Dev/github/azureai-samples/scenarios/evaluate/.venv/lib/python3.12/site-packages/azure/ai/evaluation/_evaluate/_evaluate.py:644, in evaluate(data, evaluators, evaluation_name, target, evaluator_config, azure_ai_project, output_path, **kwargs)
    641 # Ensure a consistent user experience when encountering errors by converting
    642 # all other exceptions to EvaluationException.
    643 if not isinstance(e, EvaluationException):
--> 644     raise EvaluationException(
    645         message=str(e),
    646         target=ErrorTarget.EVALUATE,
    647         category=ErrorCategory.FAILED_EXECUTION,
    648         blame=ErrorBlame.SYSTEM_ERROR,
    649     ) from e
    651 raise e

EvaluationException: (InternalError) Generate meta failed, detail error:
["Failed to collect flow entry '__main__:ModelEndpoints' in module '<module>'."]
@eitansela eitansela added the bug Something isn't working label Dec 25, 2024
eitansela added a commit to eitansela/azureai-samples that referenced this issue Dec 25, 2024
Moved `ModelEndpoints` class to a new Python file.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Something isn't working
Projects
None yet
Development

Successfully merging a pull request may close this issue.

1 participant