Skip to content

Commit

Permalink
[bugfix] Add deployment delete request to edges when deployment fails
Browse files Browse the repository at this point in the history
  • Loading branch information
charlieyl committed Feb 11, 2025
1 parent b68a835 commit 922f7b5
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from multiprocessing import Queue

import fedml
from .device_model_msg_object import FedMLModelMsgObject
from fedml.core.mlops import MLOpsRuntimeLog, MLOpsConfigs
from fedml.core.mlops.mlops_runtime_log import MLOpsFormatter
from .device_client_constants import ClientConstants
Expand Down Expand Up @@ -274,10 +275,14 @@ def process_deployment_result_message(self, topic=None, payload=None):

# Avoid endless loop, if the rollback also failed, we should report the failure to the MLOps
if self.replica_controller.under_rollback or self.is_fresh_endpoint:
logging.info(f"process deploy result, under_rollback {self.replica_controller.under_rollback}, is_fresh_endpoint {self.is_fresh_endpoint}")
self.send_deployment_status(
end_point_id, end_point_name, payload_json["model_name"], "",
ServerConstants.MSG_MODELOPS_DEPLOYMENT_STATUS_FAILED,
message_center=self.message_center)
# when report failed to the MLOps, need to delete the replica has successfully deployed and release the gpu
model_msg_object = FedMLModelMsgObject(topic, payload)
self.send_deployment_delete_request_to_edges(payload, model_msg_object, message_center=self.message_center)
return

# Failure handler, send the rollback message to the worker devices only if it has not been rollback
Expand Down
3 changes: 1 addition & 2 deletions python/fedml/core/mlops/mlops_device_perfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,7 @@ def setup_realtime_stats_process(self, sys_args):
self.monitor_run_master_process.start()

def report_device_realtime_stats_entry(self, sys_event, role, is_client=False):
logging.info(f"Report device realtime stats, role {role}, is_client {is_client}, process id {os.getpid()}")

# logging.info(f"Report device realtime stats, role {role}, process id {os.getpid()}")
self.device_realtime_stats_event = sys_event
mqtt_mgr = MqttManager(
self.args.mqtt_config_path["BROKER_HOST"],
Expand Down

0 comments on commit 922f7b5

Please sign in to comment.