|
9 | 9 | from multiprocessing import Queue
|
10 | 10 |
|
11 | 11 | import fedml
|
| 12 | +from .device_model_msg_object import FedMLModelMsgObject |
12 | 13 | from fedml.core.mlops import MLOpsRuntimeLog, MLOpsConfigs
|
13 | 14 | from fedml.core.mlops.mlops_runtime_log import MLOpsFormatter
|
14 | 15 | from .device_client_constants import ClientConstants
|
@@ -274,10 +275,14 @@ def process_deployment_result_message(self, topic=None, payload=None):
|
274 | 275 |
|
275 | 276 | # Avoid endless loop, if the rollback also failed, we should report the failure to the MLOps
|
276 | 277 | if self.replica_controller.under_rollback or self.is_fresh_endpoint:
|
| 278 | + logging.info(f"process deploy result, under_rollback {self.replica_controller.under_rollback}, is_fresh_endpoint {self.is_fresh_endpoint}") |
277 | 279 | self.send_deployment_status(
|
278 | 280 | end_point_id, end_point_name, payload_json["model_name"], "",
|
279 | 281 | ServerConstants.MSG_MODELOPS_DEPLOYMENT_STATUS_FAILED,
|
280 | 282 | message_center=self.message_center)
|
| 283 | + # when report failed to the MLOps, need to delete the replica has successfully deployed and release the gpu |
| 284 | + model_msg_object = FedMLModelMsgObject(topic, payload) |
| 285 | + self.send_deployment_delete_request_to_edges(payload, model_msg_object, message_center=self.message_center) |
281 | 286 | return
|
282 | 287 |
|
283 | 288 | # Failure handler, send the rollback message to the worker devices only if it has not been rollback
|
|
0 commit comments