Skip to content

Commit

Permalink
Fix lint
Browse files Browse the repository at this point in the history
lint
  • Loading branch information
DiegoTavares committed Dec 12, 2024
1 parent 4ccf61c commit 18bf372
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 46 deletions.
3 changes: 2 additions & 1 deletion rqd/rqd/rqconstants.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,8 @@
if config.has_option(__override_section, "BACKUP_CACHE_PATH"):
BACKUP_CACHE_PATH = config.get(__override_section, "BACKUP_CACHE_PATH")
if config.has_option(__override_section, "BACKUP_CACHE_TIME_TO_LIVE_SECONDS"):
BACKUP_CACHE_TIME_TO_LIVE_SECONDS = config.getint(__override_section, "BACKUP_CACHE_TIME_TO_LIVE_SECONDS")
BACKUP_CACHE_TIME_TO_LIVE_SECONDS = config.getint(
__override_section, "BACKUP_CACHE_TIME_TO_LIVE_SECONDS")

__docker_mounts = "docker.mounts"
__docker_config = "docker.config"
Expand Down
98 changes: 53 additions & 45 deletions rqd/rqd/rqcore.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,8 @@ def recoverCache(self):
# pylint: enable=no-member

running_frame.frameAttendantThread.start()
except:
# pylint: disable=broad-except
except Exception:
pass
# Ignore frames that got corrupted

Expand Down Expand Up @@ -1409,8 +1410,8 @@ def setup(self):
self.rqlog.waitForFile()
# pylint: disable=broad-except
except Exception as e:
err = "Unable to write to %s due to %s" % (runFrame.log_dir_file, e)
raise RuntimeError(err)
err = "Unable to write to %s due to %s" % (runFrame.log_dir_file, e)
raise RuntimeError(err)
finally:
rqd.rqutil.permissionsLow()

Expand All @@ -1422,7 +1423,7 @@ def run(self):
"""Thread initialization"""
if self.recovery_mode:
self.runRecovery()
return;
return

log.info("Monitor frame started for frameId=%s", self.frameId)

Expand Down Expand Up @@ -1459,6 +1460,7 @@ def run(self):
self.postFrameAction()

def postFrameAction(self):
"""Action to be executed after a frame completes its execution"""
self.rqCore.releaseCores(self.runFrame.num_cores,
self.runFrame.attributes.get('CPU_LIST'),
self.runFrame.attributes.get('GPU_LIST')
Expand Down Expand Up @@ -1496,6 +1498,11 @@ def recoverDocker(self):
self.__createEnvVariables()
self.__writeHeader()

tempStatFile = "%srqd-stat-%s-%s" % (self.rqCore.machine.getTempPath(),
frameInfo.frameId,
time.time())
self._tempLocations.append(tempStatFile)

try:
log_stream = None
with self.rqCore.docker_lock:
Expand Down Expand Up @@ -1573,7 +1580,8 @@ def recoverDocker(self):
frameInfo.exitSignal = 0

# Log frame start info
log.warning("Frame %s.%s(%s) with pid %s finished on container %s with exitStatus %s %s ",
log.warning(
"Frame %s.%s(%s) with pid %s finished on container %s with exitStatus %s %s",
runFrame.job_name,
runFrame.frame_name,
frameInfo.frameId,
Expand All @@ -1596,43 +1604,43 @@ def recoverDocker(self):
self.__cleanup()

def runRecovery(self):
"""Recover a frame that was running before this instance started"""
if not self.recovery_mode:
return;

log.info("Monitor recovered frame started for frameId=%s", self.frameId)

runFrame = self.runFrame
run_on_docker = self.rqCore.docker is not None

# pylint: disable=too-many-nested-blocks
try:
self.setup()
# Store frame in cache and register servant
self.rqCore.storeFrame(runFrame.frame_id, self.frameInfo)

if run_on_docker:
self.recoverDocker()
elif platform.system() == "Linux":
# TODO
pass
elif platform.system() == "Windows":
# TODO
pass
elif platform.system() == "Darwin":
# TODO
pass
else:
self.runUnknown()

# pylint: disable=broad-except
except Exception:
log.critical(
"Failed launchFrame: For %s due to: \n%s",
runFrame.frame_id, ''.join(traceback.format_exception(*sys.exc_info())))
# Notifies the cuebot that there was an error launching
self.frameInfo.exitStatus = rqd.rqconstants.EXITSTATUS_FOR_FAILED_LAUNCH
# Delay keeps the cuebot from spamming failing booking requests
time.sleep(10)
finally:
self.postFrameAction()
"""Recover a frame that was running before this instance started"""
if not self.recovery_mode:
return

log.info("Monitor recovered frame started for frameId=%s", self.frameId)

runFrame = self.runFrame
run_on_docker = self.rqCore.docker is not None

# pylint: disable=too-many-nested-blocks
try:
self.setup()
# Store frame in cache and register servant
self.rqCore.storeFrame(runFrame.frame_id, self.frameInfo)

if run_on_docker:
self.recoverDocker()
elif platform.system() == "Linux":
# TODO
pass
elif platform.system() == "Windows":
# TODO
pass
elif platform.system() == "Darwin":
# TODO
pass
else:
self.runUnknown()

# pylint: disable=broad-except
except Exception:
log.critical(
"Failed launchFrame: For %s due to: \n%s",
runFrame.frame_id, ''.join(traceback.format_exception(*sys.exc_info())))
# Notifies the cuebot that there was an error launching
self.frameInfo.exitStatus = rqd.rqconstants.EXITSTATUS_FOR_FAILED_LAUNCH
# Delay keeps the cuebot from spamming failing booking requests
time.sleep(10)
finally:
self.postFrameAction()

0 comments on commit 18bf372

Please sign in to comment.