Skip to content

Commit

Permalink
Restart UPS Agent every 60 minutes (#592)
Browse files Browse the repository at this point in the history
* exit ups agent after 60 min

This is a temporary workaround to a memory leak in this agent.

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* no crash in test mode

* address comments

* fix log

* update docs

* fix condition

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
davidvng and pre-commit-ci[bot] authored Dec 11, 2023
1 parent c19e74d commit bb03733
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 3 deletions.
7 changes: 6 additions & 1 deletion docs/agents/ups.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,13 @@ using all of the available arguments::
'arguments': [['--address', '10.10.10.50'],
['--port', 161],
['--mode', 'acq'],
['--snmp-version', 1]]},
['--snmp-version', 1],
['--restart-time', 60]]},

.. note::
The ``--address`` argument should be the address of the UPS on the network.
The ``--restart-time`` argument should be set to number of minutes before
exiting the agent. Setting to 0 (default) will not exit the agent.

Docker Compose
``````````````
Expand All @@ -57,6 +60,8 @@ example docker-compose service configuration is shown here::

The ``LOGLEVEL`` environment variable can be used to set the log level for
debugging. The default level is "info".
If not using HostManager, we must set ``restart: unless-stopped``
to automatically restart the docker container.

Description
-----------
Expand Down
18 changes: 16 additions & 2 deletions socs/agents/ups/agent.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import argparse
import os
import signal
import time

import txaio
Expand Down Expand Up @@ -172,7 +173,7 @@ class UPSAgent:
txaio logger object, created by the OCSAgent
"""

def __init__(self, agent, address, port=161, version=1):
def __init__(self, agent, address, port=161, version=1, restart_time=0):
self.agent = agent
self.is_streaming = False
self.log = self.agent.log
Expand All @@ -182,6 +183,7 @@ def __init__(self, agent, address, port=161, version=1):
self.version = version
self.snmp = SNMPTwister(address, port)
self.connected = True
self.restart = restart_time

self.lastGet = 0

Expand Down Expand Up @@ -308,7 +310,10 @@ def acq(self, session, params=None):

session.set_status('running')
self.is_streaming = True
timeout = time.time() + 60 * self.restart # exit loop after self.restart minutes
while self.is_streaming:
if ((self.restart != 0) and (time.time() > timeout)):
break
yield dsleep(1)
if not self.connected:
self.log.error('No SNMP response. Check your connection.')
Expand Down Expand Up @@ -432,6 +437,12 @@ def acq(self, session, params=None):
if params['test_mode']:
break

# Exit agent to release memory
# Add "restart: unless-stopped" to docker-compose to automatically restart container
if ((not params['test_mode']) and (timeout != 0) and (self.is_streaming)):
self.log.info(f"{self.restart} minutes have elasped. Exiting agent.")
os.kill(os.getppid(), signal.SIGTERM)

return True, "Finished Recording"

def _stop_acq(self, session, params=None):
Expand Down Expand Up @@ -461,6 +472,8 @@ def add_agent_args(parser=None):
pgroup.add_argument("--snmp-version", default='1', choices=['1', '2', '3'],
help="SNMP version for communication. Must match "
+ "configuration on the UPS.")
pgroup.add_argument("--restart-time", default=0,
help="Number of minutes before restarting agent.")
pgroup.add_argument("--mode", choices=['acq', 'test'])

return parser
Expand All @@ -484,7 +497,8 @@ def main(args=None):
p = UPSAgent(agent,
address=args.address,
port=int(args.port),
version=int(args.snmp_version))
version=int(args.snmp_version),
restart_time=int(args.restart_time))

agent.register_process("acq",
p.acq,
Expand Down

0 comments on commit bb03733

Please sign in to comment.