Skip to content

Commit

Permalink
bgpcfgd: add support for software bfd sessions (#20981)
Browse files Browse the repository at this point in the history
bgpcfgd: add support for software bfd sessions
  • Loading branch information
abdbaig authored Feb 6, 2025
1 parent accf5b3 commit a35e23c
Show file tree
Hide file tree
Showing 8 changed files with 1,150 additions and 0 deletions.
11 changes: 11 additions & 0 deletions dockers/docker-fpm-frr/frr/supervisord/supervisord.conf.j2
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,17 @@ dependent_startup_wait_for=bgpd:running

{% endif %}

{% if DEVICE_METADATA.localhost.switch_type is defined and DEVICE_METADATA.localhost.switch_type == "dpu" %}
[program:bfdmon]
command=/usr/local/bin/bfdmon
priority=6
autostart=true
autorestart=true
startsecs=0
stdout_logfile=syslog
stderr_logfile=syslog
{% endif %}

{% if DEVICE_METADATA.localhost.docker_routing_config_mode is defined and (DEVICE_METADATA.localhost.docker_routing_config_mode == "unified" or DEVICE_METADATA.localhost.docker_routing_config_mode == "split-unified") %}
[program:vtysh_b]
command=/usr/bin/vtysh -b
Expand Down
Empty file.
141 changes: 141 additions & 0 deletions src/sonic-bgpcfgd/bfdmon/bfdmon.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
import json
import subprocess
import time
import syslog
from swsscommon import swsscommon
from sonic_py_common.general import getstatusoutput_noshell

class BfdFrrMon:
def __init__(self):
# Initialize local sets to store current BFD peer states
self.local_v4_peers = set()
self.local_v6_peers = set()
self.status_table = "DPU_BFD_PROBE_STATE"
self.db_connector = swsscommon.DBConnector("STATE_DB", 0)
self.table = swsscommon.Table(self.db_connector, self.status_table)

self.bfdd_running = False
self.init_done = False
self.MAX_RETRY_ATTEMPTS = 3

def check_bfdd(self):
"""
Check if bfdd is running.
Return: True if bfdd process is running, False otherwise.
"""
try:
# Use pgrep to check if the process is running
rc, output = getstatusoutput_noshell(["pgrep", "-f", "bfdd"])
if not rc:
self.bfdd_running = True
return True
except Exception as e:
return False

return False

def get_bfd_sessions(self):
"""
Get BFD session information from FRR using vtysh.
Updates two sets: one for IPv4 peers and another for IPv6 peers whose BFD state is 'up'.
Returns True if peer info was retreived successfully, False otherwise.
"""

self.frr_v4_peers = set()
self.frr_v6_peers = set()

# Update bfdd state if it wasn't previously running
if not self.bfdd_running:
self.bfdd_running = self.check_bfdd()

if not self.bfdd_running:
syslog.syslog(syslog.LOG_WARNING, "*WARNING* bfdd not currently running")
return False

retry_attempt = 0
cmd = ['vtysh', '-c', 'show bfd peers json']
while retry_attempt < self.MAX_RETRY_ATTEMPTS:
try:
rc, output = getstatusoutput_noshell(cmd)
if rc:
syslog.syslog(syslog.LOG_ERR, "*ERROR* Failed with rc:{} when execute: {}".format(rc, cmd))
return False
if len(output) == 0:
syslog.syslog(syslog.LOG_WARNING, "*WARNING* output none when execute: {}".format(cmd))
return False

bfd_data = json.loads(output)
if bfd_data:
for session in bfd_data:
if "status" in session and session["status"] == "up":
if "peer" in session:
if ":" in session["peer"]: # IPv6
self.frr_v6_peers.add(session["peer"])
else: # IPv4
self.frr_v4_peers.add(session["peer"])
return True
except json.JSONDecodeError as e:
# Log the exception and retry if within the maximum attempts
retry_attempt += 1
syslog.syslog(syslog.LOG_WARNING,
"*WARNING* JSONDecodeError: {} when execute: {} Retry attempt: {}".format(e, cmd, retry_attempt))
time.sleep(1)
continue
except Exception as e:
# Log other exceptions and return failure
retry_attempt += 1
syslog.syslog(syslog.LOG_WARNING,
"*WARNING* An unexpected error occurred: {} when execute: {} Retry attempt: {}".format(
e, cmd, retry_attempt))
time.sleep(1)
continue

# Log an error if the maximum retry attempts are reached
syslog.syslog(syslog.LOG_ERR,
"*ERROR* Maximum retry attempts reached. Failed to execute: {}".format(cmd))
return False

def update_state_db(self):
"""
Update the state DB only with changes (additions or deletions) to the peer list.
"""
# Check differences between local sets and new data
new_v4_peers = self.frr_v4_peers - self.local_v4_peers # Peers to add
removed_v4_peers = self.local_v4_peers - self.frr_v4_peers # Peers to remove

new_v6_peers = self.frr_v6_peers - self.local_v6_peers # Peers to add
removed_v6_peers = self.local_v6_peers - self.frr_v6_peers # Peers to remove

if new_v4_peers or removed_v4_peers or new_v6_peers or removed_v6_peers or not self.init_done:
# Update local sets with the new data
self.local_v4_peers = self.frr_v4_peers
self.local_v6_peers = self.frr_v6_peers

# Update Redis with the new peer sets
values = [
("v4_bfd_up_sessions", json.dumps(list(self.local_v4_peers))),
("v6_bfd_up_sessions", json.dumps(list(self.local_v6_peers)))
]
self.table.set("", values)
syslog.syslog(syslog.LOG_INFO,
"{} table in STATE_DB updated. v4_peers: {}, v6_peers: {}".format(
self.status_table, self.local_v4_peers, self.local_v6_peers))

self.init_done = True

def main():
SLEEP_TIME = 2 # Wait in seconds between each iteration
syslog.syslog(syslog.LOG_INFO, "bfdmon service started")
bfd_mon = BfdFrrMon()

while True:
# Sleep for a while before checking again (adjust as necessary)
time.sleep(SLEEP_TIME)

if bfd_mon.get_bfd_sessions():
bfd_mon.update_state_db()

syslog.syslog(syslog.LOG_INFO, "bfdmon service stopped")

if __name__ == "__main__":
main()
6 changes: 6 additions & 0 deletions src/sonic-bgpcfgd/bgpcfgd/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from .managers_rm import RouteMapMgr
from .managers_device_global import DeviceGlobalCfgMgr
from .managers_chassis_app_db import ChassisAppDbMgr
from .managers_bfd import BfdMgr
from .managers_srv6 import SRv6Mgr
from .static_rt_timer import StaticRouteTimer
from .runner import Runner, signal_handler
Expand Down Expand Up @@ -84,6 +85,11 @@ def do_work():
if device_info.is_chassis():
managers.append(ChassisAppDbMgr(common_objs, "CHASSIS_APP_DB", "BGP_DEVICE_GLOBAL"))

switch_type = device_info.get_localhost_info("switch_type")
if switch_type and switch_type == "dpu":
log_notice("switch type is dpu, starting bfd manager")
managers.append(BfdMgr(common_objs, "STATE_DB", swsscommon.STATE_BFD_SOFTWARE_SESSION_TABLE_NAME))

runner = Runner(common_objs['cfg_mgr'])
for mgr in managers:
runner.add_manager(mgr)
Expand Down
Loading

0 comments on commit a35e23c

Please sign in to comment.