From 2d0e896bded76c518f01f379f6729571c2abd89b Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Mon, 27 Jul 2020 10:27:57 -0400 Subject: [PATCH 1/4] Refactor subscribers and handle errors better --- bot.py | 9 +- cogs/subscribers.py | 186 +++++++++++++++++++++----------------- cogs/utils/subscribers.py | 57 ++++++++++++ 3 files changed, 168 insertions(+), 84 deletions(-) create mode 100644 cogs/utils/subscribers.py diff --git a/bot.py b/bot.py index 0776be21..721b15bc 100644 --- a/bot.py +++ b/bot.py @@ -61,6 +61,11 @@ def _start_database(self): conn.close() self.logger.debug('Database is ready') + def log_traceback(self, exception): + self.logger.error("".join( + traceback.format_exception(type(exception), exception, + exception.__traceback__))) + async def on_command_error(self, ctx, error): """The event triggered when an error is raised while invoking a command. ctx : Context @@ -93,9 +98,7 @@ async def on_command_error(self, ctx, error): self.logger.error('Ignoring exception in command {}:'.format( ctx.command)) - self.logger.error(''.join( - traceback.format_exception(type(error), error, - error.__traceback__))) + self.log_traceback(error) # predefined variables to be imported diff --git a/cogs/subscribers.py b/cogs/subscribers.py index b6643b00..66072b54 100644 --- a/cogs/subscribers.py +++ b/cogs/subscribers.py @@ -1,6 +1,4 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) idoneam (2016-2019) +# Copyright (C) idoneam (2016-2020) # # This file is part of Canary # @@ -21,7 +19,6 @@ import discord from discord.ext import commands from discord import utils -import asyncio # URL access and parsing from bs4 import BeautifulSoup @@ -33,7 +30,13 @@ import feedparser import requests +# Subscriber decorator +from .utils.subscribers import canary_subscriber + + CFIA_FEED_URL = "http://inspection.gc.ca/eng/1388422350443/1388422374046.xml" +CFIA_RECALL_TAG_PATH = "pickles/recall_tag.obj" + METRO_STATUS_API = "https://www.stm.info/en/ajax/etats-du-service" METRO_GREEN_LINE = "1" @@ -52,105 +55,126 @@ METRO_NORMAL_SERVICE_MESSAGE = "Normal métro service" -# Default values by line number for status -metro_status = { - METRO_GREEN_LINE: METRO_NORMAL_SERVICE_MESSAGE, - METRO_ORANGE_LINE: METRO_NORMAL_SERVICE_MESSAGE, - METRO_YELLOW_LINE: METRO_NORMAL_SERVICE_MESSAGE, - METRO_BLUE_LINE: METRO_NORMAL_SERVICE_MESSAGE, -} - -os.makedirs('./pickles', exist_ok=True) +os.makedirs("./pickles", exist_ok=True) class Subscribers(commands.Cog): def __init__(self, bot): self.bot = bot + # Compiled recall regular expression for filtering + self._recall_filter = re.compile(self.bot.config.recall_filter, + re.IGNORECASE) + + # Default values by line number for status + self._metro_statuses = { + METRO_GREEN_LINE: METRO_NORMAL_SERVICE_MESSAGE, + METRO_ORANGE_LINE: METRO_NORMAL_SERVICE_MESSAGE, + METRO_YELLOW_LINE: METRO_NORMAL_SERVICE_MESSAGE, + METRO_BLUE_LINE: METRO_NORMAL_SERVICE_MESSAGE, + } + + self._recall_channel = None + self._metro_status_channel = None + + @commands.Cog.listener() + async def on_ready(self): + self._recall_channel = utils.get(self.bot.get_guild( + self.bot.config.server_id).text_channels, + name=self.bot.config.recall_channel) + + self._metro_status_channel = utils.get( + self.bot.get_guild(self.bot.config.server_id).text_channels, + name=self.bot.config.metro_status_channel) + + # Register all subscribers + self.bot.loop.create_task(self.cfia_rss()) + self.bot.loop.create_task(self.metro_status()) + + @canary_subscriber(12 * 3600) # run every 12 hours async def cfia_rss(self): # Written by @jidicula """ Co-routine that periodically checks the CFIA Health Hazard Alerts RSS feed for updates. """ - await self.bot.wait_until_ready() - while not self.bot.is_closed(): - recall_channel = utils.get(self.bot.get_guild( - self.bot.config.server_id).text_channels, - name=self.bot.config.recall_channel) - newest_recalls = feedparser.parse(CFIA_FEED_URL)['entries'] - try: - id_unpickle = open("pickles/recall_tag.obj", 'rb') + + newest_recalls = feedparser.parse(CFIA_FEED_URL)["entries"] + + try: + with open(CFIA_RECALL_TAG_PATH, "rb") as id_unpickle: recalls = pickle.load(id_unpickle) - id_unpickle.close() - except Exception: - recalls = {} - new_recalls = False - for recall in newest_recalls: - recall_id = recall['id'] - if recall_id not in recalls: - new_recalls = True - recalls[recall_id] = "" - recall_warning = discord.Embed(title=recall['title'], - description=recall['link']) - soup = BeautifulSoup(recall['summary'], "html.parser") - try: - img_url = soup.img['src'] - summary = soup.p.find_parent().text.strip() - except Exception: - img_url = "" - summary = recall['summary'] - if re.search(self.bot.config.recall_filter, summary, - re.IGNORECASE): - recall_warning.set_image(url=img_url) - recall_warning.add_field(name="Summary", value=summary) - await recall_channel.send(embed=recall_warning) - if new_recalls: - # Pickle newly added IDs - id_pickle = open("pickles/recall_tag.obj", 'wb') + except Exception: # TODO: Specify exception + recalls = {} + + new_recalls = False + + for recall in newest_recalls: + recall_id = recall["id"] + if recall_id in recalls: + # Don't send already-sent recalls + continue + + new_recalls = True + recalls[recall_id] = "" + recall_warning = discord.Embed(title=recall["title"], + description=recall["link"]) + soup = BeautifulSoup(recall["summary"], "html.parser") + + try: + img_url = soup.img["src"] + summary = soup.p.find_parent().text.strip() + except Exception: # TODO: Specify exception + img_url = "" + summary = recall["summary"] + + if self._recall_filter.search(summary): + recall_warning.set_image(url=img_url) + recall_warning.add_field(name="Summary", value=summary) + await self._recall_channel.send(embed=recall_warning) + + if new_recalls: + # Pickle newly added IDs + with open(CFIA_RECALL_TAG_PATH, "wb") as id_pickle: pickle.dump(recalls, id_pickle) - id_pickle.close() - await asyncio.sleep(12 * 3600) # run every 12 hours + @staticmethod + def _check_metro_status(line_number, response): + # Helper function to return line name and status. + # - `line_number` must be a string containing the number of the + # metro line + # - `response` must be a JSON response object from a GET request to + # the metro status API. + line_name = response.json()["metro"][line_number]["name"] + status = response.json()["metro"][line_number]["data"]["text"] + return line_name, status + + @canary_subscriber(60) # Run every 60 seconds async def metro_status(self): # Written by @jidicula """ Co-routine that periodically checks the STM Metro status API for outages. """ - await self.bot.wait_until_ready() - - def check_status(line_number, response): - # Helper function to return line name and status. - # - `line_number` must be a string containing the number of the - # metro line - # - `response` must be a JSON response object from a GET request to - # the metro status API. - line_name = response.json()["metro"][line_number]["name"] - status = response.json()["metro"][line_number]["data"]["text"] - return (line_name, status) - - while not self.bot.is_closed(): - metro_status_channel = utils.get( - self.bot.get_guild(self.bot.config.server_id).text_channels, - name=self.bot.config.metro_status_channel) - response = requests.get(METRO_STATUS_API) - for line_status in metro_status.items(): - line_number = line_status[0] - cached_status = line_status[1] - line_name, current_status = check_status(line_number, response) - if (current_status != cached_status - and current_status != METRO_INTERIM_STATUS): - metro_status[line_number] = current_status - metro_status_update = discord.Embed( - title=line_name, - description=current_status, - colour=METRO_COLOURS[line_number]) - await metro_status_channel.send(embed=metro_status_update) - await asyncio.sleep(60) # Run every 60 seconds + + response = requests.get(METRO_STATUS_API) + + for line_number, cached_status in self._metro_statuses.items(): + line_name, current_status = Subscribers._check_metro_status( + line_number, response) + if current_status in (cached_status, METRO_INTERIM_STATUS): + # Don't send message if the status hasn't changed or the status + # is currently in the middle of changing on the API side. + continue + + self._metro_statuses[line_number] = current_status + metro_status_update = discord.Embed( + title=line_name, + description=current_status, + colour=METRO_COLOURS[line_number]) + + await self._metro_status_channel.send(embed=metro_status_update) def setup(bot): bot.add_cog(Subscribers(bot)) - bot.loop.create_task(Subscribers(bot).cfia_rss()) - bot.loop.create_task(Subscribers(bot).metro_status()) diff --git a/cogs/utils/subscribers.py b/cogs/utils/subscribers.py new file mode 100644 index 00000000..1eceae13 --- /dev/null +++ b/cogs/utils/subscribers.py @@ -0,0 +1,57 @@ +# Copyright (C) idoneam (2016-2020) +# +# This file is part of Canary +# +# Canary is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Canary is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Canary. If not, see . + +import asyncio +import functools +from typing import Callable + + +__all__ = [ + "CanarySubscriberException", + "canary_subscriber", +] + + +class CanarySubscriberException(Exception): + pass + + +NO_BOT = CanarySubscriberException("Could not get bot from wrapped function") + + +def canary_subscriber(sleep_time: int): + def _canary_subscriber(func: Callable): + @functools.wraps(func) + async def wrapper(*args, **kwargs): + if not args: + raise NO_BOT + + try: + bot = getattr(args[0], "bot") + except AttributeError: + raise NO_BOT + + await bot.wait_until_ready() + while not bot.is_closed(): + try: + await func(*args, **kwargs) + except Exception as e: + bot.logger.error("Subscriber encountered error:") + bot.log_traceback(e) + await asyncio.sleep(sleep_time) + return wrapper + return _canary_subscriber From 61961b310c374ca6ac05f8c0bf25d155fa845bec Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Mon, 27 Jul 2020 10:31:39 -0400 Subject: [PATCH 2/4] formatterino --- Main.py | 11 +++++++---- cogs/utils/subscribers.py | 3 ++- 2 files changed, 9 insertions(+), 5 deletions(-) mode change 100644 => 100755 Main.py diff --git a/Main.py b/Main.py old mode 100644 new mode 100755 index 70ddc5dd..df7e1ebe --- a/Main.py +++ b/Main.py @@ -1,7 +1,6 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- +#! /usr/bin/env python3 # -# Copyright (C) idoneam (2016-2019) +# Copyright (C) idoneam (2016-2020) # # This file is part of Canary # @@ -163,7 +162,7 @@ async def backup(ctx): bot.logger.info('Database backup') -if __name__ == "__main__": +def main(): for extension in startup: try: bot.load_extension(extension) @@ -172,3 +171,7 @@ async def backup(ctx): extension, type(e).__name__, e)) bot.run(bot.config.discord_key) + + +if __name__ == "__main__": + main() diff --git a/cogs/utils/subscribers.py b/cogs/utils/subscribers.py index 1eceae13..b461e952 100644 --- a/cogs/utils/subscribers.py +++ b/cogs/utils/subscribers.py @@ -19,7 +19,6 @@ import functools from typing import Callable - __all__ = [ "CanarySubscriberException", "canary_subscriber", @@ -53,5 +52,7 @@ async def wrapper(*args, **kwargs): bot.logger.error("Subscriber encountered error:") bot.log_traceback(e) await asyncio.sleep(sleep_time) + return wrapper + return _canary_subscriber From f8b980789f890394c3f0bd4274e5ad0d6957e6ae Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Mon, 27 Jul 2020 10:48:38 -0400 Subject: [PATCH 3/4] formatterino II: the reformattening --- cogs/subscribers.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cogs/subscribers.py b/cogs/subscribers.py index 66072b54..2a483511 100644 --- a/cogs/subscribers.py +++ b/cogs/subscribers.py @@ -33,7 +33,6 @@ # Subscriber decorator from .utils.subscribers import canary_subscriber - CFIA_FEED_URL = "http://inspection.gc.ca/eng/1388422350443/1388422374046.xml" CFIA_RECALL_TAG_PATH = "pickles/recall_tag.obj" From 0cbd042ee3517c7bea7631194a5453a6b11bef86 Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Mon, 27 Jul 2020 12:54:22 -0400 Subject: [PATCH 4/4] Don't try to deserialize twice, explicitly handle json decode exc --- cogs/subscribers.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/cogs/subscribers.py b/cogs/subscribers.py index 2a483511..a079d961 100644 --- a/cogs/subscribers.py +++ b/cogs/subscribers.py @@ -24,6 +24,7 @@ from bs4 import BeautifulSoup # Other utilities +import json.decoder import os import re import pickle @@ -138,14 +139,14 @@ async def cfia_rss(self): pickle.dump(recalls, id_pickle) @staticmethod - def _check_metro_status(line_number, response): + def _check_metro_status(line_number, response_data): # Helper function to return line name and status. # - `line_number` must be a string containing the number of the # metro line # - `response` must be a JSON response object from a GET request to # the metro status API. - line_name = response.json()["metro"][line_number]["name"] - status = response.json()["metro"][line_number]["data"]["text"] + line_name = response_data["metro"][line_number]["name"] + status = response_data["metro"][line_number]["data"]["text"] return line_name, status @canary_subscriber(60) # Run every 60 seconds @@ -156,11 +157,16 @@ async def metro_status(self): outages. """ - response = requests.get(METRO_STATUS_API) + try: + response = requests.get(METRO_STATUS_API) + response_data = response.json() + except json.decoder.JSONDecodeError: + # STM API sometimes returns non-JSON responses + return for line_number, cached_status in self._metro_statuses.items(): line_name, current_status = Subscribers._check_metro_status( - line_number, response) + line_number, response_data) if current_status in (cached_status, METRO_INTERIM_STATUS): # Don't send message if the status hasn't changed or the status # is currently in the middle of changing on the API side.