From dede9944a6c9e5cf1cdd8ba30247001ab66df91b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gina=20H=C3=A4u=C3=9Fge?= Date: Mon, 20 Apr 2020 10:52:20 +0200 Subject: [PATCH] Sanitize county name The ministry sadly does some horrid stuff to their HTML and has implemented hyphenation manually, leading to some county names now being split in weird ways after extraction. The following replacements takes place: * "- " -> "-" * "- " -> "" --- .../coronavirus_hessen/__init__.py | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/custom_components/coronavirus_hessen/__init__.py b/custom_components/coronavirus_hessen/__init__.py index b92d43e..f90be60 100644 --- a/custom_components/coronavirus_hessen/__init__.py +++ b/custom_components/coronavirus_hessen/__init__.py @@ -2,6 +2,7 @@ from datetime import timedelta import logging +import re import async_timeout import asyncio @@ -19,6 +20,8 @@ PLATFORMS = ["sensor"] +HYPHEN_PATTERN = re.compile(r"- (.)") + async def async_setup(hass: HomeAssistant, config: dict): """Set up the Coronavirus Hessen component.""" # Make sure coordinator is initialized. @@ -87,8 +90,10 @@ async def async_get_data(): _LOGGER.exception("Error processing line {}, skipping".format(line)) continue + county = sanitize_county(county) if county == "Gesamtergebnis": county = OPTION_TOTAL + result[county] = dict(cases=cases, hospitalized=hospitalized, deaths=deaths) _LOGGER.debug("Corona Hessen: {!r}".format(result)) @@ -109,3 +114,37 @@ def parse_num(s, t=int): if len(s) and s != "-": return t(s.replace(".", "").replace(",", ".")) return 0 + +def sanitize_county(county): + """ + Sanitizes the county. + + The ministry sadly does some horrid stuff to their HTML + and has implemented hyphenation manually, leading to + some county names now being split in weird ways after + extraction. + + The following replacements takes place: + + * "- " -> "-" + * "- " -> "" + + Examples: + + >>> sanitize_county("LK Main-Kinzig- Kreis") + <<< "LK Main-Kinzig-Kreis" + >>> sanitize_county("LK Wetterau- kreis") + <<< "LK Wetteraukreis" + >>> sanitize_county("SK Frankfurt am Main") + <<< "SK Frankfurt am Main" + """ + + def replace(m): + letter = m.group(1) + if letter.islower(): + return letter + else: + return "-{}".format(letter) + + return HYPHEN_PATTERN.sub(replace, county) +