diff --git a/config.py b/config.py index 8602ec5..af74e69 100755 --- a/config.py +++ b/config.py @@ -18,24 +18,27 @@ # strings: Your personal name and the name of your blog: -AUTHOR_NAME = 'Karl Voit' -BLOG_NAME = 'public voit' +AUTHOR_NAME = 'Marc Baaden' +BLOG_NAME = 'UnityMol News' ## strings: Define your URLs and your name below: -DOMAIN = 'Karl-Voit.at' +DOMAIN = 'unity.mol3d.tech/news' +DOMAIN = 'localhost:8000/news' BASE_URL = '//' + DOMAIN -CSS_URL = BASE_URL + '/public_voit.css' -BLOG_LOGO = BASE_URL + '/images/public-voit_logo.svg' -DISQUS_NAME = 'publicvoit' # gets placed in: '//publicvoit.disqus.com/embed.js' +ROOT_URL = '//unity.mol3d.tech' +ROOT_URL = '//localhost:8000' +CSS_URL = BASE_URL + '/unitymol.css' +BLOG_LOGO = BASE_URL + '/img/baamcode.svg' +DISQUS_NAME = 'baaden' # gets placed in: '//publicvoit.disqus.com/embed.js' ## string: Email address to send comments to: -COMMENT_EMAIL_ADDRESS = 'publicvoit-comment@Karl-Voit.at' +COMMENT_EMAIL_ADDRESS = 'umol-blog-comment@smplinux.de' ## integer: Show this many article teasers on entry page NUMBER_OF_TEASER_ARTICLES = 25 ## integer: Show this many top tags in the sidebar -NUMBER_OF_TOP_TAGS = 10 +NUMBER_OF_TOP_TAGS = 5 ## list of strings: tags to ignore when generating misc things: IGNORE_FOR_TOP_TAGS = ['suderei', 'personally'] @@ -47,20 +50,20 @@ ## string: This is the Org-mode property :ID: of your blog article which ## is used for the about page of your blog. ## See example in: testdata/end_to_end_test/orgfiles/about-placeholder.org -ID_OF_ABOUT_PAGE = '2014-03-09-about' +ID_OF_ABOUT_PAGE = '2013-02-28-about' ## string: This is the Org-mode property :ID: of your blog article which ## is used for the "How to use this blog efficiently" page of your blog. -ID_OF_HOWTO_PAGE = '2017-01-03-how-to-use-this-blog' +ID_OF_HOWTO_PAGE = '2013-02-28-how-to-use-this-blog' ## string: Your Twitter handle/username which is used in the HTML header ## metadata (without the @ character) -TWITTER_HANDLE = 'n0v0id' +TWITTER_HANDLE = 'Unitymol' ## string: An image which is added to the HTML header metadata and is used ## by Twitter in Twitter cards to visualize your blog (also used ## as og:image) -TWITTER_IMAGE = 'http://Karl-Voit.at/images/public-voit_T_logo_200x200.png' +TWITTER_IMAGE = 'https://pbs.twimg.com/profile_images/275310725/frog_400x400.jpg' ## string: Replace "+01:00" below with your time-zone indicator ## This string gets added to the time strings in order to describe time zone of the blog: @@ -93,22 +96,26 @@ ## like "/home/user/dir1/memacs_files.org_archive" as string. ## EMPTY string if including images via Memacs index is disabled ## Please do read the documentation: https://github.com/novoid/lazyblorg/wiki/Orgmode-Elements#images -MEMACS_FILE_WITH_IMAGE_FILE_INDEX = os.path.join(os.path.expanduser("~"), "org", "memacs", "files.org_archive") +#MEMACS_FILE_WITH_IMAGE_FILE_INDEX = os.path.join(os.path.expanduser("~"), "REP", "org", "memacs", "files.org_archive") +#MEMACS_FILE_WITH_IMAGE_FILE_INDEX = os.path.join("/usr", "src", "host", "REP", "org", "memacs", "files.org_archive") +MEMACS_FILE_WITH_IMAGE_FILE_INDEX = "" ## string: path to a directory that holds image files (+ sub-directories) ## EMPTY string if including images via traversing the file system is disabled ## Please do read the documentation: https://github.com/novoid/lazyblorg/wiki/Orgmode-Elements#images -DIRECTORIES_WITH_IMAGE_ORIGINALS = ["testdata/testimages", - os.path.join(os.path.expanduser("~"), *"tmp/digicam/tmp".split('/')), - os.path.join(os.path.expanduser("~"), *"tmp/digicam/oneplus5".split('/')), - os.path.join(os.path.expanduser("~"), *"archive/events_memories/2019".split('/')), - os.path.join(os.path.expanduser("~"), *"archive/events_memories/2020".split('/')), - os.path.join(os.path.expanduser("~"), *"archive/events_memories/2021".split('/')), - os.path.join(os.path.expanduser("~"), *"archive/events_memories/2022".split('/')), - os.path.join(os.path.expanduser("~"), *"archive/events_memories/2023".split('/')), - os.path.join(os.path.expanduser("~"), *"archive/events_memories/2024".split('/')), - os.path.join(os.path.expanduser("~"), *"fun/netfun".split('/')), - os.path.join(os.path.expanduser("~"), *"archive/fromweb/cliparts".split('/'))] +##DIRECTORIES_WITH_IMAGE_ORIGINALS = ["blog.smplinux.de/img"] +DIRECTORIES_WITH_IMAGE_ORIGINALS = ["img"] +#DIRECTORIES_WITH_IMAGE_ORIGINALS = ["orgfiles/testimages", +# os.path.join(os.path.expanduser("~"), *"tmp/digicam/tmp".split('/')), +# os.path.join(os.path.expanduser("~"), *"tmp/digicam/oneplus5".split('/')), +# os.path.join(os.path.expanduser("~"), *"archive/events_memories/2019".split('/')), +# os.path.join(os.path.expanduser("~"), *"archive/events_memories/2020".split('/')), +# os.path.join(os.path.expanduser("~"), *"archive/events_memories/2021".split('/')), +# os.path.join(os.path.expanduser("~"), *"archive/events_memories/2022".split('/')), +# os.path.join(os.path.expanduser("~"), *"archive/events_memories/2023".split('/')), +# os.path.join(os.path.expanduser("~"), *"archive/events_memories/2024".split('/')), +# os.path.join(os.path.expanduser("~"), *"fun/netfun".split('/')), +# os.path.join(os.path.expanduser("~"), *"archive/fromweb/cliparts".split('/'))] ## string: a filetags-tag - see ## http://karl-voit.at/managing-digital-photographs/ and @@ -117,7 +124,7 @@ ## If not empty: Contains a tag which should be part of any image ## file included. If the image file does not contain this filetag, ## a warning is issued in the console output. -WARN_IF_IMAGE_FILE_NOT_TAGGED_WITH="publicvoit" +WARN_IF_IMAGE_FILE_NOT_TAGGED_WITH="baaden" ## According to https://github.com/novoid/lazyblorg/wiki/Images ## you can link to a differently sized image when including a @@ -132,6 +139,7 @@ ## within Utils is extended as well and the language identifier ## matches the dict keys below. CLUE_TEXT_FOR_LINKED_IMAGES = {'deutsch': '(klicken für größere Version)', + 'français': '(cliquer pour une version plus grande)', 'english': '(click for a larger version)'} ## Mastodon is an federated social network. It offers a homepage verification mechanism @@ -140,9 +148,9 @@ ## If you want to use this header, you can use this option. ## In my case, the string should look like: ## Mastodon -MASTODON_USER_URL="https://graz.social/@publicvoit" +#### MASTODON_USER_URL="https://graz.social/@publicvoit" ## If you do NOT want to use this: -## MASTODON_USER_URL=None +MASTODON_USER_URL=None ## ===================================================================== ## @@ -188,7 +196,7 @@ def assertTag(tag): ## tag that is expected in any blog entry category; tag does not get shown in list of user-tags -TAG_FOR_BLOG_ENTRY = 'blog' +TAG_FOR_BLOG_ENTRY = 'blog_um' assertTag(TAG_FOR_BLOG_ENTRY) diff --git a/lib/htmlizer.py b/lib/htmlizer.py index 79b3780..bd16373 100644 --- a/lib/htmlizer.py +++ b/lib/htmlizer.py @@ -32,6 +32,14 @@ # NOTE: pdb hides private variables as well. Please use: # data = self._OrgParser__entry_data ; data['content'] +# MVB initialize link dictionary here as global store +global dict_of_links +dict_of_links = {} + +# set checking for link definitions to strict: any re-definition will raise +# a critical error +LINKDEFS_STRICT_CHECKING = False + class HtmlizerException(Exception): """ @@ -79,7 +87,7 @@ class Htmlizer(object): # populated in copy_cust_link_image_file() # { 'basefilename': [ width, height ] } dict_of_image_files_with_width_height = { } - + # holds a list of tags whose tag pages have been generated list_of_tag_pages_generated = [] @@ -91,14 +99,20 @@ class Htmlizer(object): ID_DESCRIBED_LINK_REGEX = re.compile(r'(\[\[id:([^\]]+?)\]\[([^\]]+?)\]\])') # find external links such as [[http(s)://foo.com][bar]]: + # MVB :: maybe regexp needs adjustment to exclude [] from allowed characters EXT_URL_WITH_DESCRIPTION_REGEX = re.compile( r'\[\[(http[^ ]+?)\]\[(.+?)\]\]', flags=re.U) - + + # find external links such as [[http(s)://foo.com][bar]]: + EXT_TAG_URL_WITH_DESCRIPTION_REGEX = re.compile( + r'\[\[([^[\[\]]+?)\]\[([^[\[\]]+?)\]\]', flags=re.U) + # find external links such as [[foo]]: EXT_URL_WITHOUT_DESCRIPTION_REGEX = re.compile( - r'\[\[(.+?)\]\]', flags=re.U) + r'\[\[([^[\[\]]+?)\]\]', flags=re.U) # find external links such as http(s)://foo.bar + # MVB :: maybe regexp needs adjustment to exclude [] from allowed characters EXT_URL_LINK_REGEX = re.compile( r'([^"<>\[])(http(s)?:\/\/\S+)', flags=re.U) @@ -193,7 +207,7 @@ def run(self): """ self.blog_data = self._populate_backreferences(self.blog_data) - + self.dict_of_tags_with_ids = self._populate_dict_of_tags_with_ids( self.blog_data) @@ -450,7 +464,9 @@ def _generate_page(self, kind, originalentry): my_templates = ['article-tags-end', 'persistent-header-end'] elif kind == config.TAGS: my_templates = ['tagpage-tags-end', 'tagpage-header-end'] - + if 'title' in entry and '🇫🇷' == entry['title']: + my_templates = ['tagpage-tags-end', 'tagpage-header-end-fr'] + for articlepart in my_templates: htmlcontent += self.template_definition_by_name(articlepart) @@ -480,6 +496,12 @@ def _generate_page(self, kind, originalentry): elif kind == config.TAGS: my_end_template = 'tagpage-end' my_footer_template = 'article-footer' + if 'title' in entry and '🇫🇷' == entry['title']: + my_end_template += '-fr' + my_footer_template += '-fr' + + if 'usertags' in entry and isinstance(entry['usertags'], list) and '🇫🇷' in entry['usertags']: + my_footer_template += '-fr' if config.MASTODON_USER_URL: ## the config.org contains a filled MASTODON_USER_URL variable: @@ -918,6 +940,8 @@ def generate_entry_page(self, entry_list_by_newest_timestamp, tags): listentry['latestupdateTS'].minute).zfill(2) iso_timestamp = '-'.join([year, month, day]) + \ 'T' + hours + ':' + minutes + mvb_timestamp = '-'.join([day, month, year]) + \ + ', ' + hours + ':' + minutes content = content.replace('#ARTICLE-YEAR#', year) content = content.replace('#ARTICLE-MONTH#', month) @@ -926,7 +950,7 @@ def generate_entry_page(self, entry_list_by_newest_timestamp, tags): '#ARTICLE-PUBLISHED-HTML-DATETIME#', iso_timestamp + config.TIME_ZONE_ADDON) content = content.replace( - '#ARTICLE-PUBLISHED-HUMAN-READABLE#', iso_timestamp) + '#ARTICLE-PUBLISHED-HUMAN-READABLE#', mvb_timestamp) # sanitize internal links of content so far: content = self.sanitize_internal_links(content) @@ -1170,6 +1194,28 @@ def convert_latex_to_html5(self, latex): self.stats_external_latex_to_html5_conversion += 1 return pypandoc.convert_text(latex, 'html5', format='latex') + def check_link_definition_against_dictionary(self, keyword, linkdef): + # Check if the tag is already defined in linkdefs + if keyword in dict_of_links: + if LINKDEFS_STRICT_CHECKING: + message=("Htmlizer: Aborting because tag value " + f"for {keyword} is re-defined and strict checking is " + f"enabled.\nNew assignment: {linkdef}\nPrevious " + "assignment: %s" % dict_of_links[keyword]) + self.logging.critical(message) + raise HtmlizerException(self.current_entry_id,message) + else: + self.logging.debug(f"Htmlizer: Warning! Tag {keyword} " + "already defined in url dictionary.") + # Compare the existing value to the new value + if linkdef != dict_of_links[keyword]: + message=("Htmlizer: Aborting because tag value " + f"for {keyword} re-defined differently from the " + f"previous value. \nNew assignment: {linkdef}\nPrevious " + "assignment: %s" % dict_of_links[keyword]) + self.logging.critical(message) + raise HtmlizerException(self.current_entry_id,message) + def sanitize_and_htmlize_blog_content(self, entry): """ Inspects a selection of the entry content data and sanitizes @@ -1216,7 +1262,21 @@ def sanitize_and_htmlize_blog_content(self, entry): str(entry['id']) + " is not recognized clearly; using guessed_language_autotag \"unsure\"") entry['autotags']['language'] = 'unsure' - + + # set url dictionary MVB + # look up our dictionary for link definitions + if 'linkdefs' not in entry or not isinstance(entry['linkdefs'], dict): + self.logging.debug("Htmlizer: blog entry without url" + "dictionary definitions encountered") + else: + for keyword, linkdef in entry['linkdefs'].items(): + # handle issues such as re-definition + self.check_link_definition_against_dictionary(keyword, linkdef) + # we're fine, let's add the definition to the dictionary + dict_of_links[keyword] = linkdef + self.logging.debug("Htmlizer: instantiate url " + "dictionary with definition for %s tag" % keyword) + # for element in entry['content']: for index in range(0, len(entry['content'])): @@ -1573,6 +1633,14 @@ def sanitize_and_htmlize_blog_content(self, entry): else: # fall-back for all content elements which do not require special treatment: + # simply handle url link definitions for lists before calling pandoc + if entry['content'][index][0] == 'list': + for i, listitem in enumerate(entry['content'][index][1]): + # look up URL dictionary for each listitem to + # evtl. replace content with sanitized + # external links + entry['content'][index][1][i] = self.sanitize_url_dict_links(listitem) + # entry['content'][index][0] == 'mylist': # ['table', [u'- an example', # u' - list subitem', @@ -1839,20 +1907,83 @@ def sanitize_external_links(self, content): @param entry: string @param return: sanitized string """ - + + # Moved here to the top because of issues with double http links as wayback archive content = re.sub( self.EXT_URL_LINK_REGEX, r'\1\2', content) + # Replace directly written URLs with HTML anchor tags + def replace_url(match): + key, value = match.group(0).strip('[]').split('][') + if dict_of_links.get(key): + self.logging.debug("Htmlizer: replaced tag %s from url dictionary" % key) + return '{}'.format(dict_of_links[key], value) + else: + return '{}'.format(key, value) # Return the original match if URL is not found in dictionary + + # Replace external links of type [[foo][bar]] with bar + content = re.sub( + self.EXT_TAG_URL_WITH_DESCRIPTION_REGEX, + replace_url, + content) + content = re.sub( self.EXT_URL_WITH_DESCRIPTION_REGEX, r'\2', content) + def replace_tag(match): + key = match.group(0)[2:-2] + if key in dict_of_links: + self.logging.debug("Htmlizer: replaced tag %s (no description) from url dictionary" % key) + return '{}'.format(dict_of_links[key],dict_of_links[key]) + else: + return '{}'.format(key, key) # Return the original match if tag is not found in dictionary + + content = re.sub( + self.EXT_URL_WITHOUT_DESCRIPTION_REGEX, + replace_tag, + content) + + return content + + def sanitize_url_dict_links(self, content): + """ + Replaces all external Org-mode links of type [[foo][bar]] with + dictionary values if a \#+LINK tag definition exists for foo. + + @param entry: string + @param return: sanitized string + """ + + # Replace url dictionary tags with actual URL + def replace_url(match): + key, value = match.group(0).strip('[]').split('][') + if dict_of_links.get(key): + self.logging.debug("Htmlizer: replaced tag %s from url dictionary" % key) + return '[[{}][{}]]'.format(dict_of_links[key], value) + else: + return '[[{}][{}]]'.format(key, value) # Return the original match if URL is not found in dictionary + + content = re.sub( + self.EXT_TAG_URL_WITH_DESCRIPTION_REGEX, + replace_url, + content) + + # Replace url dictionary tags with actual URL + def replace_tag(match): + key = match.group(0)[2:-2] + if key in dict_of_links: + self.logging.debug("Htmlizer: replaced tag %s (no description) from url dictionary" % key) + return '[[{}][{}]]'.format(dict_of_links[key],dict_of_links[key]) + else: + return '[[{}][{}]]'.format(key, key) # Return the original match if tag is not found in dictionary + content = re.sub( self.EXT_URL_WITHOUT_DESCRIPTION_REGEX, - r'\1', + replace_tag, content) return content @@ -1967,6 +2098,8 @@ def _generate_back_references_content(self, entry, sourcecategory): if 'autotags' in list(entry.keys()): if 'language' in list(entry['autotags'].keys()) and entry['autotags']['language'] == 'deutsch': content += self.template_definition_by_name('backreference-header-de') + elif '🇫🇷' in list(entry['usertags']): + content += self.template_definition_by_name('backreference-header-fr') else: content += self.template_definition_by_name('backreference-header-en') @@ -2104,6 +2237,7 @@ def _replace_general_blog_placeholders(self, content): content = content.replace('#TOP-TAG-LIST#', self._generate_top_tag_list()) # FIXXME: generate only once for performance reasons? content = content.replace('#DOMAIN#', config.DOMAIN) content = content.replace('#BASE-URL#', config.BASE_URL) + content = content.replace('#ROOT-URL#', config.ROOT_URL) content = content.replace('#CSS-URL#', config.CSS_URL) content = content.replace('#AUTHOR-NAME#', config.AUTHOR_NAME) content = content.replace('#BLOG-NAME#', config.BLOG_NAME) @@ -2152,6 +2286,8 @@ def _replace_general_article_placeholders(self, entry, template): year, month, day, hours, minutes = Utils.get_YY_MM_DD_HH_MM_from_datetime(entry['firstpublishTS']) iso_timestamp = '-'.join([year, month, day]) + \ 'T' + hours + ':' + minutes + mvb_timestamp = '-'.join([day, month, year]) + \ + ', ' + hours + ':' + minutes content = content.replace('#ARTICLE-ID#', entry['id']) content = content.replace('#ARTICLE-URL#', str(self._target_path_for_id_without_targetdir(entry['id']))) @@ -2163,7 +2299,7 @@ def _replace_general_article_placeholders(self, entry, template): iso_timestamp + config.TIME_ZONE_ADDON) content = content.replace( '#ARTICLE-PUBLISHED-HUMAN-READABLE#', - iso_timestamp) + mvb_timestamp) if entry['category'] == config.TAGS: # replace #TAG-PAGE-LIST# @@ -2492,6 +2628,8 @@ def _insert_reading_minutes_if_found(self, entry, htmlcontent): # here: (generalize using a configured list of known # languages?) snippetname += 'de' + elif '🇫🇷' in list(entry['usertags']): + snippetname += 'fr' else: snippetname += 'en' @@ -2715,7 +2853,7 @@ def copy_cust_link_image_file(self, filename, articlepath, width): return self._save_width_height_to_dict_of_image_files_with_width_height(destinationfile) # FIXXME: FUTURE? generate scaled version when width/height is set - + def _populate_filename_dict(self): """ Locates and parses the directory config.DIRECTORIES_WITH_IMAGE_ORIGINALS for filename index. Result is stored in self.filename_dict. diff --git a/lib/orgparser.py b/lib/orgparser.py index 9aa0007..4497f68 100644 --- a/lib/orgparser.py +++ b/lib/orgparser.py @@ -54,7 +54,8 @@ class OrgParser(object): # asterisk(s), whitespace, word(s), optional followed by optional tags: HEADING_REGEX = re.compile( - r'^(\*+)\s+((' + config.BLOG_FINISHED_STATE + r')\s+)?(.*?)(\s+(:\S+:)+)?\s*$') + r'^(\*+|\#\+TITLE:)\s+((' + config.BLOG_FINISHED_STATE + r')\s+)?(.*?)(\s+(:\S+:)+)?\s*$') + # r'^(\*+)\s+((' + config.BLOG_FINISHED_STATE + r')\s+)?(.*?)(\s+(:\S+:)+)?\s*$') # REGEX.match(string).group(INDEX) HEADING_STARS_IDX = 1 HEADING_STATE_IDX = 3 @@ -131,7 +132,7 @@ class OrgParser(object): CUST_LINK_IMAGE_REGEX = re.compile(r'^\[\[tsfile:([^\]]+\.(png|jpg|jpeg|svg|gif))+(\]\[(.+))?\]\]$') CUST_LINK_IMAGE_FILENAME_IDX = 1 CUST_LINK_IMAGE_DESCRIPTION_IDX = 4 - + __filename = '' # for description please visit: lazyblog.org > Notes > Representation of @@ -142,6 +143,12 @@ class OrgParser(object): __entry_data = {} # dict of currently parsed blog entry data: gets "filled" # while parsing the entry + + # set checking for link definitions to strict: any re-definition will raise + # a critical error + global LINKDEFS_STRICT_CHECKING + LINKDEFS_STRICT_CHECKING = False + def __init__(self, filename): """ This function handles the communication with the parser object and returns the blog data. @@ -267,6 +274,9 @@ def __handle_heading_and_check_if_it_is_blog_heading( assert tags.__class__ == str + # assign top stars level of 0 if title + if (stars == "#+TITLE:"): stars="" + self.__entry_data['title'] = title self.__entry_data['level'] = len(stars) self.__entry_data['lbtags'] = [] @@ -384,6 +394,30 @@ def _get_list_indentation_number(self, list_item): # return number of leading spaces: return len(list_item) - len(list_item.lstrip(' ')) + def check_link_definition_against_dictionary(self, linkdefs, tag, url): + # Check if the tag is already defined in linkdefs + if tag in linkdefs: + linkdef = linkdefs[tag] + if LINKDEFS_STRICT_CHECKING: + message=(f"OrgParser: Aborting because tag " + f"value for {tag} is re-defined and strict checking " + f"is enabled. \nNew assignment: {url}\nPrevious " + f"assignment: {linkdef}") + self.logging.critical(message) + raise OrgParserException(message) + else: + self.logging.debug(f"OrgParser: Warning! Tag {tag} " + "already defined in url dictionary.") + + # Compare the existing value to the new value + if linkdef != url: + message=(f"OrgParser: Aborting because tag " + f"value for {tag} re-defined differently from the " + f"previous value. \nNew assignment: {url}\nPrevious " + f"assignment: {linkdef}") + self.logging.critical(message) + raise OrgParserException(message) + def parse_orgmode_file(self): """ Parses the Org-mode file. @@ -448,6 +482,25 @@ def parse_orgmode_file(self): line = rawline.rstrip() # remove trailing whitespace + # Create a 'linkdefs' dictionary entry for link definitions + if line.upper().startswith('#+LINK:'): + link_definition = line[7:].strip() + tag, url = link_definition.split(None, 1) + url = url.strip('"') + + # Ensure 'linkdefs' key exists in __entry_data and initialize if it doesn't + linkdefs = self.__entry_data.setdefault('linkdefs', {}) + + # check if link definitions are consistent + self.check_link_definition_against_dictionary(linkdefs, + tag, + url) + + linkdefs[tag]=url + self.logging.debug( + f"OrgParser: define url dictionary tag {tag} for URL {url}") + previous_line = line + if not state == self.SEARCHING_BLOG_HEADER: ## limit the output to interesting lines self.logging.debug( "OrgParser: ------------------------------- %s" % diff --git a/lib/utils.py b/lib/utils.py index af5cb8d..d528389 100644 --- a/lib/utils.py +++ b/lib/utils.py @@ -79,7 +79,8 @@ class Utils(object): 'welcher', 'welches', 'wenn', 'werde', 'werden', 'wie', 'wieder', 'wir', 'wird', 'wirst', 'wo', 'wollen', 'wollte', 'würde', 'wüden', 'zu', 'zum', 'zur', - 'zwar', 'zwischen'])] + 'zwar', 'zwischen']), + ('🇫🇷', ['alors', 'au', 'aucuns', 'aussi', 'autre', 'avant', 'avec', 'avoir', 'bon', 'car', 'ce', 'cela', 'ces', 'ceux', 'chaque', 'ci', 'comme', 'comment', 'dans', 'de', 'dedans', 'dehors', 'depuis', 'des', 'deux', 'devrait', 'doit', 'donc', 'dos', 'du', 'elle', 'elles', 'en', 'encore', 'essai', 'est', 'et', 'eu', 'fait', 'faites', 'fois', 'font', 'hors', 'ici', 'il', 'ils', 'je', 'juste', 'la', 'le', 'les', 'leur', 'là', 'ma', 'maintenant', 'mais', 'mes', 'mine', 'moins', 'mon'])] FILENAME_TAG_SEPARATOR = ' -- ' BETWEEN_TAG_SEPARATOR = ' '