Skip to content

Commit

Permalink
Improve filtering criteria for undocumented templates (#70)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbzdarkid authored Oct 18, 2024
1 parent b426489 commit 8a9cd01
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 3 deletions.
2 changes: 1 addition & 1 deletion master.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# Using {{lang}} and {{if lang}} on non-template pages -> this is apparently somewhat common now to make copy/paste editing easier
# Pages which link to disambig pages not in hatnote/see also
# Just... a summary of every single external link. Maybe just 'count per domain' and then list the top 10 pages? I'm finding a LOT of sus links, and it's only the ones that are *broken*.
# Lang template mis-ordering and lang-template duplicate keys
# {{lang}} template mis-ordering and lang-template duplicate keys
# Templates sorted by usage and protect status
# A 'missing translations' report but for dictionary entries (maybe sorted by usage, too?)
# A report for "Edits on talkpages (not in the "user talk" namespace) in the past few days", so people can track active discussions?
Expand Down
20 changes: 18 additions & 2 deletions undocumented_templates.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from re import search, sub
from utils import pagescraper_queue, plural, time_and_date, whatlinkshere
from wikitools import wiki
from wikitools.page import Page

verbose = False

Expand All @@ -9,12 +10,14 @@ def pagescraper(page, badpages):
page_visible = sub('<includeonly>.*?</includeonly>', '', page_text)
if len(page_text) == 0:
return # Empty templates (usually due to HTTP failures)
elif float(len(page_visible)) / len(page_text) > .80:
return # Template is self-documenting, as it shows the majority of its contents.
elif '{{tlx|' in page_visible or '{{tl|' in page_visible:
return # Page has example usages
elif search('{{([Dd]oc begin|[Tt]emplate doc|[Dd]ocumentation|[Ww]ikipedia doc|[dD]ictionary/wrapper)}}', page_visible):
return # Page uses a documentation template
elif '{{{' not in page_text:
return # Page does not have any arguments
elif not search('{{{[a-zA-Z0-9]+}}}', page_visible):
return # All of the arguments have defaults

count = page.get_transclusion_count()
if count > 0:
Expand All @@ -23,13 +26,26 @@ def pagescraper(page, badpages):
badpages.append([count, page.title])

def main(w):
navbox_templates = []
navbox = Page(w, 'Template:Navbox')
for page in navbox.get_transclusions(namespaces=['Template']):
if page.title.lower().startswith('template:navbox'):
continue # Exclude alternative navbox templates
if page.title.lower().endswith('sandbox'):
continue # Sandboxes link to pages but shouldn't be used
if 'navbox' not in page.get_wiki_text().lower():
continue # Some template pages actually *use* other navboxes, but are not one themselves.
navbox_templates.append(page.title)

badpages = []
with pagescraper_queue(pagescraper, badpages) as page_q:
for page in w.get_all_templates():
if '/' in page.title:
continue # Don't include subpage templates like Template:Dictionary or Template:PatchDiff
elif page.title[:13] == 'Template:User':
continue # Don't include userboxes.
elif page.title in navbox_templates:
continue # Don't include navboxes, they are self-documenting.
page_q.put(page)

badpages.sort(key=lambda s: (-s[0], s[1]))
Expand Down

0 comments on commit 8a9cd01

Please sign in to comment.