diff --git a/ipwb/indexer.py b/ipwb/indexer.py index 3c78e067..8ee0c65d 100755 --- a/ipwb/indexer.py +++ b/ipwb/indexer.py @@ -1,5 +1,14 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +""" +InterPlanetary Wayback indexer + +This script reads a WARC file and returns a CDXJ representative of its + contents. In doing so, it extracts all archived HTTP responses from + warc-response records, separates the HTTP header from the body, pushes each + into IPFS, and retains the hashes. These hashes are then used to populate the + JSON block corresponding to the archived URI. +""" from __future__ import print_function import sys diff --git a/ipwb/replay.py b/ipwb/replay.py index 7599b1e6..7f2bde27 100755 --- a/ipwb/replay.py +++ b/ipwb/replay.py @@ -1,5 +1,13 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +""" +InterPlanetary Wayback Replay system + +This script handles requests to replay IPWB archived contents based on a +supplied CDXJ file. This file has been previously generated by the ipwb +indexer. An interface is supplied when first started to assist the user in +navigating their captures. +""" from __future__ import print_function import sys @@ -54,6 +62,9 @@ def setServerHeader(response): @app.route('/webui/') def showWebUI(path): + """ Handle requests for the IPWB replay Web interface and requests + for initializing the replay ServiceWorker. + """ webuiPath = '/'.join(('webui', path)).replace('ipwb.replay', 'ipwb') content = pkg_resources.resource_string(__name__, webuiPath) @@ -92,6 +103,9 @@ def showWebUI(path): def getServiceWorker(path): + """ Get the ServiceWorker code and return corresponding + HTTP response information for the Worker + """ path = ('/' + path).replace('ipwb.replay', 'ipwb') content = pkg_resources.resource_string(__name__, path) resp = Response(content, mimetype='application/javascript') @@ -172,6 +186,7 @@ def __init__(self, url_map, *items): @app.route('/memento//') def showMemento(urir, datetime): + """ Request a URI-R at a supplied datetime from the CDXJ """ urir = getCompleteURI(urir) if ipwbConfig.isLocalHosty(urir): @@ -195,6 +210,7 @@ def showMemento(urir, datetime): def getCDXJLineClosestTo(datetimeTarget, cdxjLines): + """ Get the closest CDXJ entry for a datetime and URI-R """ smallestDiff = float('inf') # math.inf is only py3 bestLine = None datetimeTarget = int(datetimeTarget) @@ -208,6 +224,7 @@ def getCDXJLineClosestTo(datetimeTarget, cdxjLines): def getCDXJLinesWithURIR(urir, indexPath): + """ Get all CDXJ records corresponding to a URI-R """ if not indexPath: indexPath = ipwbConfig.getIPWBReplayIndexPath() indexPath = getIndexFileFullPath(indexPath)