Add animated GIFs, update readme

jxu · Jun 12, 2018 · 4d498b0 · 4d498b0
1 parent f00dae9
commit 4d498b0
Show file tree

Hide file tree

Showing 2 changed files with 33 additions and 11 deletions.
diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@ Project Euler Offline
 =====================
 All Project Euler problems, with MathJax and images, as a single PDF. Additional text files are provided. Get the releases [here](https://github.com/wxv/project-euler-offline/releases).
 
-Please report any accuracy issues or give feedback. Thanks.
+Please report any inaccuracies or give feedback. Thanks.
 
 Inspired by [Kyle Keen's original Local Euler](http://kmkeen.com/local-euler/2008-07-16-07-33-00.html).
 
@@ -11,13 +11,16 @@ Installation and Usage
 Requirements:
 - PhantomJS (`apt install phantomjs`)
 - Node modules system, webpage (`npm install system webpage`)
-- Python 3 and BeautifulSoup, PyPDF2 (`pip install beautifulsoup4 pypdf2`)
+- Python 3 and  PyPDF2, BeautifulSoup, Pillow (`pip install beautifulsoup4 pypdf2 pillow`)
+  - BeautifulSoup and Pillow are only required for downloading extra text and images (animated GIF only).
 
 My usage process (replace 1 and 628 with whatever range you like):
 
     phantomjs capture.js 1 628
     python3 combine.py 1 628
     // Optional: download solutions from https://github.com/luckytoilet/projecteuler-solutions
     cd render
-    zip problems problems.pdf *.txt
+    zip problems problems.pdf *.txt *.gif
 
+Since each page is independent, it is possible to run multiple processes of
+`capture.js` at once, each downloading a certain range.
diff --git a/combine.py b/combine.py
@@ -1,25 +1,44 @@
-from bs4 import BeautifulSoup
-import requests
 import sys
 from os import sep
 from PyPDF2 import PdfFileMerger
 
 
+
 render_dir = "render"
-download_txt_flag = True
+download_extra_flag = True
+
+def download_extra(url):
+    '''Tries to find a .txt attachment or .gif and download it to render_dir.'''
+    # TODO async request
+    from bs4 import BeautifulSoup
+    import requests
+    from os.path import basename
+    from PIL import Image
+    from io import BytesIO
+
+    site_main = "http://projecteuler.net/"
 
-def download_txt(url):
-    '''Tries to find a .txt attachment and download it to render_dir.'''
     print("Searching", url)
     content = requests.get(url).content
     soup = BeautifulSoup(content, "lxml")
     for a in soup.find_all('a', href=True):
         if a["href"].endswith(".txt"):
             print("Found", a["href"])
-            r = requests.get("http://projecteuler.net/" + a["href"])
+            r = requests.get(site_main + a["href"])
             with open(render_dir + sep + a.text, 'wb') as f:
                 f.write(r.content)
 
+    for img in soup.find_all("img"):
+        img_src = img["src"]
+        # Ignore spacer.gif (blank)
+        if img_src.endswith(".gif") and "spacer" not in img_src:
+            print("Found", img_src)
+            r = requests.get(site_main + img_src)
+            # Only write animated GIFs
+            if Image.open(BytesIO(r.content)).is_animated:
+                print("Writing", img_src)
+                with open(render_dir + sep + basename(img_src), 'wb') as f:
+                    f.write(r.content) 
 
 
 def main():
@@ -36,13 +55,13 @@ def main():
     print("Merged PDFs")
 
 
-    if download_txt_flag:
+    if download_extra_flag:
         url_list = []
         for problem_id in range(problem_id_start, problem_id_end+1):
             url_list.append("https://projecteuler.net/problem=" + str(problem_id))
 
         for url in url_list:
-            download_txt(url)
+            download_extra(url)
 
 
 if __name__ == "__main__":