Merge pull request #206 from uclaacm/hackmd2cyberblog

`hackmd2cyberblog.py`
uclaacm · Dec 30, 2024 · 280f2a6 · 280f2a6
2 parents 108c80f + 72f70b3
commit 280f2a6
Showing 1 changed file with 89 additions and 0 deletions.
diff --git a/data/scripts/hackmd2cyberblog.py b/data/scripts/hackmd2cyberblog.py
@@ -0,0 +1,89 @@
+#!/usr/bin/env python3
+"""
+    A script to convert HackMD notes to Cyber Blog posts (mostly downloading and replacing images).
+
+    Dependencies:
+        You will need to download the `requests` library to run this script.
+
+        pip install requests
+
+    Usage: hackmd2cyberblog.py <file>
+
+    The script will download all images from the HackMD note and replace the URLs with the local paths.
+    
+    NOTE: Make sure to run this from the root of the repository (uses relative paths).
+
+    Example:
+        python3 data/scripts/hackmd2cyberblog.py data/blog/2024-12-03-fall-2024-fuzzing-lab.md
+"""
+
+import sys, re
+from pathlib import Path
+import requests
+
+if len(sys.argv) < 2:
+    print("Usage: hackmd2cyberblog.py <file>")
+    sys.exit(1)
+
+# Read the file
+file_path = Path(sys.argv[1])
+if not file_path.exists():
+    print(f"[*] File not found: {sys.argv[1]}")
+    sys.exit(1)
+
+with open(file_path, "r") as f:
+    content = f.read()
+
+# Return a list of all image urls
+images = re.findall(r"https://hackmd\.io/_uploads/[^)]+", content)
+
+
+def download_image(dowload_folder: Path, url: str) -> bool:
+    """
+    Download an image from a given URL and return the path to the downloaded image.
+    """
+    response = requests.get(url)
+    if response.status_code != 200:
+        print(f"[*] Failed to download image: {url}")
+        return False
+
+    # Save the image
+    filename = url.split("/")[-1]
+    path = dowload_folder / filename
+    print(f"[*] Downloading image: {url} -> {path}")
+    with open(path, "wb") as f:
+        f.write(response.content)
+
+    return True
+
+
+print("[*] Downloading images...")
+
+shim = file_path.stem
+download_folder = Path("public/images/blog") / shim
+
+# Create the download folder
+download_folder.mkdir(parents=True, exist_ok=True)
+
+failed = []
+
+for image in images:
+    success = download_image(download_folder, image)
+    if not success:
+        failed.append(image)
+
+
+# Replace the image URLs
+content = re.sub(
+    r"https://hackmd\.io/_uploads/([^)]+)",
+    lambda x: (
+        f"/images/blog/{shim}/{x.group(1)}" if x.group(0) not in failed else x.group(0)
+    ),
+    content,
+)
+
+# Write the new content
+with open(file_path, "w") as f:
+    f.write(content)
+
+print("[*] Done!")