Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

hackmd2cyberblog.py #206

Merged
merged 2 commits into from
Dec 30, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 89 additions & 0 deletions data/scripts/hackmd2cyberblog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#!/usr/bin/env python3
"""
A script to convert HackMD notes to Cyber Blog posts (mostly downloading and replacing images).

Dependencies:
You will need to download the `requests` library to run this script.

pip install requests

Usage: hackmd2cyberblog.py <file>

The script will download all images from the HackMD note and replace the URLs with the local paths.

NOTE: Make sure to run this from the root of the repository (uses relative paths).

Example:
python3 data/scripts/hackmd2cyberblog.py data/blog/2024-12-03-fall-2024-fuzzing-lab.md
"""

import sys, re
from pathlib import Path
import requests

if len(sys.argv) < 2:
print("Usage: hackmd2cyberblog.py <file>")
sys.exit(1)

# Read the file
file_path = Path(sys.argv[1])
if not file_path.exists():
print(f"[*] File not found: {sys.argv[1]}")
sys.exit(1)

with open(file_path, "r") as f:
content = f.read()

# Return a list of all image urls
images = re.findall(r"https://hackmd\.io/_uploads/[^)]+", content)


def download_image(dowload_folder: Path, url: str) -> bool:
"""
Download an image from a given URL and return the path to the downloaded image.
"""
response = requests.get(url)
if response.status_code != 200:
print(f"[*] Failed to download image: {url}")
return False

# Save the image
filename = url.split("/")[-1]
path = dowload_folder / filename
print(f"[*] Downloading image: {url} -> {path}")
with open(path, "wb") as f:
f.write(response.content)

return True


print("[*] Downloading images...")

shim = file_path.stem
download_folder = Path("public/images/blog") / shim

# Create the download folder
download_folder.mkdir(parents=True, exist_ok=True)

failed = []

for image in images:
success = download_image(download_folder, image)
if not success:
failed.append(image)


# Replace the image URLs
content = re.sub(
r"https://hackmd\.io/_uploads/([^)]+)",
lambda x: (
f"/images/blog/{shim}/{x.group(1)}" if x.group(0) not in failed else x.group(0)
),
content,
)

# Write the new content
with open(file_path, "w") as f:
f.write(content)

print("[*] Done!")