Skip to content

Commit

Permalink
Ensure image extensions, vbump (#159)
Browse files Browse the repository at this point in the history
  • Loading branch information
logan-markewich authored Apr 25, 2024
1 parent 91b03b2 commit 9ed2081
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 1 deletion.
12 changes: 12 additions & 0 deletions llama_parse/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,11 @@ def get_json_result(
def get_images(self, json_result: List[dict], download_path: str) -> List[dict]:
"""Download images from the parsed result."""
headers = {"Authorization": f"Bearer {self.api_key}"}

# make the download path
if not os.path.exists(download_path):
os.makedirs(download_path)

try:
images = []
for result in json_result:
Expand All @@ -318,9 +323,16 @@ def get_images(self, json_result: List[dict], download_path: str) -> List[dict]:
print(f"> Image for page {page['page']}: {page['images']}")
for image in page["images"]:
image_name = image["name"]

# get the full path
image_path = os.path.join(
download_path, f"{job_id}-{image_name}"
)

# get a valid image path
if not image_path.endswith(".png"):
image_path += ".png"

image["path"] = image_path
image["job_id"] = job_id
image["original_pdf_path"] = result["file_path"]
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "llama-parse"
version = "0.4.1"
version = "0.4.2"
description = "Parse files into RAG-Optimized formats."
authors = ["Logan Markewich <[email protected]>"]
license = "MIT"
Expand Down

0 comments on commit 9ed2081

Please sign in to comment.