Skip to content

Commit

Permalink
Add support for html_remove_navigation_elements. (#532)
Browse files Browse the repository at this point in the history
  • Loading branch information
hexapode authored Dec 6, 2024
1 parent cdbddef commit 483b51c
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 1 deletion.
9 changes: 9 additions & 0 deletions llama_parse/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,10 @@ class LlamaParse(BasePydanticReader):
default=False,
description="If set to true, when parsing HTML the parser will remove fixed elements. Useful to hide cookie banners.",
)
html_remove_navigation_elements: Optional[bool] = Field(
default=False,
description="If set to true, when parsing HTML the parser will remove navigation elements. Useful to hide menus, header, footer.",
)
http_proxy: Optional[str] = Field(
default=None,
description="(optional) If set with input_url will use the specified http proxy to download the file.",
Expand Down Expand Up @@ -465,6 +469,11 @@ async def _create_job(
if self.html_remove_fixed_elements:
data["html_remove_fixed_elements"] = self.html_remove_fixed_elements

if self.html_remove_navigation_elements:
data[
"html_remove_navigation_elements"
] = self.html_remove_navigation_elements

if self.http_proxy is not None:
data["http_proxy"] = self.http_proxy

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "llama-parse"
version = "0.5.16"
version = "0.5.17"
description = "Parse files into RAG-Optimized formats."
authors = ["Logan Markewich <[email protected]>"]
license = "MIT"
Expand Down

0 comments on commit 483b51c

Please sign in to comment.