From 483b51c51c0dcae16fc24dd5c852e94ad0533380 Mon Sep 17 00:00:00 2001 From: Pierre-Loic Doulcet Date: Fri, 6 Dec 2024 12:05:46 +0100 Subject: [PATCH] Add support for html_remove_navigation_elements. (#532) --- llama_parse/base.py | 9 +++++++++ pyproject.toml | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/llama_parse/base.py b/llama_parse/base.py index fdd9d21..02a183a 100644 --- a/llama_parse/base.py +++ b/llama_parse/base.py @@ -173,6 +173,10 @@ class LlamaParse(BasePydanticReader): default=False, description="If set to true, when parsing HTML the parser will remove fixed elements. Useful to hide cookie banners.", ) + html_remove_navigation_elements: Optional[bool] = Field( + default=False, + description="If set to true, when parsing HTML the parser will remove navigation elements. Useful to hide menus, header, footer.", + ) http_proxy: Optional[str] = Field( default=None, description="(optional) If set with input_url will use the specified http proxy to download the file.", @@ -465,6 +469,11 @@ async def _create_job( if self.html_remove_fixed_elements: data["html_remove_fixed_elements"] = self.html_remove_fixed_elements + if self.html_remove_navigation_elements: + data[ + "html_remove_navigation_elements" + ] = self.html_remove_navigation_elements + if self.http_proxy is not None: data["http_proxy"] = self.http_proxy diff --git a/pyproject.toml b/pyproject.toml index e339f91..576fa66 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "llama-parse" -version = "0.5.16" +version = "0.5.17" description = "Parse files into RAG-Optimized formats." authors = ["Logan Markewich "] license = "MIT"