diff --git a/.github/workflows/ci_test.yaml b/.github/workflows/ci_test.yaml index 22abd96..6e8ac56 100644 --- a/.github/workflows/ci_test.yaml +++ b/.github/workflows/ci_test.yaml @@ -35,8 +35,8 @@ jobs: - name: Create test env shell: bash run: | - cp tests/sample.env tests/.env - sed -i "s|LLMWHISPERER_API_KEY=|LLMWHISPERER_API_KEY=${{ secrets.LLMWHISPERER_API_KEY }}|" tests/.env + cp sample.env .env + sed -i "s|LLMWHISPERER_API_KEY=|LLMWHISPERER_API_KEY=${{ secrets.LLMWHISPERER_API_KEY }}|" .env - name: Run tox id: tox diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 26bf2f5..f298e61 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -17,6 +17,7 @@ repos: exclude_types: - "markdown" - id: end-of-file-fixer + exclude: "tests/test_data/.*" - id: check-yaml args: [--unsafe] - id: check-added-large-files @@ -65,9 +66,7 @@ repos: args: [--max-line-length=120] exclude: | (?x)^( - .*migrations/.*\.py| - unstract-core/tests/.*| - pkgs/unstract-flags/src/unstract/flags/evaluation_.*\.py| + tests/test_data/.*| )$ - repo: https://github.com/pycqa/isort rev: 5.13.2 diff --git a/README.md b/README.md index b279246..cbdf6d1 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ LLMs are powerful, but their output is as good as the input you provide. LLMWhisperer is a technology that presents data from complex documents (different designs and formats) to LLMs in a way that they can best understand. LLMWhisperer features include Layout Preserving Mode, Auto-switching between native text and OCR modes, proper representation of radio buttons and checkboxes in PDF forms as raw text, among other features. You can now extract raw text from complex PDF documents or images without having to worry about whether the document is a native text document, a scanned image or just a picture clicked on a smartphone. Extraction of raw text from invoices, purchase orders, bank statements, etc works easily for structured data extraction with LLMs powered by LLMWhisperer's Layout Preserving mode. -Refer to the client documentation for more information: [LLMWhisperer Client Documentation](https://docs.unstract.com/llm_whisperer/python_client/llm_whisperer_python_client_intro) +Refer to the client documentation for more information: [LLMWhisperer Client Documentation](https://docs.unstract.com/llmwhisperer/index.html) ## Features diff --git a/pyproject.toml b/pyproject.toml index b893a52..9d5cb77 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ dependencies = [ "requests>=2", ] readme = "README.md" -urls = { Homepage = "https://llmwhisperer.unstract.com", Source = "https://github.com/Zipstack/llm-whisperer-python-client" } +urls = { Homepage = "https://unstract.com/llmwhisperer/", Source = "https://github.com/Zipstack/llm-whisperer-python-client" } license = {text = "AGPL v3"} authors = [ {name = "Zipstack Inc", email = "devsupport@zipstack.com"}, @@ -69,7 +69,7 @@ includes = ["src"] package-dir = "src" [tool.pytest.ini_options] -env_files = ["tests/.env"] +env_files = [".env"] addopts = "-s" log_level = "INFO" log_cli = true diff --git a/tests/sample.env b/sample.env similarity index 58% rename from tests/sample.env rename to sample.env index c69cc3d..4b9d712 100644 --- a/tests/sample.env +++ b/sample.env @@ -1,3 +1,4 @@ LLMWHISPERER_BASE_URL=https://llmwhisperer-api.unstract.com/v1 +LLMWHISPERER_BASE_URL_V2=https://llmwhisperer-api.us-central.unstract.com/api/v2 LLMWHISPERER_LOG_LEVEL=DEBUG LLMWHISPERER_API_KEY= diff --git a/src/unstract/llmwhisperer/__init__.py b/src/unstract/llmwhisperer/__init__.py index c50eaef..54049f0 100644 --- a/src/unstract/llmwhisperer/__init__.py +++ b/src/unstract/llmwhisperer/__init__.py @@ -1,8 +1,9 @@ -__version__ = "0.22.0" +__version__ = "0.23.0" from .client import LLMWhispererClient # noqa: F401 +from .client_v2 import LLMWhispererClientV2 # noqa: F401 -def get_sdk_version(): +def get_llmw_py_client_version(): """Returns the SDK version.""" return __version__ diff --git a/src/unstract/llmwhisperer/client.py b/src/unstract/llmwhisperer/client.py index e7076e9..6bbbb1e 100644 --- a/src/unstract/llmwhisperer/client.py +++ b/src/unstract/llmwhisperer/client.py @@ -58,9 +58,7 @@ class LLMWhispererClient: client's activities and errors. """ - formatter = logging.Formatter( - "%(asctime)s - %(name)s - %(levelname)s - %(message)s" - ) + formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") logger = logging.getLogger(__name__) log_stream_handler = logging.StreamHandler() log_stream_handler.setFormatter(formatter) @@ -117,9 +115,7 @@ def __init__( self.api_key = os.getenv("LLMWHISPERER_API_KEY", "") else: self.api_key = api_key - self.logger.debug( - "api_key set to %s", LLMWhispererUtils.redact_key(self.api_key) - ) + self.logger.debug("api_key set to %s", LLMWhispererUtils.redact_key(self.api_key)) self.api_timeout = api_timeout @@ -169,7 +165,7 @@ def whisper( ocr_provider: str = "advanced", line_splitter_tolerance: float = 0.4, horizontal_stretch_factor: float = 1.0, - encoding: str = "utf-8" + encoding: str = "utf-8", ) -> dict: """ Sends a request to the LLMWhisperer API to process a document. @@ -240,12 +236,10 @@ def whisper( should_stream = False if url == "": if stream is not None: - should_stream = True def generate(): - for chunk in stream: - yield chunk + yield from stream req = requests.Request( "POST", @@ -269,7 +263,7 @@ def generate(): req = requests.Request("POST", api_url, params=params, headers=self.headers) prepared = req.prepare() s = requests.Session() - response = s.send(prepared, timeout=self.api_timeout, stream=should_stream) + response = s.send(prepared, timeout=timeout, stream=should_stream) response.encoding = encoding if response.status_code != 200 and response.status_code != 202: message = json.loads(response.text) diff --git a/src/unstract/llmwhisperer/client_v2.py b/src/unstract/llmwhisperer/client_v2.py new file mode 100644 index 0000000..97210cf --- /dev/null +++ b/src/unstract/llmwhisperer/client_v2.py @@ -0,0 +1,530 @@ +"""This module provides a Python client for interacting with the LLMWhisperer +API. + +Note: This is for the LLMWhisperer API v2.x + +Prepare documents for LLM consumption +LLMs are powerful, but their output is as good as the input you provide. +LLMWhisperer is a technology that presents data from complex documents +(different designs and formats) to LLMs in a way that they can best understand. + +LLMWhisperer is available as an API that can be integrated into your existing +systems to preprocess your documents before they are fed into LLMs. It can handle +a variety of document types, including PDFs, images, and scanned documents. + +This client simplifies the process of making requests to the API and handling the responses. + +Classes: + LLMWhispererClientException: Exception raised for errors in the LLMWhispererClient. +""" + +import copy +import json +import logging +import os +import time +from typing import IO + +import requests + +BASE_URL = "https://llmwhisperer-api.unstract.com/api/v2" + + +class LLMWhispererClientException(Exception): + """Exception raised for errors in the LLMWhispererClient. + + Attributes: + message (str): Explanation of the error. + status_code (int): HTTP status code returned by the LLMWhisperer API. + + Args: + message (str): Explanation of the error. + status_code (int, optional): HTTP status code returned by the LLMWhisperer API. Defaults to None. + """ + + def __init__(self, value): + self.value = value + + def __str__(self): + return repr(self.value) + + def error_message(self): + return self.value + + +class LLMWhispererClientV2: + """A client for interacting with the LLMWhisperer API. + + Note: This is for the LLMWhisperer API v2.x + + This client uses the requests library to make HTTP requests to the + LLMWhisperer API. It also includes a logger for tracking the + client's activities and errors. + """ + + formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") + logger = logging.getLogger(__name__) + log_stream_handler = logging.StreamHandler() + log_stream_handler.setFormatter(formatter) + logger.addHandler(log_stream_handler) + + api_key = "" + base_url = "" + api_timeout = 120 + + def __init__( + self, + base_url: str = "", + api_key: str = "", + logging_level: str = "", + ): + """Initializes the LLMWhispererClient with the given parameters. + + Args: + base_url (str, optional): The base URL for the LLMWhisperer API. Defaults to "". + If the base_url is not provided, the client will use + the value of the LLMWHISPERER_BASE_URL_V2 environment + variable,or the default value. + api_key (str, optional): The API key for the LLMWhisperer API. Defaults to "". + If the api_key is not provided, the client will use the + value of the LLMWHISPERER_API_KEY environment variable. + logging_level (str, optional): The logging level for the client. Can be "DEBUG", + "INFO", "WARNING" or "ERROR". Defaults to the + value of the LLMWHISPERER_LOGGING_LEVEL + environment variable, or "DEBUG" if the + environment variable is not set. + """ + if logging_level == "": + logging_level = os.getenv("LLMWHISPERER_LOGGING_LEVEL", "DEBUG") + if logging_level == "DEBUG": + self.logger.setLevel(logging.DEBUG) + elif logging_level == "INFO": + self.logger.setLevel(logging.INFO) + elif logging_level == "WARNING": + self.logger.setLevel(logging.WARNING) + elif logging_level == "ERROR": + self.logger.setLevel(logging.ERROR) + self.logger.setLevel(logging_level) + self.logger.debug("logging_level set to %s", logging_level) + + if base_url == "": + self.base_url = os.getenv("LLMWHISPERER_BASE_URL_V2", BASE_URL) + else: + self.base_url = base_url + self.logger.debug("base_url set to %s", self.base_url) + + if api_key == "": + self.api_key = os.getenv("LLMWHISPERER_API_KEY", "") + else: + self.api_key = api_key + + self.headers = {"unstract-key": self.api_key} + + def get_usage_info(self) -> dict: + """Retrieves the usage information of the LLMWhisperer API. + + This method sends a GET request to the '/get-usage-info' endpoint of the LLMWhisperer API. + The response is a JSON object containing the usage information. + Refer to https://docs.unstract.com/llm_whisperer/apis/llm_whisperer_usage_api + + Returns: + dict: A dictionary containing the usage information. + + Raises: + LLMWhispererClientException: If the API request fails, it raises an exception with + the error message and status code returned by the API. + """ + self.logger.debug("get_usage_info called") + url = f"{self.base_url}/get-usage-info" + self.logger.debug("url: %s", url) + req = requests.Request("GET", url, headers=self.headers) + prepared = req.prepare() + s = requests.Session() + response = s.send(prepared, timeout=self.api_timeout) + if response.status_code != 200: + err = json.loads(response.text) + err["status_code"] = response.status_code + raise LLMWhispererClientException(err) + return json.loads(response.text) + + def whisper( + self, + file_path: str = "", + stream: IO[bytes] = None, + url: str = "", + mode: str = "form", + output_mode: str = "layout_preserving", + page_seperator: str = "<<<", + pages_to_extract: str = "", + median_filter_size: int = 0, + gaussian_blur_radius: int = 0, + line_splitter_tolerance: float = 0.4, + horizontal_stretch_factor: float = 1.0, + mark_vertical_lines: bool = False, + mark_horizontal_lines: bool = False, + line_spitter_strategy: str = "left-priority", + lang="eng", + tag="default", + filename="", + webhook_metadata="", + use_webhook="", + wait_for_completion=False, + wait_timeout=180, + encoding: str = "utf-8", + ) -> dict: + """ + Sends a request to the LLMWhisperer API to process a document. + Refer to https://docs.unstract.com/llm_whisperer/apis/llm_whisperer_text_extraction_api + + Args: + file_path (str, optional): The path to the file to be processed. Defaults to "". + stream (IO[bytes], optional): A stream of bytes to be processed. Defaults to None. + url (str, optional): The URL of the file to be processed. Defaults to "". + mode (str, optional): The processing mode. Can be "high_quality", "form", "low_cost" or "native_text". + Defaults to "high_quality". + output_mode (str, optional): The output mode. Can be "layout_preserving" or "text". + Defaults to "layout_preserving". + page_seperator (str, optional): The page separator. Defaults to "<<<". + pages_to_extract (str, optional): The pages to extract. Defaults to "". + median_filter_size (int, optional): The size of the median filter. Defaults to 0. + gaussian_blur_radius (int, optional): The radius of the Gaussian blur. Defaults to 0. + line_splitter_tolerance (float, optional): The line splitter tolerance. Defaults to 0.4. + horizontal_stretch_factor (float, optional): The horizontal stretch factor. Defaults to 1.0. + mark_vertical_lines (bool, optional): Whether to mark vertical lines. Defaults to False. + mark_horizontal_lines (bool, optional): Whether to mark horizontal lines. Defaults to False. + line_spitter_strategy (str, optional): The line splitter strategy. Defaults to "left-priority". + lang (str, optional): The language of the document. Defaults to "eng". + tag (str, optional): The tag for the document. Defaults to "default". + filename (str, optional): The name of the file to store in reports. Defaults to "". + webhook_metadata (str, optional): The webhook metadata. This data will be passed to the webhook if + webhooks are used Defaults to "". + use_webhook (str, optional): Webhook name to call. Defaults to "". If not provided, then + no webhook will be called. + wait_for_completion (bool, optional): Whether to wait for the whisper operation to complete. + Defaults to False. + wait_timeout (int, optional): The number of seconds to wait for the whisper operation to complete. + Defaults to 180. + encoding (str): The character encoding to use for processing the text. Defaults to "utf-8". + + Returns: + dict: The response from the API as a dictionary. + + Raises: + LLMWhispererClientException: If the API request fails, it raises an exception with + the error message and status code returned by the API. + """ + self.logger.debug("whisper called") + api_url = f"{self.base_url}/whisper" + params = { + "mode": mode, + "output_mode": output_mode, + "page_seperator": page_seperator, + "pages_to_extract": pages_to_extract, + "median_filter_size": median_filter_size, + "gaussian_blur_radius": gaussian_blur_radius, + "line_splitter_tolerance": line_splitter_tolerance, + "horizontal_stretch_factor": horizontal_stretch_factor, + "mark_vertical_lines": mark_vertical_lines, + "mark_horizontal_lines": mark_horizontal_lines, + "line_spitter_strategy": line_spitter_strategy, + "lang": lang, + "tag": tag, + "filename": filename, + "webhook_metadata": webhook_metadata, + "use_webhook": use_webhook, + } + + self.logger.debug("api_url: %s", api_url) + self.logger.debug("params: %s", params) + + if use_webhook != "" and wait_for_completion: + raise LLMWhispererClientException( + { + "status_code": -1, + "message": "Cannot wait for completion when using webhook", + } + ) + + if url == "" and file_path == "" and stream is None: + raise LLMWhispererClientException( + { + "status_code": -1, + "message": "Either url, stream or file_path must be provided", + } + ) + + should_stream = False + if url == "": + if stream is not None: + should_stream = True + + def generate(): + yield from stream + + req = requests.Request( + "POST", + api_url, + params=params, + headers=self.headers, + data=generate(), + ) + + else: + with open(file_path, "rb") as f: + data = f.read() + req = requests.Request( + "POST", + api_url, + params=params, + headers=self.headers, + data=data, + ) + else: + params["url_in_post"] = True + req = requests.Request("POST", api_url, params=params, headers=self.headers, data=url) + prepared = req.prepare() + s = requests.Session() + response = s.send(prepared, timeout=wait_timeout, stream=should_stream) + response.encoding = encoding + if response.status_code != 200 and response.status_code != 202: + message = json.loads(response.text) + message["status_code"] = response.status_code + message["extraction"] = {} + raise LLMWhispererClientException(message) + if response.status_code == 202: + message = json.loads(response.text) + message["status_code"] = response.status_code + message["extraction"] = {} + if not wait_for_completion: + return message + whisper_hash = message["whisper_hash"] + start_time = time.time() + while time.time() - start_time < wait_timeout: + status = self.whisper_status(whisper_hash=whisper_hash) + if status["status_code"] != 200: + message["status_code"] = -1 + message["message"] = "Whisper client operation failed" + message["extraction"] = {} + return message + if status["status"] == "processing": + self.logger.debug(f"Whisper-hash:{whisper_hash} | STATUS: processing...") + elif status["status"] == "delivered": + self.logger.debug(f"Whisper-hash:{whisper_hash} | STATUS: Already delivered!") + raise LLMWhispererClientException( + { + "status_code": -1, + "message": "Whisper operation already delivered", + } + ) + elif status["status"] == "unknown": + self.logger.debug(f"Whisper-hash:{whisper_hash} | STATUS: unknown...") + raise LLMWhispererClientException( + { + "status_code": -1, + "message": "Whisper operation status unknown", + } + ) + elif status["status"] == "failed": + self.logger.debug(f"Whisper-hash:{whisper_hash} | STATUS: failed...") + message["status_code"] = -1 + message["message"] = "Whisper operation failed" + message["extraction"] = {} + return message + elif status["status"] == "processed": + self.logger.debug(f"Whisper-hash:{whisper_hash} | STATUS: processed!") + resultx = self.whisper_retrieve(whisper_hash=whisper_hash) + if resultx["status_code"] == 200: + message["status_code"] = 200 + message["message"] = "Whisper operation completed" + message["status"] = "processed" + message["extraction"] = resultx["extraction"] + else: + message["status_code"] = -1 + message["message"] = "Whisper client operation failed" + message["extraction"] = {} + return message + time.sleep(5) + message["status_code"] = -1 + message["message"] = "Whisper client operation timed out" + message["extraction"] = {} + return message + + # Will not reach here if status code is 202 + message = json.loads(response.text) + message["status_code"] = response.status_code + return message + + def whisper_status(self, whisper_hash: str) -> dict: + """Retrieves the status of the whisper operation from the LLMWhisperer + API. + + This method sends a GET request to the '/whisper-status' endpoint of the LLMWhisperer API. + The response is a JSON object containing the status of the whisper operation. + + Refer https://docs.unstract.com/llm_whisperer/apis/llm_whisperer_text_extraction_status_api + + Args: + whisper_hash (str): The hash of the whisper (returned by whisper method) + + Returns: + dict: A dictionary containing the status of the whisper operation. The keys in the + dictionary include 'status_code' and the status details. + + Raises: + LLMWhispererClientException: If the API request fails, it raises an exception with + the error message and status code returned by the API. + """ + self.logger.debug("whisper_status called") + url = f"{self.base_url}/whisper-status" + params = {"whisper_hash": whisper_hash} + self.logger.debug("url: %s", url) + req = requests.Request("GET", url, headers=self.headers, params=params) + prepared = req.prepare() + s = requests.Session() + response = s.send(prepared, timeout=self.api_timeout) + if response.status_code != 200: + err = json.loads(response.text) + err["status_code"] = response.status_code + raise LLMWhispererClientException(err) + message = json.loads(response.text) + message["status_code"] = response.status_code + return message + + def whisper_retrieve(self, whisper_hash: str, encoding: str = "utf-8") -> dict: + """Retrieves the result of the whisper operation from the LLMWhisperer + API. + + This method sends a GET request to the '/whisper-retrieve' endpoint of the LLMWhisperer API. + The response is a JSON object containing the result of the whisper operation. + + Refer to https://docs.unstract.com/llm_whisperer/apis/llm_whisperer_text_extraction_retrieve_api + + Args: + whisper_hash (str): The hash of the whisper operation. + encoding (str): The character encoding to use for processing the text. Defaults to "utf-8". + + Returns: + dict: A dictionary containing the status code and the extracted text from the whisper operation. + + Raises: + LLMWhispererClientException: If the API request fails, it raises an exception with + the error message and status code returned by the API. + """ + self.logger.debug("whisper_retrieve called") + url = f"{self.base_url}/whisper-retrieve" + params = {"whisper_hash": whisper_hash} + self.logger.debug("url: %s", url) + req = requests.Request("GET", url, headers=self.headers, params=params) + prepared = req.prepare() + s = requests.Session() + response = s.send(prepared, timeout=self.api_timeout) + response.encoding = encoding + if response.status_code != 200: + err = json.loads(response.text) + err["status_code"] = response.status_code + raise LLMWhispererClientException(err) + + return { + "status_code": response.status_code, + "extraction": json.loads(response.text), + } + + def register_webhook(self, url: str, auth_token: str, webhook_name: str) -> dict: + """Registers a webhook with the LLMWhisperer API. + + This method sends a POST request to the '/whisper-manage-callback' endpoint of the LLMWhisperer API. + The response is a JSON object containing the status of the webhook registration. + + Refer to https://docs.unstract.com/llm_whisperer/apis/ + + Args: + url (str): The URL of the webhook. + auth_token (str): The authentication token for the webhook. + webhook_name (str): The name of the webhook. + + Returns: + dict: A dictionary containing the status code and the response from the API. + + Raises: + LLMWhispererClientException: If the API request fails, it raises an exception with + the error message and status code returned by the API. + """ + + data = { + "url": url, + "auth_token": auth_token, + "webhook_name": webhook_name, + } + url = f"{self.base_url}/whisper-manage-callback" + headersx = copy.deepcopy(self.headers) + headersx["Content-Type"] = "application/json" + req = requests.Request("POST", url, headers=headersx, json=data) + prepared = req.prepare() + s = requests.Session() + response = s.send(prepared, timeout=self.api_timeout) + if response.status_code != 200: + err = json.loads(response.text) + err["status_code"] = response.status_code + raise LLMWhispererClientException(err) + return json.loads(response.text) + + def get_webhook_details(self, webhook_name: str) -> dict: + """Retrieves the details of a webhook from the LLMWhisperer API. + + This method sends a GET request to the '/whisper-manage-callback' endpoint of the LLMWhisperer API. + The response is a JSON object containing the details of the webhook. + + Refer to https://docs.unstract.com/llm_whisperer/apis/ + + Args: + webhook_name (str): The name of the webhook. + + Returns: + dict: A dictionary containing the status code and the response from the API. + + Raises: + LLMWhispererClientException: If the API request fails, it raises an exception with + the error message and status code returned by the API. + """ + + url = f"{self.base_url}/whisper-manage-callback" + params = {"webhook_name": webhook_name} + req = requests.Request("GET", url, headers=self.headers, params=params) + prepared = req.prepare() + s = requests.Session() + response = s.send(prepared, timeout=self.api_timeout) + if response.status_code != 200: + err = json.loads(response.text) + err["status_code"] = response.status_code + raise LLMWhispererClientException(err) + return json.loads(response.text) + + def get_highlight_rect( + self, + line_metadata: list[int], + target_width: int, + target_height: int, + ) -> tuple[int, int, int, int, int]: + """Given the line metadata and the line number, this function returns + the bounding box of the line in the format (page,x1,y1,x2,y2) + + Args: + line_metadata (list[int]): The line metadata returned by the LLMWhisperer API. + target_width (int): The width of your target image/page in UI. + target_height (int): The height of your target image/page in UI. + + Returns: + tuple: The bounding box of the line in the format (page,x1,y1,x2,y2) + """ + + page = line_metadata[0] + x1 = 0 + y1 = line_metadata[1] - line_metadata[2] + x2 = target_width + y2 = line_metadata[1] + original_height = line_metadata[3] + + y1 = int((float(y1) / float(original_height)) * float(target_height)) + y2 = int((float(y2) / float(original_height)) * float(target_height)) + + return (page, x1, y1, x2, y2) diff --git a/tests/conftest.py b/tests/conftest.py index e5e6b03..49eab9a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,15 +3,21 @@ import pytest from unstract.llmwhisperer.client import LLMWhispererClient +from unstract.llmwhisperer.client_v2 import LLMWhispererClientV2 @pytest.fixture(name="client") def llm_whisperer_client(): - # Create an instance of the client client = LLMWhispererClient() return client +@pytest.fixture(name="client_v2") +def llm_whisperer_client_v2(): + client = LLMWhispererClientV2() + return client + + @pytest.fixture(name="data_dir", scope="session") def test_data_dir(): return os.path.join(os.path.dirname(__file__), "test_data") diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/client_test.py b/tests/integration/client_test.py similarity index 97% rename from tests/client_test.py rename to tests/integration/client_test.py index 9f77ac5..5f0d35a 100644 --- a/tests/client_test.py +++ b/tests/integration/client_test.py @@ -32,6 +32,7 @@ def test_get_usage_info(client): ("ocr", "text", "restaurant_invoice_photo.pdf"), ("text", "line-printer", "restaurant_invoice_photo.pdf"), ("text", "text", "handwritten-form.pdf"), + ("ocr", "line-printer", "utf_8_chars.pdf"), ], ) def test_whisper(client, data_dir, processing_mode, output_mode, input_file): diff --git a/tests/integration/client_v2_test.py b/tests/integration/client_v2_test.py new file mode 100644 index 0000000..a097765 --- /dev/null +++ b/tests/integration/client_v2_test.py @@ -0,0 +1,124 @@ +import logging +import os +from difflib import SequenceMatcher, unified_diff +from pathlib import Path + +import pytest + +logger = logging.getLogger(__name__) + + +def test_get_usage_info(client_v2): + usage_info = client_v2.get_usage_info() + logger.info(usage_info) + assert isinstance(usage_info, dict), "usage_info should be a dictionary" + expected_keys = [ + "current_page_count", + "current_page_count_low_cost", + "current_page_count_form", + "current_page_count_high_quality", + "current_page_count_native_text", + "daily_quota", + "monthly_quota", + "overage_page_count", + "subscription_plan", + "today_page_count", + ] + assert set(usage_info.keys()) == set(expected_keys), f"usage_info {usage_info} does not contain the expected keys" + + +@pytest.mark.parametrize( + "output_mode, mode, input_file", + [ + ("layout_preserving", "native_text", "credit_card.pdf"), + ("layout_preserving", "low_cost", "credit_card.pdf"), + ("layout_preserving", "high_quality", "restaurant_invoice_photo.pdf"), + ("layout_preserving", "form", "handwritten-form.pdf"), + ("text", "native_text", "credit_card.pdf"), + ("text", "low_cost", "credit_card.pdf"), + ("text", "high_quality", "restaurant_invoice_photo.pdf"), + ("text", "form", "handwritten-form.pdf"), + ("layout_preserving", "high_quality", "utf_8_chars.pdf"), + ], +) +def test_whisper_v2(client_v2, data_dir, output_mode, mode, input_file): + file_path = os.path.join(data_dir, input_file) + whisper_result = client_v2.whisper( + mode=mode, output_mode=output_mode, file_path=file_path, wait_for_completion=True + ) + logger.debug(f"Result for '{output_mode}', '{mode}', " f"'{input_file}: {whisper_result}") + + exp_basename = f"{Path(input_file).stem}.{mode}.{output_mode}.txt" + exp_file = os.path.join(data_dir, "expected", exp_basename) + # verify extracted text + assert_extracted_text(exp_file, whisper_result, mode, output_mode) + + +@pytest.mark.parametrize( + "output_mode, mode, url, input_file, page_count", + [ + ("layout_preserving", "native_text", "https://unstractpocstorage.blob.core.windows.net/public/Amex.pdf", + "credit_card.pdf", 7), + ("layout_preserving", "low_cost", "https://unstractpocstorage.blob.core.windows.net/public/Amex.pdf", + "credit_card.pdf", 7), + ("layout_preserving", "high_quality", "https://unstractpocstorage.blob.core.windows.net/public/scanned_bill.pdf", + "restaurant_invoice_photo.pdf", 1), + ("layout_preserving", "form", "https://unstractpocstorage.blob.core.windows.net/public/scanned_form.pdf", + "handwritten-form.pdf", 1), + ] +) +def test_whisper_v2_url_in_post(client_v2, data_dir, output_mode, mode, url, input_file, page_count): + usage_before = client_v2.get_usage_info() + whisper_result = client_v2.whisper( + mode=mode, output_mode=output_mode, url=url, wait_for_completion=True + ) + logger.debug(f"Result for '{output_mode}', '{mode}', " f"'{input_file}: {whisper_result}") + + exp_basename = f"{Path(input_file).stem}.{mode}.{output_mode}.txt" + exp_file = os.path.join(data_dir, "expected", exp_basename) + # verify extracted text + assert_extracted_text(exp_file, whisper_result, mode, output_mode) + usage_after = client_v2.get_usage_info() + # Verify usage after extraction + verify_usage(usage_before, usage_after, page_count, mode) + + +def assert_extracted_text(file_path, whisper_result, mode, output_mode): + with open(file_path, encoding="utf-8") as f: + exp = f.read() + + assert isinstance(whisper_result, dict) + assert whisper_result["status_code"] == 200 + + # For OCR based processing + threshold = 0.97 + + # For text based processing + if mode == "native_text" and output_mode == "text": + threshold = 0.99 + extracted_text = whisper_result["extraction"]["result_text"] + similarity = SequenceMatcher(None, extracted_text, exp).ratio() + + if similarity < threshold: + diff = "\n".join( + unified_diff(exp.splitlines(), extracted_text.splitlines(), fromfile="Expected", tofile="Extracted") + ) + pytest.fail(f"Texts are not similar enough: {similarity * 100:.2f}% similarity. Diff:\n{diff}") + + +def verify_usage(before_extract, after_extract, page_count, mode='form'): + all_modes = ['form', 'high_quality', 'low_cost', 'native_text'] + all_modes.remove(mode) + assert (after_extract['today_page_count'] == before_extract['today_page_count'] + page_count), \ + "today_page_count calculation is wrong" + assert (after_extract['current_page_count'] == before_extract['current_page_count'] + page_count), \ + "current_page_count calculation is wrong" + if after_extract['overage_page_count'] > 0: + assert (after_extract['overage_page_count'] == before_extract['overage_page_count'] + page_count), \ + "overage_page_count calculation is wrong" + assert (after_extract[f'current_page_count_{mode}'] == before_extract[f'current_page_count_{mode}'] + page_count), \ + f"{mode} mode calculation is wrong" + for i in range(len(all_modes)): + assert (after_extract[f'current_page_count_{all_modes[i]}'] == + before_extract[f'current_page_count_{all_modes[i]}']), \ + f"{all_modes[i]} mode calculation is wrong" diff --git a/tests/test_data/expected/credit_card.low_cost.layout_preserving.txt b/tests/test_data/expected/credit_card.low_cost.layout_preserving.txt new file mode 100644 index 0000000..8964a8a --- /dev/null +++ b/tests/test_data/expected/credit_card.low_cost.layout_preserving.txt @@ -0,0 +1,377 @@ + + +AMERICAN Blue Cash® from American Express p. 1/7 + EXPRESS + JOSEPH PAULSON Customer Care: 1-888-258-3741 + TTY: Use 711 + Closing Date 09/27/23 Relay + Account 7-73045 Website: americanexpress.com + Ending ~ ~ + + Reward Dollars + New Balance $10,269.65 + as of 08/29/2023 + + Minimum Payment Due $205.39 1,087.93 + For more details about Rewards, visit + americanexpress.com/cashbackrewards + + Payment Due Date 10/22/23 + Account Summary + + Previous Balance $6,583.67 + Late Payment Warning: If we do not receive your Minimum Payment Due by + the Payment Due Date of 10/22/23, you may have to pay a late fee of up to Payments/Credits -$6,583.67 + $40.00 and your APRs may be increased to the Penalty APR of 29.99%. New Charges +$10,269.65 + Fees +$0.00 + + Interest Charged +$0.00 + + New Balance + Minimum Payment Warning: If you have a Non-Plan Balance and make only the $10,269.65 + Minimum Due $205.39 + minimum payment each period, you will pay more in interest and it will take you longer Payment + to pay off your Non-Plan Balance. For example: + Credit Limit $26,400.00 + If you make no additional You will pay off the balance And you will pay an Available Credit $16,130.35 + charges and each month shown on this statement in estimated total of... + Cash Advance Limit $4,600.00 + you pay... about... + Available Cash $4,600.00 + + Only the + 22 years $29,830 + Minimum Payment Due + + $14,640 + $407 3 years = + (Savings $15,190) + + If you would like information about credit counseling services, call 1-888-733-4139. + + See page 2 for important information about your account. + [+] + + > Please refer to the IMPORTANT NOTICES section on + page 7. + + Continued on page 3 + + \ Please fold on the perforation below, detach and return with your payment V + + ps Payment Coupon Pay by Computer Pay by Phone Account Ending 7-73045 + I Do not or use C 1-800-472-9297 + staple paper clips americanexpress.com/pbc + Enter 15 digit account # on all payments. + Make check payable to American Express. + + JOSEPH PAULSON Payment Due Date + 3742 CLOUD SPGS RD 10/22/23 + #403-1045 + New Balance + DALLAS TX 75219-4136 + $10,269.65 + + Minimum Payment Due + 205.39 + + See reverse side for instructions AMERICAN EXPRESS e + PO BOX 6031 Amount Enclosed + on how to update your address, + CAROL STREAM IL 60197-6031 + phone number, or email. + + Wall dbollllllllatloodladll + + 00003499916e2708152 0010269650000280539 a4 d +<<< + + JOSEPH PAULSON + Account Ending 7-73045 p. 2/7 + + Payments: Your payment must be sent to the payment address shown on represents money owed to you. If within the six-month period following +your statement and must be received by 5 p.m. local time at that address to the date of the first statement indicating the credit balance you do not + be credited as of the day it is received. Payments we receive after 5 p.m. will request a refund or charge enough to use up the credit balance, we will + not be credited to your Account until the next day. Payments must also: (1) send you a check for the credit balance within 30 days if the amount is + include the remittance coupon from your statement; (2) be made with a $1.00 or more. + single check drawn on a US bank and payable in US dollars, or with a Credit Reporting: We may report information about your Account to credit + negotiable instrument payable in US dollars and clearable through the US bureaus. Late payments, missed payments, or other defaults on your + banking system; and (3) include your Account number. If your payment Account may be reflected in your credit report. + does not meet all of the above requirements, crediting may be delayed and What To Do If You Think You Find A Mistake On Your Statement +you may incur late payment fees and additional interest charges. Electronic If you think there is an error on your statement, write to us at: + payments must be made through an electronic payment method payable American Express, PO Box 981535, El Paso TX 79998-1535 + in US dollars and clearable through the US banking system. Please do not You may also contact us on the Web: www.americanexpress.com + send post-dated checks as they will be deposited upon receipt. Any In your letter, give us the following information: + restrictive language on a payment we accept will have no effect on us - Account information: Your name and account number. +without our express prior written approval. We will re-present to your - Dollar amount: The dollar amount of the suspected error. + - If +financial institution any payment that is returned unpaid. Description of Problem: you think there is an error on your bill, + Permission for Electronic Withdrawal: (1) When you send a check for describe what you believe is wrong and why you believe it is a mistake. + payment, you give us permission to electronically withdraw your payment You must contact us within 60 days after the error appeared on your +from your deposit or other asset account. We will process checks statement. + electronically by transmitting the amount of the check, routing number, You must notify us of any potential errors in writing [or electronically]. You + account number and check serial number to your financial institution, may call us, but if you do we are not required to investigate any potential + unless the check is not processable electronically or a less costly process is errors and you may have to pay the amount in question. + available. When we process your check electronically, your payment may While we investigate whether or not there has been an error, the following + be withdrawn from your deposit or other asset account as soon as the same are true: + day we receive your check, and you will not receive that cancelled check - We cannot try to collect the amount in question, or report you as +with your deposit or other asset account statement. If we cannot collect the delinquent on that amount. +funds electronically we may issue a draft against your deposit or other asset - The charge in question may remain on your statement, and we may + account for the amount of the check. (2) By using Pay By Computer, Pay By continue to charge you interest on that amount. But, if we determine that + Phone or any other electronic payment service of ours, you give us we made a mistake, you will not have to pay the amount in question or any + permission to electronically withdraw funds from the deposit or other asset interest or other fees related to that amount. + account you specify in the amount you request. Payments using such - While you do not have to pay the amount in question, you are responsible + services of ours received after 8:00 p.m. MST may not be credited until the for the remainder of your balance. + next day. - We can apply any unpaid amount against your credit limit. + How We Calculate Your Balance: We use the Average Daily Balance (ADB) Your Rights If You Are Dissatisfied With Your Credit Card Purchases + method (including new transactions) to calculate the balance on which we If you are dissatisfied with the goods or services that you have purchased + charge interest on your Account. Call the Customer Care number on page 3 with your credit card, and you have tried in good faith to correct the +for more information about this balance computation method and how problem with the merchant, you may have the right not to pay the + resulting interest charges are determined. The method we use to figure the remaining amount due on the purchase. +ADB and interest results in daily compounding of interest. To use this right, all of the following must be true: + Paying Interest: Your due date is at least 25 days after the close of each 1. The purchase must have been made in your home state or within 100 + billing period. We will not charge you interest on your purchases if you pay miles of your current mailing address, and the purchase price must have + each month your entire balance (or Adjusted Balance if applicable) by the been more than $50. (Note: Neither of these is necessary if your purchase + due date each month. We will charge you interest on cash advances and was based on an advertisement we mailed to you, or if we own the + (unless otherwise disclosed) balance transfers beginning on the transaction company that sold you the goods or services.) + date. 2. You must have used your credit card for the purchase. Purchases made + Foreign Currency Charges: If you make a Charge in a foreign currency, we with cash advances from an ATM or with a check that accesses your credit +will convert it into US dollars on the date we or our agents process it. We card account do not qualify. +will charge a fee of 2.70% of the converted US dollar amount. We will 3. You must not yet have fully paid for the purchase. + choose a conversion rate that is acceptable to us for that date, unless a If all of the criteria above are met and you are still dissatisfied with the + particular rate is required by law. The conversion rate we use is no more purchase, contact us in writing or electronically at: +than the highest official rate published by a government agency or the American Express, PO Box 981535, El Paso TX 79998-1535 + highest interbank rate we identify from customary banking sources on the www.americanexpress.com + conversion date or the prior business day. This rate may differ from rates in While we investigate, the same rules apply to the disputed amount as + effect on the date of your charge. Charges converted by establishments discussed above. After we finish our investigation, we will tell you our + (such as airlines) will be billed at the rates such establishments use. decision. At that point, if we think you owe an amount and you do not pay + Credit Balance: A credit balance (designated CR) shown on this statement we may report you as delinquent. + + Your Bill with + Pay AutoPay + + Deduct your payment from your bank + account automatically each month. + + - + Avoid late fees + - + Save time + + Change of Address, phone number, email + + - Visit americanexpress.com/autopay + Online at www.americanexpress.com/updatecontactinfo + today to enroll. + - + Via mobile device + - + Voice automated: call the number on the back of your card + - + For name, company name, and foreign address or phone changes, please call Customer Care + + Please do not add any written communication or address change on this stub + For information on how we protect your + privacy and to set your communication + and privacy choices, please visit + www.americanexpress.com/privacy. +<<< + +AMERICAN Blue Cash® from American Express p. 3/7 + EXPRESS + JOSEPH PAULSON + + Closing Date 09/27/23 Account Ending 7-73045 + + Customer Care & Billing Inquiries 1-888-258-3741 + C International Collect 1-336-393-1111 + =] Website: americanexpress.com + Cash Advance at ATMs Inquiries 1-800-CASH-NOW + Customer Care Payments + Large Print & Braille Statements 1-888-258-3741 + & Billing Inquiries PO BOX 6031 + P.O. BOX 981535 CAROL STREAM IL + EL PASO, TX 60197-6031 + 79998-1535 + Hearing Impaired + Online chat at americanexpress.com or use Relay dial 711 and 1-888-258-3741 + + American Express® High Yield Savings Account + No monthly fees. No minimum opening monthly deposit. 24/7 customer + + support. FDIC insured. Meet your savings goals faster with an American + + Express High Yield Savings Account. Terms apply. Learn more by visiting + + americanexpress.com/savenow. + + Total + + Payments -$6,583.67 + + Credits $0.00 + + Total Payments and Credits -$6,583.67 + + Payments Amount + + 09/22/23* MOBILE PAYMENT - THANK YOU -$6,583.67 + + Total + + Total New Charges $10,269.65 + + JOSEPH PAULSON + an Card Ending 7-73045 + + Amount + + 08/30/23 SAFEWAY CUPERTINO CA $23.11 + 800-898-4027 + + 09/01/23 BANANA LEAF 650000012619980 MILPITAS CA $144.16 + 4087199811 + + 09/01/23 BT*LINODE*AKAMAI CAMBRIDGE MA $6,107.06 + 6093807100 + + 09/01/23 GOOGLE*GSUITE_SOCIALANIMAL.IO MOUNTAIN VIEW CA $20.44 + ADVERTISING SERVICE + + 09/02/23 Amazon Web Services AWS.Amazon.com WA $333.88 + WEB SERVICES + + 09/03/23 SAFEWAY CUPERTINO CA $11.18 + 800-898-4027 + + 09/09/23 TST* BIKANER SWEET 00053687 SUNNYVALE CA $21.81 + RESTAURANT + + Continued on reverse +<<< + + JOSEPH PAULSON + Account Ending 7-73045 p.4/7 + + Amount + +09/10/23 CVS PHARMACY CUPERTINO CA $2.34 + 8007467287 + +09/13/23 APPLE.COM/BILL INTERNET CHARGE CA $2.99 + RECORD STORE + +09/13/23 SAFEWAY CUPERTINO CA $26.73 + 800-898-4027 + +09/14/23 MCDONALD'S CUPERTINO CA $3.26 + 6509404200 + +09/14/23 PANERA BREAD #204476 CAMPBELL CA $23.38 + + 975313007 95008 + +09/14/23 MANLEY DONUTS 00-08040662747 CUPERTINO CA $21.15 + BAKERY + +09/15/23 Ap|Pay 6631309 - PEETS B TMP 53033 OKALAND CA $4.27 + RESTAURANT + +09/16/23 VEGAS.COM LAS VEGAS NV $761.58 + 18669983427 + +09/16/23 Ap|Pay PANDA EXPRESS LAS VEGAS NV $12.08 + FAST FOOD RESTAURANT + +09/17/23 Ap|IPay LUX_STARBUCKS_ATRIUM LAS VEGAS NV $23.68 + 11980066 89109 + RESTAURANT + +09/18/23 SPK*SPOKEO ENTPRS 888-858-0803 CA $119.95 + + 888-858-0803 + +09/24/23 SIXT USA POS FORT LAUDERDALE FL $2,537.90 + AUTOMOBILE RENTAL + Sixt9497938611 + 30826E5JF4ZIIBIHSB + +09/24/23 LUCKY #773.SANTA CLARACA 0000000009925 SANTA CLARA CA $35.17 + 4082475200 + +09/24/23 MILAN SWEET CENTER 0000 MILPITAS CA $27.03 + 408-946-2525 + +09/25/23 ApIPay MANLEY DONUTS 00-08040662747 CUPERTINO CA $6.50 + + BAKERY + + Amount + +Total Fees for this Period $0.00 + + Amount + +Total Interest Charged for this Period $0.00 + +About Trailing Interest +You may see interest on your next statement even if you pay the new balance in full and on time and make no new charges. This is called +"trailing interest". Trailing interest is the interest charged when, for example, you didn't pay your previous balance in full. When that +happens, we charge interest from the first day of the billing period until we receive your payment in full. You can avoid paying interest +on purchases by paying your balance in full (or if you have a Plan balance, by paying your Adjusted Balance on your billing statement) by +the due date each month. Please see the "When we charge interest" sub-section in your Cardmember Agreement for details. + + Continued on next page +<<< + +AMERICAN Blue Cash® from American Express p.5/7 + EXPRESS + JOSEPH PAULSON + + Closing Date 09/27/23 Account Ending 7-73045 + + Amount + + Total Fees in 2023 $0.00 + + Total Interest in 2023 $0.00 + + Your Annual Percentage Rate (APR) is the annual interest rate on your account. + Variable APRs will not exceed 29.99%. + Transactions Dated Annual Balance Interest + Percentage Subject to Charge + From To Rate Interest Rate + + Purchases 02/26/2011 24.49% (v) $0.00 $0.00 + + Cash Advances 02/26/2011 29.99% (v) $0.00 $0.00 + + Total $0.00 + + (v) Variable Rate +<<< + +JOSEPH PAULSON + Account Ending 7-73045 p. 6/7 +<<< + +AMERICAN 7/7 + EXPRESS JOSEPH PAULSON Closing Date 09/27/23 Account Ending 7-73045 + + EFT Error Resolution Notice + In Case of Errors or Questions About Your Electronic Transfers Telephone us at 1-800-IPAY-AXP for Pay By + Phone questions, at 1-800-528-2122 for Pay By Computer questions, and at 1-800-528-4800 for AutoPay. You + may also write us at American Express, Electronic Funds Services, P.O. Box 981531, El Paso TX 79998-1531, or + contact online at www.americanexpress.com/inquirycenter as soon as you can, if you think your statement or + receipt is wrong or if you need more information about a transfer on the statement or receipt. We must hear from + you no later than 60 days after we sent you the FIRST statement on which the error or problem appeared. + 1. Tell us your name and account number (if any). + 2. Describe the error or the transfer you are unsure about, and explain as clearly as you can why you + believe it is an error or why you need more information. + 3. Tell us the dollar amount of the suspected error. + We will investigate your complaint and will correct any error promptly. If we take more than 10 business days to + do this, we will credit your account for the amount you think is in error, so that you will have the use of the money + during the time it takes us to complete our investigation. + + End of Important Notices. +<<< \ No newline at end of file diff --git a/tests/test_data/expected/credit_card.low_cost.text.txt b/tests/test_data/expected/credit_card.low_cost.text.txt new file mode 100644 index 0000000..6a428a8 --- /dev/null +++ b/tests/test_data/expected/credit_card.low_cost.text.txt @@ -0,0 +1,1577 @@ +AMERICAN + + +Blue Cash® from American Express + + +p. 1/7 + + +EXPRESS + + +JOSEPH PAULSON + + +Customer Care: + + +1-888-258-3741 + + +TTY: + + +Closing Date 09/27/23 + + +Use Relay 711 + + +Account Ending 7-73045 + + +~ + + +~ + + +Website: + + +americanexpress.com + + +Reward Dollars + + +New Balance + + +$10,269.65 + + +as of 08/29/2023 + + +Minimum Payment Due + + +$205.39 + + +1,087.93 + + +For more details about Rewards, visit + + +americanexpress.com/cashbackrewards + + +Payment Due Date + + +10/22/23 + + +Account Summary + + +Previous Balance + + +$6,583.67 + + +Late Payment Warning: If we do not receive your Minimum Payment Due by + + +Payments/Credits + + +-$6,583.67 + + +the Payment Due Date of 10/22/23, you may have to pay a late fee of up to + + +$40.00 and your APRs may be increased to the Penalty APR of 29.99%. + + +New Charges + + ++$10,269.65 + + +Fees + + ++$0.00 + + +Interest Charged + + ++$0.00 + + +New Balance + + +Minimum Payment Warning: If you have a Non-Plan Balance and make only the + + +$10,269.65 + + +minimum payment each period, you will pay more in interest and it will take you longer + + +Minimum Payment Due + + +$205.39 + + +to pay off your Non-Plan Balance. For example: + + +Credit Limit + + +$26,400.00 + + +If you make no additional + + +You will pay off the balance + + +And you will pay an + + +Available Credit + + +$16,130.35 + + +charges and each month + + +shown on this statement in + + +estimated total of... + + +you pay... + + +about... + + +Cash Advance Limit + + +$4,600.00 + + +Available Cash + + +$4,600.00 + + +Only the + + +22 years + + +$29,830 + + +Minimum Payment Due + + +$14,640 + + +$407 + + +3 years + + +(Savings = $15,190) + + +If you would like information about credit counseling services, call 1-888-733-4139. + + +[+] See page 2 for important information about your account. + + +> + + +Please refer to the IMPORTANT NOTICES section on + + +page 7. + + +Continued on page 3 + + +\ Please fold on the perforation below, detach and return with your payment V + + +ps Payment Coupon + + +Pay by Computer + + +Pay by Phone + + +Account Ending 7-73045 + + +I Do not staple or use paper clips + + +americanexpress.com/pbc + + +C + + +1-800-472-9297 + + +Enter 15 digit account # on all payments. + + +Make check payable to American Express. + + +JOSEPH PAULSON + + +Payment Due Date + + +3742 CLOUD SPGS RD + + +10/22/23 + + +#403-1045 + + +DALLAS TX 75219-4136 + + +New Balance + + +$10,269.65 + + +Minimum Payment Due + + +205.39 + + +AMERICAN EXPRESS + + +See reverse side for instructions + + +e + + +on how to update your address, + + +PO BOX 6031 + + +Amount Enclosed + + +phone number, or email. + + +CAROL STREAM IL 60197-6031 + + +Wall dbollllllllatloodladll + + +00003499916e2708152 0010269650000280539 a4 d + + +<<< + + + + + + + +JOSEPH PAULSON + + +Account Ending 7-73045 + + +p. 2/7 + + +Payments: Your payment must be sent to the payment address shown on + + +represents money owed to you. If within the six-month period following + + +your statement and must be received by 5 p.m. local time at that address to + + +the date of the first statement indicating the credit balance you do not + + +be credited as of the day it is received. Payments we receive after 5 p.m. will + + +request a refund or charge enough to use up the credit balance, we will + + +not be credited to your Account until the next day. Payments must also: (1) + + +send you a check for the credit balance within 30 days if the amount is + + +include the remittance coupon from your statement; (2) be made with a + + +$1.00 or more. + + +single check drawn on a US bank and payable in US dollars, or with a + + +Credit Reporting: We may report information about your Account to credit + + +negotiable instrument payable in US dollars and clearable through the US + + +bureaus. Late payments, missed payments, or other defaults on your + + +banking system; and (3) include your Account number. If your payment + + +Account may be reflected in your credit report. + + +does not meet all of the above requirements, crediting may be delayed and + + +What To Do If You Think You Find A Mistake On Your Statement + + +you may incur late payment fees and additional interest charges. Electronic + + +If you think there is an error on your statement, write to us at: + + +payments must be made through an electronic payment method payable + + +American Express, PO Box 981535, El Paso TX 79998-1535 + + +in US dollars and clearable through the US banking system. Please do not + + +You may also contact us on the Web: www.americanexpress.com + + +send post-dated checks as they will be deposited upon receipt. Any + + +In your letter, give us the following information: + + +restrictive language on a payment we accept will have no effect on us + + +- Account information: Your name and account number. + + +without our express prior written approval. We will re-present to your + + +- Dollar amount: The dollar amount of the suspected error. + + +financial institution any payment that is returned unpaid. + + +- Description of Problem: If you think there is an error on your bill, + + +Permission for Electronic Withdrawal: (1) When you send a check for + + +describe what you believe is wrong and why you believe it is a mistake. + + +payment, you give us permission to electronically withdraw your payment + + +You must contact us within 60 days after the error appeared on your + + +from your deposit or other asset account. We will process checks + + +statement. + + +electronically by transmitting the amount of the check, routing number, + + +You must notify us of any potential errors in writing [or electronically]. You + + +account number and check serial number to your financial institution, + + +may call us, but if you do we are not required to investigate any potential + + +unless the check is not processable electronically or a less costly process is + + +errors and you may have to pay the amount in question. + + +available. When we process your check electronically, your payment may + + +While we investigate whether or not there has been an error, the following + + +be withdrawn from your deposit or other asset account as soon as the same + + +are true: + + +day we receive your check, and you will not receive that cancelled check + + +- We cannot try to collect the amount in question, or report you as + + +with your deposit or other asset account statement. If we cannot collect the + + +delinquent on that amount. + + +funds electronically we may issue a draft against your deposit or other asset + + +- The charge in question may remain on your statement, and we may + + +account for the amount of the check. (2) By using Pay By Computer, Pay By + + +continue to charge you interest on that amount. But, if we determine that + + +Phone or any other electronic payment service of ours, you give us + + +we made a mistake, you will not have to pay the amount in question or any + + +permission to electronically withdraw funds from the deposit or other asset + + +interest or other fees related to that amount. + + +account you specify in the amount you request. Payments using such + + +- While you do not have to pay the amount in question, you are responsible + + +services of ours received after 8:00 p.m. MST may not be credited until the + + +for the remainder of your balance. + + +next day. + + +- We can apply any unpaid amount against your credit limit. + + +How We Calculate Your Balance: We use the Average Daily Balance (ADB) + + +Your Rights If You Are Dissatisfied With Your Credit Card Purchases + + +method (including new transactions) to calculate the balance on which we + + +If you are dissatisfied with the goods or services that you have purchased + + +charge interest on your Account. Call the Customer Care number on page 3 + + +with your credit card, and you have tried in good faith to correct the + + +for more information about this balance computation method and how + + +problem with the merchant, you may have the right not to pay the + + +resulting interest charges are determined. The method we use to figure the + + +remaining amount due on the purchase. + + +ADB and interest results in daily compounding of interest. + + +To use this right, all of the following must be true: + + +Paying Interest: Your due date is at least 25 days after the close of each + + +1. The purchase must have been made in your home state or within 100 + + +billing period. We will not charge you interest on your purchases if you pay + + +miles of your current mailing address, and the purchase price must have + + +each month your entire balance (or Adjusted Balance if applicable) by the + + +been more than $50. (Note: Neither of these is necessary if your purchase + + +due date each month. We will charge you interest on cash advances and + + +was based on an advertisement we mailed to you, or if we own the + + +(unless otherwise disclosed) balance transfers beginning on the transaction + + +company that sold you the goods or services.) + + +date. + + +2. You must have used your credit card for the purchase. Purchases made + + +Foreign Currency Charges: If you make a Charge in a foreign currency, we + + +with cash advances from an ATM or with a check that accesses your credit + + +will convert it into US dollars on the date we or our agents process it. We + + +card account do not qualify. + + +will charge a fee of 2.70% of the converted US dollar amount. We will + + +3. You must not yet have fully paid for the purchase. + + +choose a conversion rate that is acceptable to us for that date, unless a + + +If all of the criteria above are met and you are still dissatisfied with the + + +particular rate is required by law. The conversion rate we use is no more + + +purchase, contact us in writing or electronically at: + + +than the highest official rate published by a government agency or the + + +American Express, PO Box 981535, El Paso TX 79998-1535 + + +highest interbank rate we identify from customary banking sources on the + + +www.americanexpress.com + + +conversion date or the prior business day. This rate may differ from rates in + + +While we investigate, the same rules apply to the disputed amount as + + +effect on the date of your charge. Charges converted by establishments + + +discussed above. After we finish our investigation, we will tell you our + + +(such as airlines) will be billed at the rates such establishments use. + + +decision. At that point, if we think you owe an amount and you do not pay + + +Credit Balance: A credit balance (designated CR) shown on this statement + + +we may report you as delinquent. + + +Pay Your Bill with AutoPay + + +Deduct your payment from your bank + + +account automatically each month. + + +- Avoid late fees + + +- Save time + + +Change of Address, phone number, email + + +Visit americanexpress.com/autopay + + +- Online at www.americanexpress.com/updatecontactinfo + + +today to enroll. + + +- Via mobile device + + +- Voice automated: call the number on the back of your card + + +- For name, company name, and foreign address or phone changes, please call Customer Care + + +Please do not add any written communication or address change on this stub + + +For information on how we protect your + + +privacy and to set your communication + + +and privacy choices, please visit + + +www.americanexpress.com/privacy. + + +<<< + + + + + + + +AMERICAN + + +Blue Cash® from American Express + + +p. 3/7 + + +EXPRESS + + +JOSEPH PAULSON + + +Closing Date 09/27/23 + + +Account Ending 7-73045 + + +1-888-258-3741 + + +Customer Care & Billing Inquiries + + +C + + +International Collect + + +1-336-393-1111 + + +=] Website: americanexpress.com + + +Cash Advance at ATMs Inquiries + + +1-800-CASH-NOW + + +Large Print & Braille Statements + + +1-888-258-3741 + + +Customer Care + + +Payments + + +& Billing Inquiries + + +PO BOX 6031 + + +P.O. BOX 981535 + + +CAROL STREAM IL + + +EL PASO, TX + + +60197-6031 + + +79998-1535 + + +Hearing Impaired + + +Online chat at americanexpress.com or use Relay dial 711 and 1-888-258-3741 + + +American Express® High Yield Savings Account + + +No monthly fees. No minimum opening monthly deposit. 24/7 customer + + +support. FDIC insured. Meet your savings goals faster with an American + + +Express High Yield Savings Account. Terms apply. Learn more by visiting + + +americanexpress.com/savenow. + + +Total + + +Payments + + +-$6,583.67 + + +Credits + + +$0.00 + + +Total Payments and Credits + + +-$6,583.67 + + +Payments + + +Amount + + +09/22/23* + + +MOBILE PAYMENT - THANK YOU + + +-$6,583.67 + + +Total + + +Total New Charges + + +$10,269.65 + + +JOSEPH PAULSON + + +an + + +Card Ending 7-73045 + + +Amount + + +08/30/23 + + +SAFEWAY + + +CUPERTINO + + +CA + + +$23.11 + + +800-898-4027 + + +09/01/23 + + +BANANA LEAF 650000012619980 + + +MILPITAS + + +CA + + +$144.16 + + +4087199811 + + +09/01/23 + + +BT*LINODE*AKAMAI + + +CAMBRIDGE + + +MA + + +$6,107.06 + + +6093807100 + + +09/01/23 + + +GOOGLE*GSUITE_SOCIALANIMAL.IO + + +MOUNTAIN VIEW + + +CA + + +$20.44 + + +ADVERTISING SERVICE + + +09/02/23 + + +Amazon Web Services + + +AWS.Amazon.com + + +WA + + +$333.88 + + +WEB SERVICES + + +09/03/23 + + +SAFEWAY + + +CUPERTINO + + +CA + + +$11.18 + + +800-898-4027 + + +09/09/23 + + +TST* BIKANER SWEET 00053687 + + +SUNNYVALE + + +CA + + +$21.81 + + +RESTAURANT + + +Continued on reverse + + +<<< + + + + + + + +JOSEPH PAULSON + + +Account Ending 7-73045 + + +p.4/7 + + +Amount + + +09/10/23 + + +CVS PHARMACY + + +CUPERTINO + + +CA + + +$2.34 + + +8007467287 + + +09/13/23 + + +APPLE.COM/BILL + + +INTERNET CHARGE + + +CA + + +$2.99 + + +RECORD STORE + + +09/13/23 + + +SAFEWAY + + +CUPERTINO + + +CA + + +$26.73 + + +800-898-4027 + + +09/14/23 + + +MCDONALD'S + + +CUPERTINO + + +CA + + +$3.26 + + +6509404200 + + +PANERA BREAD #204476 + + +CAMPBELL + + +CA + + +09/14/23 + + +$23.38 + + +975313007 95008 + + +09/14/23 + + +MANLEY DONUTS 00-08040662747 + + +CUPERTINO + + +CA + + +$21.15 + + +BAKERY + + +09/15/23 + + +Ap|Pay 6631309 - PEETS B TMP 53033 + + +OKALAND + + +CA + + +$4.27 + + +RESTAURANT + + +09/16/23 + + +VEGAS.COM + + +LAS VEGAS + + +NV + + +$761.58 + + +18669983427 + + +09/16/23 + + +Ap|Pay PANDA EXPRESS + + +LAS VEGAS + + +NV + + +$12.08 + + +FAST FOOD RESTAURANT + + +09/17/23 + + +Ap|IPay LUX_STARBUCKS_ATRIUM + + +LAS VEGAS + + +NV + + +$23.68 + + +11980066 89109 + + +RESTAURANT + + +SPK*SPOKEO ENTPRS + + +888-858-0803 + + +CA + + +09/18/23 + + +$119.95 + + +888-858-0803 + + +09/24/23 + + +SIXT USA POS + + +FORT LAUDERDALE + + +FL + + +$2,537.90 + + +AUTOMOBILE RENTAL + + +Sixt9497938611 + + +30826E5JF4ZIIBIHSB + + +09/24/23 + + +LUCKY #773.SANTA CLARACA 0000000009925 + + +SANTA CLARA + + +CA + + +$35.17 + + +4082475200 + + +09/24/23 + + +MILAN SWEET CENTER 0000 + + +MILPITAS + + +CA + + +$27.03 + + +408-946-2525 + + +CUPERTINO + + +CA + + +09/25/23 + + +ApIPay MANLEY DONUTS 00-08040662747 + + +$6.50 + + +BAKERY + + +Amount + + +Total Fees for this Period + + +$0.00 + + +Amount + + +Total Interest Charged for this Period + + +$0.00 + + +About Trailing Interest + + +You may see interest on your next statement even if you pay the new balance in full and on time and make no new charges. This is called + + +"trailing interest". Trailing interest is the interest charged when, for example, you didn't pay your previous balance in full. When that + + +happens, we charge interest from the first day of the billing period until we receive your payment in full. You can avoid paying interest + + +on purchases by paying your balance in full (or if you have a Plan balance, by paying your Adjusted Balance on your billing statement) by + + +the due date each month. Please see the "When we charge interest" sub-section in your Cardmember Agreement for details. + + +Continued on next page + + +<<< + + + + + + + +AMERICAN + + +Blue Cash® from American Express + + +p.5/7 + + +EXPRESS + + +JOSEPH PAULSON + + +Closing Date 09/27/23 + + +Account Ending 7-73045 + + +Amount + + +Total Fees in 2023 + + +$0.00 + + +Total Interest in 2023 + + +$0.00 + + +Your Annual Percentage Rate (APR) is the annual interest rate on your account. + + +Variable APRs will not exceed 29.99%. + + +Transactions Dated + + +Annual + + +Balance + + +Interest + + +Percentage + + +Subject to + + +Charge + + +From + + +To + + +Rate + + +Interest Rate + + +Purchases + + +02/26/2011 + + +24.49% (v) + + +$0.00 + + +$0.00 + + +Cash Advances + + +02/26/2011 + + +29.99% (v) + + +$0.00 + + +$0.00 + + +Total + + +$0.00 + + +(v) Variable Rate + + +<<< + + + + + + + +JOSEPH PAULSON + + +Account Ending 7-73045 + + +p. 6/7 + + +<<< + + + + + + + +AMERICAN + + +7/7 + + +EXPRESS + + +JOSEPH PAULSON + + +Closing Date 09/27/23 + + +Account Ending 7-73045 + + +EFT Error Resolution Notice + + +In Case of Errors or Questions About Your Electronic Transfers Telephone us at 1-800-IPAY-AXP for Pay By + + +Phone questions, at 1-800-528-2122 for Pay By Computer questions, and at 1-800-528-4800 for AutoPay. You + + +may also write us at American Express, Electronic Funds Services, P.O. Box 981531, El Paso TX 79998-1531, or + + +contact online at www.americanexpress.com/inquirycenter as soon as you can, if you think your statement or + + +receipt is wrong or if you need more information about a transfer on the statement or receipt. We must hear from + + +you no later than 60 days after we sent you the FIRST statement on which the error or problem appeared. + + +1. Tell us your name and account number (if any). + + +2. Describe the error or the transfer you are unsure about, and explain as clearly as you can why you + + +believe it is an error or why you need more information. + + +3. Tell us the dollar amount of the suspected error. + + +We will investigate your complaint and will correct any error promptly. If we take more than 10 business days to + + +do this, we will credit your account for the amount you think is in error, so that you will have the use of the money + + +during the time it takes us to complete our investigation. + + +End of Important Notices. + + +<<< + + + + + + + diff --git a/tests/test_data/expected/credit_card.native_text.layout_preserving.txt b/tests/test_data/expected/credit_card.native_text.layout_preserving.txt new file mode 100644 index 0000000..0a15a48 --- /dev/null +++ b/tests/test_data/expected/credit_card.native_text.layout_preserving.txt @@ -0,0 +1,329 @@ + + + Blue Cash® from American Express p. 1/7 + + JOSEPH PAULSON Customer Care: 1-888-258-3741 + Closing Date 09/27/23 TTY: Use Relay 711 + Account Ending 7-73045 Website: americanexpress.com + + Reward Dollars + New Balance $10,269.65 as of 08/29/2023 + + Minimum Payment Due $205.39 1,087.93 + For more details about Rewards, visit + americanexpress.com/cashbackrewards + + Payment Due Date 10/22/23 Account Summary + + Late Payment Warning: If we do not receive your Minimum Payment Due by Previous Balance $6,583.67 + the Payment Due Date of 10/22/23, you may have to pay a late fee of up to Payments/Credits -$6,583.67 + $40.00 and your APRs may be increased to the Penalty APR of 29.99%. New Charges +$10,269.65 + Fees +$0.00 + Interest Charged +$0.00 + +Minimum Payment Warning: If you have a Non-Plan Balance and make only the New Balance $10,269.65 +minimum payment each period, you will pay more in interest and it will take you longer Minimum Payment Due $205.39 +to pay off your Non-Plan Balance. For example: + Credit Limit $26,400.00 +If you make no additional You will pay off the balance And you will pay an Available Credit $16,130.35 +charges and each month shown on this statement in estimated total of... +you pay... about... Cash Available Advance Cash Limit $4,600.00 $4,600.00 + + Only the + Minimum Payment Due 22 years $29,830 + + $14,640 + $407 3 years (Savings = $15,190) + +If you would like information about credit counseling services, call 1-888-733-4139. + + See page 2 for important information about your account. + + Please refer to the IMPORTANT NOTICES section on + page 7. + + Continued on page 3 + + Please fold on the perforation below, detach and return with your payment + + Payment Coupon Pay by Computer Pay by Phone A c c o u n t E n d i n g 7 - 7 3 0 4 5 + Do not staple or use paper clips americanexpress.com/pbc 1-800-472-9297 + Enter 15 digit account # on all payments. + Make check payable to American Express. + + J O S E P H PA U L S O N Payment Due Date + 3742 CLOUD SPGS RD 10/22/23 + #403-1045 New Balance + DALLAS TX 75219-4136 $10,269.65 + + Minimum Payment Due + $205.39 + + See reverse side for instructions AMERICAN EXPRESS $ + on how to update your address, PO BOX 6031 Amount Enclosed . + phone number, or email. CAROL STREAM IL 60197-6031 +<<< + + J O S E P H P A U L S O N Account Ending 7-73045 p. 2/7 + +Payments: Your payment must be sent to the payment address shown on represents money owed to you. If within the six-month period following +your statement and must be received by 5 p.m. local time at that address to the date of the first statement indicating the credit balance you do not +be credited as of the day it is received. Payments we receive after 5 p.m. will request a refund or charge enough to use up the credit balance, we will +not be credited to your Account until the next day. Payments must also: (1) send you a check for the credit balance within 30 days if the amount is +include the remittance coupon from your statement; (2) be made with a $1.00 or more. +single check drawn on a US bank and payable in US dollars, or with a Credit Reporting: We may report information about your Account to credit +negotiable instrument payable in US dollars and clearable through the US bureaus. Late payments, missed payments, or other defaults on your +banking system; and (3) include your Account number. If your payment Account may be reflected in your credit report. +does not meet all of the above requirements, crediting may be delayed and What To Do If You Think You Find A Mistake On Your Statement +you may incur late payment fees and additional interest charges. Electronic If you think there is an error on your statement, write to us at: +payments must be made through an electronic payment method payable American Express, PO Box 981535, El Paso TX 79998-1535 +in US dollars and clearable through the US banking system. Please do not You may also contact us on the Web: www.americanexpress.com +send post-dated checks as they will be deposited upon receipt. Any In your letter, give us the following information: +restrictive language on a payment we accept will have no effect on us - Account information: Your name and account number. +without our express prior written approval. We will re-present to your - Dollar amount: The dollar amount of the suspected error. +financial institution any payment that is returned unpaid. - Description of Problem: If you think there is an error on your bill, +Permission for Electronic Withdrawal: (1) When you send a check for describe what you believe is wrong and why you believe it is a mistake. +payment, you give us permission to electronically withdraw your payment You must contact us within 60 days after the error appeared on your +from your deposit or other asset account. We will process checks statement. +electronically by transmitting the amount of the check, routing number, You must notify us of any potential errors in writing [or electronically]. You +account number and check serial number to your financial institution, may call us, but if you do we are not required to investigate any potential +unless the check is not processable electronically or a less costly process is errors and you may have to pay the amount in question. +available. When we process your check electronically, your payment may While we investigate whether or not there has been an error, the following +be withdrawn from your deposit or other asset account as soon as the same are true: +day we receive your check, and you will not receive that cancelled check - We cannot try to collect the amount in question, or report you as +with your deposit or other asset account statement. If we cannot collect the delinquent on that amount. +funds electronically we may issue a draft against your deposit or other asset - The charge in question may remain on your statement, and we may +account for the amount of the check. (2) By using Pay By Computer, Pay By continue to charge you interest on that amount. But, if we determine that +Phone or any other electronic payment service of ours, you give us we made a mistake, you will not have to pay the amount in question or any +permission to electronically withdraw funds from the deposit or other asset interest or other fees related to that amount. +account you specify in the amount you request. Payments using such - While you do not have to pay the amount in question, you are responsible +services of ours received after 8:00 p.m. MST may not be credited until the for the remainder of your balance. +next day. - We can apply any unpaid amount against your credit limit. +How We Calculate Your Balance: We use the Average Daily Balance (ADB) Your Rights If You Are Dissatisfied With Your Credit Card Purchases +method (including new transactions) to calculate the balance on which we If you are dissatisfied with the goods or services that you have purchased +charge interest on your Account. Call the Customer Care number on page 3 with your credit card, and you have tried in good faith to correct the +for more information about this balance computation method and how problem with the merchant, you may have the right not to pay the +resulting interest charges are determined. The method we use to figure the remaining amount due on the purchase. +ADB and interest results in daily compounding of interest. To use this right, all of the following must be true: +Paying Interest: Your due date is at least 25 days after the close of each 1. The purchase must have been made in your home state or within 100 +billing period. We will not charge you interest on your purchases if you pay miles of your current mailing address, and the purchase price must have +each month your entire balance (or Adjusted Balance if applicable) by the been more than $50. (Note: Neither of these is necessary if your purchase +due date each month. We will charge you interest on cash advances and was based on an advertisement we mailed to you, or if we own the +(unless otherwise disclosed) balance transfers beginning on the transaction company that sold you the goods or services.) +date. 2. You must have used your credit card for the purchase. Purchases made +Foreign Currency Charges: If you make a Charge in a foreign currency, we with cash advances from an ATM or with a check that accesses your credit +will convert it into US dollars on the date we or our agents process it. We card account do not qualify. +will charge a fee of 2.70% of the converted US dollar amount. We will 3. You must not yet have fully paid for the purchase. +choose a conversion rate that is acceptable to us for that date, unless a If all of the criteria above are met and you are still dissatisfied with the +particular rate is required by law. The conversion rate we use is no more purchase, contact us in writing or electronically at: +than the highest official rate published by a government agency or the American Express, PO Box 981535, El Paso TX 79998-1535 +highest interbank rate we identify from customary banking sources on the www.americanexpress.com +conversion date or the prior business day. This rate may differ from rates in While we investigate, the same rules apply to the disputed amount as +effect on the date of your charge. Charges converted by establishments discussed above. After we finish our investigation, we will tell you our +(such as airlines) will be billed at the rates such establishments use. decision. At that point, if we think you owe an amount and you do not pay +Credit Balance: A credit balance (designated CR) shown on this statement we may report you as delinquent. + + Pay Your Bill with AutoPay + Deduct your payment from your bank + account automatically each month. + + - Avoid late fees + - Save time +Change of Address, phone number, email + Visit americanexpress.com/autopay + - Online at www.americanexpress.com/updatecontactinfo today to enroll. + - Via mobile device + - Voice automated: call the number on the back of your card + - For name, company name, and foreign address or phone changes, please call Customer Care + +Please do not add any written communication or address change on this stub For information on how we protect your + privacy and to set your communication + and privacy choices, please visit + www.americanexpress.com/privacy. +<<< + + Blue Cash® from American Express p. 3/7 + + JOSEPH PAULSON + Closing Date 09/27/23 Account Ending 7-73045 + + Customer Care & Billing Inquiries 1-888-258-3741 + International Collect 1-336-393-1111 Website: americanexpress.com + Cash Advance at ATMs Inquiries 1-800-CASH-NOW + Large Print & Braille Statements 1-888-258-3741 Customer Care Payments + & Billing Inquiries PO BOX 6031 + P.O. BOX 981535 CAROL STREAM IL + EL PASO, TX 60197-6031 + 79998-1535 + Hearing Impaired + Online chat at americanexpress.com or use Relay dial 711 and 1-888-258-3741 + + American Express® High Yield Savings Account + No monthly fees. No minimum opening monthly deposit. 24/7 customer + support. FDIC insured. Meet your savings goals faster with an American + Express High Yield Savings Account. Terms apply. Learn more by visiting + americanexpress.com/savenow . + + Payments and Credits + + Summary + + Total + +Payments -$6,583.67 +Credits $0.00 +Total Payments and Credits -$6,583.67 + + Detail *Indicates posting date + +Payments Amount + +09/22/23* MOBILE PAYMENT - THANK YOU -$6,583.67 + + New Charges + + Summary + + Total +Total New Charges $10,269.65 + + Detail + + J O S E P H P A U L S O N + C a r d E n d i n g 7 - 7 3 0 4 5 + + Amount + +08/30/23 SAFEWAY CUPERTINO CA $23.11 + 800-898-4027 +09/01/23 BANANA LEAF 650000012619980 MILPITAS CA $144.16 + 4087199811 +09/01/23 BT*LINODE*AKAMAI CAMBRIDGE MA $6,107.06 + 6093807100 +09/01/23 GOOGLE*GSUITE_SOCIALANIMAL.IO MOUNTAIN VIEW CA $20.44 + ADVERTISING SERVICE +09/02/23 Amazon Web Services AWS.Amazon.com WA $333.88 + WEB SERVICES +09/03/23 SAFEWAY CUPERTINO CA $11.18 + 800-898-4027 +09/09/23 TST* BIKANER SWEET 00053687 SUNNYVALE CA $21.81 + RESTAURANT + + Continued on reverse +<<< + + JOSEPH PAULSON A c c o u n t E n d i n g 7 - 7 3 0 4 5 p. 4/7 + + Detail Continued + + Amount + +09/10/23 CVS PHARMACY CUPERTINO CA $2.34 + 8007467287 +09/13/23 APPLE.COM/BILL INTERNET CHARGE CA $2.99 + RECORD STORE +09/13/23 SAFEWAY CUPERTINO CA $26.73 + 800-898-4027 +09/14/23 MCDONALD'S CUPERTINO CA $3.26 + 6509404200 +09/14/23 PANERA BREAD #204476 CAMPBELL CA $23.38 + 975313007 95008 +09/14/23 MANLEY DONUTS 00-08040662747 CUPERTINO CA $21.15 + BAKERY +09/15/23 AplPay 6631309 - PEETS B TMP 53033 OKALAND CA $4.27 + RESTAURANT +09/16/23 VEGAS.COM LAS VEGAS NV $761.58 + 18669983427 +09/16/23 AplPay PANDA EXPRESS LAS VEGAS NV $12.08 + FAST FOOD RESTAURANT +09/17/23 AplPay LUX_STARBUCKS_ATRIUM LAS VEGAS NV $23.68 + 11980066 89109 + RESTAURANT +09/18/23 SPK*SPOKEO ENTPRS 888-858-0803 CA $119.95 + 888-858-0803 +09/24/23 SIXT USA POS FORT LAUDERDALE FL $2,537.90 + AUTOMOBILE RENTAL + Sixt9497938611 + 30826E5JF4ZIIBIHSB +09/24/23 LUCKY #773.SANTA CLARACA 0000000009925 SANTA CLARA CA $35.17 + 4082475200 +09/24/23 MILAN SWEET CENTER 0000 MILPITAS CA $27.03 + 408-946-2525 +09/25/23 AplPay MANLEY DONUTS 00-08040662747 CUPERTINO CA $6.50 + BAKERY + + Fees + + Amount + +Total Fees for this Period $0.00 + + Interest Charged + + Amount + +Total Interest Charged for this Period $0.00 + +About Trailing Interest +You may see interest on your next statement even if you pay the new balance in full and on time and make no new charges. This is called +"trailing interest". Trailing interest is the interest charged when, for example, you didn't pay your previous balance in full. When that +happens, we charge interest from the first day of the billing period until we receive your payment in full. You can avoid paying interest +on purchases by paying your balance in full (or if you have a Plan balance, by paying your Adjusted Balance on your billing statement) by +the due date each month. Please see the "When we charge interest" sub-section in your Cardmember Agreement for details. + + Continued on next page +<<< + + Blue Cash® from American Express p. 5/7 + + JOSEPH PAULSON + Closing Date 09/27/23 A c c o u n t E n d i n g 7 - 7 3 0 4 5 + + 2023 Fees and Interest Totals Year-to-Date + + Amount + Total Fees in 2023 $0.00 + + Total Interest in 2023 $0.00 + + Interest Charge Calculation Days in Billing Period: 30 + + Your Annual Percentage Rate (APR) is the annual interest rate on your account. + Variable APRs will not exceed 29.99%. + Transactions Dated Annual Balance Interest + Percentage Subject to Charge + From To Rate Interest Rate + +Purchases 02/26/2011 24.49% (v) $0.00 $0.00 + +Cash Advances 02/26/2011 29.99% (v) $0.00 $0.00 + +Total $0.00 +(v) Variable Rate +<<< + +JOSEPH PAULSON A c c o u n t E n d i n g 7 - 7 3 0 4 5 p. 6/7 +<<< + + p. 7/7 + JOSEPH PAULSON Closing Date 09/27/23 A c c o u n t E n d i n g 7 - 7 3 0 4 5 + + IMPORTANT NOTICES + +EFT Error Resolution Notice +In Case of Errors or Questions About Your Electronic Transfers Telephone us at 1-800-IPAY-AXP for Pay By +Phone questions, at 1-800-528-2122 for Pay By Computer questions, and at 1-800-528-4800 for AutoPay. You +may also write us at American Express, Electronic Funds Services, P.O. Box 981531, El Paso TX 79998-1531, or +contact online at www.americanexpress.com/inquirycenter as soon as you can, if you think your statement or +receipt is wrong or if you need more information about a transfer on the statement or receipt. We must hear from +you no later than 60 days after we sent you the FIRST statement on which the error or problem appeared. + 1. Tell us your name and account number (if any). + 2. Describe the error or the transfer you are unsure about, and explain as clearly as you can why you + believe it is an error or why you need more information. + 3. Tell us the dollar amount of the suspected error. +We will investigate your complaint and will correct any error promptly. If we take more than 10 business days to +do this, we will credit your account for the amount you think is in error, so that you will have the use of the money +during the time it takes us to complete our investigation. + + End of Important Notices. +<<< \ No newline at end of file diff --git a/tests/test_data/expected/credit_card.native_text.text.txt b/tests/test_data/expected/credit_card.native_text.text.txt new file mode 100644 index 0000000..9259fe5 --- /dev/null +++ b/tests/test_data/expected/credit_card.native_text.text.txt @@ -0,0 +1,318 @@ +Blue Cash® from American Express p.1/7 +JOSEPH PAULSON CustomerCare: 1-888-258-3741 +Closing Date 09/27/23 TTY: UseRelay711 +Account Ending 7-73045 Website: americanexpress.com +RewardDollars +New Balance $10,269.65 +asof 08/29/2023 +Minimum Payment Due $205.39 1,087.93 +FormoredetailsaboutRewards,visit +americanexpress.com/cashbackrewards +Payment Due Date 10/22/23 +AccountSummary +LatePaymentWarning:IfwedonotreceiveyourMinimumPaymentDueby PreviousBalance $6,583.67 +the Payment Due Date of 10/22/23, you may have to pay a late fee of up to Payments/Credits -$6,583.67 +$40.00andyourAPRsmaybeincreasedtothePenaltyAPRof29.99%. NewCharges +$10,269.65 +Fees +$0.00 +InterestCharged +$0.00 +MinimumPaymentWarning:IfyouhaveaNon-PlanBalanceandmakeonlythe NewBalance $10,269.65 +minimumpaymenteachperiod,youwillpaymoreininterestanditwilltakeyoulonger MinimumPaymentDue $205.39 +topayoffyourNon-PlanBalance.Forexample: +CreditLimit $26,400.00 +Ifyoumakenoadditional Youwillpayoffthebalance Andyouwillpayan AvailableCredit $16,130.35 +chargesandeachmonth shownonthisstatementin estimatedtotalof... +CashAdvanceLimit $4,600.00 +youpay... about... +AvailableCash $4,600.00 +Onlythe +22years $29,830 +MinimumPaymentDue +$14,640 +$407 3years (Savings=$15,190) +Ifyouwouldlikeinformationaboutcreditcounselingservices,call1-888-733-4139. +Seepage2forimportantinformationaboutyouraccount. +PleaserefertotheIMPORTANTNOTICESsectionon +page7. +Continuedonpage3 +Pleasefoldontheperforationbelow,detachandreturnwithyourpayment +PaymentCoupon PaybyComputer PaybyPhone A c c o u n t E n d in g 7 -7 3 0 4 5 +Donotstapleorusepaperclips americanexpress.com/pbc 1-800-472-9297 +Enter15digitaccount#onallpayments. +MakecheckpayabletoAmericanExpress. +J O S E P H PAU L S O N PaymentDueDate +3742 CLOUD SPGS RD 10/22/23 +#403-1045 +DALLAS TX 75219-4136 NewBalance +$10,269.65 +MinimumPaymentDue +$205.39 +. +Seereversesideforinstructions AMERICANEXPRESS $ +onhowtoupdateyouraddress, POBOX6031 Amount Enclosed +phonenumber,oremail. CAROLSTREAMIL60197-6031 +<<< + + + + + + + +J O S E P H P A U L S O N +Account Ending 7-73045 p.2/7 +Payments:Yourpaymentmustbesenttothepaymentaddressshownon representsmoneyowedtoyou.Ifwithinthesix-monthperiodfollowing +yourstatementandmustbereceivedby5p.m.localtimeatthataddressto the date of the first statement indicating the credit balance you do not +becreditedasofthedayitisreceived.Paymentswereceiveafter5p.m.will requestarefundorchargeenoughtouseupthecreditbalance,wewill +notbecreditedtoyourAccountuntilthenextday.Paymentsmustalso:(1) sendyou a check for the credit balance within 30days ifthe amountis +includethe remittancecouponfromyour statement;(2)bemadewitha $1.00ormore. +single check drawn on a US bank and payable in US dollars, or with a CreditReporting: WemayreportinformationaboutyourAccounttocredit +negotiableinstrumentpayableinUSdollarsandclearablethroughtheUS bureaus. Late payments, missed payments, or other defaults on your +banking system; and (3) include your Account number. If your payment Accountmaybereflectedinyourcreditreport. +doesnotmeetalloftheaboverequirements,creditingmaybedelayedand WhatToDoIfYouThinkYouFindAMistakeOnYourStatement +youmayincurlatepaymentfeesandadditionalinterestcharges.Electronic Ifyouthinkthereisanerroronyourstatement,writetousat: +paymentsmustbemadethroughanelectronicpaymentmethodpayable AmericanExpress,POBox981535,ElPasoTX79998-1535 +inUSdollarsandclearablethroughtheUSbankingsystem.Pleasedonot YoumayalsocontactusontheWeb:www.americanexpress.com +send post-dated checks as they will be deposited upon receipt. Any Inyourletter,giveusthefollowinginformation: +restrictive language on a payment we accept will have no effect on us -Accountinformation:Yournameandaccountnumber. +without our express prior written approval. We will re-present to your -Dollaramount:Thedollaramountofthesuspectederror. +financialinstitutionanypaymentthatisreturnedunpaid. -DescriptionofProblem:Ifyouthinkthereisanerroronyourbill, +Permission for Electronic Withdrawal: (1) When you send a check for describewhatyoubelieveiswrongandwhyyoubelieveitisamistake. +payment,yougiveuspermissiontoelectronicallywithdrawyourpayment Youmustcontactuswithin60daysaftertheerrorappearedonyour +from your deposit or other asset account. We will process checks statement. +electronically by transmitting the amount of the check, routing number, Youmustnotifyusofanypotentialerrorsinwriting[orelectronically].You +account number and check serial number to your financial institution, maycallus,butifyoudowearenotrequiredtoinvestigateanypotential +unlessthecheckisnotprocessableelectronicallyoralesscostlyprocessis errorsandyoumayhavetopaytheamountinquestion. +available.Whenweprocessyourcheckelectronically,yourpaymentmay Whileweinvestigatewhetherornottherehasbeenanerror,thefollowing +bewithdrawnfromyourdepositorotherassetaccountassoonasthesame aretrue: +daywereceiveyourcheck,andyouwillnotreceivethatcancelledcheck -Wecannottrytocollecttheamountinquestion,orreportyouas +withyourdepositorotherassetaccountstatement.Ifwecannotcollectthe delinquentonthatamount. +fundselectronicallywemayissueadraftagainstyourdepositorotherasset -Thechargeinquestionmayremainonyourstatement,andwemay +accountfortheamountofthecheck.(2)ByusingPayByComputer,PayBy continuetochargeyouinterestonthatamount.But,ifwedeterminethat +Phone or any other electronic payment service of ours, you give us wemadeamistake,youwillnothavetopaytheamountinquestionorany +permissiontoelectronicallywithdrawfundsfromthedepositorotherasset interestorotherfeesrelatedtothatamount. +account you specify in the amount you request. Payments using such -Whileyoudonothavetopaytheamountinquestion,youareresponsible +servicesofoursreceivedafter8:00p.m.MSTmaynotbecrediteduntilthe fortheremainderofyourbalance. +nextday. -Wecanapplyanyunpaidamountagainstyourcreditlimit. +HowWeCalculateYourBalance: WeusetheAverageDailyBalance(ADB) YourRightsIfYouAreDissatisfiedWithYourCreditCardPurchases +method(includingnewtransactions)tocalculatethebalanceonwhichwe Ifyouaredissatisfiedwiththegoodsorservicesthatyouhavepurchased +chargeinterestonyourAccount.CalltheCustomerCarenumberonpage3 withyourcreditcard,andyouhavetriedingoodfaithtocorrectthe +for more information about this balance computation method and how problemwiththemerchant,youmayhavetherightnottopaythe +resultinginterestchargesaredetermined. Themethodweusetofigurethe remainingamountdueonthepurchase. +ADBandinterestresultsindailycompoundingofinterest. Tousethisright,allofthefollowingmustbetrue: +PayingInterest: Yourduedateisatleast25daysafterthecloseofeach 1.Thepurchasemusthavebeenmadeinyourhomestateorwithin100 +billingperiod.Wewillnotchargeyouinterestonyourpurchasesifyoupay milesofyourcurrentmailingaddress,andthepurchasepricemusthave +eachmonthyourentirebalance(orAdjustedBalanceifapplicable)bythe beenmorethan$50.(Note:Neitheroftheseisnecessaryifyourpurchase +duedateeachmonth.Wewillchargeyouinterestoncashadvancesand wasbasedonanadvertisementwemailedtoyou,orifweownthe +(unlessotherwisedisclosed)balancetransfersbeginningonthetransaction companythatsoldyouthegoodsorservices.) +date. 2.Youmusthaveusedyourcreditcardforthepurchase.Purchasesmade +ForeignCurrencyCharges: IfyoumakeaChargeinaforeigncurrency,we withcashadvancesfromanATMorwithacheckthataccessesyourcredit +willconvertitintoUSdollarsonthedateweorouragentsprocessit.We cardaccountdonotqualify. +willchargeafeeof2.70%oftheconvertedUSdollaramount. Wewill 3.Youmustnotyethavefullypaidforthepurchase. +choosea conversion rate that is acceptable tous for that date, unless a Ifallofthecriteriaabovearemetandyouarestilldissatisfiedwiththe +particularrateisrequiredbylaw.Theconversionrateweuseisnomore purchase,contactusinwritingorelectronicallyat: +than the highest official rate published by a government agency or the AmericanExpress,POBox981535,ElPasoTX79998-1535 +highestinterbankrateweidentifyfromcustomarybankingsourcesonthe www.americanexpress.com +conversiondateorthepriorbusinessday.Thisratemaydifferfromratesin Whileweinvestigate,thesamerulesapplytothedisputedamountas +effect on the date of your charge. Charges converted by establishments discussedabove.Afterwefinishourinvestigation,wewilltellyouour +(suchasairlines)willbebilledattheratessuchestablishmentsuse. decision.Atthatpoint,ifwethinkyouoweanamountandyoudonotpay +CreditBalance: Acreditbalance(designatedCR)shownonthisstatement wemayreportyouasdelinquent. +PayYourBillwithAutoPay +Deductyourpaymentfromyourbank +accountautomaticallyeachmonth. +-Avoidlatefees +-Savetime +ChangeofAddress,phonenumber,email +Visitamericanexpress.com/autopay +-Onlineatwww.americanexpress.com/updatecontactinfo +todaytoenroll. +-Viamobiledevice +-Voiceautomated:callthenumberonthebackofyourcard +-Forname,companyname,andforeignaddressorphonechanges,pleasecallCustomerCare +Pleasedonotaddanywrittencommunicationoraddresschangeonthisstub +Forinformationonhowweprotectyour +privacyandtosetyourcommunication +andprivacychoices,pleasevisit +www.americanexpress.com/privacy. +<<< + + + + + + + +Blue Cash® from American Express p.3/7 +JOSEPH PAULSON +Closing Date 09/27/23 Account Ending 7-73045 +CustomerCare&BillingInquiries 1-888-258-3741 +InternationalCollect 1-336-393-1111 Website:americanexpress.com +CashAdvanceatATMsInquiries 1-800-CASH-NOW +LargePrint&BrailleStatements 1-888-258-3741 CustomerCare Payments +&BillingInquiries POBOX6031 +P.O.BOX981535 CAROLSTREAMIL +ELPASO,TX 60197-6031 +79998-1535 +HearingImpaired +Onlinechatatamericanexpress.comoruseRelaydial711and1-888-258-3741 +AmericanExpress®HighYieldSavingsAccount +No monthly fees. No minimum opening monthly deposit. 24/7 customer +support. FDIC insured. Meet your savings goals faster with an American +Express High Yield Savings Account. Terms apply. Learn more by visiting +americanexpress.com/savenow. +Payments and Credits +Summary +Total +Payments -$6,583.67 +Credits $0.00 +TotalPaymentsandCredits -$6,583.67 +Detail *Indicatespostingdate +Payments Amount +09/22/23* MOBILEPAYMENT-THANKYOU -$6,583.67 +New Charges +Summary +Total +Total NewCharges $10,269.65 +Detail +J O S E P H P A U L S O N +C a rd E n d in g 7 -7 3 0 4 5 +Amount +08/30/23 SAFEWAY CUPERTINO CA $23.11 +800-898-4027 +09/01/23 BANANALEAF650000012619980 MILPITAS CA $144.16 +4087199811 +09/01/23 BT*LINODE*AKAMAI CAMBRIDGE MA $6,107.06 +6093807100 +09/01/23 GOOGLE*GSUITE_SOCIALANIMAL.IO MOUNTAINVIEW CA $20.44 +ADVERTISINGSERVICE +09/02/23 AmazonWebServices AWS.Amazon.com WA $333.88 +WEBSERVICES +09/03/23 SAFEWAY CUPERTINO CA $11.18 +800-898-4027 +09/09/23 TST*BIKANERSWEET00053687 SUNNYVALE CA $21.81 +RESTAURANT +Continuedonreverse +<<< + + + + + + + +JOSEPH PAULSON A c c o u n t E n d in g 7 -7 3 0 4 5 p.4/7 +DetailContinued +Amount +09/10/23 CVSPHARMACY CUPERTINO CA $2.34 +8007467287 +09/13/23 APPLE.COM/BILL INTERNETCHARGE CA $2.99 +RECORDSTORE +09/13/23 SAFEWAY CUPERTINO CA $26.73 +800-898-4027 +09/14/23 MCDONALD'S CUPERTINO CA $3.26 +6509404200 +09/14/23 PANERABREAD#204476 CAMPBELL CA $23.38 +97531300795008 +09/14/23 MANLEYDONUTS00-08040662747 CUPERTINO CA $21.15 +BAKERY +09/15/23 AplPay6631309-PEETSBTMP53033 OKALAND CA $4.27 +RESTAURANT +09/16/23 VEGAS.COM LASVEGAS NV $761.58 +18669983427 +09/16/23 AplPayPANDAEXPRESS LASVEGAS NV $12.08 +FASTFOODRESTAURANT +09/17/23 AplPayLUX_STARBUCKS_ATRIUM LASVEGAS NV $23.68 +1198006689109 +RESTAURANT +09/18/23 SPK*SPOKEOENTPRS 888-858-0803 CA $119.95 +888-858-0803 +09/24/23 SIXTUSAPOS FORTLAUDERDALE FL $2,537.90 +AUTOMOBILERENTAL +Sixt9497938611 +30826E5JF4ZIIBIHSB +09/24/23 LUCKY#773.SANTACLARACA0000000009925 SANTACLARA CA $35.17 +4082475200 +09/24/23 MILANSWEETCENTER0000 MILPITAS CA $27.03 +408-946-2525 +09/25/23 AplPayMANLEYDONUTS00-08040662747 CUPERTINO CA $6.50 +BAKERY +Fees +Amount +TotalFeesforthisPeriod $0.00 +Interest Charged +Amount +TotalInterestChargedforthisPeriod $0.00 +AboutTrailingInterest +Youmayseeinterestonyournextstatementevenifyoupaythenewbalanceinfullandontimeandmakenonewcharges.Thisiscalled +"trailinginterest". Trailinginterestis theinterestchargedwhen,forexample,youdidn'tpayyour previousbalanceinfull.Whenthat +happens,wechargeinterestfromthefirstdayofthebillingperioduntilwereceiveyourpaymentinfull.Youcanavoidpayinginterest +onpurchasesbypayingyourbalanceinfull(orifyouhaveaPlanbalance,bypayingyourAdjustedBalanceonyourbillingstatement)by +theduedateeachmonth.Pleaseseethe"Whenwechargeinterest"sub-sectioninyourCardmemberAgreementfordetails. +Continuedonnextpage +<<< + + + + + + + +Blue Cash® from American Express p.5/7 +JOSEPH PAULSON +Closing Date 09/27/23 A c c o u n t E n d in g 7 -7 3 0 4 5 +2023 Fees and Interest Totals Year-to-Date +Amount +TotalFeesin2023 $0.00 +TotalInterestin2023 $0.00 +Interest Charge Calculation DaysinBillingPeriod:30 +YourAnnualPercentageRate(APR)istheannualinterestrateonyouraccount. +VariableAPRswillnotexceed29.99%. +TransactionsDated Annual Balance Interest +Percentage Subjectto Charge +From To Rate InterestRate +Purchases 02/26/2011 24.49%(v) $0.00 $0.00 +CashAdvances 02/26/2011 29.99%(v) $0.00 $0.00 +Total $0.00 +(v)VariableRate +<<< + + + + + + + +JOSEPH PAULSON A c c o u n t E n d in g 7 -7 3 0 4 5 p.6/7 +<<< + + + + + + + +p.7/7 +JOSEPH PAULSON ClosingDate09/27/23 A c c o u n t E n d in g 7 -7 3 0 4 5 +IMPORTANT NOTICES +EFT Error ResolutionNotice +In Case of Errors or Questions About Your Electronic Transfers Telephone us at 1-800-IPAY-AXP for Pay By +Phone questions, at 1-800-528-2122 for Pay By Computer questions, and at 1-800-528-4800 for AutoPay. You +mayalsowriteusatAmericanExpress,ElectronicFunds Services,P.O.Box981531,ElPasoTX79998-1531,or +contact online at www.americanexpress.com/inquirycenter as soon as you can, if you think your statement or +receipt is wrong or if you need more information about a transfer on the statement or receipt. We must hear from +younolaterthan60daysafterwesentyoutheFIRSTstatementonwhichtheerrororproblemappeared. +1. Tellusyournameandaccountnumber(ifany). +2. Describe the error or the transfer you are unsure about, and explain as clearly as you can why you +believeitisanerrororwhyyouneedmoreinformation. +3. Tellusthedollaramountofthesuspectederror. +We will investigate your complaint and will correct any error promptly. If we take more than 10 business days to +do this, we will credit your accountfor theamount youthinkis in error, so that you will have theuse ofthe money +duringthetimeittakesustocompleteourinvestigation. +EndofImportantNotices. +<<< + + + + + + + diff --git a/tests/test_data/expected/handwritten-form.form.layout_preserving.txt b/tests/test_data/expected/handwritten-form.form.layout_preserving.txt new file mode 100644 index 0000000..0f85851 --- /dev/null +++ b/tests/test_data/expected/handwritten-form.form.layout_preserving.txt @@ -0,0 +1,25 @@ + + +Name STEPHEN YOUNG + + Address 123 MAIN ST. + + Are you: [ ] Married [X] Single + + How did you hear about us? + + [ ] Search Ad + [ ] Facebook + [ ] X (formerly Twitter) + [ ] This mailer + [X] Other (Explain) SAW THE SIGN WHEN THE LOCATION + + WAS BEING BUILT + + By signing, I agree to receive all communications from acme, inc. + + Signature + + 10/15/23 + Date +<<< \ No newline at end of file diff --git a/tests/test_data/expected/handwritten-form.form.text.txt b/tests/test_data/expected/handwritten-form.form.text.txt new file mode 100644 index 0000000..8440f06 --- /dev/null +++ b/tests/test_data/expected/handwritten-form.form.text.txt @@ -0,0 +1,27 @@ +Name +STEPHEN YOUNG +Address +123 MAIN ST. +Are you: +[ ] Married +[X] Single +How did you hear about us? +[ ] Search Ad +[ ] Facebook +[ ] X (formerly Twitter) +[ ] This mailer +[X] Other (Explain) i SAW THE SIGN WHEN THE LOCATION WAS BEING BUILT +By signing, I agree to receive all communications from acme, inc. +Signature +Date +10/15/23 + + +<<< + + + + + + + diff --git a/tests/test_data/expected/restaurant_invoice_photo.high_quality.layout_preserving.txt b/tests/test_data/expected/restaurant_invoice_photo.high_quality.layout_preserving.txt new file mode 100644 index 0000000..3a74955 --- /dev/null +++ b/tests/test_data/expected/restaurant_invoice_photo.high_quality.layout_preserving.txt @@ -0,0 +1,43 @@ + + + BURGER SEIGNEUR + + No.35, 80 feet road, + HAL 3rd Stage, + Indiranagar, Bangalore + GST: 29AAHFL9534H1ZV + + Order Number : T2- 57 + + Type : Table + + Table Number: 2 + + Bill No .: T2 -- 126653 + Date:2023-05-31 23:16:50 + Kots:63 + + Item Qty Amt + + Jack The + + Ripper 1 400.00 + Plain Fries + + Coke 300 ML 1 130.00 + + Total Qty: 2 + SubTotal: 530.00 + + GST@5% 26.50 + CGST @2.5% 13.25 + SGST @2.5% 13.25 + + Round Off: 0.50 + Total Invoice Value: 557 + + PAY: 557 + + Thank you, visit again! + +Powered by - POSIST +<<< \ No newline at end of file diff --git a/tests/test_data/expected/restaurant_invoice_photo.high_quality.text.txt b/tests/test_data/expected/restaurant_invoice_photo.high_quality.text.txt new file mode 100644 index 0000000..59d47f8 --- /dev/null +++ b/tests/test_data/expected/restaurant_invoice_photo.high_quality.text.txt @@ -0,0 +1,39 @@ +BURGER SEIGNEUR No.35, 80 feet road, HAL 3rd Stage, Indiranagar, Bangalore GST: 29AAHFL9534H1ZV +Order Number : T2- 57 Type : Table Table Number: 2 +Bill No .: T2 -- 126653 Date:2023-05-31 23:16:50 Kots:63 +Item +Qty +Amt +Jack The +Ripper +1 +400.00 +Plain Fries ++ +Coke 300 ML +1 +130.00 +Total Qty: 2 +SubTotal: 530.00 +GST@5% +26.50 +CGST @2.5% +13.25 +SGST @2.5% +13.25 +Round Off: +0.50 +Total Invoice Value: +557 +PAY: 557 Thank you, visit again! +Powered by - POSIST + + +<<< + + + + + + + diff --git a/tests/test_data/expected/utf_8_chars.high_quality.layout_preserving.txt b/tests/test_data/expected/utf_8_chars.high_quality.layout_preserving.txt new file mode 100644 index 0000000..5f9776d --- /dev/null +++ b/tests/test_data/expected/utf_8_chars.high_quality.layout_preserving.txt @@ -0,0 +1,188 @@ + + + + TCPDF Example 008 + TCPDF by Nicola Asuni - Tecnick.com + www.tcpdf.org + + +Sentences that contain all letters commonly used in a language + + +This file is UTF-8 encoded. + + +Czech (cz) + + + Příšerně žluťoučký kůň úpěl ďábelské ódy. + Hleď, toť přízračný kůň v mátožné póze šíleně úpí. + Zvlášť zákeřný učeň s ďolíčky běží podél zóny úlů. + Loď čeří kýlem tůň obzvlášť v Grónské úžině. + Ó, náhlý déšť již zvířil prach a čilá laň teď běží s houfcem gazel k úkrytům. + + +Danish (da) + + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + + +German (de) + + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + + +English (en) + + + The quick brown fox jumps over the lazy dog + + +Spanish (es) + + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + + + page 1 / 3 +<<< + + + + TCPDF Example 008 + TCPDF by Nicola Asuni - Tecnick.com + www.tcpdf.org + + + of vowel + acute.) + + +French (fr) + + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + + Le cœur déçu mais l'âme plutôt naïve, Louys rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + + +Irish Gaelic (ga) + + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + + +Hungarian (hu) + + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + + +Icelandic (is) + + + Kæmi ný öxi hér ykist bjófum nú bæỗi víl og ádrepa + + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + + +Greek (el) + + + Γαζέες και μυρτιές δέν θά βρώ στό χρυσαφί ξέφωτο + (= No more shall I see acacias or myrtles in the golden clearing) + + + Ξεσκεπάζω την ψυχοφθόρα βδελυγμία + + + page 2 / 3 +<<< + + + + TCPDF Example 008 + TCPDF by Nicola Asuni - Tecnick.com + www.tcpdf.org + + + (= I uncover the soul-destroying abhorrence) + + +Hebrew (iw) + + +הקליטה איך חברה לו מצא ולפתע מאוכזב בים שט סקרן דג ? + + +Polish (pl) + + + Pchnąć w tę łódź jeża lub osiem skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + + + Zażółć gęślą jaźń + + +Russian (ru) + + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + + +Thai (th) + + + [- -] + เป็น มนุษย์ สุดประเสริฐ เลิศ คุณค่า กว่า บรรดา ฝูง สัตว์ เดรัจฉาน + จง ฝ่าฟัน พัฒนา วิชาการ อย่า ล้าง ผลาญ ฤา เข่น ฆ่า บีฑา ใคร + ไม่ ถือ โทษ โกรธ แช่ง ซัด ฮึดฮัด ด่า หัด อภัย เหมือน กีฬา อัชฌาสัย + ปฏิบัติ ประพฤติ กฎ กำหนด ใจ พูดจา ให้ จ๊ะๆ จำๆ น่า ฟัง เอย ฯ + + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. + + + page 3 / 3 +<<< + diff --git a/tests/test_data/expected/utf_8_chars.ocr.line-printer.txt b/tests/test_data/expected/utf_8_chars.ocr.line-printer.txt new file mode 100644 index 0000000..5f9776d --- /dev/null +++ b/tests/test_data/expected/utf_8_chars.ocr.line-printer.txt @@ -0,0 +1,188 @@ + + + + TCPDF Example 008 + TCPDF by Nicola Asuni - Tecnick.com + www.tcpdf.org + + +Sentences that contain all letters commonly used in a language + + +This file is UTF-8 encoded. + + +Czech (cz) + + + Příšerně žluťoučký kůň úpěl ďábelské ódy. + Hleď, toť přízračný kůň v mátožné póze šíleně úpí. + Zvlášť zákeřný učeň s ďolíčky běží podél zóny úlů. + Loď čeří kýlem tůň obzvlášť v Grónské úžině. + Ó, náhlý déšť již zvířil prach a čilá laň teď běží s houfcem gazel k úkrytům. + + +Danish (da) + + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + + +German (de) + + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + + +English (en) + + + The quick brown fox jumps over the lazy dog + + +Spanish (es) + + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + + + page 1 / 3 +<<< + + + + TCPDF Example 008 + TCPDF by Nicola Asuni - Tecnick.com + www.tcpdf.org + + + of vowel + acute.) + + +French (fr) + + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + + Le cœur déçu mais l'âme plutôt naïve, Louys rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + + +Irish Gaelic (ga) + + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + + +Hungarian (hu) + + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + + +Icelandic (is) + + + Kæmi ný öxi hér ykist bjófum nú bæỗi víl og ádrepa + + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + + +Greek (el) + + + Γαζέες και μυρτιές δέν θά βρώ στό χρυσαφί ξέφωτο + (= No more shall I see acacias or myrtles in the golden clearing) + + + Ξεσκεπάζω την ψυχοφθόρα βδελυγμία + + + page 2 / 3 +<<< + + + + TCPDF Example 008 + TCPDF by Nicola Asuni - Tecnick.com + www.tcpdf.org + + + (= I uncover the soul-destroying abhorrence) + + +Hebrew (iw) + + +הקליטה איך חברה לו מצא ולפתע מאוכזב בים שט סקרן דג ? + + +Polish (pl) + + + Pchnąć w tę łódź jeża lub osiem skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + + + Zażółć gęślą jaźń + + +Russian (ru) + + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + + +Thai (th) + + + [- -] + เป็น มนุษย์ สุดประเสริฐ เลิศ คุณค่า กว่า บรรดา ฝูง สัตว์ เดรัจฉาน + จง ฝ่าฟัน พัฒนา วิชาการ อย่า ล้าง ผลาญ ฤา เข่น ฆ่า บีฑา ใคร + ไม่ ถือ โทษ โกรธ แช่ง ซัด ฮึดฮัด ด่า หัด อภัย เหมือน กีฬา อัชฌาสัย + ปฏิบัติ ประพฤติ กฎ กำหนด ใจ พูดจา ให้ จ๊ะๆ จำๆ น่า ฟัง เอย ฯ + + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + + +Please let me know if you find others! Special thanks to the people +from all over the world who contributed these sentences. + + + page 3 / 3 +<<< + diff --git a/tests/test_data/utf_8_chars.pdf b/tests/test_data/utf_8_chars.pdf new file mode 100644 index 0000000..ba2c906 Binary files /dev/null and b/tests/test_data/utf_8_chars.pdf differ diff --git a/tests/unit/client_v2_test.py b/tests/unit/client_v2_test.py new file mode 100644 index 0000000..70191b5 --- /dev/null +++ b/tests/unit/client_v2_test.py @@ -0,0 +1,34 @@ +WEBHOOK_URL = "http://test-webhook.com/callback" +AUTH_TOKEN = "dummy-auth-token" +WEBHOOK_NAME = "test_webhook" +WEBHOOK_RESPONSE = {"status": "success", "message": "Webhook registered successfully"} +WHISPER_RESPONSE = {"status_code": 200, "extraction": {"result_text": "Test result"}} + + +def test_register_webhook(mocker, client_v2): + mock_send = mocker.patch("requests.Session.send") + mock_response = mocker.MagicMock() + mock_response.status_code = 200 + mock_response.text = '{"status": "success", "message": "Webhook registered successfully"}' # noqa: E501 + mock_send.return_value = mock_response + + response = client_v2.register_webhook(WEBHOOK_URL, AUTH_TOKEN, WEBHOOK_NAME) + + assert response == WEBHOOK_RESPONSE + mock_send.assert_called_once() + + +def test_get_webhook_details(mocker, client_v2): + mock_send = mocker.patch("requests.Session.send") + mock_response = mocker.MagicMock() + mock_response.status_code = 200 + mock_response.text = ( + '{"status": "success", "webhook_details": {"url": "http://test-webhook.com/callback"}}' # noqa: E501 + ) + mock_send.return_value = mock_response + + response = client_v2.get_webhook_details(WEBHOOK_NAME) + + assert response["status"] == "success" + assert response["webhook_details"]["url"] == WEBHOOK_URL + mock_send.assert_called_once()