Marker API Implemented + Updated Llama code if ever needed

lm-sys · Jan 31, 2025 · f2c4d64 · f2c4d64
1 parent 2ea729c
commit f2c4d64
Show file tree

Hide file tree

Showing 3 changed files with 177 additions and 41 deletions.
diff --git a/fastchat/serve/gradio_block_arena_vision.py b/fastchat/serve/gradio_block_arena_vision.py
@@ -16,6 +16,9 @@
 from gradio.data_classes import FileData
 import numpy as np
 
+from io import BytesIO
+import base64
+
 from fastchat.constants import (
     TEXT_MODERATION_MSG,
     IMAGE_MODERATION_MSG,
@@ -217,29 +220,38 @@ def wrap_pdfchat_query(query, document):
 
 # def parse_pdf(file_path):
 #     from llama_parse import LlamaParse
+#     from llama_index.core.schema import ImageDocument, TextNode
+
+#     from PIL import Image
 
-#     assert (
-#         "LLAMA_CLOUD_API_KEY" in os.environ
-#     ), "Make sure to specify LlamaParse API key."
-
-#     for _ in range(LLAMA_PARSE_MAX_RETRY):
-#         try:
-#             documents = LlamaParse(
-#                 result_type="markdown",
-#                 verbose=True,
-#                 languages=list(LLAMAPARSE_SUPPORTED_LANGS.values()),
-#                 accurate_mode=True,
-#             ).load_data(file_path)
-#             assert len(documents) > 0
-#             break
-#         except AssertionError as e:
-#             continue
-
-#     output = "\n".join(
-#         [f"Page {i+1}:\n{doc.text}\n" for i, doc in enumerate(documents)]
+#     parser = LlamaParse(
+#         api_key=os.getenv("LLAMA_CLOUD_API_KEY"),
+#         result_type="markdown",
 #     )
 
-#     return output
+#     def get_image_nodes(json_objs: List[dict], download_path: str):
+#         image_dicts = parser.get_images(json_objs, download_path=download_path)
+#         return [ImageDocument(image_path=image_dict["path"]) for image_dict in image_dicts]
+
+#     json_objs = parser.get_json_result(file_path)
+#     json_list = json_objs[0]["pages"]
+
+#     text = ""
+#     for page in json_list:
+#         text += f"Page {page['page']}:\n{page['md']}\n"
+#         if (page['images']):
+#             for i, image in enumerate(page['images']):
+#                 text += f"page{page['page']}_figure{i + 1}\n"
+
+#     image_documents = get_image_nodes(json_objs, ".")
+#     images = []
+
+#     for image_doc in image_documents:
+#         image_path = image_doc.image_path
+#         image = Image.open(image_path)
+#         images.append(image)
+
+#     return text, images
 
 
 PDFPARSE_MAX_RETRY = 2
@@ -259,29 +271,48 @@ def wrap_pdfchat_query(query, document):
     "languages": ",".join(PDFPARSE_SUPPORTED_LANGS.values()),
 }
 
+def convert_base64_to_pil_image(b64_string):
+    from PIL import Image
+
+    image_data = base64.b64decode(b64_string)
+    image_bytes = BytesIO(image_data)
+    image = Image.open(image_bytes)
+
+    return image
 
 def parse_pdf(file_path):
-    from marker.config.parser import ConfigParser
-    from marker.models import create_model_dict
-    from marker.converters.pdf import PdfConverter
-
-    output_md, output_images = None, None
-    for _ in range(PDFPARSE_MAX_RETRY):
-        try:
-            config_parser = ConfigParser(MARKER_PDFPARSE_CONFIG)
-
-            converter = PdfConverter(
-                config=config_parser.generate_config_dict(),
-                artifact_dict=create_model_dict(),
-                processor_list=config_parser.get_processors(),
-                renderer=config_parser.get_renderer(),
-            )
-            rendered = converter(file_path)
-            output_md = rendered.markdown
-            output_images = list(rendered.images.values())
+    import requests
+
+    url = "https://www.datalab.to/api/v1/marker"
+
+    form_data = {
+        'file': ('test.pdf', open(file_path, 'rb'), 'application/pdf'),
+        'langs': (None, "English"),
+        "force_ocr": (None, False),
+        "paginate": (None, False),
+        'output_format': (None, 'markdown'),
+        "use_llm": (None, True),
+        "strip_existing_ocr": (None, False),
+        "disable_image_extraction": (None, False)
+    }
+
+    headers = {"X-Api-Key": os.getenv("X-Api-Key")} 
+    response = requests.post(url, files=form_data, headers=headers)
+    data = response.json()
+
+    max_polls = 300
+    check_url = data["request_check_url"]
+
+    for i in range(max_polls):
+        time.sleep(2)
+        response = requests.get(check_url, headers=headers)
+        data = response.json()
+
+        if data["status"] == "complete":
             break
-        except AssertionError as e:
-            continue
+
+    output_md = data["markdown"]
+    output_images = [convert_base64_to_pil_image(b64_image) for b64_image in data["images"].values()]
 
     return output_md, output_images
 

diff --git a/fastchat/serve/gradio_block_arena_vision_anony.py b/fastchat/serve/gradio_block_arena_vision_anony.py
@@ -95,7 +95,8 @@
 
 # TODO(chris): fix sampling weights
 VISION_SAMPLING_WEIGHTS = {}
-PDFCHAT_SAMPLING_WEIGHTS = {}
+PDFCHAT_SAMPLING_WEIGHTS = {
+    "gpt-4o-2024-05-13": 1, "gpt-4o-mini-2024-07-18": 1}
 
 # TODO(chris): Find battle targets that make sense
 VISION_BATTLE_TARGETS = {}

diff --git a/fastchat/serve/test.ipynb b/fastchat/serve/test.ipynb
@@ -0,0 +1,104 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import requests\n",
+    "\n",
+    "url = \"https://www.datalab.to/api/v1/marker\"\n",
+    "import os\n",
+    "form_data = {\n",
+    "    'file': ('test.pdf', open(\"ddsppaper (1).pdf\", 'rb'), 'application/pdf'),\n",
+    "    'langs': (None, \"English\"),\n",
+    "    \"force_ocr\": (None, False),\n",
+    "    \"paginate\": (None, False),\n",
+    "    'output_format': (None, 'markdown'),\n",
+    "    \"use_llm\": (None, True),\n",
+    "    \"strip_existing_ocr\": (None, False),\n",
+    "    \"disable_image_extraction\": (None, False)\n",
+    "}\n",
+    "\n",
+    "headers = {\"X-Api-Key\": \"wAdzo2tLEsd5PzQTtQT4RNZSBM6rJy_LWFTtj8hjbZ0\"} \n",
+    "response = requests.post(url, files=form_data, headers=headers)\n",
+    "data = response.json()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'detail': 'Not authenticated'}\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "KeyError",
+     "evalue": "'request_check_url'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[4], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m max_polls \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m300\u001b[39m\n\u001b[0;32m----> 2\u001b[0m check_url \u001b[38;5;241m=\u001b[39m data[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrequest_check_url\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m      3\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtime\u001b[39;00m\n\u001b[1;32m      4\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(max_polls):\n",
+      "\u001b[0;31mKeyError\u001b[0m: 'request_check_url'"
+     ]
+    }
+   ],
+   "source": [
+    "max_polls = 300\n",
+    "check_url = data[\"request_check_url\"]\n",
+    "import time\n",
+    "for i in range(max_polls):\n",
+    "    time.sleep(2)\n",
+    "    response = requests.get(check_url, headers=headers)\n",
+    "    data = response.json()\n",
+    "\n",
+    "    if data[\"status\"] == \"complete\":\n",
+    "        break"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.12.8 ('myenv')",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.8"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "03c457903a5d26c69a3bb8be9c56ac1ee96fb7ba834b2e69a22fb0607b146481"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}