Skip to content

Commit

Permalink
Marker API Implemented + Updated Llama code if ever needed
Browse files Browse the repository at this point in the history
  • Loading branch information
PranavB-11 committed Jan 31, 2025
1 parent 2ea729c commit f2c4d64
Show file tree
Hide file tree
Showing 3 changed files with 177 additions and 41 deletions.
111 changes: 71 additions & 40 deletions fastchat/serve/gradio_block_arena_vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
from gradio.data_classes import FileData
import numpy as np

from io import BytesIO
import base64

from fastchat.constants import (
TEXT_MODERATION_MSG,
IMAGE_MODERATION_MSG,
Expand Down Expand Up @@ -217,29 +220,38 @@ def wrap_pdfchat_query(query, document):

# def parse_pdf(file_path):
# from llama_parse import LlamaParse
# from llama_index.core.schema import ImageDocument, TextNode

# from PIL import Image

# assert (
# "LLAMA_CLOUD_API_KEY" in os.environ
# ), "Make sure to specify LlamaParse API key."

# for _ in range(LLAMA_PARSE_MAX_RETRY):
# try:
# documents = LlamaParse(
# result_type="markdown",
# verbose=True,
# languages=list(LLAMAPARSE_SUPPORTED_LANGS.values()),
# accurate_mode=True,
# ).load_data(file_path)
# assert len(documents) > 0
# break
# except AssertionError as e:
# continue

# output = "\n".join(
# [f"Page {i+1}:\n{doc.text}\n" for i, doc in enumerate(documents)]
# parser = LlamaParse(
# api_key=os.getenv("LLAMA_CLOUD_API_KEY"),
# result_type="markdown",
# )

# return output
# def get_image_nodes(json_objs: List[dict], download_path: str):
# image_dicts = parser.get_images(json_objs, download_path=download_path)
# return [ImageDocument(image_path=image_dict["path"]) for image_dict in image_dicts]

# json_objs = parser.get_json_result(file_path)
# json_list = json_objs[0]["pages"]

# text = ""
# for page in json_list:
# text += f"Page {page['page']}:\n{page['md']}\n"
# if (page['images']):
# for i, image in enumerate(page['images']):
# text += f"page{page['page']}_figure{i + 1}\n"

# image_documents = get_image_nodes(json_objs, ".")
# images = []

# for image_doc in image_documents:
# image_path = image_doc.image_path
# image = Image.open(image_path)
# images.append(image)

# return text, images


PDFPARSE_MAX_RETRY = 2
Expand All @@ -259,29 +271,48 @@ def wrap_pdfchat_query(query, document):
"languages": ",".join(PDFPARSE_SUPPORTED_LANGS.values()),
}

def convert_base64_to_pil_image(b64_string):
from PIL import Image

image_data = base64.b64decode(b64_string)
image_bytes = BytesIO(image_data)
image = Image.open(image_bytes)

return image

def parse_pdf(file_path):
from marker.config.parser import ConfigParser
from marker.models import create_model_dict
from marker.converters.pdf import PdfConverter

output_md, output_images = None, None
for _ in range(PDFPARSE_MAX_RETRY):
try:
config_parser = ConfigParser(MARKER_PDFPARSE_CONFIG)

converter = PdfConverter(
config=config_parser.generate_config_dict(),
artifact_dict=create_model_dict(),
processor_list=config_parser.get_processors(),
renderer=config_parser.get_renderer(),
)
rendered = converter(file_path)
output_md = rendered.markdown
output_images = list(rendered.images.values())
import requests

url = "https://www.datalab.to/api/v1/marker"

form_data = {
'file': ('test.pdf', open(file_path, 'rb'), 'application/pdf'),
'langs': (None, "English"),
"force_ocr": (None, False),
"paginate": (None, False),
'output_format': (None, 'markdown'),
"use_llm": (None, True),
"strip_existing_ocr": (None, False),
"disable_image_extraction": (None, False)
}

headers = {"X-Api-Key": os.getenv("X-Api-Key")}
response = requests.post(url, files=form_data, headers=headers)
data = response.json()

max_polls = 300
check_url = data["request_check_url"]

for i in range(max_polls):
time.sleep(2)
response = requests.get(check_url, headers=headers)
data = response.json()

if data["status"] == "complete":
break
except AssertionError as e:
continue

output_md = data["markdown"]
output_images = [convert_base64_to_pil_image(b64_image) for b64_image in data["images"].values()]

return output_md, output_images

Expand Down
3 changes: 2 additions & 1 deletion fastchat/serve/gradio_block_arena_vision_anony.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,8 @@

# TODO(chris): fix sampling weights
VISION_SAMPLING_WEIGHTS = {}
PDFCHAT_SAMPLING_WEIGHTS = {}
PDFCHAT_SAMPLING_WEIGHTS = {
"gpt-4o-2024-05-13": 1, "gpt-4o-mini-2024-07-18": 1}

# TODO(chris): Find battle targets that make sense
VISION_BATTLE_TARGETS = {}
Expand Down
104 changes: 104 additions & 0 deletions fastchat/serve/test.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"\n",
"url = \"https://www.datalab.to/api/v1/marker\"\n",
"import os\n",
"form_data = {\n",
" 'file': ('test.pdf', open(\"ddsppaper (1).pdf\", 'rb'), 'application/pdf'),\n",
" 'langs': (None, \"English\"),\n",
" \"force_ocr\": (None, False),\n",
" \"paginate\": (None, False),\n",
" 'output_format': (None, 'markdown'),\n",
" \"use_llm\": (None, True),\n",
" \"strip_existing_ocr\": (None, False),\n",
" \"disable_image_extraction\": (None, False)\n",
"}\n",
"\n",
"headers = {\"X-Api-Key\": \"wAdzo2tLEsd5PzQTtQT4RNZSBM6rJy_LWFTtj8hjbZ0\"} \n",
"response = requests.post(url, files=form_data, headers=headers)\n",
"data = response.json()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'detail': 'Not authenticated'}\n"
]
}
],
"source": [
"print(data)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"ename": "KeyError",
"evalue": "'request_check_url'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[4], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m max_polls \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m300\u001b[39m\n\u001b[0;32m----> 2\u001b[0m check_url \u001b[38;5;241m=\u001b[39m data[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrequest_check_url\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtime\u001b[39;00m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(max_polls):\n",
"\u001b[0;31mKeyError\u001b[0m: 'request_check_url'"
]
}
],
"source": [
"max_polls = 300\n",
"check_url = data[\"request_check_url\"]\n",
"import time\n",
"for i in range(max_polls):\n",
" time.sleep(2)\n",
" response = requests.get(check_url, headers=headers)\n",
" data = response.json()\n",
"\n",
" if data[\"status\"] == \"complete\":\n",
" break"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.12.8 ('myenv')",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.8"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "03c457903a5d26c69a3bb8be9c56ac1ee96fb7ba834b2e69a22fb0607b146481"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit f2c4d64

Please sign in to comment.