Skip to content

Commit

Permalink
feat: JSON schema changes to support URLs and description boxes (#133)
Browse files Browse the repository at this point in the history
JSON schema changes to support URLs and description boxes

Signed-off-by: Chandrasekharan M <[email protected]>
  • Loading branch information
chandrasekharan-zipstack authored Dec 4, 2024
1 parent 6fda9d3 commit e2cf928
Show file tree
Hide file tree
Showing 14 changed files with 27 additions and 23 deletions.
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
{
"title": "No-op",
"title": "No Op Embedding",
"type": "object",
"required": [
"adapter_name",
"wait_time"
],
"description": "No Op Embedding does not perform any operation, its used to test the performance of the system in the absence of 3rd party induced latencies",
"properties": {
"adapter_name": {
"type": "string",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
"type": "string",
"title": "Base URL",
"default": "",
"description": "Provide the base URL where Ollama server is running. Example: http://docker.host.internal:11434 or http://localhost:11434"
"description": "Provide the base URL where Ollama server is running. Example: `http://docker.host.internal:11434` or `http://localhost:11434`"
},
"embed_batch_size": {
"type": "number",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"type": "string",
"title": "Model Name",
"default": "models/embedding-gecko-001",
"description": "Provide the name of the model to use for embedding. Example: models/embedding-gecko-001"
"description": "Provide the name of the model to use for embedding. Example: `models/embedding-gecko-001`"
},
"api_key": {
"type": "string",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"type": "string",
"title": "Model",
"default": "BAAI/bge-small-en-v1.5",
"description": "The name of the model to use. Example: BAAI/bge-small-en-v1.5"
"description": "The name of the model to use. Example: `BAAI/bge-small-en-v1.5`"
}
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"title": "MistralAI LLM",
"title": "Mistral AI LLM",
"type": "object",
"required": [
"adapter_name",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
{
"title": "No-op",
"title": "No Op LLM",
"type": "object",
"required": [
"adapter_name",
"wait_time"
],
"description": "No Op LLM does not perform any operation, its used to test the performance of the system in the absence of 3rd party induced latencies",
"properties": {
"adapter_name": {
"type": "string",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"title": "URI",
"format": "uri",
"default": "localhost:19530",
"description": "Provide the URI of the Milvus server. Example: https://<instance-id>.api.gcp-us-west1.zillizcloud.com"
"description": "Provide the URI of the Milvus server. Example: `https://<instance-id>.api.gcp-us-west1.zillizcloud.com`"
},
"token": {
"type": "string",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
{
"title": "No-op",
"title": "No Op Vector DB",
"type": "object",
"required": [
"adapter_name",
"wait_time"
],
"description": "No Op Vector DB does not perform any operation, its used to test the performance of the system in the absence of 3rd party induced latencies",
"properties": {
"adapter_name": {
"type": "string",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"title": "Llama Parse X2Text",
"title": "Llama Parse Text Extractor",
"type": "object",
"required": [
"api_key"
Expand All @@ -9,7 +9,7 @@
"type": "string",
"title": "Name",
"default": "",
"description": "Provide a unique name for this adapter instance. Example: Llama parse1"
"description": "Provide a unique name for this adapter instance. Example: llama-parse-1"
},
"api_key": {
"type": "string",
Expand All @@ -33,7 +33,7 @@
"markdown"
],
"default": "text",
"description": "Choose the type of result. Markdown or text."
"description": "Choose the type of result - `markdown` or `text`."
},
"verbose": {
"type": "boolean",
Expand All @@ -43,4 +43,3 @@
}
}
}

Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
{
"title": "LLMWhisperer X2Text",
"title": "LLMWhisperer v1 Text Extractor",
"type": "object",
"required": [
"adapter_name",
"unstract_key",
"url"
],
"description": "LLMWhisperer v1 is deprecated, use the cheaper and faster [LLMWhisperer v2](https://docs.unstract.com/llmwhisperer/llm_whisperer/faqs/v1_to_v2/) instead.",
"properties": {
"adapter_name": {
"type": "string",
Expand All @@ -24,7 +25,7 @@
"type": "string",
"title": "Unstract Key",
"format": "password",
"description": "API key obtained from the Unstract developer portal (https://unstract-api-resource.developer.azure-api.net)"
"description": "API key obtained from the [Unstract developer portal](https://unstract-api-resource.developer.azure-api.net)"
},
"mode": {
"type": "string",
Expand All @@ -36,7 +37,7 @@
"form"
],
"default": "form",
"description": "Native text : Extracts text from PDF without OCR. This is very fast and cost effective. Use this mode if you are sure all your PDFs are native text pdfs (not scanned documents). Note that some scanned PDFs are \"searchable\" PDFs. Use the OCR modes for these PDFs as the quality of text in these documents are often poor. \n Low cost : Extracts text from scanned and native PDFs, images and office documents. This OCR mode cannot handle handwriting and low quality scanned pdfs and images. \n High quality : Extracts text from scanned and native PDFs, images and office documents. This OCR mode can handle handwriting and low quality scanned pdfs and images. \n Form: Extracts text from scanned and native PDFs, images and office documents. This OCR mode can handle handwriting and low quality scanned pdfs and images. Can also extract information about checkboxes and radio button"
"description": "Processing mode to use, described in the [LLMWhisperer v1 documentation](https://docs.unstract.com/llmwhisperer/1.0.0/llm_whisperer/apis/llm_whisperer_text_extraction_api/#processing-modes)"
},
"output_mode": {
"type": "string",
Expand All @@ -47,7 +48,7 @@
"text"
],
"default": "line-printer",
"description": "The output format. Valid options are line-printer, dump-text and text. The line-printer mode tries to maintain the layout of the original text and works very well as inputs to LLMs. dump-text just dumps each page as paragraphs. text extracts text into groups as it sees in the original page. text and dump-text are treated as same in ocr processing mode."
"description": "Output mode to use, described in the [LLMWhisperer v1 documentation](https://docs.unstract.com/llmwhisperer/1.0.0/llm_whisperer/apis/llm_whisperer_text_extraction_api/#output-modes)"
},

"line_splitter_tolerance": {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"title": "LLMWhisperer X2Text v2",
"title": "LLMWhisperer v2 Text Extractor",
"type": "object",
"required": [
"adapter_name",
Expand All @@ -18,7 +18,7 @@
"title": "URL",
"format": "uri",
"default": "https://llmwhisperer-api.us-central.unstract.com",
"description": "Provide the base URL of the LLM Whisperer service based on your region."
"description": "Provide the base URL of the LLMWhisperer service based on your region, can be obtained from the [Unstract developer portal](https://us-central.unstract.com/landing?selectedProduct=llm-whisperer)."
},
"unstract_key": {
"type": "string",
Expand All @@ -36,7 +36,7 @@
"form"
],
"default": "form",
"description": "Processing mode to use, described in the [LLM Whisperer documentation](https://docs.unstract.com/llmwhisperer/llm_whisperer/apis/llm_whisperer_text_extraction_api/#modes)."
"description": "Processing mode to use, described in the [LLMWhisperer documentation](https://docs.unstract.com/llmwhisperer/llm_whisperer/apis/llm_whisperer_text_extraction_api/#modes)."
},
"output_mode": {
"type": "string",
Expand All @@ -46,7 +46,7 @@
"text"
],
"default": "layout_preserving",
"description": "Output format, described in the [LLM Whisperer documentation](https://docs.unstract.com/llmwhisperer/llm_whisperer/apis/llm_whisperer_text_extraction_api/#output-modes)"
"description": "Output format, described in the [LLMWhisperer documentation](https://docs.unstract.com/llmwhisperer/llm_whisperer/apis/llm_whisperer_text_extraction_api/#output-modes)"
},
"line_splitter_tolerance": {
"type": "number",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
{
"title": "No-op",
"title": "No Op Text Extractor",
"type": "object",
"required": [
"adapter_name",
"wait_time"
],
"description": "No Op Text Extractor does not perform any operation, its used to test the performance of the system in the absence of 3rd party induced latencies",
"properties": {
"adapter_name": {
"type": "string",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"title": "Unstructured IO Community X2Text",
"title": "Unstructured IO Community Text Extractor",
"type": "object",
"required": [
"adapter_name",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"title": "Unstructured IO Enterprise X2Text",
"title": "Unstructured IO Enterprise Text Extractor",
"type": "object",
"required": [
"adapter_name",
Expand Down

0 comments on commit e2cf928

Please sign in to comment.