From be59c2f6a788f5dcad5d260b59b6a1216713a2a0 Mon Sep 17 00:00:00 2001 From: Menghuan1918 Date: Thu, 14 Nov 2024 20:55:37 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0Doc2X=E6=8F=92=E4=BB=B6?= =?UTF-8?q?=EF=BC=9A=E9=80=82=E9=85=8D=E6=96=B0=E6=8E=A5=E5=8F=A3=20(#3159?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: doc2x now not hava the picture API * fix: 适配doc2x V2 API * Update to axios to request doc2x * Add time out --- packages/plugins/register.ts | 17 +- .../plugins/src/Doc2X/FileImg2text/index.ts | 172 ------- .../plugins/src/Doc2X/FilePDF2text/index.ts | 165 ------ .../src/Doc2X/FilePDF2text/template.json | 451 ---------------- packages/plugins/src/Doc2X/PDF2text/index.ts | 182 +++++++ .../{FileImg2text => PDF2text}/template.json | 300 +++++------ .../plugins/src/Doc2X/URLImg2text/index.ts | 166 ------ .../src/Doc2X/URLImg2text/template.json | 484 ------------------ .../plugins/src/Doc2X/URLPDF2text/index.ts | 156 ------ .../src/Doc2X/URLPDF2text/template.json | 435 ---------------- 10 files changed, 341 insertions(+), 2187 deletions(-) delete mode 100644 packages/plugins/src/Doc2X/FileImg2text/index.ts delete mode 100644 packages/plugins/src/Doc2X/FilePDF2text/index.ts delete mode 100644 packages/plugins/src/Doc2X/FilePDF2text/template.json create mode 100644 packages/plugins/src/Doc2X/PDF2text/index.ts rename packages/plugins/src/Doc2X/{FileImg2text => PDF2text}/template.json (77%) delete mode 100644 packages/plugins/src/Doc2X/URLImg2text/index.ts delete mode 100644 packages/plugins/src/Doc2X/URLImg2text/template.json delete mode 100644 packages/plugins/src/Doc2X/URLPDF2text/index.ts delete mode 100644 packages/plugins/src/Doc2X/URLPDF2text/template.json diff --git a/packages/plugins/register.ts b/packages/plugins/register.ts index 63c309f6fc10..0ad3e5d26c93 100644 --- a/packages/plugins/register.ts +++ b/packages/plugins/register.ts @@ -5,18 +5,7 @@ import { cloneDeep } from 'lodash'; import { WorkerNameEnum, runWorker } from '@fastgpt/service/worker/utils'; // Run in main thread -const staticPluginList = [ - 'getTime', - 'fetchUrl', - 'Doc2X', - 'Doc2X/URLPDF2text', - 'Doc2X/URLImg2text', - `Doc2X/FilePDF2text`, - `Doc2X/FileImg2text`, - 'feishu', - 'google', - 'bing' -]; +const staticPluginList = ['getTime', 'fetchUrl', 'feishu', 'google', 'bing']; // Run in worker thread (Have npm packages) const packagePluginList = [ 'mathExprVal', @@ -28,7 +17,9 @@ const packagePluginList = [ 'drawing', 'drawing/baseChart', 'wiki', - 'databaseConnection' + 'databaseConnection', + 'Doc2X', + 'Doc2X/PDF2text' ]; export const list = [...staticPluginList, ...packagePluginList]; diff --git a/packages/plugins/src/Doc2X/FileImg2text/index.ts b/packages/plugins/src/Doc2X/FileImg2text/index.ts deleted file mode 100644 index 2789cdd92945..000000000000 --- a/packages/plugins/src/Doc2X/FileImg2text/index.ts +++ /dev/null @@ -1,172 +0,0 @@ -import { delay } from '@fastgpt/global/common/system/utils'; -import { addLog } from '@fastgpt/service/common/system/log'; - -type Props = { - apikey: string; - files: Array; - img_correction: boolean; - formula: boolean; -}; - -type Response = Promise<{ - result: string; - failreason: string; - success: boolean; -}>; - -const main = async ({ apikey, files, img_correction, formula }: Props): Response => { - // Check the apikey - if (!apikey) { - return { - result: '', - failreason: `API key is required`, - success: false - }; - } - - let real_api_key = apikey; - if (!apikey.startsWith('sk-')) { - const response = await fetch('https://api.doc2x.noedgeai.com/api/token/refresh', { - method: 'POST', - headers: { - Authorization: `Bearer ${apikey}` - } - }); - if (response.status !== 200) { - return { - result: '', - failreason: `Get token failed: ${await response.text()}`, - success: false - }; - } - const data = await response.json(); - real_api_key = data.data.token; - } - - let final_result = ''; - let fail_reason = ''; - let flag = false; - //Process each file one by one - for await (const url of files) { - // Fetch the image and check its content type - const imageResponse = await fetch(url); - if (!imageResponse.ok) { - fail_reason += `\n---\nFile:${url} \n\nFailed to fetch image from URL\n\n`; - flag = true; - continue; - } - - const contentType = imageResponse.headers.get('content-type'); - const fileName = url.match(/read\?filename=([^&]+)/)?.[1] || 'unknown.png'; - if (!contentType || !contentType.startsWith('image/')) { - fail_reason += `\n---\nFile:${url} \n\nThe provided URL does not point to an image: ${contentType}\n\n`; - flag = true; - continue; - } - - const blob = await imageResponse.blob(); - const formData = new FormData(); - formData.append('file', blob, fileName); - formData.append('img_correction', img_correction ? '1' : '0'); - formData.append('equation', formula ? '1' : '0'); - - let upload_url = 'https://api.doc2x.noedgeai.com/api/platform/async/img'; - if (real_api_key.startsWith('sk-')) { - upload_url = 'https://api.doc2x.noedgeai.com/api/v1/async/img'; - } - - let uuid; - let upload_flag = true; - const uploadAttempts = [1, 2, 3]; - for await (const attempt of uploadAttempts) { - const upload_response = await fetch(upload_url, { - method: 'POST', - headers: { - Authorization: `Bearer ${real_api_key}` - }, - body: formData - }); - - if (!upload_response.ok) { - // Rate limit, wait for 10s and retry at most 3 times - if (upload_response.status === 429 && attempt < 3) { - await delay(10000); - continue; - } - fail_reason += `\n---\nFile:${fileName}\n\nFailed to upload file: ${await upload_response.text()}\n\n`; - flag = true; - upload_flag = false; - break; - } - if (!upload_flag) { - continue; - } - - const upload_data = await upload_response.json(); - uuid = upload_data.data.uuid; - break; - } - - // Get the result by uuid - let result_url = 'https://api.doc2x.noedgeai.com/api/platform/async/status?uuid=' + uuid; - if (real_api_key.startsWith('sk-')) { - result_url = 'https://api.doc2x.noedgeai.com/api/v1/async/status?uuid=' + uuid; - } - - let required_flag = true; - const maxAttempts = 100; - // Wait for the result, at most 100s - for await (const _ of Array(maxAttempts).keys()) { - const result_response = await fetch(result_url, { - headers: { - Authorization: `Bearer ${real_api_key}` - } - }); - if (!result_response.ok) { - fail_reason += `\n---\nFile:${fileName}\n\nFailed to get result: ${await result_response.text()}\n\n`; - flag = true; - required_flag = false; - break; - } - const result_data = await result_response.json(); - if (['ready', 'processing'].includes(result_data.data.status)) { - await delay(1000); - } else if (result_data.data.status === 'pages limit exceeded') { - fail_reason += `\n---\nFile:${fileName}\n\nFailed to get result: pages limit exceeded\n\n`; - flag = true; - required_flag = false; - break; - } else if (result_data.data.status === 'success') { - let result; - try { - result = result_data.data.result.pages[0].md; - result = result.replace(/\\[\(\)]/g, '$').replace(/\\[\[\]]/g, '$$'); - } catch { - // no pages - final_result += `\n---\nFile:${fileName}\n\n \n\n`; - required_flag = false; - } - final_result += `\n---\nFile:${fileName}\n\n${result}\n\n`; - required_flag = false; - break; - } else { - fail_reason += `\n---\nFile:${fileName}\n\nFailed to get result: ${result_data.data.status}\n\n`; - flag = true; - required_flag = false; - break; - } - } - if (required_flag) { - fail_reason += `\n---\nFile:${fileName}\n\nTimeout waiting for result\n\n`; - flag = true; - } - } - - return { - result: final_result, - failreason: fail_reason, - success: !flag - }; -}; - -export default main; diff --git a/packages/plugins/src/Doc2X/FilePDF2text/index.ts b/packages/plugins/src/Doc2X/FilePDF2text/index.ts deleted file mode 100644 index 4b6695a5a4b9..000000000000 --- a/packages/plugins/src/Doc2X/FilePDF2text/index.ts +++ /dev/null @@ -1,165 +0,0 @@ -import { delay } from '@fastgpt/global/common/system/utils'; -import { addLog } from '@fastgpt/service/common/system/log'; -import { result } from 'lodash'; - -type Props = { - apikey: string; - files: Array; - ocr: boolean; -}; - -// Response type same as HTTP outputs -type Response = Promise<{ - result: string; - failreason: string; - success: boolean; -}>; - -const main = async ({ apikey, files, ocr }: Props): Response => { - // Check the apikey - if (!apikey) { - return { - result: '', - failreason: `API key is required`, - success: false - }; - } - - let real_api_key = apikey; - if (!apikey.startsWith('sk-')) { - const response = await fetch('https://api.doc2x.noedgeai.com/api/token/refresh', { - method: 'POST', - headers: { - Authorization: `Bearer ${apikey}` - } - }); - if (response.status !== 200) { - return { - result: '', - failreason: `Get token failed: ${await response.text()}`, - success: false - }; - } - const data = await response.json(); - real_api_key = data.data.token; - } - - let final_result = ''; - let fail_reason = ''; - let flag = false; - //Process each file one by one - for await (const url of files) { - //Fetch the pdf and check its contene type - const PDFResponse = await fetch(url); - if (!PDFResponse.ok) { - fail_reason += `\n---\nFile:${url} \n\nFailed to fetch PDF from URL\n\n`; - flag = true; - continue; - } - - const contentType = PDFResponse.headers.get('content-type'); - const file_name = url.match(/read\?filename=([^&]+)/)?.[1] || 'unknown.pdf'; - if (!contentType || !contentType.startsWith('application/pdf')) { - fail_reason += `\n---\nFile:${file_name}\n\nThe provided file does not point to a PDF: ${contentType}\n\n`; - flag = true; - continue; - } - - const blob = await PDFResponse.blob(); - const formData = new FormData(); - formData.append('file', blob, file_name); - formData.append('ocr', ocr ? '1' : '0'); - - let upload_url = 'https://api.doc2x.noedgeai.com/api/platform/async/pdf'; - if (real_api_key.startsWith('sk-')) { - upload_url = 'https://api.doc2x.noedgeai.com/api/v1/async/pdf'; - } - - let uuid; - let upload_flag = true; - const uploadAttempts = [1, 2, 3]; - for await (const attempt of uploadAttempts) { - const upload_response = await fetch(upload_url, { - method: 'POST', - headers: { - Authorization: `Bearer ${real_api_key}` - }, - body: formData - }); - if (!upload_response.ok) { - // Rate limit, wait for 10s and retry at most 3 times - if (upload_response.status === 429 && attempt < 3) { - await delay(10000); - continue; - } - fail_reason += `\n---\nFile:${file_name}\n\nFailed to upload file: ${await upload_response.text()}\n\n`; - flag = true; - upload_flag = false; - } - if (!upload_flag) { - continue; - } - const upload_data = await upload_response.json(); - uuid = upload_data.data.uuid; - break; - } - - // Get the result by uuid - let result_url = 'https://api.doc2x.noedgeai.com/api/platform/async/status?uuid=' + uuid; - if (real_api_key.startsWith('sk-')) { - result_url = 'https://api.doc2x.noedgeai.com/api/v1/async/status?uuid=' + uuid; - } - - let required_flag = true; - let result = ''; - // Wait for the result, at most 100s - const maxAttempts = 100; - for await (const _ of Array(maxAttempts).keys()) { - const result_response = await fetch(result_url, { - headers: { - Authorization: `Bearer ${real_api_key}` - } - }); - if (!result_response.ok) { - fail_reason += `\n---\nFile:${file_name}\n\nFailed to get result: ${await result_response.text()}\n\n`; - flag = true; - required_flag = false; - break; - } - const result_data = await result_response.json(); - if (['ready', 'processing'].includes(result_data.data.status)) { - await delay(1000); - } else if (result_data.data.status === 'pages limit exceeded') { - fail_reason += `\n---\nFile:${file_name}\n\nPages limit exceeded\n\n`; - flag = true; - required_flag = false; - break; - } else if (result_data.data.status === 'success') { - result = await Promise.all( - result_data.data.result.pages.map((page: { md: any }) => page.md) - ).then((pages) => pages.join('\n')); - result = result.replace(/\\[\(\)]/g, '$').replace(/\\[\[\]]/g, '$$'); - final_result += `\n---\nFile:${file_name}\n\n${result}\n\n`; - required_flag = false; - break; - } else { - fail_reason += `\n---\nFile:${file_name}\n\nFailed to get result: ${result_data.data.status}\n\n`; - flag = true; - required_flag = false; - break; - } - } - if (required_flag) { - fail_reason += `\n---\nFile:${file_name}\n\nTimeout after 100s for uuid ${uuid}\n\n`; - flag = true; - } - } - - return { - result: final_result, - failreason: fail_reason, - success: !flag - }; -}; - -export default main; diff --git a/packages/plugins/src/Doc2X/FilePDF2text/template.json b/packages/plugins/src/Doc2X/FilePDF2text/template.json deleted file mode 100644 index 4fa3f0908ab1..000000000000 --- a/packages/plugins/src/Doc2X/FilePDF2text/template.json +++ /dev/null @@ -1,451 +0,0 @@ -{ - "author": "Menghuan1918", - "version": "488", - "name": "Doc2X PDF文件(文件)识别", - "avatar": "plugins/doc2x", - "intro": "将上传的PDF文件发送至Doc2X进行解析,返回带LaTeX公式的markdown格式的文本", - "courseUrl": "https://fael3z0zfze.feishu.cn/wiki/Rkc5witXWiJoi5kORd2cofh6nDg?fromScene=spaceOverview", - "showStatus": true, - "weight": 10, - - "isTool": true, - "templateType": "tools", - - "workflow": { - "nodes": [ - { - "nodeId": "pluginConfig", - "name": "common:core.module.template.system_config", - "intro": "", - "avatar": "core/workflow/template/systemConfig", - "flowNodeType": "pluginConfig", - "position": { - "x": -30.474351356537454, - "y": -101.45216221730038 - }, - "version": "4811", - "inputs": [], - "outputs": [] - }, - { - "nodeId": "pluginInput", - "name": "插件开始", - "intro": "可以配置插件需要哪些输入,利用这些输入来运行插件", - "avatar": "core/workflow/template/workflowStart", - "flowNodeType": "pluginInput", - "showStatus": false, - "position": { - "x": 407.2817920483865, - "y": -101.45216221730038 - }, - "version": "481", - "inputs": [ - { - "renderTypeList": ["input"], - "selectedTypeIndex": 0, - "valueType": "string", - "canEdit": true, - "key": "apikey", - "label": "apikey", - "description": "Doc2X的验证密匙,对于个人用户可以从Doc2X官网 - 个人信息 - 身份令牌获得", - "required": true, - "toolDescription": "", - "defaultValue": "" - }, - { - "renderTypeList": ["reference"], - "selectedTypeIndex": 0, - "valueType": "arrayString", - "canEdit": true, - "key": "files", - "label": "files", - "description": "待处理的PDF文件", - "required": true, - "toolDescription": "待处理的PDF文件" - }, - { - "renderTypeList": ["switch"], - "selectedTypeIndex": 0, - "valueType": "boolean", - "canEdit": true, - "key": "ocr", - "label": "ocr", - "description": "是否开启对PDF文件内图片的OCR识别,建议开启", - "required": true, - "toolDescription": "", - "defaultValue": true - } - ], - "outputs": [ - { - "id": "apikey", - "valueType": "string", - "key": "apikey", - "label": "apikey", - "type": "hidden" - }, - { - "id": "url", - "valueType": "arrayString", - "key": "files", - "label": "files", - "type": "hidden" - }, - { - "id": "formula", - "valueType": "boolean", - "key": "ocr", - "label": "ocr", - "type": "hidden" - } - ] - }, - { - "nodeId": "pluginOutput", - "name": "插件输出", - "intro": "自定义配置外部输出,使用插件时,仅暴露自定义配置的输出", - "avatar": "core/workflow/template/pluginOutput", - "flowNodeType": "pluginOutput", - "showStatus": false, - "position": { - "x": 1842.070888321717, - "y": -101.45216221730038 - }, - "version": "481", - "inputs": [ - { - "renderTypeList": ["reference"], - "valueType": "string", - "canEdit": true, - "key": "result", - "label": "result", - "description": "处理结果,由文件名以及文档内容组成,多个文件之间由横线分隔开", - "value": ["zHG5jJBkXmjB", "xWQuEf50F3mr"] - }, - { - "renderTypeList": ["reference"], - "valueType": "string", - "canEdit": true, - "key": "failreason", - "label": "failreason", - "description": "文件处理失败原因,由文件名以及报错组成,多个文件之间由横线分隔开", - "value": ["zHG5jJBkXmjB", "yDxzW5CFalGw"] - }, - { - "renderTypeList": ["reference"], - "valueType": "boolean", - "canEdit": true, - "key": "success", - "label": "success", - "description": "是否全部文件都处理成功,如有没有处理成功的文件,失败原因将会输出在failreason中", - "value": ["zHG5jJBkXmjB", "m6CJJj7GFud5"] - } - ], - "outputs": [] - }, - { - "nodeId": "zHG5jJBkXmjB", - "name": "HTTP 请求", - "intro": "可以发出一个 HTTP 请求,实现更为复杂的操作(联网搜索、数据库查询等)", - "avatar": "core/workflow/template/httpRequest", - "flowNodeType": "httpRequest468", - "showStatus": true, - "position": { - "x": 1077.7986740892777, - "y": -496.9521622173004 - }, - "version": "481", - "inputs": [ - { - "key": "system_addInputParam", - "renderTypeList": ["addInputParam"], - "valueType": "dynamic", - "label": "", - "required": false, - "description": "common:core.module.input.description.HTTP Dynamic Input", - "customInputConfig": { - "selectValueTypeList": [ - "string", - "number", - "boolean", - "object", - "arrayString", - "arrayNumber", - "arrayBoolean", - "arrayObject", - "arrayAny", - "any", - "chatHistory", - "datasetQuote", - "dynamic", - "selectApp", - "selectDataset" - ], - "showDescription": false, - "showDefaultValue": true - }, - "debugLabel": "", - "toolDescription": "" - }, - { - "key": "system_httpMethod", - "renderTypeList": ["custom"], - "valueType": "string", - "label": "", - "value": "POST", - "required": true, - "debugLabel": "", - "toolDescription": "" - }, - { - "key": "system_httpTimeout", - "renderTypeList": ["custom"], - "valueType": "number", - "label": "", - "value": 30, - "min": 5, - "max": 600, - "required": true, - "debugLabel": "", - "toolDescription": "" - }, - { - "key": "system_httpReqUrl", - "renderTypeList": ["hidden"], - "valueType": "string", - "label": "", - "description": "common:core.module.input.description.Http Request Url", - "placeholder": "https://api.ai.com/getInventory", - "required": false, - "value": "Doc2X/FilePDF2text", - "debugLabel": "", - "toolDescription": "" - }, - { - "key": "system_httpHeader", - "renderTypeList": ["custom"], - "valueType": "any", - "value": [], - "label": "", - "description": "common:core.module.input.description.Http Request Header", - "placeholder": "common:core.module.input.description.Http Request Header", - "required": false, - "debugLabel": "", - "toolDescription": "" - }, - { - "key": "system_httpParams", - "renderTypeList": ["hidden"], - "valueType": "any", - "value": [], - "label": "", - "required": false, - "debugLabel": "", - "toolDescription": "" - }, - { - "key": "system_httpJsonBody", - "renderTypeList": ["hidden"], - "valueType": "any", - "value": "{\n \"apikey\": \"{{apikey}}\",\n \"files\": {{files}},\n \"ocr\": {{ocr}}\n}", - "label": "", - "required": false, - "debugLabel": "", - "toolDescription": "" - }, - { - "key": "system_httpFormBody", - "renderTypeList": ["hidden"], - "valueType": "any", - "value": [], - "label": "", - "required": false, - "debugLabel": "", - "toolDescription": "" - }, - { - "key": "system_httpContentType", - "renderTypeList": ["hidden"], - "valueType": "string", - "value": "json", - "label": "", - "required": false, - "debugLabel": "", - "toolDescription": "" - }, - { - "renderTypeList": ["reference"], - "valueType": "string", - "canEdit": true, - "key": "apikey", - "label": "apikey", - "customInputConfig": { - "selectValueTypeList": [ - "string", - "number", - "boolean", - "object", - "arrayString", - "arrayNumber", - "arrayBoolean", - "arrayObject", - "arrayAny", - "any", - "chatHistory", - "datasetQuote", - "dynamic", - "selectApp", - "selectDataset" - ], - "showDescription": false, - "showDefaultValue": true - }, - "required": true, - "value": ["pluginInput", "apikey"] - }, - { - "renderTypeList": ["reference"], - "valueType": "arrayString", - "canEdit": true, - "key": "files", - "label": "files", - "customInputConfig": { - "selectValueTypeList": [ - "string", - "number", - "boolean", - "object", - "arrayString", - "arrayNumber", - "arrayBoolean", - "arrayObject", - "arrayAny", - "any", - "chatHistory", - "datasetQuote", - "dynamic", - "selectApp", - "selectDataset" - ], - "showDescription": false, - "showDefaultValue": true - }, - "required": true, - "value": ["pluginInput", "url"] - }, - { - "renderTypeList": ["reference"], - "valueType": "boolean", - "canEdit": true, - "key": "ocr", - "label": "ocr", - "customInputConfig": { - "selectValueTypeList": [ - "string", - "number", - "boolean", - "object", - "arrayString", - "arrayNumber", - "arrayBoolean", - "arrayObject", - "arrayAny", - "any", - "chatHistory", - "datasetQuote", - "dynamic", - "selectApp", - "selectDataset" - ], - "showDescription": false, - "showDefaultValue": true - }, - "required": true, - "value": ["pluginInput", "formula"] - } - ], - "outputs": [ - { - "id": "error", - "key": "error", - "label": "workflow:request_error", - "description": "HTTP请求错误信息,成功时返回空", - "valueType": "object", - "type": "static" - }, - { - "id": "httpRawResponse", - "key": "httpRawResponse", - "required": true, - "label": "workflow:raw_response", - "description": "HTTP请求的原始响应。只能接受字符串或JSON类型响应数据。", - "valueType": "any", - "type": "static" - }, - { - "id": "system_addOutputParam", - "key": "system_addOutputParam", - "type": "dynamic", - "valueType": "dynamic", - "label": "", - "customFieldConfig": { - "selectValueTypeList": [ - "string", - "number", - "boolean", - "object", - "arrayString", - "arrayNumber", - "arrayBoolean", - "arrayObject", - "any", - "chatHistory", - "datasetQuote", - "dynamic", - "selectApp", - "selectDataset" - ], - "showDescription": false, - "showDefaultValue": false - } - }, - { - "id": "xWQuEf50F3mr", - "valueType": "string", - "type": "dynamic", - "key": "result", - "label": "result" - }, - { - "id": "m6CJJj7GFud5", - "valueType": "boolean", - "type": "dynamic", - "key": "success", - "label": "success" - }, - { - "id": "yDxzW5CFalGw", - "valueType": "string", - "type": "dynamic", - "key": "failreason", - "label": "failreason" - } - ] - } - ], - "edges": [ - { - "source": "pluginInput", - "target": "zHG5jJBkXmjB", - "sourceHandle": "pluginInput-source-right", - "targetHandle": "zHG5jJBkXmjB-target-left" - }, - { - "source": "zHG5jJBkXmjB", - "target": "pluginOutput", - "sourceHandle": "zHG5jJBkXmjB-source-right", - "targetHandle": "pluginOutput-target-left" - } - ] - } -} diff --git a/packages/plugins/src/Doc2X/PDF2text/index.ts b/packages/plugins/src/Doc2X/PDF2text/index.ts new file mode 100644 index 000000000000..539fa62316e4 --- /dev/null +++ b/packages/plugins/src/Doc2X/PDF2text/index.ts @@ -0,0 +1,182 @@ +import { delay } from '@fastgpt/global/common/system/utils'; +import axios from 'axios'; +import { addLog } from '@fastgpt/service/common/system/log'; +import { result } from 'lodash'; + +type Props = { + apikey: string; + files: any; + ocr: boolean; +}; + +// Response type same as HTTP outputs +type Response = Promise<{ + result: string; + failreason: string; + success: boolean; +}>; + +const main = async ({ apikey, files }: Props): Response => { + // Check the apikey + if (!apikey) { + return { + result: '', + failreason: `API key is required`, + success: false + }; + } + let final_result = ''; + let fail_reason = ''; + let flag = false; + //Convert the String to Array or String + let All_URL: Array; + try { + const parsed = JSON.parse(files); + if (Array.isArray(parsed)) { + All_URL = parsed; + } else { + All_URL = [String(parsed)]; + } + } catch (e) { + // Set it as String + All_URL = [String(files)]; + } + const axiosInstance = axios.create({ + timeout: 30000 // 30 seconds timeout + }); + + //Process each file one by one + for await (const url of All_URL) { + //Fetch the pdf and check its content type + let PDFResponse; + try { + PDFResponse = await axiosInstance.get(url, { responseType: 'arraybuffer' }); + } catch (e) { + fail_reason += `\n---\nFile:${url} \n\nFailed to fetch image from URL: ${e}\n\n`; + flag = true; + continue; + } + if (PDFResponse.status !== 200) { + fail_reason += `\n---\nFile:${url} \n\nFailed to fetch PDF from URL: ${PDFResponse.statusText}\n\n`; + flag = true; + continue; + } + + const contentType = PDFResponse.headers['content-type']; + const file_name = url.match(/read\/([^?]+)/)?.[1] || 'unknown.pdf'; + if (!contentType || !contentType.startsWith('application/pdf')) { + fail_reason += `\n---\nFile:${file_name}\n\nThe provided file does not point to a PDF: ${contentType}\n\n`; + flag = true; + continue; + } + const blob = new Blob([PDFResponse.data], { type: 'application/pdf' }); + + // Get pre-upload URL first + let preupload_url = 'https://v2.doc2x.noedgeai.com/api/v2/parse/preupload'; + let preupload_response; + try { + preupload_response = await axiosInstance.post(preupload_url, null, { + headers: { + Authorization: `Bearer ${apikey}` + } + }); + } catch (e) { + fail_reason += `\n---\nFile:${file_name}\n\nFailed to get pre-upload URL: ${e}\n\n`; + flag = true; + continue; + } + + if (preupload_response.status !== 200) { + fail_reason += `\n---\nFile:${file_name}\n\nFailed to get pre-upload URL: ${preupload_response.statusText}\n\n`; + flag = true; + continue; + } + + const preupload_data = preupload_response.data; + if (preupload_data.code !== 'success') { + fail_reason += `\n---\nFile:${file_name}\n\nFailed to get pre-upload URL: ${JSON.stringify(preupload_data)}\n\n`; + flag = true; + continue; + } + + const upload_url = preupload_data.data.url; + const uid = preupload_data.data.uid; + // Upload file to pre-signed URL with binary stream + try { + const response = await axiosInstance.put(upload_url, blob, { + headers: { + 'Content-Type': 'application/pdf' + } + }); + if (response.status !== 200) { + throw new Error(`Upload failed with status ${response.status}: ${response.statusText}`); + } + } catch (e) { + fail_reason += `\n---\nFile:${file_name}\n\nFailed to upload file (uid: ${uid}): ${e}\n\n`; + flag = true; + continue; + } + + // Get the result by uid + const result_url = `https://v2.doc2x.noedgeai.com/api/v2/parse/status?uid=${uid}`; + let required_flag = true; + let result = ''; + + // Wait for the result, at most 90s + const maxAttempts = 30; + for await (const _ of Array(maxAttempts).keys()) { + let result_response; + try { + result_response = await axiosInstance.get(result_url, { + headers: { + Authorization: `Bearer ${apikey}` + } + }); + } catch (e) { + fail_reason += `\n---\nFile:${file_name}\n\nFailed to get result (uid: ${uid}): ${e}\n\n`; + flag = true; + required_flag = false; + break; + } + + const result_data = result_response.data; + if (!['ok', 'success'].includes(result_data.code)) { + fail_reason += `\n---\nFile:${file_name}\n\nFailed to get result (uid: ${uid}): ${JSON.stringify(result_data)}\n\n`; + flag = true; + required_flag = false; + break; + } + if (['ready', 'processing'].includes(result_data.data.status)) { + await delay(3000); + } else if (result_data.data.status === 'success') { + result = await Promise.all( + result_data.data.result.pages.map((page: { md: any }) => page.md) + ).then((pages) => pages.join('\n')); + // Do some post-processing + result = result.replace(/\\[\(\)]/g, '$').replace(/\\[\[\]]/g, '$$'); + result = result.replace(/]*)?(?:\s*\/>|>)/g, '![img]($1)'); + final_result += `\n---\nFile:${file_name}\n\n${result}\n\n`; + required_flag = false; + break; + } else { + fail_reason += `\n---\nFile:${file_name}\n\nFailed to get result (uid: ${uid}): ${result_data.data.status}\n\n`; + flag = true; + required_flag = false; + break; + } + } + + if (required_flag) { + fail_reason += `\n---\nFile:${file_name}\n\nTimeout for uid ${uid}\n\n`; + flag = true; + } + } + + return { + result: final_result, + failreason: fail_reason, + success: !flag + }; +}; + +export default main; diff --git a/packages/plugins/src/Doc2X/FileImg2text/template.json b/packages/plugins/src/Doc2X/PDF2text/template.json similarity index 77% rename from packages/plugins/src/Doc2X/FileImg2text/template.json rename to packages/plugins/src/Doc2X/PDF2text/template.json index 37d992a3f8bc..65961443b454 100644 --- a/packages/plugins/src/Doc2X/FileImg2text/template.json +++ b/packages/plugins/src/Doc2X/PDF2text/template.json @@ -1,10 +1,10 @@ { "author": "Menghuan1918", "version": "488", - "name": "Doc2X 图像(文件)识别", + "name": "Doc2X PDF识别", "avatar": "plugins/doc2x", - "intro": "将上传的图片文件发送至Doc2X进行解析,返回带LaTeX公式的markdown格式的文本", - "courseUrl": "https://fael3z0zfze.feishu.cn/wiki/Rkc5witXWiJoi5kORd2cofh6nDg?fromScene=spaceOverview", + "intro": "将PDF文件发送至Doc2X进行解析,返回结构化的LaTeX公式的文本(markdown),支持传入String类型的URL或者流程输出中的文件链接变量", + "inputExplanationUrl": "https://fael3z0zfze.feishu.cn/wiki/Rkc5witXWiJoi5kORd2cofh6nDg?fromScene=spaceOverview", "showStatus": true, "weight": 10, @@ -13,30 +13,16 @@ "workflow": { "nodes": [ - { - "nodeId": "pluginConfig", - "name": "common:core.module.template.system_config", - "intro": "", - "avatar": "core/workflow/template/systemConfig", - "flowNodeType": "pluginConfig", - "position": { - "x": -90.53591960393504, - "y": -17.580286776561252 - }, - "version": "4811", - "inputs": [], - "outputs": [] - }, { "nodeId": "pluginInput", - "name": "插件开始", + "name": "自定义插件输入", "intro": "可以配置插件需要哪些输入,利用这些输入来运行插件", "avatar": "core/workflow/template/workflowStart", "flowNodeType": "pluginInput", "showStatus": false, "position": { - "x": 368.6800424053505, - "y": -17.580286776561252 + "x": -139.66495007440972, + "y": -90.99689735553712 }, "version": "481", "inputs": [ @@ -47,45 +33,22 @@ "canEdit": true, "key": "apikey", "label": "apikey", - "description": "Doc2X的验证密匙,对于个人用户可以从Doc2X官网 - 个人信息 - 身份令牌获得", + "description": "Doc2X的API密匙,可以从Doc2X开放平台获得", "required": true, - "toolDescription": "", - "defaultValue": "" + "defaultValue": "", + "list": [] }, { "renderTypeList": ["reference"], "selectedTypeIndex": 0, - "valueType": "arrayString", + "valueType": "any", "canEdit": true, "key": "files", "label": "files", - "description": "待处理图片文件", - "required": true, - "toolDescription": "待处理图片文件" - }, - { - "renderTypeList": ["switch"], - "selectedTypeIndex": 0, - "valueType": "boolean", - "canEdit": true, - "key": "img_correction", - "label": "img_correction", - "description": "是否启用图形矫正功能", + "description": "待处理的PDF文件变量或URL地址", "required": true, - "toolDescription": "", - "defaultValue": false - }, - { - "renderTypeList": ["switch"], - "selectedTypeIndex": 0, - "valueType": "boolean", - "canEdit": true, - "key": "formula", - "label": "formula", - "description": "是否开启纯公式识别(仅适用于图片内容仅有公式时)", - "required": true, - "toolDescription": "", - "defaultValue": false + "toolDescription": "待处理的PDF文件变量或URL地址", + "list": [] } ], "outputs": [ @@ -98,37 +61,23 @@ }, { "id": "url", - "valueType": "arrayString", + "valueType": "any", "key": "files", "label": "files", "type": "hidden" - }, - { - "id": "img_correction", - "valueType": "boolean", - "key": "img_correction", - "label": "img_correction", - "type": "hidden" - }, - { - "id": "formula", - "valueType": "boolean", - "key": "formula", - "label": "formula", - "type": "hidden" } ] }, { "nodeId": "pluginOutput", - "name": "插件输出", + "name": "自定义插件输出", "intro": "自定义配置外部输出,使用插件时,仅暴露自定义配置的输出", "avatar": "core/workflow/template/pluginOutput", "flowNodeType": "pluginOutput", "showStatus": false, "position": { - "x": 1796.2235867744578, - "y": 6.419713223438748 + "x": 1808.5347800638815, + "y": -105.67504356429907 }, "version": "481", "inputs": [ @@ -138,7 +87,7 @@ "canEdit": true, "key": "result", "label": "result", - "description": "处理结果(或者是报错信息)", + "description": "处理结果,由文件名以及文档内容组成,多个文件之间由横线分隔开", "value": ["zHG5jJBkXmjB", "xWQuEf50F3mr"] }, { @@ -147,8 +96,8 @@ "canEdit": true, "key": "failreason", "label": "failreason", - "description": "文件处理失败原因,由文件名以及报错组成,多个文件之间由横线分隔开", - "value": ["zHG5jJBkXmjB", "jbv4nVZvmFXm"] + "description": "文件处理失败原因,由文件名以及报错组成,多个文件之间由横线分隔开,如所有文件处理成功则为空", + "value": ["zHG5jJBkXmjB", "yDxzW5CFalGw"] }, { "renderTypeList": ["reference"], @@ -157,7 +106,7 @@ "key": "success", "label": "success", "description": "是否全部文件都处理成功,如有没有处理成功的文件,失败原因将会输出在failreason中", - "value": ["zHG5jJBkXmjB", "k46cjNulVk5Y"] + "value": ["zHG5jJBkXmjB", "m6CJJj7GFud5"] } ], "outputs": [] @@ -170,8 +119,8 @@ "flowNodeType": "httpRequest468", "showStatus": true, "position": { - "x": 1081.967607938733, - "y": -426.08028677656125 + "x": 1077.7986740892777, + "y": -496.9521622173004 }, "version": "481", "inputs": [ @@ -236,7 +185,7 @@ "description": "common:core.module.input.description.Http Request Url", "placeholder": "https://api.ai.com/getInventory", "required": false, - "value": "Doc2X/FileImg2text", + "value": "Doc2X/PDF2text", "debugLabel": "", "toolDescription": "" }, @@ -266,7 +215,7 @@ "key": "system_httpJsonBody", "renderTypeList": ["hidden"], "valueType": "any", - "value": "{\n \"apikey\": \"{{apikey}}\",\n \"files\": {{files}},\n \"img_correction\": {{img_correction}},\n \"formula\": {{formula}}\n}", + "value": "{\n \"apikey\": \"{{apikey}}\",\n \"files\": \"{{files}}\"}", "label": "", "required": false, "debugLabel": "", @@ -324,7 +273,7 @@ }, { "renderTypeList": ["reference"], - "valueType": "arrayString", + "valueType": "string", "canEdit": true, "key": "files", "label": "files", @@ -350,15 +299,34 @@ "showDefaultValue": true }, "required": true, - "value": ["pluginInput", "url"] + "value": ["pMBi7J7vcsqB", "system_text"] + } + ], + "outputs": [ + { + "id": "error", + "key": "error", + "label": "workflow:request_error", + "description": "HTTP请求错误信息,成功时返回空", + "valueType": "object", + "type": "static" }, { - "renderTypeList": ["reference"], - "valueType": "boolean", - "canEdit": true, - "key": "img_correction", - "label": "img_correction", - "customInputConfig": { + "id": "httpRawResponse", + "key": "httpRawResponse", + "required": true, + "label": "workflow:raw_response", + "description": "HTTP请求的原始响应。只能接受字符串或JSON类型响应数据。", + "valueType": "any", + "type": "static" + }, + { + "id": "system_addOutputParam", + "key": "system_addOutputParam", + "type": "dynamic", + "valueType": "dynamic", + "label": "", + "customFieldConfig": { "selectValueTypeList": [ "string", "number", @@ -368,7 +336,6 @@ "arrayNumber", "arrayBoolean", "arrayObject", - "arrayAny", "any", "chatHistory", "datasetQuote", @@ -377,17 +344,51 @@ "selectDataset" ], "showDescription": false, - "showDefaultValue": true - }, - "required": true, - "value": ["pluginInput", "img_correction"] + "showDefaultValue": false + } }, { - "renderTypeList": ["reference"], + "id": "xWQuEf50F3mr", + "valueType": "string", + "type": "dynamic", + "key": "result", + "label": "result" + }, + { + "id": "m6CJJj7GFud5", "valueType": "boolean", - "canEdit": true, - "key": "formula", - "label": "formula", + "type": "dynamic", + "key": "success", + "label": "success" + }, + { + "id": "yDxzW5CFalGw", + "valueType": "string", + "type": "dynamic", + "key": "failreason", + "label": "failreason" + } + ] + }, + { + "nodeId": "pMBi7J7vcsqB", + "name": "文本拼接", + "intro": "可对固定或传入的文本进行加工后输出,非字符串类型数据最终会转成字符串类型。", + "avatar": "core/workflow/template/textConcat", + "flowNodeType": "textEditor", + "position": { + "x": 469.8489508985863, + "y": -177.67504356429907 + }, + "version": "486", + "inputs": [ + { + "key": "system_addInputParam", + "renderTypeList": ["addInputParam"], + "valueType": "dynamic", + "label": "", + "required": false, + "description": "workflow:dynamic_input_description_concat", "customInputConfig": { "selectValueTypeList": [ "string", @@ -407,37 +408,29 @@ "selectDataset" ], "showDescription": false, - "showDefaultValue": true + "showDefaultValue": false }, - "required": true, - "value": ["pluginInput", "formula"] - } - ], - "outputs": [ - { - "id": "error", - "key": "error", - "label": "workflow:request_error", - "description": "HTTP请求错误信息,成功时返回空", - "valueType": "object", - "type": "static" + "debugLabel": "", + "toolDescription": "" }, { - "id": "httpRawResponse", - "key": "httpRawResponse", + "key": "system_textareaInput", + "renderTypeList": ["textarea"], + "valueType": "string", "required": true, - "label": "workflow:raw_response", - "description": "HTTP请求的原始响应。只能接受字符串或JSON类型响应数据。", - "valueType": "any", - "type": "static" + "label": "拼接文本", + "placeholder": "workflow:input_variable_list", + "value": "{{files}}", + "debugLabel": "", + "toolDescription": "" }, { - "id": "system_addOutputParam", - "key": "system_addOutputParam", - "type": "dynamic", - "valueType": "dynamic", - "label": "", - "customFieldConfig": { + "renderTypeList": ["reference"], + "valueType": "any", + "canEdit": true, + "key": "files", + "label": "files", + "customInputConfig": { "selectValueTypeList": [ "string", "number", @@ -447,6 +440,7 @@ "arrayNumber", "arrayBoolean", "arrayObject", + "arrayAny", "any", "chatHistory", "datasetQuote", @@ -456,45 +450,61 @@ ], "showDescription": false, "showDefaultValue": false - } - }, - { - "id": "xWQuEf50F3mr", - "valueType": "string", - "type": "dynamic", - "key": "result", - "label": "result" - }, + }, + "required": true, + "value": ["pluginInput", "url"] + } + ], + "outputs": [ { - "id": "jbv4nVZvmFXm", + "id": "system_text", + "key": "system_text", + "label": "workflow:concatenation_result", + "type": "static", "valueType": "string", - "type": "dynamic", - "key": "failreason", - "label": "failreason" - }, - { - "id": "k46cjNulVk5Y", - "valueType": "boolean", - "type": "dynamic", - "key": "success", - "label": "success" + "description": "" } ] } ], "edges": [ - { - "source": "pluginInput", - "target": "zHG5jJBkXmjB", - "sourceHandle": "pluginInput-source-right", - "targetHandle": "zHG5jJBkXmjB-target-left" - }, { "source": "zHG5jJBkXmjB", "target": "pluginOutput", "sourceHandle": "zHG5jJBkXmjB-source-right", "targetHandle": "pluginOutput-target-left" + }, + { + "source": "pluginInput", + "target": "pMBi7J7vcsqB", + "sourceHandle": "pluginInput-source-right", + "targetHandle": "pMBi7J7vcsqB-target-left" + }, + { + "source": "pMBi7J7vcsqB", + "target": "zHG5jJBkXmjB", + "sourceHandle": "pMBi7J7vcsqB-source-right", + "targetHandle": "zHG5jJBkXmjB-target-left" } - ] + ], + "chatConfig": { + "questionGuide": false, + "ttsConfig": { + "type": "web" + }, + "whisperConfig": { + "open": false, + "autoSend": false, + "autoTTSResponse": false + }, + "chatInputGuide": { + "open": false, + "textList": [], + "customUrl": "" + }, + "instruction": "", + "variables": [], + "welcomeText": "" + } } } diff --git a/packages/plugins/src/Doc2X/URLImg2text/index.ts b/packages/plugins/src/Doc2X/URLImg2text/index.ts deleted file mode 100644 index 0f51e702a655..000000000000 --- a/packages/plugins/src/Doc2X/URLImg2text/index.ts +++ /dev/null @@ -1,166 +0,0 @@ -import { delay } from '@fastgpt/global/common/system/utils'; -import { addLog } from '@fastgpt/service/common/system/log'; - -type Props = { - apikey: string; - url: string; - img_correction: boolean; - formula: boolean; -}; - -type Response = Promise<{ - result: string; - success: boolean; -}>; - -const main = async ({ apikey, url, img_correction, formula }: Props): Response => { - // Check the apikey - if (!apikey) { - return { - result: `API key is required`, - success: false - }; - } - - let real_api_key = apikey; - if (!apikey.startsWith('sk-')) { - const response = await fetch('https://api.doc2x.noedgeai.com/api/token/refresh', { - method: 'POST', - headers: { - Authorization: `Bearer ${apikey}` - } - }); - if (response.status !== 200) { - return { - result: `Get token failed: ${await response.text()}`, - success: false - }; - } - const data = await response.json(); - real_api_key = data.data.token; - } - - let imageResponse; - // Fetch the image and check its content type - try { - imageResponse = await fetch(url); - } catch (e) { - return { - result: `Failed to fetch image from URL: ${url} with error: ${e}`, - success: false - }; - } - - if (!imageResponse.ok) { - return { - result: `Failed to fetch image from URL: ${url}`, - success: false - }; - } - - const contentType = imageResponse.headers.get('content-type'); - if (!contentType || !contentType.startsWith('image/')) { - return { - result: `The provided URL does not point to an image: ${contentType}`, - success: false - }; - } - - const blob = await imageResponse.blob(); - const formData = new FormData(); - const fileName = url.split('/').pop()?.split('?')[0] || 'image'; - formData.append('file', blob, fileName); - formData.append('img_correction', img_correction ? '1' : '0'); - formData.append('equation', formula ? '1' : '0'); - - let upload_url = 'https://api.doc2x.noedgeai.com/api/platform/async/img'; - if (real_api_key.startsWith('sk-')) { - upload_url = 'https://api.doc2x.noedgeai.com/api/v1/async/img'; - } - - let uuid; - const uploadAttempts = [1, 2, 3]; - for await (const attempt of uploadAttempts) { - const upload_response = await fetch(upload_url, { - method: 'POST', - headers: { - Authorization: `Bearer ${real_api_key}` - }, - body: formData - }); - - if (!upload_response.ok) { - // Rate limit, wait for 10s and retry at most 3 times - if (upload_response.status === 429 && attempt < 3) { - await delay(10000); - continue; - } - return { - result: `Failed to upload image: ${await upload_response.text()}`, - success: false - }; - } - - const upload_data = await upload_response.json(); - uuid = upload_data.data.uuid; - break; - } - - // Get the result by uuid - let result_url = 'https://api.doc2x.noedgeai.com/api/platform/async/status?uuid=' + uuid; - if (real_api_key.startsWith('sk-')) { - result_url = 'https://api.doc2x.noedgeai.com/api/v1/async/status?uuid=' + uuid; - } - const maxAttempts = 100; - // Wait for the result, at most 100s - for await (const _ of Array(maxAttempts).keys()) { - const result_response = await fetch(result_url, { - headers: { - Authorization: `Bearer ${real_api_key}` - } - }); - if (!result_response.ok) { - return { - result: `Failed to get result: ${await result_response.text()}`, - success: false - }; - } - const result_data = await result_response.json(); - if (['ready', 'processing'].includes(result_data.data.status)) { - await delay(1000); - } else if (result_data.data.status === 'pages limit exceeded') { - return { - result: 'Doc2X Pages limit exceeded', - success: false - }; - } else if (result_data.data.status === 'success') { - let result; - try { - result = result_data.data.result.pages[0].md; - result = result.replace(/\\[\(\)]/g, '$').replace(/\\[\[\]]/g, '$$'); - } catch { - // no pages - return { - result: '', - success: true - }; - } - return { - result: result, - success: true - }; - } else { - return { - result: `Failed to get result: ${await result_data.text()}`, - success: false - }; - } - } - - return { - result: 'Timeout waiting for result', - success: false - }; -}; - -export default main; diff --git a/packages/plugins/src/Doc2X/URLImg2text/template.json b/packages/plugins/src/Doc2X/URLImg2text/template.json deleted file mode 100644 index 6afbb76bf59e..000000000000 --- a/packages/plugins/src/Doc2X/URLImg2text/template.json +++ /dev/null @@ -1,484 +0,0 @@ -{ - "author": "Menghuan1918", - "version": "488", - "name": "Doc2X 图像(URL)识别", - "avatar": "plugins/doc2x", - "intro": "从URL下载图片并发送至Doc2X进行解析,返回带LaTeX公式的markdown格式的文本", - "courseUrl": "https://fael3z0zfze.feishu.cn/wiki/Rkc5witXWiJoi5kORd2cofh6nDg?fromScene=spaceOverview", - "showStatus": true, - "weight": 10, - - "isTool": true, - "templateType": "tools", - - "workflow": { - "nodes": [ - { - "nodeId": "pluginInput", - "name": "插件开始", - "intro": "可以配置插件需要哪些输入,利用这些输入来运行插件", - "avatar": "core/workflow/template/workflowStart", - "flowNodeType": "pluginInput", - "showStatus": false, - "position": { - "x": 353.91678143999377, - "y": -75.09744210499466 - }, - "version": "481", - "inputs": [ - { - "renderTypeList": ["input"], - "selectedTypeIndex": 0, - "valueType": "string", - "canEdit": true, - "key": "apikey", - "label": "apikey", - "description": "Doc2X的验证密匙,对于个人用户可以从Doc2X官网 - 个人信息 - 身份令牌获得", - "required": true, - "toolDescription": "", - "defaultValue": "" - }, - { - "renderTypeList": ["reference"], - "selectedTypeIndex": 0, - "valueType": "string", - "canEdit": true, - "key": "url", - "label": "url", - "description": "待处理图片的URL", - "required": true, - "toolDescription": "待处理图片的URL" - }, - { - "renderTypeList": ["switch"], - "selectedTypeIndex": 0, - "valueType": "boolean", - "canEdit": true, - "key": "img_correction", - "label": "img_correction", - "description": "是否启用图形矫正功能", - "required": true, - "toolDescription": "", - "defaultValue": false - }, - { - "renderTypeList": ["switch"], - "selectedTypeIndex": 0, - "valueType": "boolean", - "canEdit": true, - "key": "formula", - "label": "formula", - "description": "是否开启纯公式识别(仅适用于图片内容仅有公式时)", - "required": true, - "toolDescription": "", - "defaultValue": false - } - ], - "outputs": [ - { - "id": "apikey", - "valueType": "string", - "key": "apikey", - "label": "apikey", - "type": "hidden" - }, - { - "id": "url", - "valueType": "string", - "key": "url", - "label": "url", - "type": "hidden" - }, - { - "id": "img_correction", - "valueType": "boolean", - "key": "img_correction", - "label": "img_correction", - "type": "hidden" - }, - { - "id": "formula", - "valueType": "boolean", - "key": "formula", - "label": "formula", - "type": "hidden" - } - ] - }, - { - "nodeId": "pluginOutput", - "name": "插件输出", - "intro": "自定义配置外部输出,使用插件时,仅暴露自定义配置的输出", - "avatar": "core/workflow/template/pluginOutput", - "flowNodeType": "pluginOutput", - "showStatus": false, - "position": { - "x": 1703.581616889916, - "y": -14.097442104994656 - }, - "version": "481", - "inputs": [ - { - "renderTypeList": ["reference"], - "valueType": "string", - "canEdit": true, - "key": "result", - "label": "result", - "description": "处理结果(或者是报错信息)", - "value": ["zHG5jJBkXmjB", "xWQuEf50F3mr"] - }, - { - "renderTypeList": ["reference"], - "valueType": "boolean", - "canEdit": true, - "key": "success", - "label": "success", - "description": "是否处理成功", - "value": ["zHG5jJBkXmjB", "m6CJJj7GFud5"] - } - ], - "outputs": [] - }, - { - "nodeId": "zHG5jJBkXmjB", - "name": "HTTP 请求", - "intro": "可以发出一个 HTTP 请求,实现更为复杂的操作(联网搜索、数据库查询等)", - "avatar": "core/workflow/template/httpRequest", - "flowNodeType": "httpRequest468", - "showStatus": true, - "position": { - "x": 1000.6685388413375, - "y": -457.0974421049947 - }, - "version": "481", - "inputs": [ - { - "key": "system_addInputParam", - "renderTypeList": ["addInputParam"], - "valueType": "dynamic", - "label": "", - "required": false, - "description": "common:core.module.input.description.HTTP Dynamic Input", - "customInputConfig": { - "selectValueTypeList": [ - "string", - "number", - "boolean", - "object", - "arrayString", - "arrayNumber", - "arrayBoolean", - "arrayObject", - "arrayAny", - "any", - "chatHistory", - "datasetQuote", - "dynamic", - "selectApp", - "selectDataset" - ], - "showDescription": false, - "showDefaultValue": true - }, - "debugLabel": "", - "toolDescription": "" - }, - { - "key": "system_httpMethod", - "renderTypeList": ["custom"], - "valueType": "string", - "label": "", - "value": "POST", - "required": true, - "debugLabel": "", - "toolDescription": "" - }, - { - "key": "system_httpTimeout", - "renderTypeList": ["custom"], - "valueType": "number", - "label": "", - "value": 30, - "min": 5, - "max": 600, - "required": true, - "debugLabel": "", - "toolDescription": "" - }, - { - "key": "system_httpReqUrl", - "renderTypeList": ["hidden"], - "valueType": "string", - "label": "", - "description": "common:core.module.input.description.Http Request Url", - "placeholder": "https://api.ai.com/getInventory", - "required": false, - "value": "Doc2X/URLImg2text", - "debugLabel": "", - "toolDescription": "" - }, - { - "key": "system_httpHeader", - "renderTypeList": ["custom"], - "valueType": "any", - "value": [], - "label": "", - "description": "common:core.module.input.description.Http Request Header", - "placeholder": "common:core.module.input.description.Http Request Header", - "required": false, - "debugLabel": "", - "toolDescription": "" - }, - { - "key": "system_httpParams", - "renderTypeList": ["hidden"], - "valueType": "any", - "value": [], - "label": "", - "required": false, - "debugLabel": "", - "toolDescription": "" - }, - { - "key": "system_httpJsonBody", - "renderTypeList": ["hidden"], - "valueType": "any", - "value": "{\n \"apikey\": \"{{apikey}}\",\n \"url\": \"{{url}}\",\n \"img_correction\": {{img_correction}},\n \"formula\": {{formula}}\n}", - "label": "", - "required": false, - "debugLabel": "", - "toolDescription": "" - }, - { - "key": "system_httpFormBody", - "renderTypeList": ["hidden"], - "valueType": "any", - "value": [], - "label": "", - "required": false, - "debugLabel": "", - "toolDescription": "" - }, - { - "key": "system_httpContentType", - "renderTypeList": ["hidden"], - "valueType": "string", - "value": "json", - "label": "", - "required": false, - "debugLabel": "", - "toolDescription": "" - }, - { - "renderTypeList": ["reference"], - "valueType": "string", - "canEdit": true, - "key": "apikey", - "label": "apikey", - "customInputConfig": { - "selectValueTypeList": [ - "string", - "number", - "boolean", - "object", - "arrayString", - "arrayNumber", - "arrayBoolean", - "arrayObject", - "arrayAny", - "any", - "chatHistory", - "datasetQuote", - "dynamic", - "selectApp", - "selectDataset" - ], - "showDescription": false, - "showDefaultValue": true - }, - "required": true, - "value": ["pluginInput", "apikey"] - }, - { - "renderTypeList": ["reference"], - "valueType": "string", - "canEdit": true, - "key": "url", - "label": "url", - "customInputConfig": { - "selectValueTypeList": [ - "string", - "number", - "boolean", - "object", - "arrayString", - "arrayNumber", - "arrayBoolean", - "arrayObject", - "arrayAny", - "any", - "chatHistory", - "datasetQuote", - "dynamic", - "selectApp", - "selectDataset" - ], - "showDescription": false, - "showDefaultValue": true - }, - "required": true, - "value": ["pluginInput", "url"] - }, - { - "renderTypeList": ["reference"], - "valueType": "boolean", - "canEdit": true, - "key": "img_correction", - "label": "img_correction", - "customInputConfig": { - "selectValueTypeList": [ - "string", - "number", - "boolean", - "object", - "arrayString", - "arrayNumber", - "arrayBoolean", - "arrayObject", - "arrayAny", - "any", - "chatHistory", - "datasetQuote", - "dynamic", - "selectApp", - "selectDataset" - ], - "showDescription": false, - "showDefaultValue": true - }, - "required": true, - "value": ["pluginInput", "img_correction"] - }, - { - "renderTypeList": ["reference"], - "valueType": "boolean", - "canEdit": true, - "key": "formula", - "label": "formula", - "customInputConfig": { - "selectValueTypeList": [ - "string", - "number", - "boolean", - "object", - "arrayString", - "arrayNumber", - "arrayBoolean", - "arrayObject", - "arrayAny", - "any", - "chatHistory", - "datasetQuote", - "dynamic", - "selectApp", - "selectDataset" - ], - "showDescription": false, - "showDefaultValue": true - }, - "required": true, - "value": ["pluginInput", "formula"] - } - ], - "outputs": [ - { - "id": "error", - "key": "error", - "label": "workflow:request_error", - "description": "HTTP请求错误信息,成功时返回空", - "valueType": "object", - "type": "static" - }, - { - "id": "httpRawResponse", - "key": "httpRawResponse", - "required": true, - "label": "workflow:raw_response", - "description": "HTTP请求的原始响应。只能接受字符串或JSON类型响应数据。", - "valueType": "any", - "type": "static" - }, - { - "id": "system_addOutputParam", - "key": "system_addOutputParam", - "type": "dynamic", - "valueType": "dynamic", - "label": "", - "customFieldConfig": { - "selectValueTypeList": [ - "string", - "number", - "boolean", - "object", - "arrayString", - "arrayNumber", - "arrayBoolean", - "arrayObject", - "any", - "chatHistory", - "datasetQuote", - "dynamic", - "selectApp", - "selectDataset" - ], - "showDescription": false, - "showDefaultValue": false - } - }, - { - "id": "xWQuEf50F3mr", - "valueType": "string", - "type": "dynamic", - "key": "result", - "label": "result" - }, - { - "id": "m6CJJj7GFud5", - "valueType": "boolean", - "type": "dynamic", - "key": "success", - "label": "success" - } - ] - }, - { - "nodeId": "sWEDDSeuI9ar", - "name": "系统配置", - "intro": "", - "avatar": "core/workflow/template/systemConfig", - "flowNodeType": "pluginConfig", - "position": { - "x": -117.03701176267538, - "y": -75.09744210499466 - }, - "version": "4811", - "inputs": [], - "outputs": [] - } - ], - "edges": [ - { - "source": "pluginInput", - "target": "zHG5jJBkXmjB", - "sourceHandle": "pluginInput-source-right", - "targetHandle": "zHG5jJBkXmjB-target-left" - }, - { - "source": "zHG5jJBkXmjB", - "target": "pluginOutput", - "sourceHandle": "zHG5jJBkXmjB-source-right", - "targetHandle": "pluginOutput-target-left" - } - ] - } -} diff --git a/packages/plugins/src/Doc2X/URLPDF2text/index.ts b/packages/plugins/src/Doc2X/URLPDF2text/index.ts deleted file mode 100644 index 79ea19fe7ebf..000000000000 --- a/packages/plugins/src/Doc2X/URLPDF2text/index.ts +++ /dev/null @@ -1,156 +0,0 @@ -import { delay } from '@fastgpt/global/common/system/utils'; -import { addLog } from '@fastgpt/service/common/system/log'; - -type Props = { - apikey: string; - url: string; - ocr: boolean; -}; - -// Response type same as HTTP outputs -type Response = Promise<{ - result: string; - success: boolean; -}>; - -const main = async ({ apikey, url, ocr }: Props): Response => { - // Check the apikey - if (!apikey) { - return { - result: `API key is required`, - success: false - }; - } - - let real_api_key = apikey; - if (!apikey.startsWith('sk-')) { - const response = await fetch('https://api.doc2x.noedgeai.com/api/token/refresh', { - method: 'POST', - headers: { - Authorization: `Bearer ${apikey}` - } - }); - if (response.status !== 200) { - return { - result: `Get token failed: ${await response.text()}`, - success: false - }; - } - const data = await response.json(); - real_api_key = data.data.token; - } - - //Fetch the pdf and check its contene type - let PDFResponse; - try { - PDFResponse = await fetch(url); - } catch (e) { - return { - result: `Failed to fetch PDF from URL: ${url} with error: ${e}`, - success: false - }; - } - if (!PDFResponse.ok) { - return { - result: `Failed to fetch PDF from URL: ${url}`, - success: false - }; - } - - const contentType = PDFResponse.headers.get('content-type'); - if (!contentType || !contentType.startsWith('application/pdf')) { - return { - result: `The provided URL does not point to a PDF: ${contentType}`, - success: false - }; - } - - const blob = await PDFResponse.blob(); - const formData = new FormData(); - const fileName = url.split('/').pop()?.split('?')[0] || 'pdf'; - formData.append('file', blob, fileName); - formData.append('ocr', ocr ? '1' : '0'); - - let upload_url = 'https://api.doc2x.noedgeai.com/api/platform/async/pdf'; - if (real_api_key.startsWith('sk-')) { - upload_url = 'https://api.doc2x.noedgeai.com/api/v1/async/pdf'; - } - - let uuid; - const uploadAttempts = [1, 2, 3]; - for await (const attempt of uploadAttempts) { - const upload_response = await fetch(upload_url, { - method: 'POST', - headers: { - Authorization: `Bearer ${real_api_key}` - }, - body: formData - }); - if (!upload_response.ok) { - if (upload_response.status === 429 && attempt < 3) { - await delay(10000); - continue; - } - return { - result: `Failed to upload file: ${await upload_response.text()}`, - success: false - }; - } - const upload_data = await upload_response.json(); - uuid = upload_data.data.uuid; - break; - } - - // Get the result by uuid - let result_url = 'https://api.doc2x.noedgeai.com/api/platform/async/status?uuid=' + uuid; - if (real_api_key.startsWith('sk-')) { - result_url = 'https://api.doc2x.noedgeai.com/api/v1/async/status?uuid=' + uuid; - } - - let result = ''; - // Wait for the result, at most 100s - const maxAttempts = 100; - for await (const _ of Array(maxAttempts).keys()) { - const result_response = await fetch(result_url, { - headers: { - Authorization: `Bearer ${real_api_key}` - } - }); - if (!result_response.ok) { - return { - result: `Failed to get result: ${await result_response.text()}`, - success: false - }; - } - const result_data = await result_response.json(); - if (['ready', 'processing'].includes(result_data.data.status)) { - await delay(1000); - } else if (result_data.data.status === 'pages limit exceeded') { - return { - result: 'Doc2X Pages limit exceeded', - success: false - }; - } else if (result_data.data.status === 'success') { - result = await Promise.all( - result_data.data.result.pages.map((page: { md: any }) => page.md) - ).then((pages) => pages.join('\n')); - result = result.replace(/\\[\(\)]/g, '$').replace(/\\[\[\]]/g, '$$'); - return { - result: result, - success: true - }; - } else { - return { - result: `Failed to get result: ${await result_data.text()}`, - success: false - }; - } - } - - return { - result: 'Timeout waiting for result', - success: false - }; -}; - -export default main; diff --git a/packages/plugins/src/Doc2X/URLPDF2text/template.json b/packages/plugins/src/Doc2X/URLPDF2text/template.json deleted file mode 100644 index 6d0496d05972..000000000000 --- a/packages/plugins/src/Doc2X/URLPDF2text/template.json +++ /dev/null @@ -1,435 +0,0 @@ -{ - "author": "Menghuan1918", - "version": "488", - "name": "Doc2X PDF文件(URL)识别", - "avatar": "plugins/doc2x", - "intro": "从URL下载PDF文件,并发送至Doc2X进行解析,返回带LaTeX公式的markdown格式的文本", - "courseUrl": "https://fael3z0zfze.feishu.cn/wiki/Rkc5witXWiJoi5kORd2cofh6nDg?fromScene=spaceOverview", - "showStatus": true, - "weight": 10, - - "isTool": true, - "templateType": "tools", - - "workflow": { - "nodes": [ - { - "nodeId": "pluginInput", - "name": "插件开始", - "intro": "可以配置插件需要哪些输入,利用这些输入来运行插件", - "avatar": "core/workflow/template/workflowStart", - "flowNodeType": "pluginInput", - "showStatus": false, - "position": { - "x": 388.243055058894, - "y": -75.09744210499466 - }, - "version": "481", - "inputs": [ - { - "renderTypeList": ["input"], - "selectedTypeIndex": 0, - "valueType": "string", - "canEdit": true, - "key": "apikey", - "label": "apikey", - "description": "Doc2X的验证密匙,对于个人用户可以从Doc2X官网 - 个人信息 - 身份令牌获得", - "required": true, - "toolDescription": "", - "defaultValue": "" - }, - { - "renderTypeList": ["reference"], - "selectedTypeIndex": 0, - "valueType": "string", - "canEdit": true, - "key": "url", - "label": "url", - "description": "待处理PDF文件的URL", - "required": true, - "toolDescription": "待处理PDF文件的URL" - }, - { - "renderTypeList": ["switch"], - "selectedTypeIndex": 0, - "valueType": "boolean", - "canEdit": true, - "key": "ocr", - "label": "ocr", - "description": "是否开启对PDF文件内图片的OCR识别,建议开启", - "required": true, - "toolDescription": "", - "defaultValue": true - } - ], - "outputs": [ - { - "id": "apikey", - "valueType": "string", - "key": "apikey", - "label": "apikey", - "type": "hidden" - }, - { - "id": "url", - "valueType": "string", - "key": "url", - "label": "url", - "type": "hidden" - }, - { - "id": "formula", - "valueType": "boolean", - "key": "ocr", - "label": "ocr", - "type": "hidden" - } - ] - }, - { - "nodeId": "pluginOutput", - "name": "插件输出", - "intro": "自定义配置外部输出,使用插件时,仅暴露自定义配置的输出", - "avatar": "core/workflow/template/pluginOutput", - "flowNodeType": "pluginOutput", - "showStatus": false, - "position": { - "x": 1665.6420513111314, - "y": -40.597442104994656 - }, - "version": "481", - "inputs": [ - { - "renderTypeList": ["reference"], - "valueType": "string", - "canEdit": true, - "key": "result", - "label": "result", - "description": "处理结果(或者是报错信息)", - "value": ["zHG5jJBkXmjB", "xWQuEf50F3mr"] - }, - { - "renderTypeList": ["reference"], - "valueType": "boolean", - "canEdit": true, - "key": "success", - "label": "success", - "description": "是否处理成功", - "value": ["zHG5jJBkXmjB", "m6CJJj7GFud5"] - } - ], - "outputs": [] - }, - { - "nodeId": "zHG5jJBkXmjB", - "name": "HTTP 请求", - "intro": "可以发出一个 HTTP 请求,实现更为复杂的操作(联网搜索、数据库查询等)", - "avatar": "core/workflow/template/httpRequest", - "flowNodeType": "httpRequest468", - "showStatus": true, - "position": { - "x": 966.3422652224374, - "y": -446.5974421049947 - }, - "version": "481", - "inputs": [ - { - "key": "system_addInputParam", - "renderTypeList": ["addInputParam"], - "valueType": "dynamic", - "label": "", - "required": false, - "description": "common:core.module.input.description.HTTP Dynamic Input", - "customInputConfig": { - "selectValueTypeList": [ - "string", - "number", - "boolean", - "object", - "arrayString", - "arrayNumber", - "arrayBoolean", - "arrayObject", - "arrayAny", - "any", - "chatHistory", - "datasetQuote", - "dynamic", - "selectApp", - "selectDataset" - ], - "showDescription": false, - "showDefaultValue": true - }, - "debugLabel": "", - "toolDescription": "" - }, - { - "key": "system_httpMethod", - "renderTypeList": ["custom"], - "valueType": "string", - "label": "", - "value": "POST", - "required": true, - "debugLabel": "", - "toolDescription": "" - }, - { - "key": "system_httpTimeout", - "renderTypeList": ["custom"], - "valueType": "number", - "label": "", - "value": 30, - "min": 5, - "max": 600, - "required": true, - "debugLabel": "", - "toolDescription": "" - }, - { - "key": "system_httpReqUrl", - "renderTypeList": ["hidden"], - "valueType": "string", - "label": "", - "description": "common:core.module.input.description.Http Request Url", - "placeholder": "https://api.ai.com/getInventory", - "required": false, - "value": "Doc2X/URLPDF2text", - "debugLabel": "", - "toolDescription": "" - }, - { - "key": "system_httpHeader", - "renderTypeList": ["custom"], - "valueType": "any", - "value": [], - "label": "", - "description": "common:core.module.input.description.Http Request Header", - "placeholder": "common:core.module.input.description.Http Request Header", - "required": false, - "debugLabel": "", - "toolDescription": "" - }, - { - "key": "system_httpParams", - "renderTypeList": ["hidden"], - "valueType": "any", - "value": [], - "label": "", - "required": false, - "debugLabel": "", - "toolDescription": "" - }, - { - "key": "system_httpJsonBody", - "renderTypeList": ["hidden"], - "valueType": "any", - "value": "{\n \"apikey\": \"{{apikey}}\",\n \"url\": \"{{url}}\",\n \"ocr\": {{ocr}}\n}", - "label": "", - "required": false, - "debugLabel": "", - "toolDescription": "" - }, - { - "key": "system_httpFormBody", - "renderTypeList": ["hidden"], - "valueType": "any", - "value": [], - "label": "", - "required": false, - "debugLabel": "", - "toolDescription": "" - }, - { - "key": "system_httpContentType", - "renderTypeList": ["hidden"], - "valueType": "string", - "value": "json", - "label": "", - "required": false, - "debugLabel": "", - "toolDescription": "" - }, - { - "renderTypeList": ["reference"], - "valueType": "string", - "canEdit": true, - "key": "apikey", - "label": "apikey", - "customInputConfig": { - "selectValueTypeList": [ - "string", - "number", - "boolean", - "object", - "arrayString", - "arrayNumber", - "arrayBoolean", - "arrayObject", - "arrayAny", - "any", - "chatHistory", - "datasetQuote", - "dynamic", - "selectApp", - "selectDataset" - ], - "showDescription": false, - "showDefaultValue": true - }, - "required": true, - "value": ["pluginInput", "apikey"] - }, - { - "renderTypeList": ["reference"], - "valueType": "string", - "canEdit": true, - "key": "url", - "label": "url", - "customInputConfig": { - "selectValueTypeList": [ - "string", - "number", - "boolean", - "object", - "arrayString", - "arrayNumber", - "arrayBoolean", - "arrayObject", - "arrayAny", - "any", - "chatHistory", - "datasetQuote", - "dynamic", - "selectApp", - "selectDataset" - ], - "showDescription": false, - "showDefaultValue": true - }, - "required": true, - "value": ["pluginInput", "url"] - }, - { - "renderTypeList": ["reference"], - "valueType": "boolean", - "canEdit": true, - "key": "ocr", - "label": "ocr", - "customInputConfig": { - "selectValueTypeList": [ - "string", - "number", - "boolean", - "object", - "arrayString", - "arrayNumber", - "arrayBoolean", - "arrayObject", - "arrayAny", - "any", - "chatHistory", - "datasetQuote", - "dynamic", - "selectApp", - "selectDataset" - ], - "showDescription": false, - "showDefaultValue": true - }, - "required": true, - "value": ["pluginInput", "formula"] - } - ], - "outputs": [ - { - "id": "error", - "key": "error", - "label": "workflow:request_error", - "description": "HTTP请求错误信息,成功时返回空", - "valueType": "object", - "type": "static" - }, - { - "id": "httpRawResponse", - "key": "httpRawResponse", - "required": true, - "label": "workflow:raw_response", - "description": "HTTP请求的原始响应。只能接受字符串或JSON类型响应数据。", - "valueType": "any", - "type": "static" - }, - { - "id": "system_addOutputParam", - "key": "system_addOutputParam", - "type": "dynamic", - "valueType": "dynamic", - "label": "", - "customFieldConfig": { - "selectValueTypeList": [ - "string", - "number", - "boolean", - "object", - "arrayString", - "arrayNumber", - "arrayBoolean", - "arrayObject", - "any", - "chatHistory", - "datasetQuote", - "dynamic", - "selectApp", - "selectDataset" - ], - "showDescription": false, - "showDefaultValue": false - } - }, - { - "id": "xWQuEf50F3mr", - "valueType": "string", - "type": "dynamic", - "key": "result", - "label": "result" - }, - { - "id": "m6CJJj7GFud5", - "valueType": "boolean", - "type": "dynamic", - "key": "success", - "label": "success" - } - ] - }, - { - "nodeId": "rZmLfANEyyJe", - "name": "系统配置", - "intro": "", - "avatar": "core/workflow/template/systemConfig", - "flowNodeType": "pluginConfig", - "position": { - "x": -93.55061402342784, - "y": -55.907069101622824 - }, - "version": "4811", - "inputs": [], - "outputs": [] - } - ], - "edges": [ - { - "source": "pluginInput", - "target": "zHG5jJBkXmjB", - "sourceHandle": "pluginInput-source-right", - "targetHandle": "zHG5jJBkXmjB-target-left" - }, - { - "source": "zHG5jJBkXmjB", - "target": "pluginOutput", - "sourceHandle": "zHG5jJBkXmjB-source-right", - "targetHandle": "pluginOutput-target-left" - } - ] - } -}