From 48c83938fe97a1d46cf1edc2ad5ebb2a15865dcc Mon Sep 17 00:00:00 2001 From: johnyrahul Date: Tue, 4 Feb 2025 17:16:44 +0530 Subject: [PATCH 1/5] Updated the version to 2.1.0 --- index.js | 44 ++++++++++++++-------------- package-lock.json | 22 ++++++++++---- package.json | 3 +- test/data/test.json | 1 + test/sample.env | 6 ++-- test/test_v2.js | 71 ++++++++++----------------------------------- 6 files changed, 59 insertions(+), 88 deletions(-) create mode 100644 test/data/test.json diff --git a/index.js b/index.js index 747f3be..26969a4 100644 --- a/index.js +++ b/index.js @@ -11,12 +11,11 @@ * */ - +require('dotenv').config() const axios = require("axios"); const winston = require("winston"); const fs = require("fs"); const { register } = require("module"); - const BASE_URL = "https://llmwhisperer-api.unstract.com/v1"; const BASE_URL_V2 = "https://llmwhisperer-api.us-central.unstract.com/api/v2"; @@ -55,6 +54,7 @@ class LLMWhispererClient { apiTimeout = 120, loggingLevel = "", } = {}) { + const level = loggingLevel || process.env.LLMWHISPERER_LOGGING_LEVEL || "debug"; @@ -374,8 +374,12 @@ class LLMWhispererClientV2 { this.headers = { "unstract-key": this.apiKey, - "Subscription-Id": "test", //TODO: Remove this line. For testing only - "Start-Date": "9-07-2024", //TODO: Remove this line. For testing only + "Subscription-Id": "jsclient-client", + "Subscription-Name": "jsclient-client", + "User-Id": "jsclient-client-user", + "Product-Id": "jsclient-client-product", + "Product-Name": "jsclient-client-product", + "Start-Date": "2024-07-09", }; } @@ -532,34 +536,27 @@ class LLMWhispererClientV2 { message["extraction"] = {}; message["status_code"] = -1; message["message"] = "Whisper client operation timed out"; - break; + return message } const whisperStatus = await this.whisperStatus(whisperHash); + if (whisperStatus.statusCode !== 200) { message["extraction"] = {}; message["status_code"] = whisperStatus.statusCode; message["message"] = "Whisper client operation failed"; - break; + return message } - if (whisperStatus.status === "processing") { + if (whisperStatus.status === "accepted") { + this.logger.debug("Status: accepted"); + } else if (whisperStatus.status === "processing") { this.logger.debug("Status: processing"); - } else if (whisperStatus.status === "delivered") { - this.logger.debug("Status: delivered"); - throw new LLMWhispererClientException( - "Whisper operation already delivered", - -1, - ); - } else if (whisperStatus.status === "unknown") { - this.logger.debug("Status: unknown"); - throw new LLMWhispererClientException( - "Whisper operation status unknown", - -1, - ); - } else if (whisperStatus.status === "failed") { - this.logger.debug("Status: failed"); + } else if (whisperStatus.status === "error") { + this.logger.debug("Status: error"); + this.logger.error('Whisper-hash: ${whisperHash} | STATUS: failed with ${whisperStatus.message}') message["extraction"] = {}; message["status_code"] = -1; - message["message"] = "Whisper client operation failed"; + message["status"] = "error"; + message["message"] = whisperStatus.message; break; } else if (whisperStatus.status === "processed") { this.logger.debug("Status: processed"); @@ -618,6 +615,9 @@ class LLMWhispererClientV2 { message.statusCode = response.status; return message; } catch (error) { + + + const err = error.response ? error.response.data : { message: error.message }; diff --git a/package-lock.json b/package-lock.json index cc08b5a..a0a6be1 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,20 +1,21 @@ { "name": "llmwhisperer-client", - "version": "0.1.0", + "version": "2.0.1", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "llmwhisperer-client", - "version": "0.1.0", + "version": "2.0.1", "license": "MIT", "dependencies": { "axios": "~1.7.2", + "llmwhisperer-client": "^2.0.1", "winston": "~3.13.0" }, "devDependencies": { "@eslint/js": "^9.4.0", - "dotenv": "^16.4.5", + "dotenv": "^16.4.7", "eslint": "^9.4.0", "eslint-config-prettier": "^9.1.0", "eslint-config-turbo": "^1.13.3", @@ -2413,9 +2414,9 @@ } }, "node_modules/dotenv": { - "version": "16.4.5", - "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.5.tgz", - "integrity": "sha512-ZmdL2rui+eB2YwhsWzjInR8LldtZHGDoQ1ugH85ppHKwpUHL7j7rN0Ti9NCnGiQbhaZ11FpR+7ao1dNsmduNUg==", + "version": "16.4.7", + "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.7.tgz", + "integrity": "sha512-47qPchRCykZC03FhkYAhrvwU4xDBFIj1QPqaarj6mdM/hgUzfPHcpkHJOn3mJAufFeeAxAzeGsr5X0M4k6fLZQ==", "dev": true, "engines": { "node": ">=12" @@ -4422,6 +4423,15 @@ "node": ">=18.0.0" } }, + "node_modules/llmwhisperer-client": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/llmwhisperer-client/-/llmwhisperer-client-2.0.1.tgz", + "integrity": "sha512-fDUPTXh0T9qnxYD2dDvMUlXdCTMiOmITVncmWBsjIL78lwAqjMiwZKxB09CN81zgxsuPyY+UEAtKC8hmnkaEWw==", + "dependencies": { + "axios": "~1.7.2", + "winston": "~3.13.0" + } + }, "node_modules/locate-path": { "version": "6.0.0", "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-6.0.0.tgz", diff --git a/package.json b/package.json index 164283f..b7af77f 100644 --- a/package.json +++ b/package.json @@ -14,11 +14,12 @@ "license": "MIT", "dependencies": { "axios": "~1.7.2", + "llmwhisperer-client": "^2.0.1", "winston": "~3.13.0" }, "devDependencies": { "@eslint/js": "^9.4.0", - "dotenv": "^16.4.5", + "dotenv": "^16.4.7", "eslint": "^9.4.0", "eslint-config-prettier": "^9.1.0", "eslint-config-turbo": "^1.13.3", diff --git a/test/data/test.json b/test/data/test.json new file mode 100644 index 0000000..4056563 --- /dev/null +++ b/test/data/test.json @@ -0,0 +1 @@ +{"test": "HelloWorld"} \ No newline at end of file diff --git a/test/sample.env b/test/sample.env index f2aedd5..afc2478 100644 --- a/test/sample.env +++ b/test/sample.env @@ -1,3 +1,3 @@ -export LLMWHISPERER_BASE_URL=https://llmwhisperer-api.unstract.com/v1 -export LLMWHISPERER_LOG_LEVEL=DEBUG -export LLMWHISPERER_API_KEY= \ No newline at end of file +LLMWHISPERER_BASE_URL_V2=https://llmwhisperer-api.us-central.unstract.com/api/v2 +LLMWHISPERER_LOG_LEVEL=DEBUG +LLMWHISPERER_API_KEY= \ No newline at end of file diff --git a/test/test_v2.js b/test/test_v2.js index 684111b..555f6b6 100644 --- a/test/test_v2.js +++ b/test/test_v2.js @@ -1,70 +1,29 @@ -const { LLMWhispererClientV2 } = require("../index"); -var LLMWhispererClient = require("../index").LLMWhispererClient; +const { LLMWhispererClientV2 } = require("../index"); -//const client = new LLMWhispererClient({apiKey:'c9b97420112f4c2aadae6fbda060680b'}); const client = new LLMWhispererClientV2(); (async () => { - // usage_info = await client.getUsageInfo(); - // console.log(usage_info); - whisper = await client.whisper({filePath: 'data/restaurant_invoice_photo.pdf'}); - //whisper = await client.whisper({url: 'https://storage.googleapis.com/pandora-static/samples/bill.jpg.pdf'}); - // whisper = await client.whisper({ - // filePath: 'sample_files/credit_card.pdf', - // processingMode: 'text', - // forceTextProcessing: true, - // pagesToExtract: '1-2', - // }); - console.log(whisper); + usage_info = await client.getUsageInfo(); + console.log(usage_info); + + whisper_result = await client.whisper({ + filePath: 'data/restaurant_invoice_photo.pdf', waitForCompletion: true, + waitTimeout: 120, + }); + console.log(whisper_result); + - //b4c25f17|5f1d285a7cf18d203de7af1a1abb0a3a - //whisper_status = await client.whisperStatus('b4c25f17|5f1d285a7cf18d203de7af1a1abb0a3a'); - //console.log(whisper_status); - // whisper_result = await client.whisperRetrieve('b4c25f17|5f1d285a7cf18d203de7af1a1abb0a3a'); - // console.log(whisper_result); - // whisper = await client.whisper({ - // filePath: 'sample_files/restaurant_invoice_photo.pdf', - // waitForCompletion: true, - // waitTimeout: 120, - // }); - // console.log(whisper); + whisper_result = await client.whisper({ + filePath: 'data/test.json', waitForCompletion: true, + waitTimeout: 120, + }); + console.log(whisper_result); //result = await client.registerWebhook('https://webhook.site/2da127b3-003f-446d-a150-7a461a099f3c','','wb4'); //console.log(result); //result = await client.getWebhookDetails('wb4'); //console.log(result); - -// whisper = await client.whisper({ -// filePath: "data/restaurant_invoice_photo.pdf", -// useWebhook: "wb4", -// webhookMetadata: "Sample Metadata", -// }); - - // whisper = await client.whisper({ - // filePath: 'sample_files/credit_card.pdf', - // timeout: 1, - // storeMetadataForHighlighting: true, - // }); - // //Keep checking the status until it is completed - // statusX = whisper.status; - // while (statusX === 'processing') { - // console.log('Processing... '+whisper['whisper-hash']); - // await new Promise(r => setTimeout(r, 3000)); - // whisperStatus = await client.whisperStatus(whisper['whisper-hash']); - // statusX = whisperStatus.status; - // } - // if (statusX === 'processed') { - // //Retrieve the result - // whisper = await client.whisperRetrieve(whisper['whisper-hash']); - // console.log(whisper); - // } else { - // console.log('Error'); - // } - - // //41ebb056|ba4473ee92b30823c4ed3da759ef670f - // highlights = await client.highlightData('41ebb056|ba4473ee92b30823c4ed3da759ef670f', 'Pay by Computer'); - // console.log(highlights); })(); From 10c81b1d74f8b12738a26f50eb43ff7f83fa9af6 Mon Sep 17 00:00:00 2001 From: johnyrahul Date: Fri, 14 Feb 2025 14:11:43 +0530 Subject: [PATCH 2/5] Fixed tests --- .vscode/launch.json | 19 +++++++++ index.js | 31 ++++++++------- package-lock.json | 7 ++++ package.json | 1 + sample.env | 3 ++ test/test.js | 94 ++++++++++++++++++++++---------------------- test/test_v2.js | 29 -------------- test/v1test.js | 95 +++++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 189 insertions(+), 90 deletions(-) create mode 100644 .vscode/launch.json create mode 100644 sample.env delete mode 100644 test/test_v2.js create mode 100644 test/v1test.js diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..d4f398c --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,19 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "type": "node", + "request": "launch", + "name": "Debug Jest Tests", + "program": "${workspaceFolder}/node_modules/jest/bin/jest.js", + "args": [ + "--runInBand" + ], + "console": "integratedTerminal", + "internalConsoleOptions": "neverOpen" + } + ] +} \ No newline at end of file diff --git a/index.js b/index.js index 26969a4..2684700 100644 --- a/index.js +++ b/index.js @@ -374,12 +374,12 @@ class LLMWhispererClientV2 { this.headers = { "unstract-key": this.apiKey, - "Subscription-Id": "jsclient-client", - "Subscription-Name": "jsclient-client", - "User-Id": "jsclient-client-user", - "Product-Id": "jsclient-client-product", - "Product-Name": "jsclient-client-product", - "Start-Date": "2024-07-09", + // "Subscription-Id": "jsclient-client", + // "Subscription-Name": "jsclient-client", + // "User-Id": "jsclient-client-user", + // "Product-Id": "jsclient-client-product", + // "Product-Name": "jsclient-client-product", + // "Start-Date": "2024-07-09", }; } @@ -539,6 +539,7 @@ class LLMWhispererClientV2 { return message } const whisperStatus = await this.whisperStatus(whisperHash); + this.logger.debug(`whisperStatus: ${JSON.stringify(whisperStatus)}`); if (whisperStatus.statusCode !== 200) { message["extraction"] = {}; @@ -547,9 +548,9 @@ class LLMWhispererClientV2 { return message } if (whisperStatus.status === "accepted") { - this.logger.debug("Status: accepted"); + this.logger.debug("Status: accepted..."); } else if (whisperStatus.status === "processing") { - this.logger.debug("Status: processing"); + this.logger.debug("Status: processing..."); } else if (whisperStatus.status === "error") { this.logger.debug("Status: error"); this.logger.error('Whisper-hash: ${whisperHash} | STATUS: failed with ${whisperStatus.message}') @@ -599,28 +600,32 @@ class LLMWhispererClientV2 { * @throws {LLMWhispererClientException} Throws an LLMWhispererClientException if an error occurs during the operation. */ async whisperStatus(whisperHash) { - this.logger.debug("whisper_status called"); + this.logger.debug(`whisper_status called for ${whisperHash}`); const url = `${this.baseUrl}/whisper-status`; const params = { whisper_hash: whisperHash }; this.logger.debug(`url: ${url}`); - + this.logger.debug(`params: ${JSON.stringify(params)}`); + delete this.headers["Content-Length"]; + this.logger.debug(`headers: ${JSON.stringify(this.headers)}`); + + try { const response = await axios.get(url, { headers: this.headers, params, timeout: this.apiTimeout * 1000, }); - const message = response.data; message.statusCode = response.status; return message; } catch (error) { - - + this.logger.debug("Hel00000000002") + this.logger.debug(`error: ${JSON.stringify(error)}`); const err = error.response ? error.response.data : { message: error.message }; + this.logger.debug(`error: ${JSON.stringify(err)}`); err.statusCode = error.response ? error.response.status : -1; throw new LLMWhispererClientException(err.message, err.statusCode); } diff --git a/package-lock.json b/package-lock.json index a0a6be1..cb37b33 100644 --- a/package-lock.json +++ b/package-lock.json @@ -11,6 +11,7 @@ "dependencies": { "axios": "~1.7.2", "llmwhisperer-client": "^2.0.1", + "string-similarity": "^4.0.4", "winston": "~3.13.0" }, "devDependencies": { @@ -5943,6 +5944,12 @@ "node": ">=10" } }, + "node_modules/string-similarity": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/string-similarity/-/string-similarity-4.0.4.tgz", + "integrity": "sha512-/q/8Q4Bl4ZKAPjj8WerIBJWALKkaPRfrvhfF8k/B23i4nzrlRj2/go1m90In7nG/3XDSbOo0+pu6RvCTM9RGMQ==", + "deprecated": "Package no longer supported. Contact Support at https://www.npmjs.com/support for more info." + }, "node_modules/string-width": { "version": "7.1.0", "resolved": "https://registry.npmjs.org/string-width/-/string-width-7.1.0.tgz", diff --git a/package.json b/package.json index b7af77f..bc79d94 100644 --- a/package.json +++ b/package.json @@ -15,6 +15,7 @@ "dependencies": { "axios": "~1.7.2", "llmwhisperer-client": "^2.0.1", + "string-similarity": "^4.0.4", "winston": "~3.13.0" }, "devDependencies": { diff --git a/sample.env b/sample.env new file mode 100644 index 0000000..afc2478 --- /dev/null +++ b/sample.env @@ -0,0 +1,3 @@ +LLMWHISPERER_BASE_URL_V2=https://llmwhisperer-api.us-central.unstract.com/api/v2 +LLMWHISPERER_LOG_LEVEL=DEBUG +LLMWHISPERER_API_KEY= \ No newline at end of file diff --git a/test/test.js b/test/test.js index a0c4abc..a2e4af6 100644 --- a/test/test.js +++ b/test/test.js @@ -1,10 +1,12 @@ + const fs = require("fs"); const path = require("path"); -const LLMWhispererClient = require("../index").LLMWhispererClient; -const client = new LLMWhispererClient({ - apiKey: process.env.LLMWHISPERER_API_KEY, -}); -describe("LLMWhispererClient", () => { +const stringSimilarity = require("string-similarity"); +const LLMWhispererClientV2 = require("../index").LLMWhispererClientV2; + +const client = new LLMWhispererClientV2(); + +describe("LLMWhispererClientV2", () => { test("get_usage_info", async () => { const usage_info = await client.getUsageInfo(); console.info(usage_info); @@ -22,13 +24,14 @@ describe("LLMWhispererClient", () => { ); }); + const test_cases = [ ["ocr", "line-printer", "restaurant_invoice_photo.pdf"], ["ocr", "line-printer", "credit_card.pdf"], - ["ocr", "line-printer", "handwritten-form.pdf"], - ["ocr", "text", "restaurant_invoice_photo.pdf"], - ["text", "line-printer", "restaurant_invoice_photo.pdf"], - ["text", "text", "handwritten-form.pdf"], + // ["ocr", "line-printer", "handwritten-form.pdf"], + // ["ocr", "text", "restaurant_invoice_photo.pdf"], + // ["text", "line-printer", "restaurant_invoice_photo.pdf"], + // ["text", "text", "handwritten-form.pdf"], ]; test.each(test_cases)( @@ -41,55 +44,50 @@ describe("LLMWhispererClient", () => { outputMode: output_mode, filePath: file_path, timeout: 200, + waitForCompletion: true }); - console.debug(response); + const exp_basename = `${path.parse(input_file).name}.${processing_mode}.${output_mode}.txt`; const exp_file = path.join(data_dir, "expected", exp_basename); - const exp = await fs.promises.readFile(exp_file, "utf-8"); + const expected_text = await fs.promises.readFile(exp_file, "utf-8"); expect(typeof response).toBe("object"); - expect(response.statusCode).toBe(200); - // expect(response.extracted_text).toBe(exp); + + const extracted_text = response.extraction.result_text + + + expect(response.status_code).toBe(200); + const similarity = stringSimilarity.compareTwoStrings(extracted_text, expected_text); + console.log(`Similarity: ${(similarity * 100).toFixed(2)}%`); + expect(similarity * 100).toBeGreaterThan(80); // Expect at least 80% match + }, 200000, ); +}); - // TODO: Review and port to Jest based tests - test.skip("whisper", () => { - // response = client.whisper( - // 'https://storage.googleapis.com/pandora-static/samples/bill.jpg.pdf' - // ); - const response = client.whisper("test_files/restaurant_invoice_photo.pdf", { - timeout: 200, - store_metadata_for_highlighting: true, - }); - console.info(response); - // expect(typeof response).toBe('object'); - }); +// (async () => { +// // usage_info = await client.getUsageInfo(); +// // console.log(usage_info); - test.skip("whisper_status", () => { - const response = client.whisper_status( - "7cfa5cbb|5f1d285a7cf18d203de7af1a1abb0a3a", - ); - console.info(response); - expect(typeof response).toBe("object"); - }); +// whisper_result = await client.whisper({ +// filePath: 'data/restaurant_invoice_photo.pdf', waitForCompletion: true, +// waitTimeout: 120, +// }); +// console.log(whisper_result); - test.skip("whisper_retrieve", () => { - const response = client.whisper_retrieve( - "7cfa5cbb|5f1d285a7cf18d203de7af1a1abb0a3a", - ); - console.info(response); - expect(typeof response).toBe("object"); - }); - test.skip("whisper_highlight_data", () => { - const response = client.highlight_data( - "9924d865|5f1d285a7cf18d203de7af1a1abb0a3a", - "Indiranagar", - ); - console.info(response); - expect(typeof response).toBe("object"); - }); -}); + +// // whisper_result = await client.whisper({ +// // filePath: 'data/test.json', waitForCompletion: true, +// // waitTimeout: 120, +// // }); +// // console.log(whisper_result); + +// //result = await client.registerWebhook('https://webhook.site/2da127b3-003f-446d-a150-7a461a099f3c','','wb4'); +// //console.log(result); + +// //result = await client.getWebhookDetails('wb4'); +// //console.log(result); +// })(); diff --git a/test/test_v2.js b/test/test_v2.js deleted file mode 100644 index 555f6b6..0000000 --- a/test/test_v2.js +++ /dev/null @@ -1,29 +0,0 @@ - -const { LLMWhispererClientV2 } = require("../index"); - -const client = new LLMWhispererClientV2(); - -(async () => { - usage_info = await client.getUsageInfo(); - console.log(usage_info); - - whisper_result = await client.whisper({ - filePath: 'data/restaurant_invoice_photo.pdf', waitForCompletion: true, - waitTimeout: 120, - }); - console.log(whisper_result); - - - - whisper_result = await client.whisper({ - filePath: 'data/test.json', waitForCompletion: true, - waitTimeout: 120, - }); - console.log(whisper_result); - - //result = await client.registerWebhook('https://webhook.site/2da127b3-003f-446d-a150-7a461a099f3c','','wb4'); - //console.log(result); - - //result = await client.getWebhookDetails('wb4'); - //console.log(result); -})(); diff --git a/test/v1test.js b/test/v1test.js new file mode 100644 index 0000000..86b83a4 --- /dev/null +++ b/test/v1test.js @@ -0,0 +1,95 @@ +const fs = require("fs"); +const path = require("path"); +const LLMWhispererClient = require("../index").LLMWhispererClient; +const client = new LLMWhispererClient({ + apiKey: process.env.LLMWHISPERER_API_KEY, +}); +describe("LLMWhispererClient", () => { + test.skip("get_usage_info", async () => { + const usage_info = await client.getUsageInfo(); + console.info(usage_info); + expect(typeof usage_info).toBe("object"); + const expected_keys = [ + "current_page_count", + "daily_quota", + "monthly_quota", + "overage_page_count", + "subscription_plan", + "today_page_count", + ]; + expect(Object.keys(usage_info)).toEqual( + expect.arrayContaining(expected_keys), + ); + }); + + const test_cases = [ + ["ocr", "line-printer", "restaurant_invoice_photo.pdf"], + ["ocr", "line-printer", "credit_card.pdf"], + ["ocr", "line-printer", "handwritten-form.pdf"], + ["ocr", "text", "restaurant_invoice_photo.pdf"], + ["text", "line-printer", "restaurant_invoice_photo.pdf"], + ["text", "text", "handwritten-form.pdf"], + ]; + + test.skip.each(test_cases)( + "whisper(%s, %s, %s)", + async (processing_mode, output_mode, input_file) => { + const data_dir = path.join(__dirname, "data"); + const file_path = path.join(data_dir, input_file); + const response = await client.whisper({ + processingMode: processing_mode, + outputMode: output_mode, + filePath: file_path, + timeout: 200, + }); + console.debug(response); + + const exp_basename = `${path.parse(input_file).name}.${processing_mode}.${output_mode}.txt`; + const exp_file = path.join(data_dir, "expected", exp_basename); + const exp = await fs.promises.readFile(exp_file, "utf-8"); + + expect(typeof response).toBe("object"); + expect(response.statusCode).toBe(200); + // expect(response.extracted_text).toBe(exp); + }, + 200000, + ); + + // TODO: Review and port to Jest based tests + test.skip("whisper", () => { + // response = client.whisper( + // 'https://storage.googleapis.com/pandora-static/samples/bill.jpg.pdf' + // ); + const response = client.whisper("test_files/restaurant_invoice_photo.pdf", { + timeout: 200, + store_metadata_for_highlighting: true, + }); + console.info(response); + // expect(typeof response).toBe('object'); + }); + + test.skip("whisper_status", () => { + const response = client.whisper_status( + "7cfa5cbb|5f1d285a7cf18d203de7af1a1abb0a3a", + ); + console.info(response); + expect(typeof response).toBe("object"); + }); + + test.skip("whisper_retrieve", () => { + const response = client.whisper_retrieve( + "7cfa5cbb|5f1d285a7cf18d203de7af1a1abb0a3a", + ); + console.info(response); + expect(typeof response).toBe("object"); + }); + + test.skip("whisper_highlight_data", () => { + const response = client.highlight_data( + "9924d865|5f1d285a7cf18d203de7af1a1abb0a3a", + "Indiranagar", + ); + console.info(response); + expect(typeof response).toBe("object"); + }); +}); From fd26354151abb12b9d15754c26e53801d5600586 Mon Sep 17 00:00:00 2001 From: johnyrahul Date: Fri, 14 Feb 2025 14:37:26 +0530 Subject: [PATCH 3/5] Fixed tests --- ...it_card.native_text.layout_preserving.txt} | 0 ...ndwritten-form.form.layout_preserving.txt} | 0 ...n-form.high_quality.layout_preserving.txt} | 0 ..._photo.high_quality.layout_preserving.txt} | 0 ...e_photo.native_text.layout_preserving.txt} | 0 test/test.js | 44 ++++--------------- 6 files changed, 9 insertions(+), 35 deletions(-) rename test/data/expected/{credit_card.ocr.line-printer.txt => credit_card.native_text.layout_preserving.txt} (100%) rename test/data/expected/{handwritten-form.ocr.line-printer.txt => handwritten-form.form.layout_preserving.txt} (100%) rename test/data/expected/{handwritten-form.text.text.txt => handwritten-form.high_quality.layout_preserving.txt} (100%) rename test/data/expected/{restaurant_invoice_photo.ocr.line-printer.txt => restaurant_invoice_photo.high_quality.layout_preserving.txt} (100%) rename test/data/expected/{restaurant_invoice_photo.text.line-printer.txt => restaurant_invoice_photo.native_text.layout_preserving.txt} (100%) diff --git a/test/data/expected/credit_card.ocr.line-printer.txt b/test/data/expected/credit_card.native_text.layout_preserving.txt similarity index 100% rename from test/data/expected/credit_card.ocr.line-printer.txt rename to test/data/expected/credit_card.native_text.layout_preserving.txt diff --git a/test/data/expected/handwritten-form.ocr.line-printer.txt b/test/data/expected/handwritten-form.form.layout_preserving.txt similarity index 100% rename from test/data/expected/handwritten-form.ocr.line-printer.txt rename to test/data/expected/handwritten-form.form.layout_preserving.txt diff --git a/test/data/expected/handwritten-form.text.text.txt b/test/data/expected/handwritten-form.high_quality.layout_preserving.txt similarity index 100% rename from test/data/expected/handwritten-form.text.text.txt rename to test/data/expected/handwritten-form.high_quality.layout_preserving.txt diff --git a/test/data/expected/restaurant_invoice_photo.ocr.line-printer.txt b/test/data/expected/restaurant_invoice_photo.high_quality.layout_preserving.txt similarity index 100% rename from test/data/expected/restaurant_invoice_photo.ocr.line-printer.txt rename to test/data/expected/restaurant_invoice_photo.high_quality.layout_preserving.txt diff --git a/test/data/expected/restaurant_invoice_photo.text.line-printer.txt b/test/data/expected/restaurant_invoice_photo.native_text.layout_preserving.txt similarity index 100% rename from test/data/expected/restaurant_invoice_photo.text.line-printer.txt rename to test/data/expected/restaurant_invoice_photo.native_text.layout_preserving.txt diff --git a/test/test.js b/test/test.js index a2e4af6..72f9101 100644 --- a/test/test.js +++ b/test/test.js @@ -26,21 +26,19 @@ describe("LLMWhispererClientV2", () => { const test_cases = [ - ["ocr", "line-printer", "restaurant_invoice_photo.pdf"], - ["ocr", "line-printer", "credit_card.pdf"], - // ["ocr", "line-printer", "handwritten-form.pdf"], - // ["ocr", "text", "restaurant_invoice_photo.pdf"], - // ["text", "line-printer", "restaurant_invoice_photo.pdf"], - // ["text", "text", "handwritten-form.pdf"], + ["high_quality", "layout_preserving", "restaurant_invoice_photo.pdf", 99], + ["native_text", "layout_preserving", "credit_card.pdf", 99], + ["form", "layout_preserving", "handwritten-form.pdf", 99], + ["high_quality", "layout_preserving", "handwritten-form.pdf", 80], ]; test.each(test_cases)( "whisper(%s, %s, %s)", - async (processing_mode, output_mode, input_file) => { + async (mode, output_mode, input_file, percent_simlarity) => { const data_dir = path.join(__dirname, "data"); const file_path = path.join(data_dir, input_file); const response = await client.whisper({ - processingMode: processing_mode, + mode: mode, outputMode: output_mode, filePath: file_path, timeout: 200, @@ -48,7 +46,7 @@ describe("LLMWhispererClientV2", () => { }); - const exp_basename = `${path.parse(input_file).name}.${processing_mode}.${output_mode}.txt`; + const exp_basename = `${path.parse(input_file).name}.${mode}.${output_mode}.txt`; const exp_file = path.join(data_dir, "expected", exp_basename); const expected_text = await fs.promises.readFile(exp_file, "utf-8"); @@ -56,38 +54,14 @@ describe("LLMWhispererClientV2", () => { const extracted_text = response.extraction.result_text - + console.log(`Extracted Text: ${extracted_text}`); expect(response.status_code).toBe(200); const similarity = stringSimilarity.compareTwoStrings(extracted_text, expected_text); console.log(`Similarity: ${(similarity * 100).toFixed(2)}%`); - expect(similarity * 100).toBeGreaterThan(80); // Expect at least 80% match + expect(similarity * 100).toBeGreaterThan(percent_simlarity); // Expect at least 80% match }, 200000, ); }); -// (async () => { -// // usage_info = await client.getUsageInfo(); -// // console.log(usage_info); - -// whisper_result = await client.whisper({ -// filePath: 'data/restaurant_invoice_photo.pdf', waitForCompletion: true, -// waitTimeout: 120, -// }); -// console.log(whisper_result); - - - -// // whisper_result = await client.whisper({ -// // filePath: 'data/test.json', waitForCompletion: true, -// // waitTimeout: 120, -// // }); -// // console.log(whisper_result); - -// //result = await client.registerWebhook('https://webhook.site/2da127b3-003f-446d-a150-7a461a099f3c','','wb4'); -// //console.log(result); - -// //result = await client.getWebhookDetails('wb4'); -// //console.log(result); -// })(); From 442a9372e55b0552c6708375b603805f9e790290 Mon Sep 17 00:00:00 2001 From: johnyrahul Date: Fri, 14 Feb 2025 14:53:02 +0530 Subject: [PATCH 4/5] Lint error fix --- .vscode/launch.json | 34 ++++++++++++++++------------------ index.js | 17 ++++++++--------- 2 files changed, 24 insertions(+), 27 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index d4f398c..1df0f6a 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -1,19 +1,17 @@ { - // Use IntelliSense to learn about possible attributes. - // Hover to view descriptions of existing attributes. - // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 - "version": "0.2.0", - "configurations": [ - { - "type": "node", - "request": "launch", - "name": "Debug Jest Tests", - "program": "${workspaceFolder}/node_modules/jest/bin/jest.js", - "args": [ - "--runInBand" - ], - "console": "integratedTerminal", - "internalConsoleOptions": "neverOpen" - } - ] -} \ No newline at end of file + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "type": "node", + "request": "launch", + "name": "Debug Jest Tests", + "program": "${workspaceFolder}/node_modules/jest/bin/jest.js", + "args": ["--runInBand"], + "console": "integratedTerminal", + "internalConsoleOptions": "neverOpen" + } + ] +} diff --git a/index.js b/index.js index 2684700..1713a3b 100644 --- a/index.js +++ b/index.js @@ -11,7 +11,7 @@ * */ -require('dotenv').config() +require("dotenv").config(); const axios = require("axios"); const winston = require("winston"); const fs = require("fs"); @@ -54,7 +54,6 @@ class LLMWhispererClient { apiTimeout = 120, loggingLevel = "", } = {}) { - const level = loggingLevel || process.env.LLMWHISPERER_LOGGING_LEVEL || "debug"; @@ -536,7 +535,7 @@ class LLMWhispererClientV2 { message["extraction"] = {}; message["status_code"] = -1; message["message"] = "Whisper client operation timed out"; - return message + return message; } const whisperStatus = await this.whisperStatus(whisperHash); this.logger.debug(`whisperStatus: ${JSON.stringify(whisperStatus)}`); @@ -545,7 +544,7 @@ class LLMWhispererClientV2 { message["extraction"] = {}; message["status_code"] = whisperStatus.statusCode; message["message"] = "Whisper client operation failed"; - return message + return message; } if (whisperStatus.status === "accepted") { this.logger.debug("Status: accepted..."); @@ -553,7 +552,9 @@ class LLMWhispererClientV2 { this.logger.debug("Status: processing..."); } else if (whisperStatus.status === "error") { this.logger.debug("Status: error"); - this.logger.error('Whisper-hash: ${whisperHash} | STATUS: failed with ${whisperStatus.message}') + this.logger.error( + "Whisper-hash: ${whisperHash} | STATUS: failed with ${whisperStatus.message}", + ); message["extraction"] = {}; message["status_code"] = -1; message["status"] = "error"; @@ -607,8 +608,7 @@ class LLMWhispererClientV2 { this.logger.debug(`params: ${JSON.stringify(params)}`); delete this.headers["Content-Length"]; this.logger.debug(`headers: ${JSON.stringify(this.headers)}`); - - + try { const response = await axios.get(url, { headers: this.headers, @@ -619,8 +619,7 @@ class LLMWhispererClientV2 { message.statusCode = response.status; return message; } catch (error) { - - this.logger.debug("Hel00000000002") + this.logger.debug("Hel00000000002"); this.logger.debug(`error: ${JSON.stringify(error)}`); const err = error.response ? error.response.data From bd997375c408b97df9e2c3f02b2e86cf8f50ff6a Mon Sep 17 00:00:00 2001 From: johnyrahul Date: Fri, 14 Feb 2025 14:56:17 +0530 Subject: [PATCH 5/5] Updated version --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index bc79d94..4a1292c 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "llmwhisperer-client", - "version": "2.0.1", + "version": "2.1.0", "description": "LLMWhisper JS Client", "main": "index.js", "scripts": {