From 4990c3fee80bd6bcb097f3f8c8dc2f00ba0e81c8 Mon Sep 17 00:00:00 2001 From: Robin Windey Date: Wed, 15 Jan 2025 19:45:19 +0000 Subject: [PATCH] Add support for workflow_ocr_backend * Integration for https://github.com/R0Wi-DEV/workflow_ocr_backend * Implements #51 --- README.md | 13 + lib/AppInfo/Application.php | 11 + .../OcrProcessorNotFoundException.php | 4 +- lib/Model/WorkflowSettings.php | 2 +- lib/OcrProcessors/CommandLineUtils.php | 100 +++ lib/OcrProcessors/ICommandLineUtils.php | 31 + lib/OcrProcessors/IOcrProcessorFactory.php | 7 - .../{ => Local}/ImageOcrProcessor.php | 2 +- .../{ => Local}/OcrMyPdfBasedProcessor.php | 85 +-- .../{ => Local}/PdfOcrProcessor.php | 2 +- lib/OcrProcessors/OcrProcessorFactory.php | 58 +- .../Remote/Client/.openapi-generator-ignore | 9 + .../Remote/Client/.openapi-generator/FILES | 5 + .../Remote/Client/.openapi-generator/VERSION | 1 + lib/OcrProcessors/Remote/Client/ApiClient.php | 98 +++ .../Remote/Client/Configuration.php | 510 +++++++++++++++ .../Remote/Client/IApiClient.php | 47 ++ .../Remote/Client/Model/ErrorResult.php | 386 +++++++++++ .../Remote/Client/Model/ModelInterface.php | 120 ++++ .../Remote/Client/Model/OcrResult.php | 491 ++++++++++++++ .../Remote/Client/ObjectSerializer.php | 617 ++++++++++++++++++ .../Remote/Client/generate-client.sh | 3 + .../Remote/Client/openapi-spec.json | 266 ++++++++ .../Remote/Client/openapitools.json | 20 + .../Remote/Client/templates/model.mustache | 37 ++ .../Client/templates/model_generic.mustache | 560 ++++++++++++++++ .../Client/templates/partial_header.mustache | 18 + .../Remote/WorkflowOcrRemoteProcessor.php | 68 ++ lib/Service/GlobalSettingsService.php | 13 +- lib/Service/IOcrBackendInfoService.php | 6 + lib/Service/OcrBackendInfoService.php | 42 +- lib/Service/OcrService.php | 3 +- lib/SetupChecks/OcrMyPdfCheck.php | 5 + lib/Wrapper/AppApiWrapper.php | 72 ++ lib/Wrapper/IAppApiWrapper.php | 44 ++ phpunit.integration.xml | 1 + phpunit.xml | 1 + sonar-project.properties | 8 +- .../{ => Local}/ImageOcrProcessorTest.php | 11 +- .../{ => Local}/PdfOcrProcessorTest.php | 48 +- .../OcrProcessors/OcrProcessorFactoryTest.php | 28 +- .../Remote/WorkflowOcrRemoteProcessorTest.php | 109 ++++ .../Service/GlobalSettingsServiceTest.php | 14 +- .../Service/OcrBackendInfoServiceTest.php | 77 ++- tests/Unit/Service/OcrServiceTest.php | 59 +- tests/Unit/Service/VersionBackendMock.php | 59 ++ tests/Unit/SetupChecks/OcrMyPdfCheckTest.php | 19 +- tests/Unit/Wrapper/AppApiWrapperTest.php | 87 +++ tests/psalm-baseline.xml | 46 ++ 49 files changed, 4165 insertions(+), 158 deletions(-) create mode 100644 lib/OcrProcessors/CommandLineUtils.php create mode 100644 lib/OcrProcessors/ICommandLineUtils.php rename lib/OcrProcessors/{ => Local}/ImageOcrProcessor.php (96%) rename lib/OcrProcessors/{ => Local}/OcrMyPdfBasedProcessor.php (55%) rename lib/OcrProcessors/{ => Local}/PdfOcrProcessor.php (95%) create mode 100644 lib/OcrProcessors/Remote/Client/.openapi-generator-ignore create mode 100644 lib/OcrProcessors/Remote/Client/.openapi-generator/FILES create mode 100644 lib/OcrProcessors/Remote/Client/.openapi-generator/VERSION create mode 100644 lib/OcrProcessors/Remote/Client/ApiClient.php create mode 100644 lib/OcrProcessors/Remote/Client/Configuration.php create mode 100644 lib/OcrProcessors/Remote/Client/IApiClient.php create mode 100644 lib/OcrProcessors/Remote/Client/Model/ErrorResult.php create mode 100644 lib/OcrProcessors/Remote/Client/Model/ModelInterface.php create mode 100644 lib/OcrProcessors/Remote/Client/Model/OcrResult.php create mode 100644 lib/OcrProcessors/Remote/Client/ObjectSerializer.php create mode 100755 lib/OcrProcessors/Remote/Client/generate-client.sh create mode 100644 lib/OcrProcessors/Remote/Client/openapi-spec.json create mode 100644 lib/OcrProcessors/Remote/Client/openapitools.json create mode 100644 lib/OcrProcessors/Remote/Client/templates/model.mustache create mode 100644 lib/OcrProcessors/Remote/Client/templates/model_generic.mustache create mode 100644 lib/OcrProcessors/Remote/Client/templates/partial_header.mustache create mode 100644 lib/OcrProcessors/Remote/WorkflowOcrRemoteProcessor.php create mode 100644 lib/Wrapper/AppApiWrapper.php create mode 100644 lib/Wrapper/IAppApiWrapper.php rename tests/Unit/OcrProcessors/{ => Local}/ImageOcrProcessorTest.php (80%) rename tests/Unit/OcrProcessors/{ => Local}/PdfOcrProcessorTest.php (89%) create mode 100644 tests/Unit/OcrProcessors/Remote/WorkflowOcrRemoteProcessorTest.php create mode 100644 tests/Unit/Service/VersionBackendMock.php create mode 100644 tests/Unit/Wrapper/AppApiWrapperTest.php diff --git a/README.md b/README.md index 07dfecc..e801eb6 100644 --- a/README.md +++ b/README.md @@ -58,6 +58,11 @@ Since the actual processing of the files is done asynchronously via Nextcloud's ### Backend + +This app is based on `ocrmypdf`. You can either install the CLI directly on the server running Nextcloud or use the alternative backend setup via Docker. + +#### Local installation + > :warning: Since `v1.20.1` you'll have to install `OCRmyPDF`. In the backend [`OCRmyPDF`](https://github.com/jbarlow83/OCRmyPDF) is used for processing PDF files. Make sure you have this commandline tool installed. Make sure you have the appropriate version (see below, Used libraries'). @@ -81,6 +86,14 @@ apt-get install tesseract-ocr-deu apt-get install tesseract-ocr-chi-sim ``` +#### `workflow_ocr_backend` installation + +Starting from version 30, Nextcloud added support for [AppApi](https://docs.nextcloud.com/server/latest/admin_manual/exapps_management/AppAPIAndExternalApps.html) apps. In essence this allows external container based applications to be integrated into the Nextcloud ecosystem. This app is using this feature to provide an alternative backend setup via Docker. + +Please refer to **https://github.com/R0Wi-DEV/workflow_ocr_backend** for more information on how to setup the backend. + +> :information_source: If the `workflow_ocr_backend` External App is installed, the app will automatically use it as the backend even if you installed `ocrmypdf` locally. + ### Setup Checks The app will perform some [Setup Checks](https://docs.nextcloud.com/server/latest/admin_manual/configuration_server/security_setup_warnings.html) to verify your installation. If there is any problem with your backend setup, you'll see an error printed in Nextcloud under `Administration Settings` → `Overview` → `Security & setup warnings`. diff --git a/lib/AppInfo/Application.php b/lib/AppInfo/Application.php index b8d7f69..680fc61 100644 --- a/lib/AppInfo/Application.php +++ b/lib/AppInfo/Application.php @@ -33,8 +33,12 @@ use OCA\WorkflowOcr\Helper\SidecarFileAccessor; use OCA\WorkflowOcr\Listener\RegisterFlowOperationsListener; use OCA\WorkflowOcr\Notification\Notifier; +use OCA\WorkflowOcr\OcrProcessors\CommandLineUtils; +use OCA\WorkflowOcr\OcrProcessors\ICommandLineUtils; use OCA\WorkflowOcr\OcrProcessors\IOcrProcessorFactory; use OCA\WorkflowOcr\OcrProcessors\OcrProcessorFactory; +use OCA\WorkflowOcr\OcrProcessors\Remote\Client\ApiClient; +use OCA\WorkflowOcr\OcrProcessors\Remote\Client\IApiClient; use OCA\WorkflowOcr\Service\EventService; use OCA\WorkflowOcr\Service\GlobalSettingsService; use OCA\WorkflowOcr\Service\IEventService; @@ -46,8 +50,10 @@ use OCA\WorkflowOcr\Service\OcrBackendInfoService; use OCA\WorkflowOcr\Service\OcrService; use OCA\WorkflowOcr\SetupChecks\OcrMyPdfCheck; +use OCA\WorkflowOcr\Wrapper\AppApiWrapper; use OCA\WorkflowOcr\Wrapper\CommandWrapper; use OCA\WorkflowOcr\Wrapper\Filesystem; +use OCA\WorkflowOcr\Wrapper\IAppApiWrapper; use OCA\WorkflowOcr\Wrapper\ICommand; use OCA\WorkflowOcr\Wrapper\IFilesystem; use OCA\WorkflowOcr\Wrapper\IViewFactory; @@ -63,6 +69,8 @@ class Application extends App implements IBootstrap { public const APP_NAME = 'workflow_ocr'; + public const APP_BACKEND_NAME = 'workflow_ocr_backend'; + public const APP_API_APP_NAME = 'app_api'; /** * Application constructor. @@ -83,6 +91,9 @@ public function register(IRegistrationContext $context): void { $context->registerServiceAlias(IEventService::class, EventService::class); $context->registerServiceAlias(IOcrBackendInfoService::class, OcrBackendInfoService::class); $context->registerServiceAlias(INotificationService::class, NotificationService::class); + $context->registerServiceAlias(IApiClient::class, ApiClient::class); + $context->registerServiceAlias(ICommandLineUtils::class, CommandLineUtils::class); + $context->registerServiceAlias(IAppApiWrapper::class, AppApiWrapper::class); // BUG #43 $context->registerService(ICommand::class, function () { diff --git a/lib/Exception/OcrProcessorNotFoundException.php b/lib/Exception/OcrProcessorNotFoundException.php index a370336..b06e147 100644 --- a/lib/Exception/OcrProcessorNotFoundException.php +++ b/lib/Exception/OcrProcessorNotFoundException.php @@ -26,7 +26,7 @@ use Exception; class OcrProcessorNotFoundException extends Exception { - public function __construct(string $mimeType) { - $this->message = 'OCR processor for mime type ' . $mimeType . ' not found'; + public function __construct(string $mimeType, bool $useRemoteBackend) { + $this->message = 'OCR processor for mime type ' . $mimeType . '(useRemoteBackend=' . $useRemoteBackend . ') not found'; } } diff --git a/lib/Model/WorkflowSettings.php b/lib/Model/WorkflowSettings.php index 0512213..bb55fe5 100644 --- a/lib/Model/WorkflowSettings.php +++ b/lib/Model/WorkflowSettings.php @@ -168,7 +168,7 @@ private function setJson(?string $json = null) { $this->setProperty($this->customCliArgs, $data, 'customCliArgs', fn ($value) => is_string($value)); } - private function setProperty(& $property, array $jsonData, string $key, ?callable $dataCheck = null): void { + private function setProperty(array|bool|int|string & $property, array $jsonData, string $key, ?callable $dataCheck = null): void { if (array_key_exists($key, $jsonData) && ($dataCheck === null || $dataCheck($jsonData[$key]))) { $property = $jsonData[$key]; } diff --git a/lib/OcrProcessors/CommandLineUtils.php b/lib/OcrProcessors/CommandLineUtils.php new file mode 100644 index 0000000..6c9e52f --- /dev/null +++ b/lib/OcrProcessors/CommandLineUtils.php @@ -0,0 +1,100 @@ + + * + * @license GNU AGPL version 3 or any later version + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +namespace OCA\WorkflowOcr\OcrProcessors; + +use OCA\WorkflowOcr\Helper\ISidecarFileAccessor; +use OCA\WorkflowOcr\Model\GlobalSettings; +use OCA\WorkflowOcr\Model\WorkflowSettings; +use OCA\WorkflowOcr\Service\IOcrBackendInfoService; +use Psr\Log\LoggerInterface; + +class CommandLineUtils implements ICommandLineUtils { + private static $ocrModeToCmdParameterMapping = [ + WorkflowSettings::OCR_MODE_SKIP_TEXT => '--skip-text', + WorkflowSettings::OCR_MODE_REDO_OCR => '--redo-ocr', + WorkflowSettings::OCR_MODE_FORCE_OCR => '--force-ocr', + WorkflowSettings::OCR_MODE_SKIP_FILE => '' // This is the ocrmypdf default behaviour + ]; + + public function __construct( + private ISidecarFileAccessor $sidecarFileAccessor, + private IOcrBackendInfoService $ocrBackendInfoService, + private LoggerInterface $logger, + ) { + } + + public function getCommandlineArgs(WorkflowSettings $settings, GlobalSettings $globalSettings, array $additionalCommandlineArgs = []): string { + $isLocalExecution = !$this->ocrBackendInfoService->isRemoteBackend(); + + // Default setting is quiet + $args = $isLocalExecution ? ['-q'] : []; + + // OCR mode ('--skip-text', '--redo-ocr', '--force-ocr' or empty) + $args[] = self::$ocrModeToCmdParameterMapping[$settings->getOcrMode()]; + + // Language settings + if ($settings->getLanguages()) { + $langStr = implode('+', $settings->getLanguages()); + $args[] = "--language $langStr"; + } + + // Remove background option (NOTE :: this is incompatible with redo-ocr, so + // we have to make it exclusive against each other!) + if ($settings->getRemoveBackground()) { + if ($settings->getOcrMode() === WorkflowSettings::OCR_MODE_REDO_OCR) { + $this->logger->warning('--remove-background is incompatible with --redo-ocr, ignoring'); + } else { + $args[] = '--remove-background'; + } + } + + // Number of CPU's to be used + $processorCount = intval($globalSettings->processorCount); + if ($processorCount > 0) { + $args[] = '--jobs ' . $processorCount; + } + + if ($isLocalExecution) { + // Save recognized text in tempfile + $sidecarFilePath = $this->sidecarFileAccessor->getOrCreateSidecarFile(); + if ($sidecarFilePath) { + $args[] = '--sidecar ' . $sidecarFilePath; + } + } + + $resultArgs = array_filter(array_merge( + $args, + $additionalCommandlineArgs, + [$this->escapeCustomCliArgs($settings->getCustomCliArgs())] + ), fn ($arg) => !empty($arg)); + + return implode(' ', $resultArgs); + } + + private function escapeCustomCliArgs(string $customCliArgs): string { + $customCliArgs = str_replace('&&', '', $customCliArgs); + $customCliArgs = str_replace(';', '', $customCliArgs); + return $customCliArgs; + } +} diff --git a/lib/OcrProcessors/ICommandLineUtils.php b/lib/OcrProcessors/ICommandLineUtils.php new file mode 100644 index 0000000..141c446 --- /dev/null +++ b/lib/OcrProcessors/ICommandLineUtils.php @@ -0,0 +1,31 @@ + + * + * @license GNU AGPL version 3 or any later version + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +namespace OCA\WorkflowOcr\OcrProcessors; + +use OCA\WorkflowOcr\Model\GlobalSettings; +use OCA\WorkflowOcr\Model\WorkflowSettings; + +interface ICommandLineUtils { + public function getCommandlineArgs(WorkflowSettings $settings, GlobalSettings $globalSettings, array $additionalCommandlineArgs = []): string; +} diff --git a/lib/OcrProcessors/IOcrProcessorFactory.php b/lib/OcrProcessors/IOcrProcessorFactory.php index 708e6b9..69ffb51 100644 --- a/lib/OcrProcessors/IOcrProcessorFactory.php +++ b/lib/OcrProcessors/IOcrProcessorFactory.php @@ -28,11 +28,4 @@ interface IOcrProcessorFactory { * Creates a IOcrProcessor object for the given mimetype */ public function create(string $mimeType) : IOcrProcessor; - - /** - * Returns true, if an OCR processor for the given mimetype - * can be constructed. - * @return bool - */ - public function canCreate(string $mimeType) : bool; } diff --git a/lib/OcrProcessors/ImageOcrProcessor.php b/lib/OcrProcessors/Local/ImageOcrProcessor.php similarity index 96% rename from lib/OcrProcessors/ImageOcrProcessor.php rename to lib/OcrProcessors/Local/ImageOcrProcessor.php index 519417d..c045db1 100644 --- a/lib/OcrProcessors/ImageOcrProcessor.php +++ b/lib/OcrProcessors/Local/ImageOcrProcessor.php @@ -21,7 +21,7 @@ * along with this program. If not, see . */ -namespace OCA\WorkflowOcr\OcrProcessors; +namespace OCA\WorkflowOcr\OcrProcessors\Local; use OCA\WorkflowOcr\Model\GlobalSettings; use OCA\WorkflowOcr\Model\WorkflowSettings; diff --git a/lib/OcrProcessors/OcrMyPdfBasedProcessor.php b/lib/OcrProcessors/Local/OcrMyPdfBasedProcessor.php similarity index 55% rename from lib/OcrProcessors/OcrMyPdfBasedProcessor.php rename to lib/OcrProcessors/Local/OcrMyPdfBasedProcessor.php index 4fb8358..5d2a88e 100644 --- a/lib/OcrProcessors/OcrMyPdfBasedProcessor.php +++ b/lib/OcrProcessors/Local/OcrMyPdfBasedProcessor.php @@ -21,42 +21,32 @@ * along with this program. If not, see . */ -namespace OCA\WorkflowOcr\OcrProcessors; +namespace OCA\WorkflowOcr\OcrProcessors\Local; use OCA\WorkflowOcr\Exception\OcrNotPossibleException; use OCA\WorkflowOcr\Exception\OcrResultEmptyException; use OCA\WorkflowOcr\Helper\ISidecarFileAccessor; use OCA\WorkflowOcr\Model\GlobalSettings; use OCA\WorkflowOcr\Model\WorkflowSettings; +use OCA\WorkflowOcr\OcrProcessors\ICommandLineUtils; +use OCA\WorkflowOcr\OcrProcessors\IOcrProcessor; +use OCA\WorkflowOcr\OcrProcessors\OcrProcessorResult; use OCA\WorkflowOcr\Wrapper\ICommand; use OCP\Files\File; use Psr\Log\LoggerInterface; abstract class OcrMyPdfBasedProcessor implements IOcrProcessor { - private static $ocrModeToCmdParameterMapping = [ - WorkflowSettings::OCR_MODE_SKIP_TEXT => '--skip-text', - WorkflowSettings::OCR_MODE_REDO_OCR => '--redo-ocr', - WorkflowSettings::OCR_MODE_FORCE_OCR => '--force-ocr', - WorkflowSettings::OCR_MODE_SKIP_FILE => '' // This is the ocrmypdf default behaviour - ]; - - /** @var ICommand */ - private $command; - - /** @var LoggerInterface */ - private $logger; - - /** @var ISidecarFileAccessor */ - private $sidecarFileAccessor; - - public function __construct(ICommand $command, LoggerInterface $logger, ISidecarFileAccessor $sidecarFileAccessor) { - $this->command = $command; - $this->logger = $logger; - $this->sidecarFileAccessor = $sidecarFileAccessor; + public function __construct( + private ICommand $command, + private LoggerInterface $logger, + private ISidecarFileAccessor $sidecarFileAccessor, + private ICommandLineUtils $commandLineUtils, + ) { } public function ocrFile(File $file, WorkflowSettings $settings, GlobalSettings $globalSettings): OcrProcessorResult { - $commandStr = 'ocrmypdf ' . $this->getCommandlineArgs($settings, $globalSettings) . ' - - || exit $? ; cat'; + $additionalCommandlineArgs = $this->getAdditionalCommandlineArgs($settings, $globalSettings); + $commandStr = 'ocrmypdf ' . $this->commandLineUtils->getCommandlineArgs($settings, $globalSettings, $additionalCommandlineArgs) . ' - - || exit $? ; cat'; $inputFileContent = $file->getContent(); @@ -109,55 +99,4 @@ public function ocrFile(File $file, WorkflowSettings $settings, GlobalSettings $ protected function getAdditionalCommandlineArgs(WorkflowSettings $settings, GlobalSettings $globalSettings): array { return []; } - - - private function getCommandlineArgs(WorkflowSettings $settings, GlobalSettings $globalSettings): string { - // Default setting is quiet - $args = ['-q']; - - // OCR mode ('--skip-text', '--redo-ocr', '--force-ocr' or empty) - $args[] = self::$ocrModeToCmdParameterMapping[$settings->getOcrMode()]; - - // Language settings - if ($settings->getLanguages()) { - $langStr = implode('+', $settings->getLanguages()); - $args[] = "-l $langStr"; - } - - // Remove background option (NOTE :: this is incompatible with redo-ocr, so - // we have to make it exclusive against each other!) - if ($settings->getRemoveBackground()) { - if ($settings->getOcrMode() === WorkflowSettings::OCR_MODE_REDO_OCR) { - $this->logger->warning('--remove-background is incompatible with --redo-ocr, ignoring'); - } else { - $args[] = '--remove-background'; - } - } - - // Number of CPU's to be used - $processorCount = intval($globalSettings->processorCount); - if ($processorCount > 0) { - $args[] = '-j ' . $processorCount; - } - - // Save recognized text in tempfile - $sidecarFilePath = $this->sidecarFileAccessor->getOrCreateSidecarFile(); - if ($sidecarFilePath) { - $args[] = '--sidecar ' . $sidecarFilePath; - } - - $resultArgs = array_filter(array_merge( - $args, - $this->getAdditionalCommandlineArgs($settings, $globalSettings), - [$this->escapeCustomCliArgs($settings->getCustomCliArgs())] - ), fn ($arg) => !empty($arg)); - - return implode(' ', $resultArgs); - } - - private function escapeCustomCliArgs(string $customCliArgs): string { - $customCliArgs = str_replace('&&', '', $customCliArgs); - $customCliArgs = str_replace(';', '', $customCliArgs); - return $customCliArgs; - } } diff --git a/lib/OcrProcessors/PdfOcrProcessor.php b/lib/OcrProcessors/Local/PdfOcrProcessor.php similarity index 95% rename from lib/OcrProcessors/PdfOcrProcessor.php rename to lib/OcrProcessors/Local/PdfOcrProcessor.php index 1af0ef1..03e0792 100644 --- a/lib/OcrProcessors/PdfOcrProcessor.php +++ b/lib/OcrProcessors/Local/PdfOcrProcessor.php @@ -21,7 +21,7 @@ * along with this program. If not, see . */ -namespace OCA\WorkflowOcr\OcrProcessors; +namespace OCA\WorkflowOcr\OcrProcessors\Local; class PdfOcrProcessor extends OcrMyPdfBasedProcessor { } diff --git a/lib/OcrProcessors/OcrProcessorFactory.php b/lib/OcrProcessors/OcrProcessorFactory.php index 7b150e6..6ae492a 100644 --- a/lib/OcrProcessors/OcrProcessorFactory.php +++ b/lib/OcrProcessors/OcrProcessorFactory.php @@ -25,23 +25,38 @@ use OCA\WorkflowOcr\Exception\OcrProcessorNotFoundException; use OCA\WorkflowOcr\Helper\ISidecarFileAccessor; +use OCA\WorkflowOcr\OcrProcessors\Local\ImageOcrProcessor; +use OCA\WorkflowOcr\OcrProcessors\Local\PdfOcrProcessor; +use OCA\WorkflowOcr\OcrProcessors\Remote\WorkflowOcrRemoteProcessor; +use OCA\WorkflowOcr\Service\IOcrBackendInfoService; use OCA\WorkflowOcr\Wrapper\ICommand; use OCP\AppFramework\Bootstrap\IRegistrationContext; use Psr\Container\ContainerInterface; use Psr\Log\LoggerInterface; class OcrProcessorFactory implements IOcrProcessorFactory { - private static $mapping = [ + /** + * List of processors which are using a local CLI tool to perform OCR. + */ + private static $localMapping = [ 'application/pdf' => PdfOcrProcessor::class, 'image/jpeg' => ImageOcrProcessor::class, 'image/png' => ImageOcrProcessor::class ]; - /** @var ContainerInterface */ - private $container; + /** + * List of processors which are using a remote service (Workflow OCR Backend) to perform OCR. + */ + private static $remoteMapping = [ + 'application/pdf' => WorkflowOcrRemoteProcessor::class, + 'image/jpeg' => WorkflowOcrRemoteProcessor::class, + 'image/png' => WorkflowOcrRemoteProcessor::class + ]; - public function __construct(ContainerInterface $container) { - $this->container = $container; + public function __construct( + private ContainerInterface $container, + private IOcrBackendInfoService $ocrBackendInfoService, + ) { } public static function registerOcrProcessors(IRegistrationContext $context) : void { @@ -52,26 +67,35 @@ public static function registerOcrProcessors(IRegistrationContext $context) : vo * "singleton per request" which leads to problems regarding the reused Command object * under the hood. */ - $context->registerService(PdfOcrProcessor::class, function (ContainerInterface $c) { - return new PdfOcrProcessor($c->get(ICommand::class), $c->get(LoggerInterface::class), $c->get(ISidecarFileAccessor::class)); - }, false); - $context->registerService(ImageOcrProcessor::class, function (ContainerInterface $c) { - return new ImageOcrProcessor($c->get(ICommand::class), $c->get(LoggerInterface::class), $c->get(ISidecarFileAccessor::class)); - }, false); + $context->registerService(PdfOcrProcessor::class, fn (ContainerInterface $c) => + new PdfOcrProcessor( + $c->get(ICommand::class), + $c->get(LoggerInterface::class), + $c->get(ISidecarFileAccessor::class), + $c->get(ICommandLineUtils::class)), false); + $context->registerService(ImageOcrProcessor::class, fn (ContainerInterface $c) => + new ImageOcrProcessor( + $c->get(ICommand::class), + $c->get(LoggerInterface::class), + $c->get(ISidecarFileAccessor::class), + $c->get(ICommandLineUtils::class)), false); } /** @inheritdoc */ public function create(string $mimeType) : IOcrProcessor { - if (!$this->canCreate($mimeType)) { - throw new OcrProcessorNotFoundException($mimeType); + $useRemoteBackend = $this->ocrBackendInfoService->isRemoteBackend(); + /** @var array */ + $mimeTypeToProcessorMapping = $useRemoteBackend ? self::$remoteMapping : self::$localMapping; + + if (!self::canCreate($mimeType, $mimeTypeToProcessorMapping)) { + throw new OcrProcessorNotFoundException($mimeType, $useRemoteBackend); } - $className = self::$mapping[$mimeType]; + $className = $mimeTypeToProcessorMapping[$mimeType]; return $this->container->get($className); } - /** @inheritdoc */ - public function canCreate(string $mimeType) : bool { - return array_key_exists($mimeType, self::$mapping); + private static function canCreate(string $mimeType, array $mimeTypeToProcessorMapping) : bool { + return array_key_exists($mimeType, $mimeTypeToProcessorMapping); } } diff --git a/lib/OcrProcessors/Remote/Client/.openapi-generator-ignore b/lib/OcrProcessors/Remote/Client/.openapi-generator-ignore new file mode 100644 index 0000000..6a379b4 --- /dev/null +++ b/lib/OcrProcessors/Remote/Client/.openapi-generator-ignore @@ -0,0 +1,9 @@ +# Currently we're only generating Models (no API clients) +!Model/ErrorResult.php +!Model/ModelInterface.php +!Model/OcrResult.php +!ObjectSerializer.php +!Configuration.php +*.* +.* +**/* \ No newline at end of file diff --git a/lib/OcrProcessors/Remote/Client/.openapi-generator/FILES b/lib/OcrProcessors/Remote/Client/.openapi-generator/FILES new file mode 100644 index 0000000..352aa47 --- /dev/null +++ b/lib/OcrProcessors/Remote/Client/.openapi-generator/FILES @@ -0,0 +1,5 @@ +Configuration.php +Model/ErrorResult.php +Model/ModelInterface.php +Model/OcrResult.php +ObjectSerializer.php diff --git a/lib/OcrProcessors/Remote/Client/.openapi-generator/VERSION b/lib/OcrProcessors/Remote/Client/.openapi-generator/VERSION new file mode 100644 index 0000000..758bb9c --- /dev/null +++ b/lib/OcrProcessors/Remote/Client/.openapi-generator/VERSION @@ -0,0 +1 @@ +7.10.0 diff --git a/lib/OcrProcessors/Remote/Client/ApiClient.php b/lib/OcrProcessors/Remote/Client/ApiClient.php new file mode 100644 index 0000000..b384ac9 --- /dev/null +++ b/lib/OcrProcessors/Remote/Client/ApiClient.php @@ -0,0 +1,98 @@ + + * + * @license GNU AGPL version 3 or any later version + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +namespace OCA\WorkflowOcr\OcrProcessors\Remote\Client; + +use OCA\WorkflowOcr\AppInfo\Application; +use OCA\WorkflowOcr\OcrProcessors\Remote\Client\Model\ErrorResult; +use OCA\WorkflowOcr\OcrProcessors\Remote\Client\Model\OcrResult; +use OCA\WorkflowOcr\Wrapper\IAppApiWrapper; +use OCP\Http\Client\IResponse; +use Psr\Log\LoggerInterface; +use RuntimeException; + +class ApiClient implements IApiClient { + public function __construct( + private IAppApiWrapper $appApiWrapper, + private LoggerInterface $logger, + ) { + } + + public function processOcr($file, string $fileName, string $ocrMyPdfParameters): OcrResult|ErrorResult { + $options = [ + 'multipart' => [ + [ + 'name' => 'file', + 'contents' => $file, + 'filename' => $fileName + ], + [ + 'name' => 'ocrmypdf_parameters', + 'contents' => $ocrMyPdfParameters + ] + ], + 'timeout' => 60 + ]; + + $response = $this->exAppRequest('/process_ocr', $options, 'POST'); + + switch ($response->getStatusCode()) { + case 200: + $class = OcrResult::class; + break; + case 500: + $class = ErrorResult::class; + break; + default: + $this->logger->error('Unexpected response code', ['response' => $response, 'body' => $response->getBody()]); + throw new RuntimeException('Unexpected response code'); + } + + return ObjectSerializer::deserialize(json_decode($response->getBody(), false, 512, JSON_THROW_ON_ERROR), $class); + } + + public function getLanguages(): array { + $response = $this->exAppRequest('/installed_languages', null, 'GET', true); + return json_decode($response->getBody(), true); + } + + private function exAppRequest(string $path, ?array $options, string $method, bool $throwIfResponseCodeNot200 = false): IResponse { + $this->logger->debug('Executing request', ['path' => $path, 'options' => $options, 'method' => $method]); + $response = $this->appApiWrapper->exAppRequest( + Application::APP_BACKEND_NAME, + $path, + null, + $method, + [], + $options + ); + $this->logger->debug('Response received', ['path' => $path, 'response' => $response]); + + if (is_array($response) || ($throwIfResponseCodeNot200 && $response->getStatusCode() !== 200)) { + $this->logger->error('Request failed', ['path' => $path, 'response' => $response]); + throw new RuntimeException('Request failed'); + } + + return $response; + } +} diff --git a/lib/OcrProcessors/Remote/Client/Configuration.php b/lib/OcrProcessors/Remote/Client/Configuration.php new file mode 100644 index 0000000..1f9040f --- /dev/null +++ b/lib/OcrProcessors/Remote/Client/Configuration.php @@ -0,0 +1,510 @@ + + * + * @license GNU AGPL version 3 or any later version + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +/** + * NOTE: This class is auto generated by OpenAPI Generator (https://openapi-generator.tech). + * https://openapi-generator.tech + * Do not edit the class manually. + */ + +namespace OCA\WorkflowOcr\OcrProcessors\Remote\Client; + +/** + * Configuration Class Doc Comment + * PHP version 7.4 + * + * @category Class + * @package OCA\WorkflowOcr\OcrProcessors\Remote\Client + * @author OpenAPI Generator team + * @link https://openapi-generator.tech + */ +class Configuration { + public const BOOLEAN_FORMAT_INT = 'int'; + public const BOOLEAN_FORMAT_STRING = 'string'; + + /** + * @var Configuration + */ + private static $defaultConfiguration; + + /** + * Associate array to store API key(s) + * + * @var string[] + */ + protected $apiKeys = []; + + /** + * Associate array to store API prefix (e.g. Bearer) + * + * @var string[] + */ + protected $apiKeyPrefixes = []; + + /** + * Access token for OAuth/Bearer authentication + * + * @var string + */ + protected $accessToken = ''; + + /** + * Boolean format for query string + * + * @var string + */ + protected $booleanFormatForQueryString = self::BOOLEAN_FORMAT_INT; + + /** + * Username for HTTP basic authentication + * + * @var string + */ + protected $username = ''; + + /** + * Password for HTTP basic authentication + * + * @var string + */ + protected $password = ''; + + /** + * The host + * + * @var string + */ + protected $host = 'http://localhost'; + + /** + * User agent of the HTTP request, set to "OpenAPI-Generator/{version}/PHP" by default + * + * @var string + */ + protected $userAgent = 'OpenAPI-Generator/1.0.0/PHP'; + + /** + * Debug switch (default set to false) + * + * @var bool + */ + protected $debug = false; + + /** + * Debug file location (log to STDOUT by default) + * + * @var string + */ + protected $debugFile = 'php://output'; + + /** + * Debug file location (log to STDOUT by default) + * + * @var string + */ + protected $tempFolderPath; + + /** + * Constructor + */ + public function __construct() { + $this->tempFolderPath = sys_get_temp_dir(); + } + + /** + * Sets API key + * + * @param string $apiKeyIdentifier API key identifier (authentication scheme) + * @param string $key API key or token + * + * @return $this + */ + public function setApiKey($apiKeyIdentifier, $key) { + $this->apiKeys[$apiKeyIdentifier] = $key; + return $this; + } + + /** + * Gets API key + * + * @param string $apiKeyIdentifier API key identifier (authentication scheme) + * + * @return null|string API key or token + */ + public function getApiKey($apiKeyIdentifier) { + return isset($this->apiKeys[$apiKeyIdentifier]) ? $this->apiKeys[$apiKeyIdentifier] : null; + } + + /** + * Sets the prefix for API key (e.g. Bearer) + * + * @param string $apiKeyIdentifier API key identifier (authentication scheme) + * @param string $prefix API key prefix, e.g. Bearer + * + * @return $this + */ + public function setApiKeyPrefix($apiKeyIdentifier, $prefix) { + $this->apiKeyPrefixes[$apiKeyIdentifier] = $prefix; + return $this; + } + + /** + * Gets API key prefix + * + * @param string $apiKeyIdentifier API key identifier (authentication scheme) + * + * @return null|string + */ + public function getApiKeyPrefix($apiKeyIdentifier) { + return isset($this->apiKeyPrefixes[$apiKeyIdentifier]) ? $this->apiKeyPrefixes[$apiKeyIdentifier] : null; + } + + /** + * Sets the access token for OAuth + * + * @param string $accessToken Token for OAuth + * + * @return $this + */ + public function setAccessToken($accessToken) { + $this->accessToken = $accessToken; + return $this; + } + + /** + * Gets the access token for OAuth + * + * @return string Access token for OAuth + */ + public function getAccessToken() { + return $this->accessToken; + } + + /** + * Sets boolean format for query string. + * + * @param string $booleanFormat Boolean format for query string + * + * @return $this + */ + public function setBooleanFormatForQueryString(string $booleanFormat) { + $this->booleanFormatForQueryString = $booleanFormat; + + return $this; + } + + /** + * Gets boolean format for query string. + * + * @return string Boolean format for query string + */ + public function getBooleanFormatForQueryString(): string { + return $this->booleanFormatForQueryString; + } + + /** + * Sets the username for HTTP basic authentication + * + * @param string $username Username for HTTP basic authentication + * + * @return $this + */ + public function setUsername($username) { + $this->username = $username; + return $this; + } + + /** + * Gets the username for HTTP basic authentication + * + * @return string Username for HTTP basic authentication + */ + public function getUsername() { + return $this->username; + } + + /** + * Sets the password for HTTP basic authentication + * + * @param string $password Password for HTTP basic authentication + * + * @return $this + */ + public function setPassword($password) { + $this->password = $password; + return $this; + } + + /** + * Gets the password for HTTP basic authentication + * + * @return string Password for HTTP basic authentication + */ + public function getPassword() { + return $this->password; + } + + /** + * Sets the host + * + * @param string $host Host + * + * @return $this + */ + public function setHost($host) { + $this->host = $host; + return $this; + } + + /** + * Gets the host + * + * @return string Host + */ + public function getHost() { + return $this->host; + } + + /** + * Sets the user agent of the api client + * + * @param string $userAgent the user agent of the api client + * + * @throws \InvalidArgumentException + * @return $this + */ + public function setUserAgent($userAgent) { + if (!is_string($userAgent)) { + throw new \InvalidArgumentException('User-agent must be a string.'); + } + + $this->userAgent = $userAgent; + return $this; + } + + /** + * Gets the user agent of the api client + * + * @return string user agent + */ + public function getUserAgent() { + return $this->userAgent; + } + + /** + * Sets debug flag + * + * @param bool $debug Debug flag + * + * @return $this + */ + public function setDebug($debug) { + $this->debug = $debug; + return $this; + } + + /** + * Gets the debug flag + * + * @return bool + */ + public function getDebug() { + return $this->debug; + } + + /** + * Sets the debug file + * + * @param string $debugFile Debug file + * + * @return $this + */ + public function setDebugFile($debugFile) { + $this->debugFile = $debugFile; + return $this; + } + + /** + * Gets the debug file + * + * @return string + */ + public function getDebugFile() { + return $this->debugFile; + } + + /** + * Sets the temp folder path + * + * @param string $tempFolderPath Temp folder path + * + * @return $this + */ + public function setTempFolderPath($tempFolderPath) { + $this->tempFolderPath = $tempFolderPath; + return $this; + } + + /** + * Gets the temp folder path + * + * @return string Temp folder path + */ + public function getTempFolderPath() { + return $this->tempFolderPath; + } + + /** + * Gets the default configuration instance + * + * @return Configuration + */ + public static function getDefaultConfiguration() { + if (self::$defaultConfiguration === null) { + self::$defaultConfiguration = new Configuration(); + } + + return self::$defaultConfiguration; + } + + /** + * Sets the default configuration instance + * + * @param Configuration $config An instance of the Configuration Object + * + * @return void + */ + public static function setDefaultConfiguration(Configuration $config) { + self::$defaultConfiguration = $config; + } + + /** + * Gets the essential information for debugging + * + * @return string The report for debugging + */ + public static function toDebugReport() { + $report = 'PHP SDK (OCA\WorkflowOcr\OcrProcessors\Remote\Client) Debug Report:' . PHP_EOL; + $report .= ' OS: ' . php_uname() . PHP_EOL; + $report .= ' PHP Version: ' . PHP_VERSION . PHP_EOL; + $report .= ' The version of the OpenAPI document: 0.1.0' . PHP_EOL; + $report .= ' Temp Folder Path: ' . self::getDefaultConfiguration()->getTempFolderPath() . PHP_EOL; + + return $report; + } + + /** + * Get API key (with prefix if set) + * + * @param string $apiKeyIdentifier name of apikey + * + * @return null|string API key with the prefix + */ + public function getApiKeyWithPrefix($apiKeyIdentifier) { + $prefix = $this->getApiKeyPrefix($apiKeyIdentifier); + $apiKey = $this->getApiKey($apiKeyIdentifier); + + if ($apiKey === null) { + return null; + } + + if ($prefix === null) { + $keyWithPrefix = $apiKey; + } else { + $keyWithPrefix = $prefix . ' ' . $apiKey; + } + + return $keyWithPrefix; + } + + /** + * Returns an array of host settings + * + * @return array an array of host settings + */ + public function getHostSettings() { + return [ + [ + 'url' => '', + 'description' => 'No description provided', + ] + ]; + } + + /** + * Returns URL based on host settings, index and variables + * + * @param array $hostSettings array of host settings, generated from getHostSettings() or equivalent from the API clients + * @param int $hostIndex index of the host settings + * @param array|null $variables hash of variable and the corresponding value (optional) + * @return string URL based on host settings + */ + public static function getHostString(array $hostSettings, $hostIndex, ?array $variables = null) { + if ($variables === null) { + $variables = []; + } + + // check array index out of bound + if ($hostIndex < 0 || $hostIndex >= count($hostSettings)) { + throw new \InvalidArgumentException("Invalid index $hostIndex when selecting the host. Must be less than " . count($hostSettings)); + } + + $host = $hostSettings[$hostIndex]; + $url = $host['url']; + + // go through variable and assign a value + foreach ($host['variables'] ?? [] as $name => $variable) { + if (array_key_exists($name, $variables)) { // check to see if it's in the variables provided by the user + if (!isset($variable['enum_values']) || in_array($variables[$name], $variable['enum_values'], true)) { // check to see if the value is in the enum + $url = str_replace('{' . $name . '}', $variables[$name], $url); + } else { + throw new \InvalidArgumentException("The variable `$name` in the host URL has invalid value " . $variables[$name] . '. Must be ' . join(',', $variable['enum_values']) . '.'); + } + } else { + // use default value + $url = str_replace('{' . $name . '}', $variable['default_value'], $url); + } + } + + return $url; + } + + /** + * Returns URL based on the index and variables + * + * @param int $index index of the host settings + * @param array|null $variables hash of variable and the corresponding value (optional) + * @return string URL based on host settings + */ + public function getHostFromSettings($index, $variables = null) { + return self::getHostString($this->getHostSettings(), $index, $variables); + } +} diff --git a/lib/OcrProcessors/Remote/Client/IApiClient.php b/lib/OcrProcessors/Remote/Client/IApiClient.php new file mode 100644 index 0000000..939f11d --- /dev/null +++ b/lib/OcrProcessors/Remote/Client/IApiClient.php @@ -0,0 +1,47 @@ + + * + * @license GNU AGPL version 3 or any later version + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +namespace OCA\WorkflowOcr\OcrProcessors\Remote\Client; + +use OCA\WorkflowOcr\OcrProcessors\Remote\Client\Model\ErrorResult; +use OCA\WorkflowOcr\OcrProcessors\Remote\Client\Model\OcrResult; + +/** + * Remote API supported by Workflow OCR Backend. + */ +interface IApiClient { + /** + * Process OCR on the given file. + * @param resource $file The file to process OCR on. + * @param string $fileName The name of the file. + * @param string $ocrMyPdfParameters The parameters to pass to ocrmypdf. + * @return OcrResult|ErrorResult The result of the OCR operation. + */ + public function processOcr($file, string $fileName, string $ocrMyPdfParameters): OcrResult|ErrorResult; + + /** + * Get the list of installed Tesseract languages. + * @return string[] + */ + public function getLanguages(): array; +} diff --git a/lib/OcrProcessors/Remote/Client/Model/ErrorResult.php b/lib/OcrProcessors/Remote/Client/Model/ErrorResult.php new file mode 100644 index 0000000..c2ad64d --- /dev/null +++ b/lib/OcrProcessors/Remote/Client/Model/ErrorResult.php @@ -0,0 +1,386 @@ + + * + * @license GNU AGPL version 3 or any later version + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +/** + * NOTE: This class is auto generated by OpenAPI Generator (https://openapi-generator.tech). + * https://openapi-generator.tech + * Do not edit the class manually. + */ + +namespace OCA\WorkflowOcr\OcrProcessors\Remote\Client\Model; + +use ArrayAccess; +use OCA\WorkflowOcr\OcrProcessors\Remote\Client\ObjectSerializer; + +/** + * ErrorResult Class Doc Comment + * + * @category Class + * @package OCA\WorkflowOcr\OcrProcessors\Remote\Client + * @author OpenAPI Generator team + * @link https://openapi-generator.tech + * @implements \ArrayAccess + */ +class ErrorResult implements ModelInterface, ArrayAccess, \JsonSerializable { + public const DISCRIMINATOR = null; + + /** + * The original name of the model. + * + * @var string + */ + protected static $openAPIModelName = 'ErrorResult'; + + /** + * Array of property to type mappings. Used for (de)serialization + * + * @var string[] + */ + protected static $openAPITypes = [ + 'message' => 'string' + ]; + + /** + * Array of property to format mappings. Used for (de)serialization + * + * @var string[] + * @phpstan-var array + * @psalm-var array + */ + protected static $openAPIFormats = [ + 'message' => null + ]; + + /** + * Array of nullable properties. Used for (de)serialization + * + * @var boolean[] + */ + protected static array $openAPINullables = [ + 'message' => false + ]; + + /** + * If a nullable field gets set to null, insert it here + * + * @var boolean[] + */ + protected array $openAPINullablesSetToNull = []; + + /** + * Array of property to type mappings. Used for (de)serialization + * + * @return array + */ + public static function openAPITypes() { + return self::$openAPITypes; + } + + /** + * Array of property to format mappings. Used for (de)serialization + * + * @return array + */ + public static function openAPIFormats() { + return self::$openAPIFormats; + } + + /** + * Array of nullable properties + * + * @return array + */ + protected static function openAPINullables(): array { + return self::$openAPINullables; + } + + /** + * Array of nullable field names deliberately set to null + * + * @return boolean[] + */ + private function getOpenAPINullablesSetToNull(): array { + return $this->openAPINullablesSetToNull; + } + + /** + * Setter - Array of nullable field names deliberately set to null + * + * @param boolean[] $openAPINullablesSetToNull + */ + private function setOpenAPINullablesSetToNull(array $openAPINullablesSetToNull): void { + $this->openAPINullablesSetToNull = $openAPINullablesSetToNull; + } + + /** + * Checks if a property is nullable + * + * @param string $property + * @return bool + */ + public static function isNullable(string $property): bool { + return self::openAPINullables()[$property] ?? false; + } + + /** + * Checks if a nullable property is set to null. + * + * @param string $property + * @return bool + */ + public function isNullableSetToNull(string $property): bool { + return in_array($property, $this->getOpenAPINullablesSetToNull(), true); + } + + /** + * Array of attributes where the key is the local name, + * and the value is the original name + * + * @var string[] + */ + protected static $attributeMap = [ + 'message' => 'message' + ]; + + /** + * Array of attributes to setter functions (for deserialization of responses) + * + * @var string[] + */ + protected static $setters = [ + 'message' => 'setMessage' + ]; + + /** + * Array of attributes to getter functions (for serialization of requests) + * + * @var string[] + */ + protected static $getters = [ + 'message' => 'getMessage' + ]; + + /** + * Array of attributes where the key is the local name, + * and the value is the original name + * + * @return array + */ + public static function attributeMap() { + return self::$attributeMap; + } + + /** + * Array of attributes to setter functions (for deserialization of responses) + * + * @return array + */ + public static function setters() { + return self::$setters; + } + + /** + * Array of attributes to getter functions (for serialization of requests) + * + * @return array + */ + public static function getters() { + return self::$getters; + } + + /** + * The original name of the model. + * + * @return string + */ + public function getModelName() { + return self::$openAPIModelName; + } + + + /** + * Associative array for storing property values + * + * @var mixed[] + */ + protected $container = []; + + /** + * Constructor + * + * @param mixed[] $data Associated array of property values + * initializing the model + */ + public function __construct(?array $data = null) { + $this->setIfExists('message', $data ?? [], null); + } + + /** + * Sets $this->container[$variableName] to the given data or to the given default Value; if $variableName + * is nullable and its value is set to null in the $fields array, then mark it as "set to null" in the + * $this->openAPINullablesSetToNull array + * + * @param string $variableName + * @param array $fields + * @param mixed $defaultValue + */ + private function setIfExists(string $variableName, array $fields, $defaultValue): void { + if (self::isNullable($variableName) && array_key_exists($variableName, $fields) && is_null($fields[$variableName])) { + $this->openAPINullablesSetToNull[] = $variableName; + } + + $this->container[$variableName] = $fields[$variableName] ?? $defaultValue; + } + + /** + * Show all the invalid properties with reasons. + * + * @return array invalid properties with reasons + */ + public function listInvalidProperties() { + $invalidProperties = []; + + if ($this->container['message'] === null) { + $invalidProperties[] = "'message' can't be null"; + } + return $invalidProperties; + } + + /** + * Validate all the properties in the model + * return true if all passed + * + * @return bool True if all properties are valid + */ + public function valid() { + return count($this->listInvalidProperties()) === 0; + } + + + /** + * Gets message + * + * @return string + */ + public function getMessage() { + return $this->container['message']; + } + + /** + * Sets message + * + * @param string $message Error message + * + * @return self + */ + public function setMessage($message) { + if (is_null($message)) { + throw new \InvalidArgumentException('non-nullable message cannot be null'); + } + $this->container['message'] = $message; + + return $this; + } + /** + * Returns true if offset exists. False otherwise. + * + * @param integer $offset Offset + * + * @return boolean + */ + public function offsetExists($offset): bool { + return isset($this->container[$offset]); + } + + /** + * Gets offset. + * + * @param integer $offset Offset + * + * @return mixed|null + */ + #[\ReturnTypeWillChange] + public function offsetGet($offset): mixed { + return $this->container[$offset] ?? null; + } + + /** + * Sets value based on offset. + * + * @param int|null $offset Offset + * @param mixed $value Value to be set + * + * @return void + */ + public function offsetSet($offset, $value): void { + if (is_null($offset)) { + $this->container[] = $value; + } else { + $this->container[$offset] = $value; + } + } + + /** + * Unsets offset. + * + * @param integer $offset Offset + * + * @return void + */ + public function offsetUnset($offset): void { + unset($this->container[$offset]); + } + + /** + * Serializes the object to a value that can be serialized natively by json_encode(). + * @link https://www.php.net/manual/en/jsonserializable.jsonserialize.php + * + * @return mixed Returns data which can be serialized by json_encode(), which is a value + * of any type other than a resource. + */ + #[\ReturnTypeWillChange] + public function jsonSerialize(): mixed { + return ObjectSerializer::sanitizeForSerialization($this); + } + + /** + * Gets the string presentation of the object + * + * @return string + */ + public function __toString() { + return json_encode( + ObjectSerializer::sanitizeForSerialization($this), + JSON_PRETTY_PRINT + ); + } + + /** + * Gets a header-safe presentation of the object + * + * @return string + */ + public function toHeaderValue() { + return json_encode(ObjectSerializer::sanitizeForSerialization($this)); + } +} diff --git a/lib/OcrProcessors/Remote/Client/Model/ModelInterface.php b/lib/OcrProcessors/Remote/Client/Model/ModelInterface.php new file mode 100644 index 0000000..9eb8f8b --- /dev/null +++ b/lib/OcrProcessors/Remote/Client/Model/ModelInterface.php @@ -0,0 +1,120 @@ + + * + * @license GNU AGPL version 3 or any later version + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +/** + * NOTE: This class is auto generated by OpenAPI Generator (https://openapi-generator.tech). + * https://openapi-generator.tech + * Do not edit the class manually. + */ + +namespace OCA\WorkflowOcr\OcrProcessors\Remote\Client\Model; + +/** + * Interface abstracting model access. + * + * @package OCA\WorkflowOcr\OcrProcessors\Remote\Client\Model + * @author OpenAPI Generator team + */ +interface ModelInterface { + /** + * The original name of the model. + * + * @return string + */ + public function getModelName(); + + /** + * Array of property to type mappings. Used for (de)serialization + * + * @return array + */ + public static function openAPITypes(); + + /** + * Array of property to format mappings. Used for (de)serialization + * + * @return array + */ + public static function openAPIFormats(); + + /** + * Array of attributes where the key is the local name, and the value is the original name + * + * @return array + */ + public static function attributeMap(); + + /** + * Array of attributes to setter functions (for deserialization of responses) + * + * @return array + */ + public static function setters(); + + /** + * Array of attributes to getter functions (for serialization of requests) + * + * @return array + */ + public static function getters(); + + /** + * Show all the invalid properties with reasons. + * + * @return array + */ + public function listInvalidProperties(); + + /** + * Validate all the properties in the model + * return true if all passed + * + * @return bool + */ + public function valid(); + + /** + * Checks if a property is nullable + * + * @param string $property + * @return bool + */ + public static function isNullable(string $property): bool; + + /** + * Checks if a nullable property is set to null. + * + * @param string $property + * @return bool + */ + public function isNullableSetToNull(string $property): bool; +} diff --git a/lib/OcrProcessors/Remote/Client/Model/OcrResult.php b/lib/OcrProcessors/Remote/Client/Model/OcrResult.php new file mode 100644 index 0000000..b971d6b --- /dev/null +++ b/lib/OcrProcessors/Remote/Client/Model/OcrResult.php @@ -0,0 +1,491 @@ + + * + * @license GNU AGPL version 3 or any later version + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +/** + * NOTE: This class is auto generated by OpenAPI Generator (https://openapi-generator.tech). + * https://openapi-generator.tech + * Do not edit the class manually. + */ + +namespace OCA\WorkflowOcr\OcrProcessors\Remote\Client\Model; + +use ArrayAccess; +use OCA\WorkflowOcr\OcrProcessors\Remote\Client\ObjectSerializer; + +/** + * OcrResult Class Doc Comment + * + * @category Class + * @package OCA\WorkflowOcr\OcrProcessors\Remote\Client + * @author OpenAPI Generator team + * @link https://openapi-generator.tech + * @implements \ArrayAccess + */ +class OcrResult implements ModelInterface, ArrayAccess, \JsonSerializable { + public const DISCRIMINATOR = null; + + /** + * The original name of the model. + * + * @var string + */ + protected static $openAPIModelName = 'OcrResult'; + + /** + * Array of property to type mappings. Used for (de)serialization + * + * @var string[] + */ + protected static $openAPITypes = [ + 'filename' => 'string', + 'contentType' => 'string', + 'recognizedText' => 'string', + 'fileContent' => 'string' + ]; + + /** + * Array of property to format mappings. Used for (de)serialization + * + * @var string[] + * @phpstan-var array + * @psalm-var array + */ + protected static $openAPIFormats = [ + 'filename' => null, + 'contentType' => null, + 'recognizedText' => null, + 'fileContent' => null + ]; + + /** + * Array of nullable properties. Used for (de)serialization + * + * @var boolean[] + */ + protected static array $openAPINullables = [ + 'filename' => false, + 'contentType' => false, + 'recognizedText' => false, + 'fileContent' => false + ]; + + /** + * If a nullable field gets set to null, insert it here + * + * @var boolean[] + */ + protected array $openAPINullablesSetToNull = []; + + /** + * Array of property to type mappings. Used for (de)serialization + * + * @return array + */ + public static function openAPITypes() { + return self::$openAPITypes; + } + + /** + * Array of property to format mappings. Used for (de)serialization + * + * @return array + */ + public static function openAPIFormats() { + return self::$openAPIFormats; + } + + /** + * Array of nullable properties + * + * @return array + */ + protected static function openAPINullables(): array { + return self::$openAPINullables; + } + + /** + * Array of nullable field names deliberately set to null + * + * @return boolean[] + */ + private function getOpenAPINullablesSetToNull(): array { + return $this->openAPINullablesSetToNull; + } + + /** + * Setter - Array of nullable field names deliberately set to null + * + * @param boolean[] $openAPINullablesSetToNull + */ + private function setOpenAPINullablesSetToNull(array $openAPINullablesSetToNull): void { + $this->openAPINullablesSetToNull = $openAPINullablesSetToNull; + } + + /** + * Checks if a property is nullable + * + * @param string $property + * @return bool + */ + public static function isNullable(string $property): bool { + return self::openAPINullables()[$property] ?? false; + } + + /** + * Checks if a nullable property is set to null. + * + * @param string $property + * @return bool + */ + public function isNullableSetToNull(string $property): bool { + return in_array($property, $this->getOpenAPINullablesSetToNull(), true); + } + + /** + * Array of attributes where the key is the local name, + * and the value is the original name + * + * @var string[] + */ + protected static $attributeMap = [ + 'filename' => 'filename', + 'contentType' => 'contentType', + 'recognizedText' => 'recognizedText', + 'fileContent' => 'fileContent' + ]; + + /** + * Array of attributes to setter functions (for deserialization of responses) + * + * @var string[] + */ + protected static $setters = [ + 'filename' => 'setFilename', + 'contentType' => 'setContentType', + 'recognizedText' => 'setRecognizedText', + 'fileContent' => 'setFileContent' + ]; + + /** + * Array of attributes to getter functions (for serialization of requests) + * + * @var string[] + */ + protected static $getters = [ + 'filename' => 'getFilename', + 'contentType' => 'getContentType', + 'recognizedText' => 'getRecognizedText', + 'fileContent' => 'getFileContent' + ]; + + /** + * Array of attributes where the key is the local name, + * and the value is the original name + * + * @return array + */ + public static function attributeMap() { + return self::$attributeMap; + } + + /** + * Array of attributes to setter functions (for deserialization of responses) + * + * @return array + */ + public static function setters() { + return self::$setters; + } + + /** + * Array of attributes to getter functions (for serialization of requests) + * + * @return array + */ + public static function getters() { + return self::$getters; + } + + /** + * The original name of the model. + * + * @return string + */ + public function getModelName() { + return self::$openAPIModelName; + } + + + /** + * Associative array for storing property values + * + * @var mixed[] + */ + protected $container = []; + + /** + * Constructor + * + * @param mixed[] $data Associated array of property values + * initializing the model + */ + public function __construct(?array $data = null) { + $this->setIfExists('filename', $data ?? [], null); + $this->setIfExists('contentType', $data ?? [], null); + $this->setIfExists('recognizedText', $data ?? [], null); + $this->setIfExists('fileContent', $data ?? [], null); + } + + /** + * Sets $this->container[$variableName] to the given data or to the given default Value; if $variableName + * is nullable and its value is set to null in the $fields array, then mark it as "set to null" in the + * $this->openAPINullablesSetToNull array + * + * @param string $variableName + * @param array $fields + * @param mixed $defaultValue + */ + private function setIfExists(string $variableName, array $fields, $defaultValue): void { + if (self::isNullable($variableName) && array_key_exists($variableName, $fields) && is_null($fields[$variableName])) { + $this->openAPINullablesSetToNull[] = $variableName; + } + + $this->container[$variableName] = $fields[$variableName] ?? $defaultValue; + } + + /** + * Show all the invalid properties with reasons. + * + * @return array invalid properties with reasons + */ + public function listInvalidProperties() { + $invalidProperties = []; + + if ($this->container['filename'] === null) { + $invalidProperties[] = "'filename' can't be null"; + } + if ($this->container['contentType'] === null) { + $invalidProperties[] = "'contentType' can't be null"; + } + if ($this->container['recognizedText'] === null) { + $invalidProperties[] = "'recognizedText' can't be null"; + } + if ($this->container['fileContent'] === null) { + $invalidProperties[] = "'fileContent' can't be null"; + } + return $invalidProperties; + } + + /** + * Validate all the properties in the model + * return true if all passed + * + * @return bool True if all properties are valid + */ + public function valid() { + return count($this->listInvalidProperties()) === 0; + } + + + /** + * Gets filename + * + * @return string + */ + public function getFilename() { + return $this->container['filename']; + } + + /** + * Sets filename + * + * @param string $filename Name of the file + * + * @return self + */ + public function setFilename($filename) { + if (is_null($filename)) { + throw new \InvalidArgumentException('non-nullable filename cannot be null'); + } + $this->container['filename'] = $filename; + + return $this; + } + + /** + * Gets contentType + * + * @return string + */ + public function getContentType() { + return $this->container['contentType']; + } + + /** + * Sets contentType + * + * @param string $contentType Content type of the file. For example: application/pdf + * + * @return self + */ + public function setContentType($contentType) { + if (is_null($contentType)) { + throw new \InvalidArgumentException('non-nullable contentType cannot be null'); + } + $this->container['contentType'] = $contentType; + + return $this; + } + + /** + * Gets recognizedText + * + * @return string + */ + public function getRecognizedText() { + return $this->container['recognizedText']; + } + + /** + * Sets recognizedText + * + * @param string $recognizedText Recognized text from the file + * + * @return self + */ + public function setRecognizedText($recognizedText) { + if (is_null($recognizedText)) { + throw new \InvalidArgumentException('non-nullable recognizedText cannot be null'); + } + $this->container['recognizedText'] = $recognizedText; + + return $this; + } + + /** + * Gets fileContent + * + * @return string + */ + public function getFileContent() { + return $this->container['fileContent']; + } + + /** + * Sets fileContent + * + * @param string $fileContent Base64 encoded file content + * + * @return self + */ + public function setFileContent($fileContent) { + if (is_null($fileContent)) { + throw new \InvalidArgumentException('non-nullable fileContent cannot be null'); + } + $this->container['fileContent'] = $fileContent; + + return $this; + } + /** + * Returns true if offset exists. False otherwise. + * + * @param integer $offset Offset + * + * @return boolean + */ + public function offsetExists($offset): bool { + return isset($this->container[$offset]); + } + + /** + * Gets offset. + * + * @param integer $offset Offset + * + * @return mixed|null + */ + #[\ReturnTypeWillChange] + public function offsetGet($offset): mixed { + return $this->container[$offset] ?? null; + } + + /** + * Sets value based on offset. + * + * @param int|null $offset Offset + * @param mixed $value Value to be set + * + * @return void + */ + public function offsetSet($offset, $value): void { + if (is_null($offset)) { + $this->container[] = $value; + } else { + $this->container[$offset] = $value; + } + } + + /** + * Unsets offset. + * + * @param integer $offset Offset + * + * @return void + */ + public function offsetUnset($offset): void { + unset($this->container[$offset]); + } + + /** + * Serializes the object to a value that can be serialized natively by json_encode(). + * @link https://www.php.net/manual/en/jsonserializable.jsonserialize.php + * + * @return mixed Returns data which can be serialized by json_encode(), which is a value + * of any type other than a resource. + */ + #[\ReturnTypeWillChange] + public function jsonSerialize(): mixed { + return ObjectSerializer::sanitizeForSerialization($this); + } + + /** + * Gets the string presentation of the object + * + * @return string + */ + public function __toString() { + return json_encode( + ObjectSerializer::sanitizeForSerialization($this), + JSON_PRETTY_PRINT + ); + } + + /** + * Gets a header-safe presentation of the object + * + * @return string + */ + public function toHeaderValue() { + return json_encode(ObjectSerializer::sanitizeForSerialization($this)); + } +} diff --git a/lib/OcrProcessors/Remote/Client/ObjectSerializer.php b/lib/OcrProcessors/Remote/Client/ObjectSerializer.php new file mode 100644 index 0000000..bcfba15 --- /dev/null +++ b/lib/OcrProcessors/Remote/Client/ObjectSerializer.php @@ -0,0 +1,617 @@ + + * + * @license GNU AGPL version 3 or any later version + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +/** + * NOTE: This class is auto generated by OpenAPI Generator (https://openapi-generator.tech). + * https://openapi-generator.tech + * Do not edit the class manually. + */ + +namespace OCA\WorkflowOcr\OcrProcessors\Remote\Client; + +use GuzzleHttp\Psr7\Utils; +use OCA\WorkflowOcr\OcrProcessors\Remote\Client\Model\ModelInterface; + +/** + * ObjectSerializer Class Doc Comment + * + * @category Class + * @package OCA\WorkflowOcr\OcrProcessors\Remote\Client + * @author OpenAPI Generator team + * @link https://openapi-generator.tech + */ +class ObjectSerializer { + /** @var string */ + private static $dateTimeFormat = \DateTime::ATOM; + + /** + * Change the date format + * + * @param string $format the new date format to use + */ + public static function setDateTimeFormat($format) { + self::$dateTimeFormat = $format; + } + + /** + * Serialize data + * + * @param mixed $data the data to serialize + * @param string $type the OpenAPIToolsType of the data + * @param string $format the format of the OpenAPITools type of the data + * + * @return scalar|object|array|null serialized form of $data + */ + public static function sanitizeForSerialization($data, $type = null, $format = null) { + if (is_scalar($data) || $data === null) { + return $data; + } + + if ($data instanceof \DateTime) { + return ($format === 'date') ? $data->format('Y-m-d') : $data->format(self::$dateTimeFormat); + } + + if (is_array($data)) { + foreach ($data as $property => $value) { + $data[$property] = self::sanitizeForSerialization($value); + } + return $data; + } + + if (is_object($data)) { + $values = []; + if ($data instanceof ModelInterface) { + $formats = $data::openAPIFormats(); + foreach ($data::openAPITypes() as $property => $openAPIType) { + $getter = $data::getters()[$property]; + $value = $data->$getter(); + if ($value !== null && !in_array($openAPIType, ['\DateTime', '\SplFileObject', 'array', 'bool', 'boolean', 'byte', 'float', 'int', 'integer', 'mixed', 'number', 'object', 'string', 'void'], true)) { + $callable = [$openAPIType, 'getAllowableEnumValues']; + if (is_callable($callable)) { + /** array $callable */ + $allowedEnumTypes = $callable(); + if (!in_array($value, $allowedEnumTypes, true)) { + $imploded = implode("', '", $allowedEnumTypes); + throw new \InvalidArgumentException("Invalid value for enum '$openAPIType', must be one of: '$imploded'"); + } + } + } + if (($data::isNullable($property) && $data->isNullableSetToNull($property)) || $value !== null) { + $values[$data::attributeMap()[$property]] = self::sanitizeForSerialization($value, $openAPIType, $formats[$property]); + } + } + } else { + foreach ($data as $property => $value) { + $values[$property] = self::sanitizeForSerialization($value); + } + } + return (object)$values; + } else { + return (string)$data; + } + } + + /** + * Sanitize filename by removing path. + * e.g. ../../sun.gif becomes sun.gif + * + * @param string $filename filename to be sanitized + * + * @return string the sanitized filename + */ + public static function sanitizeFilename($filename) { + if (preg_match("/.*[\/\\\\](.*)$/", $filename, $match)) { + return $match[1]; + } else { + return $filename; + } + } + + /** + * Shorter timestamp microseconds to 6 digits length. + * + * @param string $timestamp Original timestamp + * + * @return string the shorten timestamp + */ + public static function sanitizeTimestamp($timestamp) { + if (!is_string($timestamp)) { + return $timestamp; + } + + return preg_replace('/(:\d{2}.\d{6})\d*/', '$1', $timestamp); + } + + /** + * Take value and turn it into a string suitable for inclusion in + * the path, by url-encoding. + * + * @param string $value a string which will be part of the path + * + * @return string the serialized object + */ + public static function toPathValue($value) { + return rawurlencode(self::toString($value)); + } + + /** + * Checks if a value is empty, based on its OpenAPI type. + * + * @param mixed $value + * @param string $openApiType + * + * @return bool true if $value is empty + */ + private static function isEmptyValue($value, string $openApiType): bool { + # If empty() returns false, it is not empty regardless of its type. + if (!empty($value)) { + return false; + } + + # Null is always empty, as we cannot send a real "null" value in a query parameter. + if ($value === null) { + return true; + } + + switch ($openApiType) { + # For numeric values, false and '' are considered empty. + # This comparison is safe for floating point values, since the previous call to empty() will + # filter out values that don't match 0. + case 'int': + case 'integer': + return $value !== 0; + + case 'number': + case 'float': + return $value !== 0 && $value !== 0.0; + + # For boolean values, '' is considered empty + case 'bool': + case 'boolean': + return !in_array($value, [false, 0], true); + + # For string values, '' is considered empty. + case 'string': + return $value === ''; + + # For all the other types, any value at this point can be considered empty. + default: + return true; + } + } + + /** + * Take query parameter properties and turn it into an array suitable for + * native http_build_query or GuzzleHttp\Psr7\Query::build. + * + * @param mixed $value Parameter value + * @param string $paramName Parameter name + * @param string $openApiType OpenAPIType eg. array or object + * @param string $style Parameter serialization style + * @param bool $explode Parameter explode option + * @param bool $required Whether query param is required or not + * + * @return array + */ + public static function toQueryValue( + $value, + string $paramName, + string $openApiType = 'string', + string $style = 'form', + bool $explode = true, + bool $required = true, + ): array { + + # Check if we should omit this parameter from the query. This should only happen when: + # - Parameter is NOT required; AND + # - its value is set to a value that is equivalent to "empty", depending on its OpenAPI type. For + # example, 0 as "int" or "boolean" is NOT an empty value. + if (self::isEmptyValue($value, $openApiType)) { + if ($required) { + return ["{$paramName}" => '']; + } else { + return []; + } + } + + # Handle DateTime objects in query + if ($openApiType === '\\DateTime' && $value instanceof \DateTime) { + return ["{$paramName}" => $value->format(self::$dateTimeFormat)]; + } + + $query = []; + $value = (in_array($openApiType, ['object', 'array'], true)) ? (array)$value : $value; + + // since \GuzzleHttp\Psr7\Query::build fails with nested arrays + // need to flatten array first + $flattenArray = function ($arr, $name, &$result = []) use (&$flattenArray, $style, $explode) { + if (!is_array($arr)) { + return $arr; + } + + foreach ($arr as $k => $v) { + $prop = ($style === 'deepObject') ? $prop = "{$name}[{$k}]" : $k; + + if (is_array($v)) { + $flattenArray($v, $prop, $result); + } else { + if ($style !== 'deepObject' && !$explode) { + // push key itself + $result[] = $prop; + } + $result[$prop] = $v; + } + } + return $result; + }; + + $value = $flattenArray($value, $paramName); + + // https://github.com/OAI/OpenAPI-Specification/blob/main/versions/3.1.0.md#style-values + if ($openApiType === 'array' && $style === 'deepObject' && $explode) { + return $value; + } + + if ($openApiType === 'object' && ($style === 'deepObject' || $explode)) { + return $value; + } + + if ($openApiType === 'boolean' && is_bool($value)) { + $value = self::convertBoolToQueryStringFormat($value); + } + + // handle style in serializeCollection + $query[$paramName] = ($explode) ? $value : self::serializeCollection((array)$value, $style); + + return $query; + } + + /** + * Convert boolean value to format for query string. + * + * @param bool $value Boolean value + * + * @return int|string Boolean value in format + */ + public static function convertBoolToQueryStringFormat(bool $value) { + if (Configuration::getDefaultConfiguration()->getBooleanFormatForQueryString() == Configuration::BOOLEAN_FORMAT_STRING) { + return $value ? 'true' : 'false'; + } + + return (int)$value; + } + + /** + * Take value and turn it into a string suitable for inclusion in + * the header. If it's a string, pass through unchanged + * If it's a datetime object, format it in ISO8601 + * + * @param string $value a string which will be part of the header + * + * @return string the header string + */ + public static function toHeaderValue($value) { + $callable = [$value, 'toHeaderValue']; + if (is_callable($callable)) { + return $callable(); + } + + return self::toString($value); + } + + /** + * Take value and turn it into a string suitable for inclusion in + * the http body (form parameter). If it's a string, pass through unchanged + * If it's a datetime object, format it in ISO8601 + * + * @param string|\SplFileObject $value the value of the form parameter + * + * @return string the form string + */ + public static function toFormValue($value) { + if ($value instanceof \SplFileObject) { + return $value->getRealPath(); + } else { + return self::toString($value); + } + } + + /** + * Take value and turn it into a string suitable for inclusion in + * the parameter. If it's a string, pass through unchanged + * If it's a datetime object, format it in ISO8601 + * If it's a boolean, convert it to "true" or "false". + * + * @param float|int|bool|\DateTime $value the value of the parameter + * + * @return string the header string + */ + public static function toString($value) { + if ($value instanceof \DateTime) { // datetime in ISO8601 format + return $value->format(self::$dateTimeFormat); + } elseif (is_bool($value)) { + return $value ? 'true' : 'false'; + } else { + return (string)$value; + } + } + + /** + * Serialize an array to a string. + * + * @param array $collection collection to serialize to a string + * @param string $style the format use for serialization (csv, + * ssv, tsv, pipes, multi) + * @param bool $allowCollectionFormatMulti allow collection format to be a multidimensional array + * + * @return string + */ + public static function serializeCollection(array $collection, $style, $allowCollectionFormatMulti = false) { + if ($allowCollectionFormatMulti && ($style === 'multi')) { + // http_build_query() almost does the job for us. We just + // need to fix the result of multidimensional arrays. + return preg_replace('/%5B[0-9]+%5D=/', '=', http_build_query($collection, '', '&')); + } + switch ($style) { + case 'pipeDelimited': + case 'pipes': + return implode('|', $collection); + + case 'tsv': + return implode("\t", $collection); + + case 'spaceDelimited': + case 'ssv': + return implode(' ', $collection); + + case 'simple': + case 'csv': + // Deliberate fall through. CSV is default format. + default: + return implode(',', $collection); + } + } + + /** + * Deserialize a JSON string into an object + * + * @param mixed $data object or primitive to be deserialized + * @param string $class class name is passed as a string + * @param string[] $httpHeaders HTTP headers + * + * @return object|array|null a single or an array of $class instances + */ + public static function deserialize($data, $class, $httpHeaders = null) { + if ($data === null) { + return null; + } + + if (strcasecmp(substr($class, -2), '[]') === 0) { + $data = is_string($data) ? json_decode($data) : $data; + + if (!is_array($data)) { + throw new \InvalidArgumentException("Invalid array '$class'"); + } + + $subClass = substr($class, 0, -2); + $values = []; + foreach ($data as $key => $value) { + $values[] = self::deserialize($value, $subClass, null); + } + return $values; + } + + if (preg_match('/^(array<|map\[)/', $class)) { // for associative array e.g. array + $data = is_string($data) ? json_decode($data) : $data; + settype($data, 'array'); + $inner = substr($class, 4, -1); + $deserialized = []; + if (strrpos($inner, ',') !== false) { + $subClass_array = explode(',', $inner, 2); + $subClass = $subClass_array[1]; + foreach ($data as $key => $value) { + $deserialized[$key] = self::deserialize($value, $subClass, null); + } + } + return $deserialized; + } + + if ($class === 'object') { + settype($data, 'array'); + return $data; + } elseif ($class === 'mixed') { + settype($data, gettype($data)); + return $data; + } + + if ($class === '\DateTime') { + // Some APIs return an invalid, empty string as a + // date-time property. DateTime::__construct() will return + // the current time for empty input which is probably not + // what is meant. The invalid empty string is probably to + // be interpreted as a missing field/value. Let's handle + // this graceful. + if (!empty($data)) { + try { + return new \DateTime($data); + } catch (\Exception $exception) { + // Some APIs return a date-time with too high nanosecond + // precision for php's DateTime to handle. + // With provided regexp 6 digits of microseconds saved + return new \DateTime(self::sanitizeTimestamp($data)); + } + } else { + return null; + } + } + + if ($class === '\SplFileObject') { + $data = Utils::streamFor($data); + + /** @var \Psr\Http\Message\StreamInterface $data */ + + // determine file name + if ( + is_array($httpHeaders) + && array_key_exists('Content-Disposition', $httpHeaders) + && preg_match('/inline; filename=[\'"]?([^\'"\s]+)[\'"]?$/i', $httpHeaders['Content-Disposition'], $match) + ) { + $filename = Configuration::getDefaultConfiguration()->getTempFolderPath() . DIRECTORY_SEPARATOR . self::sanitizeFilename($match[1]); + } else { + $filename = tempnam(Configuration::getDefaultConfiguration()->getTempFolderPath(), ''); + } + + $file = fopen($filename, 'w'); + while ($chunk = $data->read(200)) { + fwrite($file, $chunk); + } + fclose($file); + + return new \SplFileObject($filename, 'r'); + } + + /** @psalm-suppress ParadoxicalCondition */ + if (in_array($class, ['\DateTime', '\SplFileObject', 'array', 'bool', 'boolean', 'byte', 'float', 'int', 'integer', 'mixed', 'number', 'object', 'string', 'void'], true)) { + settype($data, $class); + return $data; + } + + + if (method_exists($class, 'getAllowableEnumValues')) { + if (!in_array($data, $class::getAllowableEnumValues(), true)) { + $imploded = implode("', '", $class::getAllowableEnumValues()); + throw new \InvalidArgumentException("Invalid value for enum '$class', must be one of: '$imploded'"); + } + return $data; + } else { + $data = is_string($data) ? json_decode($data) : $data; + + if (is_array($data)) { + $data = (object)$data; + } + + // If a discriminator is defined and points to a valid subclass, use it. + $discriminator = $class::DISCRIMINATOR; + if (!empty($discriminator) && isset($data->{$discriminator}) && is_string($data->{$discriminator})) { + $subclass = '\OCA\WorkflowOcr\OcrProcessors\Remote\Client\Model\\' . $data->{$discriminator}; + if (is_subclass_of($subclass, $class)) { + $class = $subclass; + } + } + + /** @var ModelInterface $instance */ + $instance = new $class(); + foreach ($instance::openAPITypes() as $property => $type) { + $propertySetter = $instance::setters()[$property]; + + if (!isset($propertySetter)) { + continue; + } + + if (!isset($data->{$instance::attributeMap()[$property]})) { + if ($instance::isNullable($property)) { + $instance->$propertySetter(null); + } + + continue; + } + + if (isset($data->{$instance::attributeMap()[$property]})) { + $propertyValue = $data->{$instance::attributeMap()[$property]}; + $instance->$propertySetter(self::deserialize($propertyValue, $type, null)); + } + } + return $instance; + } + } + + /** + * Build a query string from an array of key value pairs. + * + * This function can use the return value of `parse()` to build a query + * string. This function does not modify the provided keys when an array is + * encountered (like `http_build_query()` would). + * + * The function is copied from https://github.com/guzzle/psr7/blob/a243f80a1ca7fe8ceed4deee17f12c1930efe662/src/Query.php#L59-L112 + * with a modification which is described in https://github.com/guzzle/psr7/pull/603 + * + * @param array $params Query string parameters. + * @param int|false $encoding Set to false to not encode, PHP_QUERY_RFC3986 + * to encode using RFC3986, or PHP_QUERY_RFC1738 + * to encode using RFC1738. + */ + public static function buildQuery(array $params, $encoding = PHP_QUERY_RFC3986): string { + if (!$params) { + return ''; + } + + if ($encoding === false) { + $encoder = function (string $str): string { + return $str; + }; + } elseif ($encoding === PHP_QUERY_RFC3986) { + $encoder = 'rawurlencode'; + } elseif ($encoding === PHP_QUERY_RFC1738) { + $encoder = 'urlencode'; + } else { + throw new \InvalidArgumentException('Invalid type'); + } + + $castBool = Configuration::getDefaultConfiguration()->getBooleanFormatForQueryString() == Configuration::BOOLEAN_FORMAT_INT + ? function ($v) { return (int)$v; } + : function ($v) { return $v ? 'true' : 'false'; }; + + $qs = ''; + foreach ($params as $k => $v) { + $k = $encoder((string)$k); + if (!is_array($v)) { + $qs .= $k; + $v = is_bool($v) ? $castBool($v) : $v; + if ($v !== null) { + $qs .= '=' . $encoder((string)$v); + } + $qs .= '&'; + } else { + foreach ($v as $vv) { + $qs .= $k; + $vv = is_bool($vv) ? $castBool($vv) : $vv; + if ($vv !== null) { + $qs .= '=' . $encoder((string)$vv); + } + $qs .= '&'; + } + } + } + + return $qs ? (string)substr($qs, 0, -1) : ''; + } +} diff --git a/lib/OcrProcessors/Remote/Client/generate-client.sh b/lib/OcrProcessors/Remote/Client/generate-client.sh new file mode 100755 index 0000000..dfc7da4 --- /dev/null +++ b/lib/OcrProcessors/Remote/Client/generate-client.sh @@ -0,0 +1,3 @@ +# Note: both NodeJS (+npm) and Java are required to run this script +# Update openapi-spec.json according to the Workflow OCR Backend API (/openapi.json) and run this script to generate Models +npx -y @openapitools/openapi-generator-cli generate \ No newline at end of file diff --git a/lib/OcrProcessors/Remote/Client/openapi-spec.json b/lib/OcrProcessors/Remote/Client/openapi-spec.json new file mode 100644 index 0000000..f0eafb1 --- /dev/null +++ b/lib/OcrProcessors/Remote/Client/openapi-spec.json @@ -0,0 +1,266 @@ +{ + "openapi": "3.1.0", + "info": { + "title": "FastAPI", + "version": "0.1.0" + }, + "paths": { + "/process_ocr": { + "post": { + "summary": "Process Ocr", + "description": "Processes an OCR request.\nThis endpoint accepts a file upload and optional OCR parameters to process the file using OCR (Optical Character Recognition).", + "operationId": "process_ocr_process_ocr_post", + "requestBody": { + "content": { + "multipart/form-data": { + "schema": { + "$ref": "#/components/schemas/Body_process_ocr_process_ocr_post" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/OcrResult" + } + } + } + }, + "500": { + "description": "Internal Server Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ErrorResult" + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/installed_languages": { + "get": { + "summary": "Installed Languages", + "description": "Retrieves the list of installed Tesseract languages - relevant for OCRmyPDF.", + "operationId": "installed_languages_installed_languages_get", + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "items": { + "type": "string" + }, + "type": "array", + "title": "Response Installed Languages Installed Languages Get" + } + } + } + } + } + } + }, + "/enabled": { + "put": { + "summary": "Enabled Callback", + "operationId": "enabled_callback_enabled_put", + "parameters": [ + { + "name": "enabled", + "in": "query", + "required": true, + "schema": { + "type": "boolean", + "title": "Enabled" + } + } + ], + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": {} + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + } + } + }, + "/heartbeat": { + "get": { + "summary": "Heartbeat Callback", + "operationId": "heartbeat_callback_heartbeat_get", + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": {} + } + } + } + } + } + }, + "/init": { + "post": { + "summary": "Init Callback", + "operationId": "init_callback_init_post", + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": {} + } + } + } + } + } + } + }, + "components": { + "schemas": { + "Body_process_ocr_process_ocr_post": { + "properties": { + "file": { + "type": "string", + "format": "binary", + "title": "File", + "description": "The file to be processed using OCR." + }, + "ocrmypdf_parameters": { + "type": "string", + "title": "Ocrmypdf Parameters", + "description": "Additional parameters for the OCRmyPdf process (see https://ocrmypdf.readthedocs.io/en/latest/cookbook.html#basic-examples)." + } + }, + "type": "object", + "required": [ + "file" + ], + "title": "Body_process_ocr_process_ocr_post" + }, + "ErrorResult": { + "properties": { + "message": { + "type": "string", + "title": "Message", + "description": "Error message" + } + }, + "type": "object", + "required": [ + "message" + ], + "title": "ErrorResult" + }, + "HTTPValidationError": { + "properties": { + "detail": { + "items": { + "$ref": "#/components/schemas/ValidationError" + }, + "type": "array", + "title": "Detail" + } + }, + "type": "object", + "title": "HTTPValidationError" + }, + "OcrResult": { + "properties": { + "filename": { + "type": "string", + "title": "Filename", + "description": "Name of the file" + }, + "contentType": { + "type": "string", + "title": "Contenttype", + "description": "Content type of the file. For example: application/pdf" + }, + "recognizedText": { + "type": "string", + "title": "Recognizedtext", + "description": "Recognized text from the file" + }, + "fileContent": { + "type": "string", + "title": "Filecontent", + "description": "Base64 encoded file content" + } + }, + "type": "object", + "required": [ + "filename", + "contentType", + "recognizedText", + "fileContent" + ], + "title": "OcrResult" + }, + "ValidationError": { + "properties": { + "loc": { + "items": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "integer" + } + ] + }, + "type": "array", + "title": "Location" + }, + "msg": { + "type": "string", + "title": "Message" + }, + "type": { + "type": "string", + "title": "Error Type" + } + }, + "type": "object", + "required": [ + "loc", + "msg", + "type" + ], + "title": "ValidationError" + } + } + } +} \ No newline at end of file diff --git a/lib/OcrProcessors/Remote/Client/openapitools.json b/lib/OcrProcessors/Remote/Client/openapitools.json new file mode 100644 index 0000000..be8630b --- /dev/null +++ b/lib/OcrProcessors/Remote/Client/openapitools.json @@ -0,0 +1,20 @@ +{ + "$schema": "./node_modules/@openapitools/openapi-generator-cli/config.schema.json", + "spaces": 2, + "generator-cli": { + "version": "7.10.0", + "generators": { + "php": { + "generatorName": "php", + "output": "#{cwd}", + "glob": "openapi-spec.json", + "additionalProperties": { + "invokerPackage": "OCA\\\\WorkflowOcr\\\\OcrProcessors\\\\Remote\\\\Client", + "variableNamingConvention": "camelCase", + "srcBasePath": "." + }, + "templateDir": "templates" + } + } + } +} \ No newline at end of file diff --git a/lib/OcrProcessors/Remote/Client/templates/model.mustache b/lib/OcrProcessors/Remote/Client/templates/model.mustache new file mode 100644 index 0000000..c5c669d --- /dev/null +++ b/lib/OcrProcessors/Remote/Client/templates/model.mustache @@ -0,0 +1,37 @@ +partial_header}} +/** + * NOTE: This class is auto generated by OpenAPI Generator (https://openapi-generator.tech). + * https://openapi-generator.tech + * Do not edit the class manually. + */ + +namespace {{modelPackage}}; +{{^isEnum}} +{{^parentSchema}} + +use \ArrayAccess; +{{/parentSchema}} +{{/isEnum}} +use \{{invokerPackage}}\ObjectSerializer; + +/** + * {{classname}} Class Doc Comment + * + * @category Class +{{#description}} + * @description {{.}} +{{/description}} + * @package {{invokerPackage}} + * @author OpenAPI Generator team + * @link https://openapi-generator.tech +{{^isEnum}} + * @implements \ArrayAccess +{{/isEnum}} + */ +{{#isEnum}}{{>model_enum}}{{/isEnum}}{{^isEnum}}{{>model_generic}}{{/isEnum}} +{{/model}}{{/models}} diff --git a/lib/OcrProcessors/Remote/Client/templates/model_generic.mustache b/lib/OcrProcessors/Remote/Client/templates/model_generic.mustache new file mode 100644 index 0000000..9725390 --- /dev/null +++ b/lib/OcrProcessors/Remote/Client/templates/model_generic.mustache @@ -0,0 +1,560 @@ +class {{classname}} {{#parentSchema}}extends {{{parent}}}{{/parentSchema}}{{^parentSchema}}implements ModelInterface, ArrayAccess, \JsonSerializable{{/parentSchema}} +{ + public const DISCRIMINATOR = {{#discriminator}}'{{discriminatorName}}'{{/discriminator}}{{^discriminator}}null{{/discriminator}}; + + /** + * The original name of the model. + * + * @var string + */ + protected static $openAPIModelName = '{{name}}'; + + /** + * Array of property to type mappings. Used for (de)serialization + * + * @var string[] + */ + protected static $openAPITypes = [ + {{#vars}}'{{name}}' => '{{{dataType}}}'{{^-last}}, + {{/-last}}{{/vars}} + ]; + + /** + * Array of property to format mappings. Used for (de)serialization + * + * @var string[] + * @phpstan-var array + * @psalm-var array + */ + protected static $openAPIFormats = [ + {{#vars}}'{{name}}' => {{#dataFormat}}'{{{.}}}'{{/dataFormat}}{{^dataFormat}}null{{/dataFormat}}{{^-last}}, + {{/-last}}{{/vars}} + ]; + + /** + * Array of nullable properties. Used for (de)serialization + * + * @var boolean[] + */ + protected static array $openAPINullables = [ + {{#vars}}'{{name}}' => {{#isNullable}}true{{/isNullable}}{{^isNullable}}false{{/isNullable}}{{^-last}}, + {{/-last}}{{/vars}} + ]; + + /** + * If a nullable field gets set to null, insert it here + * + * @var boolean[] + */ + protected array $openAPINullablesSetToNull = []; + + /** + * Array of property to type mappings. Used for (de)serialization + * + * @return array + */ + public static function openAPITypes() + { + return self::$openAPITypes{{#parentSchema}} + parent::openAPITypes(){{/parentSchema}}; + } + + /** + * Array of property to format mappings. Used for (de)serialization + * + * @return array + */ + public static function openAPIFormats() + { + return self::$openAPIFormats{{#parentSchema}} + parent::openAPIFormats(){{/parentSchema}}; + } + + /** + * Array of nullable properties + * + * @return array + */ + protected static function openAPINullables(): array + { + return self::$openAPINullables{{#parentSchema}} + parent::openAPINullables(){{/parentSchema}}; + } + + /** + * Array of nullable field names deliberately set to null + * + * @return boolean[] + */ + private function getOpenAPINullablesSetToNull(): array + { + return $this->openAPINullablesSetToNull; + } + + /** + * Setter - Array of nullable field names deliberately set to null + * + * @param boolean[] $openAPINullablesSetToNull + */ + private function setOpenAPINullablesSetToNull(array $openAPINullablesSetToNull): void + { + $this->openAPINullablesSetToNull = $openAPINullablesSetToNull; + } + + /** + * Checks if a property is nullable + * + * @param string $property + * @return bool + */ + public static function isNullable(string $property): bool + { + return self::openAPINullables()[$property] ?? false; + } + + /** + * Checks if a nullable property is set to null. + * + * @param string $property + * @return bool + */ + public function isNullableSetToNull(string $property): bool + { + return in_array($property, $this->getOpenAPINullablesSetToNull(), true); + } + + /** + * Array of attributes where the key is the local name, + * and the value is the original name + * + * @var string[] + */ + protected static $attributeMap = [ + {{#vars}}'{{name}}' => '{{baseName}}'{{^-last}}, + {{/-last}}{{/vars}} + ]; + + /** + * Array of attributes to setter functions (for deserialization of responses) + * + * @var string[] + */ + protected static $setters = [ + {{#vars}}'{{name}}' => '{{setter}}'{{^-last}}, + {{/-last}}{{/vars}} + ]; + + /** + * Array of attributes to getter functions (for serialization of requests) + * + * @var string[] + */ + protected static $getters = [ + {{#vars}}'{{name}}' => '{{getter}}'{{^-last}}, + {{/-last}}{{/vars}} + ]; + + /** + * Array of attributes where the key is the local name, + * and the value is the original name + * + * @return array + */ + public static function attributeMap() + { + return {{#parentSchema}}parent::attributeMap() + {{/parentSchema}}self::$attributeMap; + } + + /** + * Array of attributes to setter functions (for deserialization of responses) + * + * @return array + */ + public static function setters() + { + return {{#parentSchema}}parent::setters() + {{/parentSchema}}self::$setters; + } + + /** + * Array of attributes to getter functions (for serialization of requests) + * + * @return array + */ + public static function getters() + { + return {{#parentSchema}}parent::getters() + {{/parentSchema}}self::$getters; + } + + /** + * The original name of the model. + * + * @return string + */ + public function getModelName() + { + return self::$openAPIModelName; + } + + {{#vars}} + {{#isEnum}} + {{#allowableValues}} + {{#enumVars}} + public const {{enumName}}_{{{name}}} = {{{value}}}; + {{/enumVars}} + {{/allowableValues}} + {{/isEnum}} + {{/vars}} + + {{#vars}} + {{#isEnum}} + /** + * Gets allowable values of the enum + * + * @return string[] + */ + public function {{getter}}AllowableValues() + { + return [ + {{#allowableValues}}{{#enumVars}}self::{{enumName}}_{{{name}}},{{^-last}} + {{/-last}}{{/enumVars}}{{/allowableValues}} + ]; + } + + {{/isEnum}} + {{/vars}} + {{^parentSchema}} + /** + * Associative array for storing property values + * + * @var mixed[] + */ + protected $container = []; + {{/parentSchema}} + + /** + * Constructor + * + * @param mixed[] $data Associated array of property values + * initializing the model + */ + public function __construct(array $data = null) + { + {{#parentSchema}} + parent::__construct($data); + + {{/parentSchema}} + {{#vars}} + $this->setIfExists('{{name}}', $data ?? [], {{#defaultValue}}{{{defaultValue}}}{{/defaultValue}}{{^defaultValue}}null{{/defaultValue}}); + {{/vars}} + {{#discriminator}} + + // Initialize discriminator property with the model name. + $this->container['{{discriminatorName}}'] = static::$openAPIModelName; + {{/discriminator}} + } + + /** + * Sets $this->container[$variableName] to the given data or to the given default Value; if $variableName + * is nullable and its value is set to null in the $fields array, then mark it as "set to null" in the + * $this->openAPINullablesSetToNull array + * + * @param string $variableName + * @param array $fields + * @param mixed $defaultValue + */ + private function setIfExists(string $variableName, array $fields, $defaultValue): void + { + if (self::isNullable($variableName) && array_key_exists($variableName, $fields) && is_null($fields[$variableName])) { + $this->openAPINullablesSetToNull[] = $variableName; + } + + $this->container[$variableName] = $fields[$variableName] ?? $defaultValue; + } + + /** + * Show all the invalid properties with reasons. + * + * @return array invalid properties with reasons + */ + public function listInvalidProperties() + { + {{#parentSchema}} + $invalidProperties = parent::listInvalidProperties(); + {{/parentSchema}} + {{^parentSchema}} + $invalidProperties = []; + {{/parentSchema}} + + {{#vars}} + {{#required}} + if ($this->container['{{name}}'] === null) { + $invalidProperties[] = "'{{name}}' can't be null"; + } + {{/required}} + {{#isEnum}} + {{^isContainer}} + $allowedValues = $this->{{getter}}AllowableValues(); + if (!is_null($this->container['{{name}}']) && !in_array($this->container['{{name}}'], $allowedValues, true)) { + $invalidProperties[] = sprintf( + "invalid value '%s' for '{{name}}', must be one of '%s'", + $this->container['{{name}}'], + implode("', '", $allowedValues) + ); + } + + {{/isContainer}} + {{/isEnum}} + {{#hasValidation}} + {{#maxLength}} + if ({{^required}}!is_null($this->container['{{name}}']) && {{/required}}(mb_strlen($this->container['{{name}}']) > {{maxLength}})) { + $invalidProperties[] = "invalid value for '{{name}}', the character length must be smaller than or equal to {{{maxLength}}}."; + } + + {{/maxLength}} + {{#minLength}} + if ({{^required}}!is_null($this->container['{{name}}']) && {{/required}}(mb_strlen($this->container['{{name}}']) < {{minLength}})) { + $invalidProperties[] = "invalid value for '{{name}}', the character length must be bigger than or equal to {{{minLength}}}."; + } + + {{/minLength}} + {{#maximum}} + if ({{^required}}!is_null($this->container['{{name}}']) && {{/required}}($this->container['{{name}}'] >{{#exclusiveMaximum}}={{/exclusiveMaximum}} {{maximum}})) { + $invalidProperties[] = "invalid value for '{{name}}', must be smaller than {{^exclusiveMaximum}}or equal to {{/exclusiveMaximum}}{{maximum}}."; + } + + {{/maximum}} + {{#minimum}} + if ({{^required}}!is_null($this->container['{{name}}']) && {{/required}}($this->container['{{name}}'] <{{#exclusiveMinimum}}={{/exclusiveMinimum}} {{minimum}})) { + $invalidProperties[] = "invalid value for '{{name}}', must be bigger than {{^exclusiveMinimum}}or equal to {{/exclusiveMinimum}}{{minimum}}."; + } + + {{/minimum}} + {{#pattern}} + if ({{^required}}!is_null($this->container['{{name}}']) && {{/required}}!preg_match("{{{pattern}}}", $this->container['{{name}}'])) { + $invalidProperties[] = "invalid value for '{{name}}', must be conform to the pattern {{{pattern}}}."; + } + + {{/pattern}} + {{#maxItems}} + if ({{^required}}!is_null($this->container['{{name}}']) && {{/required}}(count($this->container['{{name}}']) > {{maxItems}})) { + $invalidProperties[] = "invalid value for '{{name}}', number of items must be less than or equal to {{{maxItems}}}."; + } + + {{/maxItems}} + {{#minItems}} + if ({{^required}}!is_null($this->container['{{name}}']) && {{/required}}(count($this->container['{{name}}']) < {{minItems}})) { + $invalidProperties[] = "invalid value for '{{name}}', number of items must be greater than or equal to {{{minItems}}}."; + } + + {{/minItems}} + {{/hasValidation}} + {{/vars}} + return $invalidProperties; + } + + /** + * Validate all the properties in the model + * return true if all passed + * + * @return bool True if all properties are valid + */ + public function valid() + { + return count($this->listInvalidProperties()) === 0; + } + + {{#vars}} + + /** + * Gets {{name}} + * + * @return {{{dataType}}}{{^required}}|null{{/required}} + {{#deprecated}} + * @deprecated + {{/deprecated}} + */ + public function {{getter}}() + { + return $this->container['{{name}}']; + } + + /** + * Sets {{name}} + * + * @param {{{dataType}}}{{^required}}|null{{/required}} ${{name}}{{#description}} {{{.}}}{{/description}}{{^description}} {{{name}}}{{/description}} + * + * @return self + {{#deprecated}} + * @deprecated + {{/deprecated}} + */ + public function {{setter}}(${{name}}) + { + {{#isNullable}} + if (is_null(${{name}})) { + array_push($this->openAPINullablesSetToNull, '{{name}}'); + } else { + $nullablesSetToNull = $this->getOpenAPINullablesSetToNull(); + $index = array_search('{{name}}', $nullablesSetToNull); + if ($index !== FALSE) { + unset($nullablesSetToNull[$index]); + $this->setOpenAPINullablesSetToNull($nullablesSetToNull); + } + } + {{/isNullable}} + {{^isNullable}} + if (is_null(${{name}})) { + throw new \InvalidArgumentException('non-nullable {{name}} cannot be null'); + } + {{/isNullable}} + {{#isEnum}} + $allowedValues = $this->{{getter}}AllowableValues(); + {{^isContainer}} + if ({{#isNullable}}!is_null(${{name}}) && {{/isNullable}}!in_array(${{{name}}}, $allowedValues, true)) { + throw new \InvalidArgumentException( + sprintf( + "Invalid value '%s' for '{{name}}', must be one of '%s'", + ${{{name}}}, + implode("', '", $allowedValues) + ) + ); + } + {{/isContainer}} + {{#isContainer}} + if ({{#isNullable}}!is_null(${{name}}) && {{/isNullable}}array_diff(${{{name}}}, $allowedValues)) { + throw new \InvalidArgumentException( + sprintf( + "Invalid value for '{{name}}', must be one of '%s'", + implode("', '", $allowedValues) + ) + ); + } + {{/isContainer}} + {{/isEnum}} + {{#hasValidation}} + {{#maxLength}} + if ({{#isNullable}}!is_null(${{name}}) && {{/isNullable}}(mb_strlen(${{name}}) > {{maxLength}})) { + throw new \InvalidArgumentException('invalid length for ${{name}} when calling {{classname}}.{{operationId}}, must be smaller than or equal to {{maxLength}}.'); + }{{/maxLength}} + {{#minLength}} + if ({{#isNullable}}!is_null(${{name}}) && {{/isNullable}}(mb_strlen(${{name}}) < {{minLength}})) { + throw new \InvalidArgumentException('invalid length for ${{name}} when calling {{classname}}.{{operationId}}, must be bigger than or equal to {{minLength}}.'); + } + {{/minLength}} + {{#maximum}} + if ({{#isNullable}}!is_null(${{name}}) && {{/isNullable}}(${{name}} >{{#exclusiveMaximum}}={{/exclusiveMaximum}} {{maximum}})) { + throw new \InvalidArgumentException('invalid value for ${{name}} when calling {{classname}}.{{operationId}}, must be smaller than {{^exclusiveMaximum}}or equal to {{/exclusiveMaximum}}{{maximum}}.'); + } + {{/maximum}} + {{#minimum}} + if ({{#isNullable}}!is_null(${{name}}) && {{/isNullable}}(${{name}} <{{#exclusiveMinimum}}={{/exclusiveMinimum}} {{minimum}})) { + throw new \InvalidArgumentException('invalid value for ${{name}} when calling {{classname}}.{{operationId}}, must be bigger than {{^exclusiveMinimum}}or equal to {{/exclusiveMinimum}}{{minimum}}.'); + } + {{/minimum}} + {{#pattern}} + if ({{#isNullable}}!is_null(${{name}}) && {{/isNullable}}(!preg_match("{{{pattern}}}", ObjectSerializer::toString(${{name}})))) { + throw new \InvalidArgumentException("invalid value for \${{name}} when calling {{classname}}.{{operationId}}, must conform to the pattern {{{pattern}}}."); + } + {{/pattern}} + {{#maxItems}} + if ({{#isNullable}}!is_null(${{name}}) && {{/isNullable}}(count(${{name}}) > {{maxItems}})) { + throw new \InvalidArgumentException('invalid value for ${{name}} when calling {{classname}}.{{operationId}}, number of items must be less than or equal to {{maxItems}}.'); + }{{/maxItems}} + {{#minItems}} + if ({{#isNullable}}!is_null(${{name}}) && {{/isNullable}}(count(${{name}}) < {{minItems}})) { + throw new \InvalidArgumentException('invalid length for ${{name}} when calling {{classname}}.{{operationId}}, number of items must be greater than or equal to {{minItems}}.'); + } + {{/minItems}} + {{/hasValidation}} + $this->container['{{name}}'] = ${{name}}; + + return $this; + } + {{/vars}} + /** + * Returns true if offset exists. False otherwise. + * + * @param integer $offset Offset + * + * @return boolean + */ + public function offsetExists($offset): bool + { + return isset($this->container[$offset]); + } + + /** + * Gets offset. + * + * @param integer $offset Offset + * + * @return mixed|null + */ + #[\ReturnTypeWillChange] + public function offsetGet($offset): mixed + { + return $this->container[$offset] ?? null; + } + + /** + * Sets value based on offset. + * + * @param int|null $offset Offset + * @param mixed $value Value to be set + * + * @return void + */ + public function offsetSet($offset, $value): void + { + if (is_null($offset)) { + $this->container[] = $value; + } else { + $this->container[$offset] = $value; + } + } + + /** + * Unsets offset. + * + * @param integer $offset Offset + * + * @return void + */ + public function offsetUnset($offset): void + { + unset($this->container[$offset]); + } + + /** + * Serializes the object to a value that can be serialized natively by json_encode(). + * @link https://www.php.net/manual/en/jsonserializable.jsonserialize.php + * + * @return mixed Returns data which can be serialized by json_encode(), which is a value + * of any type other than a resource. + */ + #[\ReturnTypeWillChange] + public function jsonSerialize(): mixed + { + return ObjectSerializer::sanitizeForSerialization($this); + } + + /** + * Gets the string presentation of the object + * + * @return string + */ + public function __toString() + { + return json_encode( + ObjectSerializer::sanitizeForSerialization($this), + JSON_PRETTY_PRINT + ); + } + + /** + * Gets a header-safe presentation of the object + * + * @return string + */ + public function toHeaderValue() + { + return json_encode(ObjectSerializer::sanitizeForSerialization($this)); + } +} diff --git a/lib/OcrProcessors/Remote/Client/templates/partial_header.mustache b/lib/OcrProcessors/Remote/Client/templates/partial_header.mustache new file mode 100644 index 0000000..ec58f73 --- /dev/null +++ b/lib/OcrProcessors/Remote/Client/templates/partial_header.mustache @@ -0,0 +1,18 @@ +/** + * @copyright Copyright (c) 2025 Robin Windey + * + * @license GNU AGPL version 3 or any later version + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ diff --git a/lib/OcrProcessors/Remote/WorkflowOcrRemoteProcessor.php b/lib/OcrProcessors/Remote/WorkflowOcrRemoteProcessor.php new file mode 100644 index 0000000..7764af2 --- /dev/null +++ b/lib/OcrProcessors/Remote/WorkflowOcrRemoteProcessor.php @@ -0,0 +1,68 @@ + + * + * @license GNU AGPL version 3 or any later version + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +namespace OCA\WorkflowOcr\OcrProcessors\Remote; + +use OCA\WorkflowOcr\Exception\OcrNotPossibleException; +use OCA\WorkflowOcr\Model\GlobalSettings; +use OCA\WorkflowOcr\Model\WorkflowSettings; +use OCA\WorkflowOcr\OcrProcessors\ICommandLineUtils; +use OCA\WorkflowOcr\OcrProcessors\IOcrProcessor; +use OCA\WorkflowOcr\OcrProcessors\OcrProcessorResult; +use OCA\WorkflowOcr\OcrProcessors\Remote\Client\IApiClient; +use OCA\WorkflowOcr\OcrProcessors\Remote\Client\Model\ErrorResult; +use OCP\Files\File; +use Psr\Log\LoggerInterface; + +/** + * OCR Processor which utilizes the Workflow OCR Backend remote service to perform OCR. + */ +class WorkflowOcrRemoteProcessor implements IOcrProcessor { + public function __construct( + private IApiClient $apiClient, + private ICommandLineUtils $commandLineUtils, + private LoggerInterface $logger, + ) { + + } + + public function ocrFile(File $file, WorkflowSettings $settings, GlobalSettings $globalSettings): OcrProcessorResult { + $ocrMyPdfParameters = $this->commandLineUtils->getCommandlineArgs($settings, $globalSettings); + $fileResource = $file->fopen('rb'); + $fileName = $file->getName(); + + $this->logger->debug('Sending OCR request to remote backend'); + $apiResult = $this->apiClient->processOcr($fileResource, $fileName, $ocrMyPdfParameters); + $this->logger->debug('OCR result received', ['apiResult' => $apiResult]); + + if ($apiResult instanceof ErrorResult) { + throw new OcrNotPossibleException($apiResult->getMessage()); + } + + return new OcrProcessorResult( + base64_decode($apiResult->getFileContent()), + pathinfo($apiResult->getFilename(), PATHINFO_EXTENSION), + $apiResult->getRecognizedText() + ); + } +} diff --git a/lib/Service/GlobalSettingsService.php b/lib/Service/GlobalSettingsService.php index bb31d46..81b1144 100644 --- a/lib/Service/GlobalSettingsService.php +++ b/lib/Service/GlobalSettingsService.php @@ -28,7 +28,7 @@ use OCA\WorkflowOcr\AppInfo\Application; use OCA\WorkflowOcr\Model\GlobalSettings; -use OCP\IConfig; +use OCP\IAppConfig; use ReflectionClass; use ReflectionProperty; @@ -39,11 +39,10 @@ * @package OCA\WorkflowOcr\Service */ class GlobalSettingsService implements IGlobalSettingsService { - /** @var IConfig */ - private $config; - public function __construct(IConfig $config) { - $this->config = $config; + public function __construct( + private IAppConfig $config, + ) { } /** @@ -54,7 +53,7 @@ public function getGlobalSettings() : GlobalSettings { foreach ($this->getProperties($settings) as $prop) { $key = $prop->getName(); - $configValue = $this->config->getAppValue(Application::APP_NAME, $key); + $configValue = $this->config->getValueString(Application::APP_NAME, $key); $settings->$key = $configValue; } @@ -70,7 +69,7 @@ public function setGlobalSettings(GlobalSettings $settings) : void { foreach ($this->getProperties($settings) as $prop) { $key = $prop->getName(); $value = $settings->$key; - $this->config->setAppValue(Application::APP_NAME, $key, $value); + $this->config->setValueString(Application::APP_NAME, $key, $value); } } diff --git a/lib/Service/IOcrBackendInfoService.php b/lib/Service/IOcrBackendInfoService.php index 0c3ce7d..2dd8a05 100644 --- a/lib/Service/IOcrBackendInfoService.php +++ b/lib/Service/IOcrBackendInfoService.php @@ -37,4 +37,10 @@ interface IOcrBackendInfoService { * @throws CommandException */ public function getInstalledLanguages() : array; + + /** + * Returns whether the OCR backend is a remote backend. + * @return bool false if ocrMyPdf is used locally, true if workflow_ocr_backend is used. + */ + public function isRemoteBackend() : bool; } diff --git a/lib/Service/OcrBackendInfoService.php b/lib/Service/OcrBackendInfoService.php index a8ce8dc..621873e 100644 --- a/lib/Service/OcrBackendInfoService.php +++ b/lib/Service/OcrBackendInfoService.php @@ -26,23 +26,45 @@ namespace OCA\WorkflowOcr\Service; +use OCA\WorkflowOcr\AppInfo\Application; use OCA\WorkflowOcr\Exception\CommandException; +use OCA\WorkflowOcr\OcrProcessors\Remote\Client\IApiClient; +use OCA\WorkflowOcr\Wrapper\IAppApiWrapper; use OCA\WorkflowOcr\Wrapper\ICommand; +use OCP\App\IAppManager; +use Psr\Container\ContainerExceptionInterface; +use Psr\Container\NotFoundExceptionInterface; use Psr\Log\LoggerInterface; class OcrBackendInfoService implements IOcrBackendInfoService { - /** @var ICommand */ - private $command; + public function __construct( + private ICommand $command, + private IApiClient $apiClient, + private IAppManager $appManager, + private IAppApiWrapper $appApiWrapper, + private LoggerInterface $logger, + ) { + } - /** @var LoggerInterface */ - private $logger; + public function getInstalledLanguages() : array { + return $this->isRemoteBackend() ? $this->getInstalledLanguagesFromRemoteBackend() : $this->getInstalledLanguagesFromLocalCli(); + } - public function __construct(ICommand $command, LoggerInterface $logger) { - $this->command = $command; - $this->logger = $logger; + public function isRemoteBackend(): bool { + if (!$this->appManager->isEnabledForUser(Application::APP_API_APP_NAME)) { + return false; + } + try { + /** @var array */ + $backendApp = $this->appApiWrapper->getExApp(Application::APP_BACKEND_NAME); + } catch (ContainerExceptionInterface|NotFoundExceptionInterface $e) { + return false; + } + + return $backendApp !== null && isset($backendApp['enabled']) && boolval($backendApp['enabled']) === true; } - public function getInstalledLanguages() : array { + private function getInstalledLanguagesFromLocalCli() : array { $commandStr = 'tesseract --list-langs'; $this->command->setCommand($commandStr); @@ -75,4 +97,8 @@ public function getInstalledLanguages() : array { ); return array_values($arr); } + + private function getInstalledLanguagesFromRemoteBackend() : array { + return $this->apiClient->getLanguages(); + } } diff --git a/lib/Service/OcrService.php b/lib/Service/OcrService.php index cf94d9c..daf2b40 100644 --- a/lib/Service/OcrService.php +++ b/lib/Service/OcrService.php @@ -281,18 +281,19 @@ private function setFileVersionsLabel(File $file, string $uid, string $label): v $versions = $this->versionManager->getVersionsForFile($user, $file); foreach ($versions as $version) { - $revisionId = $version->getRevisionId(); if (!$version instanceof IMetadataVersion) { $this->logger->debug('Skipping version with revision id {versionId} because "{versionClass}" is not an IMetadataVersion', ['versionId' => $revisionId, 'versionClass' => get_class($version)]); continue; } $versionBackend = $version->getBackend(); + if (!$versionBackend instanceof IMetadataVersionBackend) { $this->logger->debug('Skipping version with revision id {versionId} because its backend "{versionBackendClass}" does not implement IMetadataVersionBackend', ['versionId' => $revisionId, 'versionBackendClass' => get_class($versionBackend)]); continue; } + $revisionId = $version->getRevisionId(); $versionTimestamp = $version->getTimestamp(); $versionLabel = $version->getMetadataValue(self::FILE_VERSION_LABEL_KEY); diff --git a/lib/SetupChecks/OcrMyPdfCheck.php b/lib/SetupChecks/OcrMyPdfCheck.php index c8d8d73..c687348 100644 --- a/lib/SetupChecks/OcrMyPdfCheck.php +++ b/lib/SetupChecks/OcrMyPdfCheck.php @@ -26,6 +26,7 @@ namespace OCA\WorkflowOcr\SetupChecks; +use OCA\WorkflowOcr\Service\IOcrBackendInfoService; use OCA\WorkflowOcr\Wrapper\ICommand; use OCP\IL10N; use OCP\SetupCheck\ISetupCheck; @@ -35,6 +36,7 @@ class OcrMyPdfCheck implements ISetupCheck { public function __construct( private IL10N $l10n, private ICommand $command, + private IOcrBackendInfoService $ocrBackendInfoService, ) { } @@ -47,6 +49,9 @@ public function getName(): string { } public function run(): SetupResult { + if ($this->ocrBackendInfoService->isRemoteBackend()) { + return SetupResult::success($this->l10n->t('Workflow OCR Backend is installed.')); // TODO :: health check ? + } $this->command->setCommand('ocrmypdf --version')->execute(); if ($this->command->getExitCode() === 127) { return SetupResult::error($this->l10n->t('OCRmyPDF CLI is not installed.'), 'https://github.com/R0Wi-DEV/workflow_ocr?tab=readme-ov-file#backend'); diff --git a/lib/Wrapper/AppApiWrapper.php b/lib/Wrapper/AppApiWrapper.php new file mode 100644 index 0000000..5d98ee6 --- /dev/null +++ b/lib/Wrapper/AppApiWrapper.php @@ -0,0 +1,72 @@ + + * + * @license GNU AGPL version 3 or any later version + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +namespace OCA\WorkflowOcr\Wrapper; + +use OCA\AppAPI\PublicFunctions; +use OCP\Http\Client\IResponse; +use OCP\IRequest; +use Psr\Container\ContainerExceptionInterface; +use Psr\Container\ContainerInterface; +use Psr\Container\NotFoundExceptionInterface; + +class AppApiWrapper implements IAppApiWrapper { + public function __construct( + private ContainerInterface $container, + ) { + } + + /** + * @throws ContainerExceptionInterface + * @throws NotFoundExceptionInterface + */ + public function exAppRequest( + string $appId, + string $route, + ?string $userId = null, + string $method = 'POST', + array $params = [], + array $options = [], + ?IRequest $request = null, + ): IResponse|array { + $appApiFunctions = $this->container->get(PublicFunctions::class); + return $appApiFunctions->exAppRequest( + $appId, + $route, + $userId, + $method, + $params, + $options, + $request + ); + } + + /** + * @throws ContainerExceptionInterface + * @throws NotFoundExceptionInterface + */ + public function getExApp(string $appName): ?array { + $appApiFunctions = $this->container->get(PublicFunctions::class); + return $appApiFunctions->getExApp($appName); + } +} diff --git a/lib/Wrapper/IAppApiWrapper.php b/lib/Wrapper/IAppApiWrapper.php new file mode 100644 index 0000000..a4b1e04 --- /dev/null +++ b/lib/Wrapper/IAppApiWrapper.php @@ -0,0 +1,44 @@ + + * + * @license GNU AGPL version 3 or any later version + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +namespace OCA\WorkflowOcr\Wrapper; + +use OCP\Http\Client\IResponse; +use OCP\IRequest; + +/** + * Consolidates all used calls to OCA\AppAPI\PublicFunctions + */ +interface IAppApiWrapper { + public function exAppRequest( + string $appId, + string $route, + ?string $userId = null, + string $method = 'POST', + array $params = [], + array $options = [], + ?IRequest $request = null, + ): IResponse|array ; + + public function getExApp(string $appName): ?array; +} diff --git a/phpunit.integration.xml b/phpunit.integration.xml index 4b72ad0..2b9cbce 100644 --- a/phpunit.integration.xml +++ b/phpunit.integration.xml @@ -22,6 +22,7 @@ ./vendor ./node_modules ./lib/Migration + ./lib/OcrProcessors/Remote/Client ./.php-cs-fixer.dist.php diff --git a/phpunit.xml b/phpunit.xml index 1821383..2d248c7 100644 --- a/phpunit.xml +++ b/phpunit.xml @@ -22,6 +22,7 @@ ./vendor ./node_modules ./lib/Migration + ./lib/OcrProcessors/Remote/Client ./.php-cs-fixer.dist.php diff --git a/sonar-project.properties b/sonar-project.properties index 291acc3..426e3eb 100644 --- a/sonar-project.properties +++ b/sonar-project.properties @@ -1,4 +1,10 @@ sonar.projectKey=R0Wi_workflow_ocr sonar.organization=r0wi sonar.php.coverage.reportPaths=./coverage/php-coverage.xml -sonar.javascript.lcov.reportPaths=./coverage/lcov.info \ No newline at end of file +sonar.javascript.lcov.reportPaths=./coverage/lcov.info +sonar.coverage.exclusions=\ + **/lib/OcrProcessors/Remote/Client/** +sonar.cpd.exclusions=\ + **/lib/OcrProcessors/Remote/Client/** +sonar.exclusions=\ + **/lib/OcrProcessors/Remote/Client/** \ No newline at end of file diff --git a/tests/Unit/OcrProcessors/ImageOcrProcessorTest.php b/tests/Unit/OcrProcessors/Local/ImageOcrProcessorTest.php similarity index 80% rename from tests/Unit/OcrProcessors/ImageOcrProcessorTest.php rename to tests/Unit/OcrProcessors/Local/ImageOcrProcessorTest.php index 2c166b0..d0a0567 100644 --- a/tests/Unit/OcrProcessors/ImageOcrProcessorTest.php +++ b/tests/Unit/OcrProcessors/Local/ImageOcrProcessorTest.php @@ -21,12 +21,13 @@ * along with this program. If not, see . */ -namespace OCA\WorkflowOcr\Tests\Unit\OcrProcessors; +namespace OCA\WorkflowOcr\Tests\Unit\OcrProcessors\Local; use OCA\WorkflowOcr\Helper\ISidecarFileAccessor; use OCA\WorkflowOcr\Model\GlobalSettings; use OCA\WorkflowOcr\Model\WorkflowSettings; -use OCA\WorkflowOcr\OcrProcessors\ImageOcrProcessor; +use OCA\WorkflowOcr\OcrProcessors\ICommandLineUtils; +use OCA\WorkflowOcr\OcrProcessors\Local\ImageOcrProcessor; use OCA\WorkflowOcr\Wrapper\ICommand; use OCP\Files\File; use PHPUnit\Framework\MockObject\MockObject; @@ -43,8 +44,12 @@ public function testOcrFileSetsImageDpi() { $file = $this->createMock(File::class); /** @var ISidecarFileAccessor|MockObject $fileReader */ $sidecarFileAccessor = $this->createMock(ISidecarFileAccessor::class); + /** @var ICommandLineUtils|MockObject $commandLineUtils */ + $commandLineUtils = $this->createMock(ICommandLineUtils::class); + $commandLineUtils->method('getCommandlineArgs') + ->willReturnCallback(fn ($settings, $globalSettings, $additionalCommandlineArgs) => implode(' ', $additionalCommandlineArgs)); - $processor = new ImageOcrProcessor($command, $logger, $sidecarFileAccessor); + $processor = new ImageOcrProcessor($command, $logger, $sidecarFileAccessor, $commandLineUtils); $file->expects($this->once()) ->method('getContent') diff --git a/tests/Unit/OcrProcessors/PdfOcrProcessorTest.php b/tests/Unit/OcrProcessors/Local/PdfOcrProcessorTest.php similarity index 89% rename from tests/Unit/OcrProcessors/PdfOcrProcessorTest.php rename to tests/Unit/OcrProcessors/Local/PdfOcrProcessorTest.php index 909a071..c064527 100644 --- a/tests/Unit/OcrProcessors/PdfOcrProcessorTest.php +++ b/tests/Unit/OcrProcessors/Local/PdfOcrProcessorTest.php @@ -21,14 +21,17 @@ * along with this program. If not, see . */ -namespace OCA\WorkflowOcr\Tests\Unit\OcrProcessors; +namespace OCA\WorkflowOcr\Tests\Unit\OcrProcessors\Local; use OCA\WorkflowOcr\Exception\OcrNotPossibleException; use OCA\WorkflowOcr\Exception\OcrResultEmptyException; use OCA\WorkflowOcr\Helper\ISidecarFileAccessor; use OCA\WorkflowOcr\Model\GlobalSettings; use OCA\WorkflowOcr\Model\WorkflowSettings; -use OCA\WorkflowOcr\OcrProcessors\PdfOcrProcessor; +use OCA\WorkflowOcr\OcrProcessors\CommandLineUtils; +use OCA\WorkflowOcr\OcrProcessors\ICommandLineUtils; +use OCA\WorkflowOcr\OcrProcessors\Local\PdfOcrProcessor; +use OCA\WorkflowOcr\Service\IOcrBackendInfoService; use OCA\WorkflowOcr\Wrapper\ICommand; use OCP\Files\File; use PHPUnit\Framework\MockObject\MockObject; @@ -51,6 +54,10 @@ class PdfOcrProcessorTest extends TestCase { private $logger; /** @var ISidecarFileAccessor|MockObject */ private $sidecarFileAccessor; + /** @var ICommandLineUtils|MockObject */ + private $commandLineUtils; + /** @var IOcrBackendInfoService|MockObject */ + private $ocrBackendInfoService; /** @var WorkflowSettings */ private $defaultSettings; /** @var GlobalSettings */ @@ -62,6 +69,9 @@ protected function setUp(): void { $this->command = $this->createMock(ICommand::class); $this->logger = $this->createMock(LoggerInterface::class); $this->sidecarFileAccessor = $this->createMock(ISidecarFileAccessor::class); + $this->ocrBackendInfoService = $this->createMock(IOcrBackendInfoService::class); + $this->commandLineUtils = new CommandLineUtils($this->sidecarFileAccessor, $this->ocrBackendInfoService, $this->logger); + $this->defaultSettings = new WorkflowSettings(); $this->defaultGlobalSettings = new GlobalSettings(); $this->fileBefore = $this->createMock(File::class); @@ -79,6 +89,8 @@ protected function setUp(): void { ->will($this->returnCallback(function () { return $this->ocrMyPdfOutput !== self::FILE_CONTENT_AFTER ? $this->ocrMyPdfOutput : self::FILE_CONTENT_AFTER; })); + $this->ocrBackendInfoService->method('isRemoteBackend') + ->willReturn(false); } public function testCallsCommandInterface() { @@ -92,7 +104,7 @@ public function testCallsCommandInterface() { ->method('execute') ->willReturn(true); - $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor); + $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor, $this->commandLineUtils); $result = $processor->ocrFile($this->fileBefore, $this->defaultSettings, $this->defaultGlobalSettings); $this->assertEquals(self::FILE_CONTENT_AFTER, $result->getFileContent()); @@ -112,7 +124,7 @@ public function testThrowsOcrNotPossibleException() { $this->command->expects($this->once()) ->method('getExitCode'); - $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor); + $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor, $this->commandLineUtils); $thrown = false; try { @@ -151,7 +163,7 @@ public function testLogsWarningIfOcrMyPdfSucceedsWithWarningOutput() { $paramsArray['errorOutput'] === 'getErrorOutput'; })); - $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor); + $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor, $this->commandLineUtils); $processor->ocrFile($this->fileBefore, $this->defaultSettings, $this->defaultGlobalSettings); } @@ -171,7 +183,7 @@ public function testThrowsErrorIfOcrFileWasEmpty() { ->willReturn('/admin/files/somefile.pdf'); $thrown = false; - $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor); + $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor, $this->commandLineUtils); try { $processor->ocrFile($this->fileBefore, $this->defaultSettings, $this->defaultGlobalSettings); @@ -187,7 +199,7 @@ public function testThrowsErrorIfOcrFileWasEmpty() { public function testLanguageSettingsAreSetCorrectly() { $this->command->expects($this->once()) ->method('setCommand') - ->with('ocrmypdf -q --skip-text -l deu+eng - - || exit $? ; cat'); + ->with('ocrmypdf -q --skip-text --language deu+eng - - || exit $? ; cat'); $this->command->expects($this->once()) ->method('execute') ->willReturn(true); @@ -195,7 +207,7 @@ public function testLanguageSettingsAreSetCorrectly() { ->method('getOutput') ->willReturn('someOcrContent'); - $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor); + $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor, $this->commandLineUtils); $processor->ocrFile($this->fileBefore, new WorkflowSettings('{"languages": ["deu", "eng"] }'), $this->defaultGlobalSettings); } @@ -210,7 +222,7 @@ public function testRemoveBackgroundFlagIsSetCorrectly() { ->method('getOutput') ->willReturn('someOcrContent'); - $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor); + $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor, $this->commandLineUtils); $processor->ocrFile($this->fileBefore, new WorkflowSettings('{"removeBackground": true }'), $this->defaultGlobalSettings); } @@ -225,14 +237,14 @@ public function testProcessorCountIsNotSetIfGlobalSettingsDoesNotContainProcesso ->method('getOutput') ->willReturn('someOcrContent'); - $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor); + $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor, $this->commandLineUtils); $processor->ocrFile($this->fileBefore, $this->defaultSettings, $this->defaultGlobalSettings); } public function testProcessorCountIsSetCorrectlyFromGobalSettings() { $this->command->expects($this->once()) ->method('setCommand') - ->with('ocrmypdf -q --skip-text -j 42 - - || exit $? ; cat'); + ->with('ocrmypdf -q --skip-text --jobs 42 - - || exit $? ; cat'); $this->command->expects($this->once()) ->method('execute') ->willReturn(true); @@ -242,7 +254,7 @@ public function testProcessorCountIsSetCorrectlyFromGobalSettings() { $this->defaultGlobalSettings->processorCount = 42; - $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor); + $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor, $this->commandLineUtils); $processor->ocrFile($this->fileBefore, $this->defaultSettings, $this->defaultGlobalSettings); } @@ -263,7 +275,7 @@ public function testLogsInfoIfSidecarFileContentWasEmpty() { return strpos($message, 'Temporary sidecar file at') !== false && strpos($message, 'was empty') !== false; })); - $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor); + $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor, $this->commandLineUtils); $processor->ocrFile($this->fileBefore, $this->defaultSettings, $this->defaultGlobalSettings); } @@ -281,7 +293,7 @@ public function testDoesNotLogInfoIfSidecarFileContentWasNotEmpty() { $this->logger->expects($this->never()) ->method('info'); - $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor); + $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor, $this->commandLineUtils); $processor->ocrFile($this->fileBefore, $this->defaultSettings, $this->defaultGlobalSettings); } @@ -302,7 +314,7 @@ public function testAppliesSidecarParameterIfSidecarFileCanBeCreated() { ->method('getOrCreateSidecarFile') ->willReturn('/tmp/sidecar.txt'); - $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor); + $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor, $this->commandLineUtils); $processor->ocrFile($this->fileBefore, $this->defaultSettings, $this->defaultGlobalSettings); } @@ -326,7 +338,7 @@ public function testAppliesOcrModeParameter(int $simulatedOcrMode, string $expec ->method('getOrCreateSidecarFile') ->willReturn('/tmp/sidecar.txt'); - $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor); + $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor, $this->commandLineUtils); $processor->ocrFile($this->fileBefore, new WorkflowSettings('{"ocrMode": ' . $simulatedOcrMode . '}'), $this->defaultGlobalSettings); } @@ -352,7 +364,7 @@ public function testRemoveBackgroundIsNotAppliedIfOcrModeIsRedoOcr() { return strpos($message, '--remove-background is incompatible with --redo-ocr') !== false; })); - $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor); + $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor, $this->commandLineUtils); $processor->ocrFile($this->fileBefore, new WorkflowSettings('{"ocrMode": ' . WorkflowSettings::OCR_MODE_REDO_OCR . ', "removeBackground": true}'), $this->defaultGlobalSettings); } @@ -374,7 +386,7 @@ public function testAppliesCustomCliArgsCorrectly() { ->willReturn('/tmp/sidecar.txt'); $workflowSettings = new WorkflowSettings('{"customCliArgs": "--output-type pdf"}'); - $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor); + $processor = new PdfOcrProcessor($this->command, $this->logger, $this->sidecarFileAccessor, $this->commandLineUtils); $processor->ocrFile($this->fileBefore, $workflowSettings, $this->defaultGlobalSettings); } diff --git a/tests/Unit/OcrProcessors/OcrProcessorFactoryTest.php b/tests/Unit/OcrProcessors/OcrProcessorFactoryTest.php index 47715b8..1c493ab 100644 --- a/tests/Unit/OcrProcessors/OcrProcessorFactoryTest.php +++ b/tests/Unit/OcrProcessors/OcrProcessorFactoryTest.php @@ -26,8 +26,11 @@ use OCA\WorkflowOcr\AppInfo\Application; use OCA\WorkflowOcr\Exception\OcrProcessorNotFoundException; use OCA\WorkflowOcr\OcrProcessors\IOcrProcessor; +use OCA\WorkflowOcr\OcrProcessors\Local\PdfOcrProcessor; use OCA\WorkflowOcr\OcrProcessors\OcrProcessorFactory; -use OCA\WorkflowOcr\OcrProcessors\PdfOcrProcessor; +use OCA\WorkflowOcr\OcrProcessors\Remote\WorkflowOcrRemoteProcessor; +use OCA\WorkflowOcr\Service\IOcrBackendInfoService; +use PHPUnit\Framework\MockObject\MockObject; use Psr\Container\ContainerInterface; use Test\TestCase; @@ -35,21 +38,32 @@ class OcrProcessorFactoryTest extends TestCase { /** @var ContainerInterface */ private $appContainer; + /** @var IOcrBackendInfoService|MockObject */ + private $ocrBackendInfoService; + protected function setUp() : void { parent::setUp(); $app = new Application(); $this->appContainer = $app->getContainer(); + $this->ocrBackendInfoService = $this->createMock(IOcrBackendInfoService::class); } - public function testReturnsPdfProcessor() { - $factory = new OcrProcessorFactory($this->appContainer); + public function testReturnsLocalPdfProcessor() { + $factory = new OcrProcessorFactory($this->appContainer, $this->ocrBackendInfoService); $processor = $factory->create('application/pdf'); $this->assertInstanceOf(PdfOcrProcessor::class, $processor); } + public function testReturnsRemotePdfProcessor() { + $this->ocrBackendInfoService->method('isRemoteBackend')->willReturn(true); + $factory = new OcrProcessorFactory($this->appContainer, $this->ocrBackendInfoService); + $processor = $factory->create('application/pdf'); + $this->assertInstanceOf(WorkflowOcrRemoteProcessor::class, $processor); + } + public function testThrowsNotFoundExceptionOnInvalidMimeType() { $this->expectException(OcrProcessorNotFoundException::class); - $factory = new OcrProcessorFactory($this->appContainer); + $factory = new OcrProcessorFactory($this->appContainer, $this->ocrBackendInfoService); $factory->create('no/mimetype'); } @@ -59,7 +73,7 @@ public function testThrowsNotFoundExceptionOnInvalidMimeType() { public function testOcrProcessorsAreNotCached($mimetype) { // Related to BUG #43 - $factory = new OcrProcessorFactory($this->appContainer); + $factory = new OcrProcessorFactory($this->appContainer, $this->ocrBackendInfoService); $processor1 = $factory->create($mimetype); $processor2 = $factory->create($mimetype); $this->assertFalse($processor1 === $processor2); @@ -71,7 +85,7 @@ public function testOcrProcessorsAreNotCached($mimetype) { public function testPdfCommandNotCached($mimetype) { // Related to BUG #43 - $factory = new OcrProcessorFactory($this->appContainer); + $factory = new OcrProcessorFactory($this->appContainer, $this->ocrBackendInfoService); $processor1 = $factory->create($mimetype); $processor2 = $factory->create($mimetype); $cmd1 = $this->getCommandObject($processor1); @@ -85,7 +99,7 @@ public function testPdfCommandNotCached($mimetype) { public function dataProvider_mimeTypes() { $mimetypes = []; - $mapping = $this->invokePrivate(OcrProcessorFactory::class, 'mapping'); + $mapping = $this->invokePrivate(OcrProcessorFactory::class, 'localMapping'); foreach ($mapping as $mimetype => $className) { $mimetypes[] = [$mimetype]; } diff --git a/tests/Unit/OcrProcessors/Remote/WorkflowOcrRemoteProcessorTest.php b/tests/Unit/OcrProcessors/Remote/WorkflowOcrRemoteProcessorTest.php new file mode 100644 index 0000000..de6b228 --- /dev/null +++ b/tests/Unit/OcrProcessors/Remote/WorkflowOcrRemoteProcessorTest.php @@ -0,0 +1,109 @@ + + * + * @license GNU AGPL version 3 or any later version + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ +namespace OCA\WorkflowOcr\Tests\Unit\OcrProcessors\Remote; + +use OCA\WorkflowOcr\Exception\OcrNotPossibleException; +use OCA\WorkflowOcr\Model\GlobalSettings; +use OCA\WorkflowOcr\Model\WorkflowSettings; +use OCA\WorkflowOcr\OcrProcessors\ICommandLineUtils; +use OCA\WorkflowOcr\OcrProcessors\OcrProcessorResult; +use OCA\WorkflowOcr\OcrProcessors\Remote\Client\IApiClient; +use OCA\WorkflowOcr\OcrProcessors\Remote\Client\Model\ErrorResult; +use OCA\WorkflowOcr\OcrProcessors\Remote\Client\Model\OcrResult; +use OCA\WorkflowOcr\OcrProcessors\Remote\WorkflowOcrRemoteProcessor; +use OCP\Files\File; +use PHPUnit\Framework\MockObject\MockObject; +use PHPUnit\Framework\TestCase; +use Psr\Log\LoggerInterface; + +class WorkflowOcrRemoteProcessorTest extends TestCase { + /** @var IApiClient|MockObject */ + private $apiClient; + private $commandLineUtils; + private $logger; + private $file; + private $workflowSettings; + private $globalSettings; + private $processor; + + protected function setUp(): void { + $this->apiClient = $this->createMock(IApiClient::class); + $this->commandLineUtils = $this->createMock(ICommandLineUtils::class); + $this->logger = $this->createMock(LoggerInterface::class); + $this->file = $this->createMock(File::class); + $this->workflowSettings = $this->createMock(WorkflowSettings::class); + $this->globalSettings = $this->createMock(GlobalSettings::class); + + $this->processor = new WorkflowOcrRemoteProcessor( + $this->apiClient, + $this->commandLineUtils, + $this->logger + ); + } + + public function testOcrFileSuccess(): void { + $fileResource = fopen('php://memory', 'rb'); + $fileName = 'test.pdf'; + $ocrMyPdfParameters = 'ocrmypdfparam --param'; + $ocrResult = new OcrResult([ + 'filename' => 'result.pdf', + 'contentType' => 'application/pdf', + 'recognizedText' => 'recognized text', + 'fileContent' => base64_encode('file content') + ]); + + $this->file->method('fopen')->willReturn($fileResource); + $this->file->method('getName')->willReturn($fileName); + $this->commandLineUtils->method('getCommandlineArgs')->willReturn($ocrMyPdfParameters); + $this->apiClient->expects($this->once()) + ->method('processOcr') + ->with($fileResource, $fileName, $ocrMyPdfParameters) + ->willReturn($ocrResult); + + $result = $this->processor->ocrFile($this->file, $this->workflowSettings, $this->globalSettings); + + $this->assertInstanceOf(OcrProcessorResult::class, $result); + $this->assertEquals('file content', $result->getFileContent()); + $this->assertEquals('pdf', $result->getFileExtension()); + $this->assertEquals('recognized text', $result->getRecognizedText()); + } + + public function testOcrFileErrorResult(): void { + $fileResource = fopen('php://memory', 'rb'); + $fileName = 'test.pdf'; + $ocrMyPdfParameters = 'param1'; + $errorResult = $this->createMock(ErrorResult::class); + + $this->file->method('fopen')->willReturn($fileResource); + $this->file->method('getName')->willReturn($fileName); + $this->commandLineUtils->method('getCommandlineArgs')->willReturn($ocrMyPdfParameters); + $this->apiClient->method('processOcr')->willReturn($errorResult); + + $errorResult->method('getMessage')->willReturn('OCR failed'); + + $this->expectException(OcrNotPossibleException::class); + $this->expectExceptionMessage('OCR failed'); + + $this->processor->ocrFile($this->file, $this->workflowSettings, $this->globalSettings); + } +} diff --git a/tests/Unit/Service/GlobalSettingsServiceTest.php b/tests/Unit/Service/GlobalSettingsServiceTest.php index 0e26244..fb58cb6 100644 --- a/tests/Unit/Service/GlobalSettingsServiceTest.php +++ b/tests/Unit/Service/GlobalSettingsServiceTest.php @@ -26,12 +26,12 @@ use OCA\WorkflowOcr\AppInfo\Application; use OCA\WorkflowOcr\Model\GlobalSettings; use OCA\WorkflowOcr\Service\GlobalSettingsService; -use OCP\IConfig; +use OCP\IAppConfig; use PHPUnit\Framework\MockObject\MockObject; use Test\TestCase; class GlobalSettingsServiceTest extends TestCase { - /** @var IConfig|MockObject */ + /** @var IAppConfig|MockObject */ private $config; /** @var GlobalSettingsService */ @@ -39,14 +39,14 @@ class GlobalSettingsServiceTest extends TestCase { public function setUp() : void { parent::setUp(); - $this->config = $this->createMock(IConfig::class); + $this->config = $this->createMock(IAppConfig::class); $this->globalSettingsService = new GlobalSettingsService($this->config); } public function testGetSettings_ReturnsCorrectSettings() { $this->config->expects($this->once()) - ->method('getAppValue') - ->with(Application::APP_NAME, 'processorCount', '') + ->method('getValueString') + ->with(Application::APP_NAME, 'processorCount') ->willReturn('2'); $settings = $this->globalSettingsService->getGlobalSettings(); @@ -57,10 +57,10 @@ public function testGetSettings_ReturnsCorrectSettings() { public function testSetSettings_CallsConfigSetAppValue() { $settings = new GlobalSettings(); - $settings->processorCount = 2; + $settings->processorCount = '2'; $this->config->expects($this->once()) - ->method('setAppValue') + ->method('setValueString') ->with(Application::APP_NAME, 'processorCount', '2'); $this->globalSettingsService->setGlobalSettings($settings); diff --git a/tests/Unit/Service/OcrBackendInfoServiceTest.php b/tests/Unit/Service/OcrBackendInfoServiceTest.php index fcf6a44..58283c4 100644 --- a/tests/Unit/Service/OcrBackendInfoServiceTest.php +++ b/tests/Unit/Service/OcrBackendInfoServiceTest.php @@ -23,17 +23,28 @@ namespace OCA\WorkflowOcr\Tests\Unit\Service; +use Exception; use OCA\WorkflowOcr\Exception\CommandException; +use OCA\WorkflowOcr\OcrProcessors\Remote\Client\IApiClient; use OCA\WorkflowOcr\Service\OcrBackendInfoService; +use OCA\WorkflowOcr\Wrapper\IAppApiWrapper; use OCA\WorkflowOcr\Wrapper\ICommand; +use OCP\App\IAppManager; use PHPUnit\Framework\MockObject\MockObject; use PHPUnit\Framework\TestCase; +use Psr\Container\ContainerExceptionInterface; +use Psr\Container\NotFoundExceptionInterface; use Psr\Log\LoggerInterface; class OcrBackendInfoServiceTest extends TestCase { /** @var ICommand|MockObject */ private $command; - + /** @var IApiClient|MockObject */ + private $apiClient; + /** @var IAppManager|MockObject */ + private $appManager; + /** @var IAppApiManager|MockObject */ + private $appApiWrapper; /** @var LoggerInterface|MockObject */ private $logger; @@ -43,7 +54,10 @@ class OcrBackendInfoServiceTest extends TestCase { protected function setUp() : void { $this->command = $this->createMock(ICommand::class); $this->logger = $this->createMock(LoggerInterface::class); - $this->service = new OcrBackendInfoService($this->command, $this->logger); + $this->apiClient = $this->createMock(IApiClient::class); + $this->appManager = $this->createMock(IAppManager::class); + $this->appApiWrapper = $this->createMock(IAppApiWrapper::class); + $this->service = new OcrBackendInfoService($this->command, $this->apiClient, $this->appManager, $this->appApiWrapper, $this->logger); parent::setUp(); } @@ -140,6 +154,58 @@ public function testGetInstalledLanguagesThrowsIfCliDidNotProduceAnyOutput() : v $this->service->getInstalledLanguages(); } + public function testIsRemoteBackendReturnsTrueIfRemoteBackendIsInstalledViaAppApi() { + $this->appManager->expects($this->once()) + ->method('isEnabledForUser') + ->with('app_api') + ->willReturn(true); + $this->appApiWrapper->expects($this->once()) + ->method('getExApp') + ->with('workflow_ocr_backend') + ->willReturn(['enabled' => true]); + + $result = $this->service->isRemoteBackend(); + + $this->assertTrue($result); + } + + /** + * @dataProvider dataProviderDependencyInjectionExceptions + */ + public function testIsRemoteBackendReturnsFalseIfBackendAppIsNotInstalled(Exception $exception) { + $this->appManager->expects($this->once()) + ->method('isEnabledForUser') + ->with('app_api') + ->willReturn(true); + $this->appApiWrapper->expects($this->once()) + ->method('getExApp') + ->with('workflow_ocr_backend') + ->willThrowException($exception); + + $result = $this->service->isRemoteBackend(); + + $this->assertFalse($result); + } + + public function testGetInstalledLanguagesFromRemoteBackend() { + $this->appManager->expects($this->once()) + ->method('isEnabledForUser') + ->with('app_api') + ->willReturn(true); + $this->appApiWrapper->expects($this->once()) + ->method('getExApp') + ->with('workflow_ocr_backend') + ->willReturn(['enabled' => true]); + + $this->apiClient->expects($this->once()) + ->method('getLanguages') + ->willReturn(['eng', 'deu', 'chi']); + + $result = $this->service->getInstalledLanguages(); + + $this->assertEquals(['eng', 'deu', 'chi'], $result); + } + public function dataProviderInstalledLangs() { return [ ["List of available languages (4):\neng\ndeu\nosd\nchi", ['eng','deu','chi']] @@ -152,4 +218,11 @@ public function dataProviderStdErrAndErrOutput() { ['', 'someErrorOutput'] ]; } + + public function dataProviderDependencyInjectionExceptions() { + return [ + [$this->createMock(ContainerExceptionInterface::class)], + [$this->createMock(NotFoundExceptionInterface::class)] + ]; + } } diff --git a/tests/Unit/Service/OcrServiceTest.php b/tests/Unit/Service/OcrServiceTest.php index a5bec7d..49d3938 100644 --- a/tests/Unit/Service/OcrServiceTest.php +++ b/tests/Unit/Service/OcrServiceTest.php @@ -27,6 +27,7 @@ use InvalidArgumentException; use OC\User\NoUserException; use OCA\Files_Versions\Versions\IVersionManager; +use OCA\Files_Versions\Versions\Version; use OCA\WorkflowOcr\Exception\OcrNotPossibleException; use OCA\WorkflowOcr\Exception\OcrProcessorNotFoundException; use OCA\WorkflowOcr\Exception\OcrResultEmptyException; @@ -676,6 +677,61 @@ public function testCreatesNewFileVersionWithSuffixIfNodeIsNotUpdateable() { $this->ocrService->runOcrProcess(42, 'usr', $settings); } + public function testSetsFileVersionsLabelIfKeepOriginalFileVersionIsTrue() { + $settings = new WorkflowSettings('{"keepOriginalFileVersion": true}'); + $mimeType = 'application/pdf'; + $content = 'someFileContent'; + $ocrContent = 'someOcrProcessedFile'; + $ocrResult = new OcrProcessorResult($ocrContent, 'pdf', $ocrContent); + + $fileMock = $this->createValidFileMock($mimeType, $content); + $this->rootFolderGetById42ReturnValue = [$fileMock]; + + $this->ocrProcessor->expects($this->once()) + ->method('ocrFile') + ->willReturn($ocrResult); + + $viewMock = $this->createMock(IView::class); + $this->viewFactory->expects($this->once()) + ->method('create') + ->willReturn($viewMock); + + $fileMock->expects($this->once()) + ->method('getMTime') + ->willReturn(1234); + + // With PHPUnit 10 use + // https://docs.phpunit.de/en/10.5/test-doubles.html#createmockforintersectionofinterfaces + $versionMock = $this->createMock(Version::class); + $versionMock->expects($this->once()) + ->method('getRevisionId') + ->willReturn(1); + $versionMock->expects($this->once()) + ->method('getTimestamp') + ->willReturn(1234); + $versionMock->expects($this->once()) + ->method('getMetadataValue') + ->with('label') + ->willReturn(''); + + $versionBackendMock = $this->getMockBuilder(VersionBackendMock::class) + ->setConstructorArgs([fn ($className) => $this->createMock($className)]) + ->getMock(); + $versionMock->expects($this->once()) + ->method('getBackend') + ->willReturn($versionBackendMock); + + $versionBackendMock->expects($this->once()) + ->method('setMetadataValue') + ->with($fileMock, 1, 'label', 'Before OCR'); + + $this->versionManager->expects($this->once()) + ->method('getVersionsForFile') + ->willReturn([$versionMock]); + + $this->ocrService->runOcrProcess(42, 'usr', $settings); + } + public function dataProvider_InvalidNodes() { /** @var MockObject|Node */ $folderMock = $this->createMock(Node::class); @@ -717,7 +773,8 @@ public function dataProvider_InvalidArguments() { public function dataProvider_ExceptionsToBeCaught() { return [ [new OcrNotPossibleException('Ocr not possible')], - [new OcrProcessorNotFoundException('audio/mpeg')], + [new OcrProcessorNotFoundException('audio/mpeg', false)], + [new OcrProcessorNotFoundException('audio/mpeg', true)], [new OcrResultEmptyException('Ocr result was empty')], [new Exception('Some exception')] ]; diff --git a/tests/Unit/Service/VersionBackendMock.php b/tests/Unit/Service/VersionBackendMock.php new file mode 100644 index 0000000..d022c63 --- /dev/null +++ b/tests/Unit/Service/VersionBackendMock.php @@ -0,0 +1,59 @@ + + * + * @license GNU AGPL version 3 or any later version + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +namespace OCA\WorkflowOcr\Tests\Unit\Service; + +use OC\Files\Node\File; +use OCA\Files_Versions\Versions\IMetadataVersionBackend; +use OCA\Files_Versions\Versions\IVersion; +use OCA\Files_Versions\Versions\IVersionBackend; +use OCP\Files\FileInfo; +use OCP\Files\Node; +use OCP\Files\Storage\IStorage; +use OCP\IUser; + +// With PHPUnit 10 use +// https://docs.phpunit.de/en/10.5/test-doubles.html#createmockforintersectionofinterfaces +class VersionBackendMock implements IVersionBackend, IMetadataVersionBackend { + public function __construct( + private $createMock, + ) { + } + public function useBackendForStorage(IStorage $storage): bool { + return true; + } + public function getVersionsForFile(IUser $user, FileInfo $file): array { + return []; + } + public function createVersion(IUser $user, FileInfo $file) { + } + public function rollback(IVersion $version) { + } + public function read(IVersion $version) { + } + public function getVersionFile(IUser $user, FileInfo $sourceFile, $revision): File { + return $this->createMock->call(File::class); + } + public function setMetadataValue(Node $node, int $revision, string $key, string $value): void { + } +} diff --git a/tests/Unit/SetupChecks/OcrMyPdfCheckTest.php b/tests/Unit/SetupChecks/OcrMyPdfCheckTest.php index 9125ad1..214398d 100644 --- a/tests/Unit/SetupChecks/OcrMyPdfCheckTest.php +++ b/tests/Unit/SetupChecks/OcrMyPdfCheckTest.php @@ -26,6 +26,7 @@ namespace OCA\WorkflowOcr\Tests\Unit\SetupChecks; +use OCA\WorkflowOcr\Service\IOcrBackendInfoService; use OCA\WorkflowOcr\SetupChecks\OcrMyPdfCheck; use OCA\WorkflowOcr\Wrapper\ICommand; use OCP\IL10N; @@ -38,13 +39,16 @@ class OcrMyPdfCheckTest extends TestCase { private $l10n; /** @var ICommand|MockObject */ private $command; + /** @var IOcrBackendInfoService|MockObject */ + private $ocrBackendInfoService; /** @var OcrMyPdfCheck */ private $ocrMyPdfCheck; protected function setUp(): void { $this->l10n = $this->createMock(IL10N::class); $this->command = $this->createMock(ICommand::class); - $this->ocrMyPdfCheck = new OcrMyPdfCheck($this->l10n, $this->command); + $this->ocrBackendInfoService = $this->createMock(IOcrBackendInfoService::class); + $this->ocrMyPdfCheck = new OcrMyPdfCheck($this->l10n, $this->command, $this->ocrBackendInfoService); } public function testGetCategory(): void { @@ -100,4 +104,17 @@ public function testRunOcrMyPdfInstalled(): void { $this->assertEquals(SetupResult::SUCCESS, $result->getSeverity()); $this->assertEquals('OCRmyPDF is installed and has version 12.0.0.', $result->getDescription()); } + + public function testRunOcrMyPdfInstalledViaRemoteBackend(): void { + $this->ocrBackendInfoService->expects($this->once())->method('isRemoteBackend')->willReturn(true); + $this->l10n->expects($this->once())->method('t') + ->with('Workflow OCR Backend is installed.') + ->willReturn('Workflow OCR Backend is installed.'); + + $result = $this->ocrMyPdfCheck->run(); + + $this->assertInstanceOf(SetupResult::class, $result); + $this->assertEquals(SetupResult::SUCCESS, $result->getSeverity()); + $this->assertEquals('Workflow OCR Backend is installed.', $result->getDescription()); + } } diff --git a/tests/Unit/Wrapper/AppApiWrapperTest.php b/tests/Unit/Wrapper/AppApiWrapperTest.php new file mode 100644 index 0000000..547eb55 --- /dev/null +++ b/tests/Unit/Wrapper/AppApiWrapperTest.php @@ -0,0 +1,87 @@ + + * + * @license GNU AGPL version 3 or any later version + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +namespace OCA\WorkflowOcr\Tests\Unit\Wrapper; + +use OCA\AppAPI\PublicFunctions; +use OCA\WorkflowOcr\Wrapper\AppApiWrapper; +use OCP\Http\Client\IResponse; +use OCP\IRequest; +use PHPUnit\Framework\TestCase; +use Psr\Container\ContainerInterface; + +class AppApiWrapperTest extends TestCase { + private $container; + private $publicFunctions; + private $appApiWrapper; + + protected function setUp(): void { + $this->container = $this->createMock(ContainerInterface::class); + $this->publicFunctions = $this->createMock(PublicFunctions::class); + $this->container->method('get')->willReturn($this->publicFunctions); + $this->appApiWrapper = new AppApiWrapper($this->container); + } + + public function testExAppRequest(): void { + $response = $this->createMock(IResponse::class); + $this->publicFunctions->expects($this->once()) + ->method('exAppRequest') + ->with('appId', 'route') + ->willReturn($response); + + $result = $this->appApiWrapper->exAppRequest('appId', 'route'); + + $this->assertInstanceOf(IResponse::class, $result); + $this->assertEquals($response, $result); + } + + public function testExAppRequestWithParams(): void { + $response = $this->createMock(IResponse::class); + $params = ['key' => 'value']; + $options = ['option' => 'value']; + $request = $this->createMock(IRequest::class); + + $this->publicFunctions->expects($this->once()) + ->method('exAppRequest') + ->with('appId', 'route', 'userId', 'POST', $params, $options, $request) + ->willReturn($response); + + $result = $this->appApiWrapper->exAppRequest('appId', 'route', 'userId', 'POST', $params, $options, $request); + + $this->assertInstanceOf(IResponse::class, $result); + $this->assertEquals($response, $result); + } + + public function testGetExApp(): void { + $appData = ['name' => 'appName']; + $this->publicFunctions->expects($this->once()) + ->method('getExApp') + ->with('appName') + ->willReturn($appData); + + $result = $this->appApiWrapper->getExApp('appName'); + + $this->assertIsArray($result); + $this->assertEquals($appData, $result); + } +} diff --git a/tests/psalm-baseline.xml b/tests/psalm-baseline.xml index 33944e2..19a1315 100644 --- a/tests/psalm-baseline.xml +++ b/tests/psalm-baseline.xml @@ -7,6 +7,45 @@ + + + getBody(), false, 512, JSON_THROW_ON_ERROR), $class)]]> + + + + + + getBody(), false, 512, JSON_THROW_ON_ERROR), $class)]]> + + + + + + + + + + openAPINullablesSetToNull]]> + + + + + openAPINullablesSetToNull]]> + + + + + + + + + + + + + + + rootFolder]]> @@ -35,6 +74,7 @@ + @@ -46,6 +86,12 @@ + + + + + +