diff --git a/src/Tesseract/Interop/BaseApi.cs b/src/Tesseract/Interop/BaseApi.cs
index 048d4ab9..58de769c 100644
--- a/src/Tesseract/Interop/BaseApi.cs
+++ b/src/Tesseract/Interop/BaseApi.cs
@@ -96,6 +96,9 @@ int BaseApiInit(HandleRef handle, string datapath, string language, int mode,
[RuntimeDllImport(Constants.TesseractDllName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "TessBaseAPIMeanTextConf")]
int BaseAPIMeanTextConf(HandleRef handle);
+ [RuntimeDllImport(Constants.TesseractDllName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "TessBaseAPIProcessPage")]
+ int BaseAPIProcessPage(HandleRef handle, HandleRef pix, int page_index, string filename, string retry_config, int timeout_millisec, HandleRef renderer);
+
[RuntimeDllImport(Constants.TesseractDllName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "TessBaseAPIRecognize")]
int BaseApiRecognize(HandleRef handle, HandleRef monitor);
diff --git a/src/Tesseract/Page.cs b/src/Tesseract/Page.cs
index 2fc388d1..369dac9f 100644
--- a/src/Tesseract/Page.cs
+++ b/src/Tesseract/Page.cs
@@ -122,6 +122,16 @@ public string GetText()
return Interop.TessApi.BaseAPIGetUTF8Text(Engine.Handle);
}
+ public string GetText(int timeout)
+ {
+ if (!Recognize(0, timeout))
+ {
+ return null;
+ }
+
+ return Interop.TessApi.BaseAPIGetUTF8Text(Engine.Handle);
+ }
+
///
/// Gets the page's content as a HOCR text.
///
@@ -139,6 +149,22 @@ public string GetHOCRText(int pageNum, bool useXHtml = false)
return Interop.TessApi.BaseAPIGetHOCRText(Engine.Handle, pageNum);
}
+ public string GetHOCRText(int pageNum, int timeout, bool useXHtml = false)
+ {
+ //Why Not Use 'nameof(pageNum)' instead of '"pageNum"'
+ Guard.Require("pageNum", pageNum >= 0, "Page number must be greater than or equal to zero (0).");
+
+ if (!Recognize(pageNum, timeout))
+ {
+ return null;
+ }
+
+ if (useXHtml)
+ return Interop.TessApi.BaseAPIGetHOCRText2(Engine.Handle, pageNum);
+ else
+ return Interop.TessApi.BaseAPIGetHOCRText(Engine.Handle, pageNum);
+ }
+
///
/// Get's the mean confidence that as a percentage of the recognized text.
///
@@ -295,6 +321,51 @@ internal void Recognize()
}
}
+ private bool Recognize(int pageNum, int timeout)
+ {
+ Guard.Verify(PageSegmentMode != PageSegMode.OsdOnly, "Cannot OCR image when using OSD only page segmentation, please use DetectBestOrientation instead.");
+
+ //string strText = null;
+
+ int success = -1;
+
+ if (!runRecognitionPhase)
+ {
+ //Interop.TessApi.BaseApiSetVariable(Engine.Handle, "tessedit_create_hocr", "1");
+ string fileName = Path.Combine(System.IO.Path.GetTempPath(), Guid.NewGuid().ToString());
+ //IntPtr renderer = Interop.TessApi.Native.HOcrRendererCreate(fileName);
+ success = Interop.TessApi.Native.BaseAPIProcessPage(Engine.Handle, Image.Handle, pageNum, null, null, timeout, new HandleRef(this, IntPtr.Zero));
+ }
+
+ if (success == 1)
+ {
+ runRecognitionPhase = true;
+
+ // now write out the thresholded image if required to do so
+ bool tesseditWriteImages;
+ if (Engine.TryGetBoolVariable("tessedit_write_images", out tesseditWriteImages) && tesseditWriteImages)
+ {
+ using (Pix thresholdedImage = GetThresholdedImage())
+ {
+ string filePath = Path.Combine(Environment.CurrentDirectory, "tessinput.tif");
+ try
+ {
+ thresholdedImage.Save(filePath, ImageFormat.TiffG4);
+ trace.TraceEvent(TraceEventType.Information, 2,
+ "Successfully saved the thresholded image to '{0}'", filePath);
+ }
+ catch (Exception error)
+ {
+ trace.TraceEvent(TraceEventType.Error, 2,
+ "Failed to save the thresholded image to '{0}'.\nError: {1}", filePath, error.Message);
+ }
+ }
+ }
+ }
+
+ return success == 1;
+ }
+
protected override void Dispose(bool disposing)
{
if (disposing) {