-
Notifications
You must be signed in to change notification settings - Fork 33
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
None of the traineddata works for me #14
Comments
These are for tesseract 4 or above to be used with --oem 1.
What version tesseract are you using?
…On Thu, Oct 17, 2019, 21:06 Arrrrny ***@***.***> wrote:
I am using traineddatas here and all of the crashes. I am using
https://github.com/adaptech-cz/Tesseract4Android
I can use other custom trained datas without any issue. Am I missing a
setting or something?
Thanks!
E/Tesseract(native)( 6126): Could not initialize Tesseract API with
language=engrestricted_best!
F/libc ( 6126): Fatal signal 11 (SIGSEGV), code 1 (SEGV_MAPERR), fault
addr 0x8 in tid 6164 (Thread-2), pid 6126 (act_ocr_example)
------------------------------
Build fingerprint:
'google/sdk_gphone_x86/generic_x86:9/PSR1.180720.093/5456446:userdebug/dev-keys'
Revision: '0'
ABI: 'x86'
pid: 6126, tid: 6164, name: Thread-2 >>>
io.paratoner.tesseract_ocr_example <<<
signal 11 (SIGSEGV), code 1 (SEGV_MAPERR), fault addr 0x8
Cause: null pointer dereference
eax 00000000 ebx c7707f14 ecx c8475870 edx 00000000
edi c85bb740 esi c8454700
ebp c72ffa38 esp c72ff910 eip c73da039
backtrace:
#00 pc 000d9039
/data/app/io.paratoner.tesseract_ocr_example-tcEmsNHnRrF98KxA7MlFKQ==/lib/x86/libtesseract.so
(tesseract::Tesseract::recog_all_words(PAGE_RES*, ETEXT_DESC*, TBOX const*,
char const*, int)+217)
#1 <#1> pc
000bca80
/data/app/io.paratoner.tesseract_ocr_example-tcEmsNHnRrF98KxA7MlFKQ==/lib/x86/libtesseract.so
(tesseract::TessBaseAPI::Recognize(ETEXT_DESC*)+1152)
#2 <#2> pc
000bb0fc
/data/app/io.paratoner.tesseract_ocr_example-tcEmsNHnRrF98KxA7MlFKQ==/lib/x86/libtesseract.so
(tesseract::TessBaseAPI::GetUTF8Text()+76)
#3 <#3> pc
002d254a
/data/app/io.paratoner.tesseract_ocr_example-tcEmsNHnRrF98KxA7MlFKQ==/lib/x86/libtesseract.so
(Java_com_googlecode_tesseract_android_TessBaseAPI_nativeGetUTF8Text+74)
#4 <#4> pc
005f6b97 /system/lib/libart.so (art_quick_generic_jni_trampoline+71)
#5 <#5> pc
005f0b82 /system/lib/libart.so (art_quick_invoke_stub+338)
#6 <#6> pc
000a30ce /system/lib/libart.so (art::ArtMethod::Invoke(art::Thread*,
unsigned int*, unsigned int, art::JValue*, char const*)+222)
#7 <#7> pc
0029bca2 /system/lib/libart.so
(art::interpreter::ArtInterpreterToCompiledCodeBridge(art::Thread*,
art::ArtMethod*, art::ShadowFrame*, unsigned short, art::JValue*)+338)
#8 <#8> pc
00293e48 /system/lib/libart.so (bool art::interpreter::DoCall<false,
false>(art::ArtMethod*, art::Thread*, art::ShadowFrame&, art::Instruction
const*, unsigned short, art::JValue*)+1048)
#9 <#9> pc
005bda66 /system/lib/libart.so (MterpInvokeDirect+342)
#10 <#10> pc
005e2e21 /system/lib/libart.so (ExecuteMterpImpl+14497)
#11 <#11> pc
00015814 /dev/ashmem/dalvik-classes.dex extracted in memory from
/data/app/io.paratoner.tesseract_ocr_example-tcEmsNHnRrF98KxA7MlFKQ==/base.apk
(deleted) (com.googlecode.tesseract.android.TessBaseAPI.getUTF8Text+12)
#12 <#12> pc
00266216 /system/lib/libart.so
(_ZN3art11interpreterL7ExecuteEPNS_6ThreadERKNS_20CodeItemDataAccessorERNS_11ShadowFrameENS_6JValueEb.llvm.2093054539+598)
#13 <#13> pc
0026c79c /system/lib/libart.so
(art::interpreter::ArtInterpreterToInterpreterBridge(art::Thread*,
art::CodeItemDataAccessor const&, art::ShadowFrame*, art::JValue*)+220)
#14 <#14> pc
00293e2b /system/lib/libart.so (bool art::interpreter::DoCall<false,
false>(art::ArtMethod*, art::Thread*, art::ShadowFrame&, art::Instruction
const*, unsigned short, art::JValue*)+1019)
#15 pc 005bc493 /system/lib/libart.so (MterpInvokeVirtual+691)
#16 pc 005e2d21 /system/lib/libart.so (ExecuteMterpImpl+14241)
#17 pc 000301a2 /dev/ashmem/dalvik-classes.dex extracted in memory from
/data/app/io.paratoner.tesseract_ocr_example-tcEmsNHnRrF98KxA7MlFKQ==/base.apk
(deleted) (io.paratoner.tesseract_ocr.TesseractOcrPlugin$1.run+22)
#18 pc 00266216 /system/lib/libart.so
(_ZN3art11interpreterL7ExecuteEPNS_6ThreadERKNS_20CodeItemDataAccessorERNS_11ShadowFrameENS_6JValueEb.llvm.2093054539+598)
#19 pc 0026c79c /system/lib/libart.so
(art::interpreter::ArtInterpreterToInterpreterBridge(art::Thread*,
art::CodeItemDataAccessor const&, art::ShadowFrame*, art::JValue*)+220)
#20 pc 00293e2b /system/lib/libart.so (bool
art::interpreter::DoCall<false, false>(art::ArtMethod*, art::Thread*,
art::ShadowFrame&, art::Instruction const*, unsigned short,
art::JValue*)+1019)
#21 pc 005bd574 /system/lib/libart.so (MterpInvokeInterface+1444)
#22 pc 005e2f21 /system/lib/libart.so (ExecuteMterpImpl+14753)
#23 pc 000ca806 /system/framework/boot.vdex (java.lang.Thread.run+12)
#24 pc 00266216 /system/lib/libart.so
(_ZN3art11interpreterL7ExecuteEPNS_6ThreadERKNS_20CodeItemDataAccessorERNS_11ShadowFrameENS_6JValueEb.llvm.2093054539+598)
#25 pc 0026c68e /system/lib/libart.so
(art::interpreter::EnterInterpreterFromEntryPoint(art::Thread*,
art::CodeItemDataAccessor const&, art::ShadowFrame*)+126)
#26 pc 005a953d /system/lib/libart.so (artQuickToInterpreterBridge+1277)
#27 pc 005f6c6d /system/lib/libart.so (art_quick_to_interpreter_bridge+77)
#28 pc 005f0b82 /system/lib/libart.so (art_quick_invoke_stub+338)
#29 pc 000a30ce /system/lib/libart.so
(art::ArtMethod::Invoke(art::Thread*, unsigned int*, unsigned int,
art::JValue*, char const*)+222)
#30 pc 004d3349 /system/lib/libart.so (art::(anonymous
namespace)::InvokeWithArgArray(art::ScopedObjectAccessAlreadyRunnable
const&, art::ArtMethod*, art::(anonymous namespace)::ArgArray*,
art::JValue*, char const*)+89)
#31 pc 004d45f7 /system/lib/libart.so
(art::InvokeVirtualOrInterfaceWithJValues(art::ScopedObjectAccessAlreadyRunnable
const&, _jobject*, _jmethodID*, jvalue*)+471)
#32 pc 0050958c /system/lib/libart.so
(art::Thread::CreateCallback(void*)+1484)
#33 pc 0008f065 /system/lib/libc.so (__pthread_start(void*)+53)
#34 pc 0002485b /system/lib/libc.so (__start_thread+75)
Lost connection to device.
Exited (sigterm)
—
You are receiving this because you are subscribed to this thread.
Reply to this email directly, view it on GitHub
<#14?email_source=notifications&email_token=ABG37I3622W3E4ZRPAJUWGDQPCBALA5CNFSM4JB3PAX2YY3PNVWWK3TUL52HS4DFUVEXG43VMWVGG33NNVSW45C7NFSM4HSQAUQA>,
or unsubscribe
<https://github.com/notifications/unsubscribe-auth/ABG37I5JRBHFRGQQO3DZ4HTQPCBALANCNFSM4JB3PAXQ>
.
|
I am using Tesseract 4 with oem 1, both work fine, but none of the traineddatas in your library. |
I have not tested them on Android. These are just proof of concept and
seemed to work when created.
If there is any particular one you are interested in, please let me know
and I will retest.
…On Fri, Oct 18, 2019, 15:43 Arrrrny ***@***.***> wrote:
I am using Tesseract 4 with oem 1,
https://github.com/tesseract-ocr/tessdata_fast/eng.traineddata
https://github.com/anuraghkp1/tessdata/blob/master/financial.traineddata
both work fine, but none of the traineddatas in your library.
—
You are receiving this because you commented.
Reply to this email directly, view it on GitHub
<#14?email_source=notifications&email_token=ABG37I27KDQBXBS7L25U4HDQPGD5DA5CNFSM4JB3PAX2YY3PNVWWK3TUL52HS4DFVREXG43VMVBW63LNMVXHJKTDN5WW2ZLOORPWSZGOEBTXWSA#issuecomment-543652680>,
or unsubscribe
<https://github.com/notifications/unsubscribe-auth/ABG37I4HJTFCAZL4QM562ITQPGD5DANCNFSM4JB3PAXQ>
.
|
I am interested in digits_comma.traineddata |
Works fine for me. How did you download it?
…On Fri, Oct 18, 2019 at 6:48 PM Arrrrny ***@***.***> wrote:
I am interested in digits_comma.traineddata
—
You are receiving this because you commented.
Reply to this email directly, view it on GitHub
<#14?email_source=notifications&email_token=ABG37I3SH4EGBVROYFR3HJ3QPGZTNA5CNFSM4JB3PAX2YY3PNVWWK3TUL52HS4DFVREXG43VMVBW63LNMVXHJKTDN5WW2ZLOORPWSZGOEBUNG3Q#issuecomment-543740782>,
or unsubscribe
<https://github.com/notifications/unsubscribe-auth/ABG37I3HUAIARUDVKP6JT3DQPGZTNANCNFSM4JB3PAXQ>
.
--
____________________________________________________________
भजन - कीर्तन - आरती @ http://bhajans.ramparivar.com
|
|
Same problem (DotProductAVX can't be used on Android). I am using fresh Emgucv 4.1 x64 with net 4.8 and AVX CPU. |
I had trained on an IBM power8 system. Didn't know that the files are not
portable across systems.
…On Tue, Dec 10, 2019, 05:13 Volodya ***@***.***> wrote:
Same problem (DotProductAVX can't be used on Android). I am using fresh
Emgucv 4.1 with net 4.8 and AVX CPU.
https://github.com/tesseract-ocr/tessdata_fast/eng.traineddata works fine
Had to train my own data ;)
—
You are receiving this because you commented.
Reply to this email directly, view it on GitHub
<#14?email_source=notifications&email_token=ABG37I6HFVP2NKZT2VSK6CLQX3J2PA5CNFSM4JB3PAX2YY3PNVWWK3TUL52HS4DFVREXG43VMVBW63LNMVXHJKTDN5WW2ZLOORPWSZGOEGLDHIA#issuecomment-563491744>,
or unsubscribe
<https://github.com/notifications/unsubscribe-auth/ABG37I35E7A7N5SF3RFAADDQX3J2PANCNFSM4JB3PAXQ>
.
|
@vadash can you share how you did it? I mean training. |
I am using 2 specific traineddata and 1 generic. For specific generate a font (or pick one close to) -> upload to http://ocr7.com/ -> use it. Its pretty simple. For universal (more slow) one you will need linux machine (virtual is fine). Follow this guide https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract-4.00 |
I am using traineddatas here and all of the crashes. I am using https://github.com/adaptech-cz/Tesseract4Android
I can use other custom trained datas without any issue. Am I missing a setting or something?
Thanks!
E/Tesseract(native)( 6126): Could not initialize Tesseract API with language=engrestricted_best!
F/libc ( 6126): Fatal signal 11 (SIGSEGV), code 1 (SEGV_MAPERR), fault addr 0x8 in tid 6164 (Thread-2), pid 6126 (act_ocr_example)
Build fingerprint: 'google/sdk_gphone_x86/generic_x86:9/PSR1.180720.093/5456446:userdebug/dev-keys'
Revision: '0'
ABI: 'x86'
pid: 6126, tid: 6164, name: Thread-2 >>> io.paratoner.tesseract_ocr_example <<<
signal 11 (SIGSEGV), code 1 (SEGV_MAPERR), fault addr 0x8
Cause: null pointer dereference
eax 00000000 ebx c7707f14 ecx c8475870 edx 00000000
edi c85bb740 esi c8454700
ebp c72ffa38 esp c72ff910 eip c73da039
backtrace:
#00 pc 000d9039 /data/app/io.paratoner.tesseract_ocr_example-tcEmsNHnRrF98KxA7MlFKQ==/lib/x86/libtesseract.so (tesseract::Tesseract::recog_all_words(PAGE_RES*, ETEXT_DESC*, TBOX const*, char const*, int)+217)
#1 pc 000bca80 /data/app/io.paratoner.tesseract_ocr_example-tcEmsNHnRrF98KxA7MlFKQ==/lib/x86/libtesseract.so (tesseract::TessBaseAPI::Recognize(ETEXT_DESC*)+1152)
#2 pc 000bb0fc /data/app/io.paratoner.tesseract_ocr_example-tcEmsNHnRrF98KxA7MlFKQ==/lib/x86/libtesseract.so (tesseract::TessBaseAPI::GetUTF8Text()+76)
#3 pc 002d254a /data/app/io.paratoner.tesseract_ocr_example-tcEmsNHnRrF98KxA7MlFKQ==/lib/x86/libtesseract.so (Java_com_googlecode_tesseract_android_TessBaseAPI_nativeGetUTF8Text+74)
#4 pc 005f6b97 /system/lib/libart.so (art_quick_generic_jni_trampoline+71)
#5 pc 005f0b82 /system/lib/libart.so (art_quick_invoke_stub+338)
#6 pc 000a30ce /system/lib/libart.so (art::ArtMethod::Invoke(art::Thread*, unsigned int*, unsigned int, art::JValue*, char const*)+222)
#7 pc 0029bca2 /system/lib/libart.so (art::interpreter::ArtInterpreterToCompiledCodeBridge(art::Thread*, art::ArtMethod*, art::ShadowFrame*, unsigned short, art::JValue*)+338)
#8 pc 00293e48 /system/lib/libart.so (bool art::interpreter::DoCall<false, false>(art::ArtMethod*, art::Thread*, art::ShadowFrame&, art::Instruction const*, unsigned short, art::JValue*)+1048)
#9 pc 005bda66 /system/lib/libart.so (MterpInvokeDirect+342)
#10 pc 005e2e21 /system/lib/libart.so (ExecuteMterpImpl+14497)
#11 pc 00015814 /dev/ashmem/dalvik-classes.dex extracted in memory from /data/app/io.paratoner.tesseract_ocr_example-tcEmsNHnRrF98KxA7MlFKQ==/base.apk (deleted) (com.googlecode.tesseract.android.TessBaseAPI.getUTF8Text+12)
#12 pc 00266216 /system/lib/libart.so (_ZN3art11interpreterL7ExecuteEPNS_6ThreadERKNS_20CodeItemDataAccessorERNS_11ShadowFrameENS_6JValueEb.llvm.2093054539+598)
#13 pc 0026c79c /system/lib/libart.so (art::interpreter::ArtInterpreterToInterpreterBridge(art::Thread*, art::CodeItemDataAccessor const&, art::ShadowFrame*, art::JValue*)+220)
#14 pc 00293e2b /system/lib/libart.so (bool art::interpreter::DoCall<false, false>(art::ArtMethod*, art::Thread*, art::ShadowFrame&, art::Instruction const*, unsigned short, art::JValue*)+1019)
#15 pc 005bc493 /system/lib/libart.so (MterpInvokeVirtual+691)
#16 pc 005e2d21 /system/lib/libart.so (ExecuteMterpImpl+14241)
#17 pc 000301a2 /dev/ashmem/dalvik-classes.dex extracted in memory from /data/app/io.paratoner.tesseract_ocr_example-tcEmsNHnRrF98KxA7MlFKQ==/base.apk (deleted) (io.paratoner.tesseract_ocr.TesseractOcrPlugin$1.run+22)
#18 pc 00266216 /system/lib/libart.so (_ZN3art11interpreterL7ExecuteEPNS_6ThreadERKNS_20CodeItemDataAccessorERNS_11ShadowFrameENS_6JValueEb.llvm.2093054539+598)
#19 pc 0026c79c /system/lib/libart.so (art::interpreter::ArtInterpreterToInterpreterBridge(art::Thread*, art::CodeItemDataAccessor const&, art::ShadowFrame*, art::JValue*)+220)
#20 pc 00293e2b /system/lib/libart.so (bool art::interpreter::DoCall<false, false>(art::ArtMethod*, art::Thread*, art::ShadowFrame&, art::Instruction const*, unsigned short, art::JValue*)+1019)
#21 pc 005bd574 /system/lib/libart.so (MterpInvokeInterface+1444)
#22 pc 005e2f21 /system/lib/libart.so (ExecuteMterpImpl+14753)
#23 pc 000ca806 /system/framework/boot.vdex (java.lang.Thread.run+12)
#24 pc 00266216 /system/lib/libart.so (_ZN3art11interpreterL7ExecuteEPNS_6ThreadERKNS_20CodeItemDataAccessorERNS_11ShadowFrameENS_6JValueEb.llvm.2093054539+598)
#25 pc 0026c68e /system/lib/libart.so (art::interpreter::EnterInterpreterFromEntryPoint(art::Thread*, art::CodeItemDataAccessor const&, art::ShadowFrame*)+126)
#26 pc 005a953d /system/lib/libart.so (artQuickToInterpreterBridge+1277)
#27 pc 005f6c6d /system/lib/libart.so (art_quick_to_interpreter_bridge+77)
#28 pc 005f0b82 /system/lib/libart.so (art_quick_invoke_stub+338)
#29 pc 000a30ce /system/lib/libart.so (art::ArtMethod::Invoke(art::Thread*, unsigned int*, unsigned int, art::JValue*, char const*)+222)
#30 pc 004d3349 /system/lib/libart.so (art::(anonymous namespace)::InvokeWithArgArray(art::ScopedObjectAccessAlreadyRunnable const&, art::ArtMethod*, art::(anonymous namespace)::ArgArray*, art::JValue*, char const*)+89)
#31 pc 004d45f7 /system/lib/libart.so (art::InvokeVirtualOrInterfaceWithJValues(art::ScopedObjectAccessAlreadyRunnable const&, _jobject*, _jmethodID*, jvalue*)+471)
#32 pc 0050958c /system/lib/libart.so (art::Thread::CreateCallback(void*)+1484)
#33 pc 0008f065 /system/lib/libc.so (__pthread_start(void*)+53)
#34 pc 0002485b /system/lib/libc.so (__start_thread+75)
Lost connection to device.
Exited (sigterm)
The text was updated successfully, but these errors were encountered: