-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathlang.py
36 lines (29 loc) · 847 Bytes
/
lang.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
################################################################
### Language and script related data.
################################################################
### character properties
def size_category(c):
if len(c)>1: raise Exception("isolated characters only")
if c in "acemnorsuvwxyz": return "x"
if c in "ABCDEFGHIJKLMNOPQRSTUVWXYZbdfhklt!?": return "k"
if c in "gpqy": return "y"
if c in ".,": return "."
if c in """'"`""": return "'"
return None
### commonly confused characters in OCR
ocr_confusions_list = [
["c","C"],
["l","1","I","|","/"],
["o","O","0"],
["s","S"],
["u","U"],
["v","V"],
["w","W"],
["x","X"],
["z","Z"],
[",","'",".","`"],
]
ocr_confusions = {}
for e in ocr_confusions_list:
for i in range(len(e)):
ocr_confusions[e[i]] = e