-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmistranslate.py
131 lines (116 loc) · 6.46 KB
/
mistranslate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import google.cloud.translate_v2 as tl
import os,random,json
#Set up environment variables when imported
__location__ = os.path.join(os.getcwd(),os.path.dirname(__file__))
gac_filepath = os.path.join(__location__, 'gac-key.json')
conf_filepath = os.path.join(__location__, 'config.json')
cf = open(gac_filepath)
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = os.path.join(__location__, 'gac-key.json')
class mt_Client(tl.Client):
config = {'max-tl-chain': 20}
cfgfile = None
try:
cfgfile = open(conf_filepath)
cfgjson = cfgfile.read()
config = json.loads(cfgjson)
except FileNotFoundError:
print("WARNING: config.json file not found; using defaults.")
def __init__(self):
#Set environment variable
#Run parent init
tl.Client.__init__(self)
self.langdata = self.get_languages()
#Update language database
def update_languages(self):
self.langdata = self.get_languages()
#Check whether or not a language code is valid.
def get_langcode(self,langcode,name=False):
if not isinstance(langcode,str):
raise TypeError(f'Language code must be a string, received type {type(langcode)}, value: {langcode}')
for i in self.langdata:
if langcode.lower() == i['language'].lower() or langcode.lower() == i['name'].lower():
return i['name'] if name else i['language']
#If no matches occur, the code is invalid
raise ValueError("Invalid language code '{0}'".format(langcode))
def chain_translate(self,inputstr,listmode,outputlang=None,iters=0,inputlang=None,langlist=None):
"""Run a string of text through the translator a set number of times, using random languages for each iteration. Returns a string.
REQUIRED VALUES:
inputstr : The input string of text.
listmode : Specifies whether the translation languages will be restricted. 0 = None; all languages may be used. 1 = Blacklist; languages in langlist will not be used. 2 = Whilelist; only languages in langlist will be used. 3 = Queue; input will be translated with the languages in langlist, in the order given.
OPTIONAL VALUES:
outputlang: The language to translate the resultant text into. Defaults to config variable 'default-lang ('en' if config is absent.).
iters : An integer defining the number of additional translations to be performed. Default allowed range is 1~20. Default value is 0 (standard translation).
inputlang : The language code for input. If omitted, automatically determines language.
langlist : A list of strings containing language codes, or string of comma-separated language codes, to be used as either a language blacklist, whitelist or queue, as specified by listmode.
"""
#Check parameter values
if not (isinstance(inputstr,str) or isinstance(inputstr,list) or isinstance(inputstr,tuple)):
raise TypeError(f'Input must be a string, list or tuple. Received value: {type(inputstr)} ({inputstr})')
elif not inputstr:
raise ValueError('Input must not be blank.')
if not isinstance(listmode,int):
raise TypeError('listmode must be an integer.')
if listmode < 0 or listmode > 3:
raise ValueError('listmode must be between 0 and 3.')
if listmode != 0:
if isinstance(langlist,str):
langlist = langlist.split(',')
if isinstance(langlist,list):
for i in langlist:
i = self.get_langcode(i)
elif langlist == None:
raise TypeError('No language list was provided; the current flag(s) set require a language list.')
else:
raise TypeError('Language list must be a comma-separated list of language codes with no spaces or other symbols.')
if (listmode == 1 or listmode == 2):
if not isinstance(iters,int):
raise TypeError('Number of iterations must be a valid integer')
elif iters < 1 or iters > config['max-tl-chain']:
raise ValueError(f"Number of iterations must be between 1 and {config['max-tl-chain']}.")
if outputlang != None:
outputlang = self.get_langcode(outputlang)
else:
outputlang = 'en'
if inputlang != None:
inputlang = self.get_langcode(inputlang)
#Once all data is validated, proceed with translation
rval = {}
rval['input'] = inputstr
rval['inputlang'] = inputlang
rval['iters'] = []
curinput = inputstr
if listmode == 3:
iters = len(langlist)
for i in range(iters):
#Select a random language to translate to
if listmode == 3:
thislang = langlist[i]
elif listmode == 2:
thislang = random.choice(langlist)
else:
thislang = random.choice(self.langdata)['language']
while listmode == 1 and thislang in langlist:
thislang = random.choice(self.langdata)['language']
#Translate and save this iteration
result = self.translate(curinput,thislang,'text',inputlang)
if isinstance(result,list):
curinput = []
for i in result:
curinput.append(i['translatedText'])
else:
curinput = result['translatedText']
if rval['inputlang'] == None:
rval['inputlang'] = result[0]['detectedSourceLanguage'] if isinstance(result,list) else result['detectedSourceLanguage']
rval['inputlangname'] = self.get_langcode(rval['inputlang'],True)
#Discard the inputlang value for subsequent translations
inputlang = None
rval['iters'].append({'language':thislang,'result':curinput,'langname':self.get_langcode(thislang,True)})
#Finally, translate the result to the target language (unless listmode is 3)
if listmode != 3:
result = self.translate(curinput,outputlang,'text')
if isinstance(result,list):
curinput = []
for i in result:
curinput.append(i['translatedText'])
rval['iters'].append({'language':outputlang,'result':curinput if isinstance(result,list) else result['translatedText'],'langname':self.get_langcode(outputlang,True)})
return rval