-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathno_wait.py
93 lines (66 loc) · 2.71 KB
/
no_wait.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import re
import json
import threading
from collections import Counter
from tqdm import tqdm
from deep_translator import GoogleTranslator
from googletrans import Translator
from nltk.corpus import wordnet
def is_english_word(word):
return bool(wordnet.synsets(word))
def count_word_occurrences(input_file, output_file):
with open(input_file, 'r') as file:
text = file.read()
# Define the characters to remove and create a regular expression pattern
characters_to_remove = r'[!?<>,-:->\d]'
# Use re.sub to replace characters and numbers with spaces
text_cleaned = re.sub(characters_to_remove, '', text)
# Tokenize the cleaned text into words
words = text_cleaned.split()
# Use Counter to count word occurrences
word_count = Counter(words)
result = {
"word_count": word_count
}
removeWords(result)
def removeWords(data):
word_count_dict = data.get("word_count", {})
english_word_count = {word: count for word, count in word_count_dict.items() if is_english_word(word)}
data["word_count"] = english_word_count
translate(data)
class TranslationEntry:
def __init__(self, word, number, translation):
self.word = word
self.number = number
self.translation = translation
def translate_word(word, number, translator, results):
translation = GoogleTranslator(source='en', target='uz').translate(word)
entry = TranslationEntry(word, number, translation)
results.append(entry)
def translate(data):
translator = Translator()
print('Translating...')
translated_entries = []
threads = []
total_words = len(data['word_count'])
with tqdm(total=total_words) as pbar:
for word, number in data['word_count'].items():
thread = threading.Thread(target=translate_word, args=(word, number, translator, translated_entries))
threads.append(thread)
thread.start()
# Update the progress bar with each completed translation
pbar.update(1)
for word, number in data['word_count'].items():
thread = threading.Thread(target=translate_word, args=(word, number, translator, translated_entries))
threads.append(thread)
thread.start()
for thread in threads:
thread.join() # Wait for all threads to finish
output_data = [{'word': entry.word, 'number': entry.number, 'translate': entry.translation} for entry in
translated_entries]
with open('output.json', 'w', encoding='utf-8') as json_file:
json.dump(output_data, json_file, ensure_ascii=False, indent=4)
if __name__ == '__main__':
input_file = 'words.txt'
output_file = 'output.json'
count_word_occurrences(input_file, output_file)