forked from tazeek/BullyDetect
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsoundex_list.py
34 lines (23 loc) · 895 Bytes
/
soundex_list.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import pickle
import os
os.system('cls')
# Specify the file
FILE = "Word Dictionaries/soundex_dict.pk"
# Load using pickle
soundex_word_dict = pickle.load(open(FILE,"rb"))
# Get the unique soundex encoders
unique_soundex = list(set([value for key, value in soundex_word_dict.items()]))
# Combine all the soundex encoders as one
# Get the words. Then use the encoder as key and the list as value
soundex_words_list = {}
for index, soundex in enumerate(unique_soundex):
# Update
if index % 100 == 0:
print("%i out of %i done " % (index, len(unique_soundex)))
# Get the word list
words_list = [ word for word, encode in soundex_word_dict.items() if encode == soundex ]
# Store in dictionary. Encoder is key, value is words list
soundex_words_list[soundex] = words_list
# Save it
FILE = "Word Dictionaries/soundex_words_list.pk"
pickle.dump(soundex_words_list, open(FILE, "wb"))