-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathcreateDics.py
33 lines (27 loc) · 1.07 KB
/
createDics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import glob
import csv
# before usage:
# * create UMLS dictionaries with JuFiT
# * create Gene dictionary with JULIELab/gene-name-mapping: https://zenodo.org/record/3874895#.XxG0Zh0aRhE
# * adapt path names of your dictionaries
def create_global_dict(dic_path, delim):
dics = glob.glob(dic_path + '/*')
global_dict = ''
for dic in dics:
name = dic.replace(dic_path + '/', '').replace('.txt', '').replace('.dict', '').replace('2019AB-', '').replace('-GER', '')
with open(dic) as tsvfile:
reader = csv.reader(tsvfile, delimiter=delim)
for row in reader:
#print(row[0])
global_dict += row[0] + '\t' + name + '\n'
return global_dict
# todo apapt before usage
path = '/the/name/of/the/path/with/dictionary/files'
dic_path_umls = path + '/UMLS-semantic-group'
global_dict_umls = create_global_dict(dic_path_umls, '|')
dic_path_gene = path + '/gene'
global_dict_gene = create_global_dict(dic_path_gene, '\t')
global_dict_file = open('global_dictionary.txt.txt', 'w')
global_dict_file.write(global_dict_umls)
global_dict_file.write(global_dict_gene)
global_dict_file.close()