-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Update project with pipelines and sample data.
Our code for the submission of AMIA 2020.
- Loading branch information
Showing
10 changed files
with
82 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
.project | ||
jcore-pipelines/detectStopWords/* | ||
jcore-pipelines/detectUMLSentries/* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
import glob | ||
import csv | ||
|
||
print('merge different UMLS dics') | ||
|
||
def create_big_dic(dic_path, delim): | ||
dics = glob.glob(dic_path + '/*') | ||
big_dic = '' | ||
for dic in dics: | ||
name = dic.replace(dic_path + '/', '').replace('.txt', '').replace('.dict', '').replace('2019AB-', '').replace('-GER', '') | ||
with open(dic) as tsvfile: | ||
reader = csv.reader(tsvfile, delimiter=delim) | ||
for row in reader: | ||
#print(row[0]) | ||
big_dic += row[0] + '\t' + name + '\n' | ||
return big_dic | ||
|
||
path = '/the/name/of/the/path/with/dictionary/files' | ||
|
||
dic_path_umls = path + '/UMLS-semantic-group' | ||
big_dic_umls = create_big_dic(dic_path_umls, '|') | ||
|
||
dic_path_gene = path + '/gene' | ||
big_dic_gene = create_big_dic(dic_path_gene, '\t') | ||
|
||
big_dic_file = open('bic_dic.txt', 'w') | ||
big_dic_file.write(big_dic_umls) | ||
big_dic_file.write(big_dic_redlist) | ||
big_dic_file.write(big_dic_gene) | ||
big_dic_file.close() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
java -jar JenaUmlsFilter-1.1-jar-with-dependencies.jar MRCONSO.RRF MRSTY.RRF GER --grounded --semanticGroup=ACTI > dic/UMLS-2019AB-ACTI-GER.txt | ||
java -jar JenaUmlsFilter-1.1-jar-with-dependencies.jar MRCONSO.RRF MRSTY.RRF GER --grounded --semanticGroup=ANAT > dic/UMLS-2019AB-ANAT-GER.txt | ||
java -jar JenaUmlsFilter-1.1-jar-with-dependencies.jar MRCONSO.RRF MRSTY.RRF GER --grounded --semanticGroup=CHEM > dic/UMLS-2019AB-CHEM-GER.txt | ||
java -jar JenaUmlsFilter-1.1-jar-with-dependencies.jar MRCONSO.RRF MRSTY.RRF GER --grounded --semanticGroup=CONC > dic/UMLS-2019AB-CONC-GER.txt | ||
java -jar JenaUmlsFilter-1.1-jar-with-dependencies.jar MRCONSO.RRF MRSTY.RRF GER --grounded --semanticGroup=DEVI > dic/UMLS-2019AB-DEVI-GER.txt | ||
|
||
java -jar JenaUmlsFilter-1.1-jar-with-dependencies.jar MRCONSO.RRF MRSTY.RRF GER --grounded --semanticGroup=DISO > dic/UMLS-2019AB-DISO-GER.txt | ||
java -jar JenaUmlsFilter-1.1-jar-with-dependencies.jar MRCONSO.RRF MRSTY.RRF GER --grounded --semanticGroup=GENE > dic/UMLS-2019AB-GENE-GER.txt | ||
java -jar JenaUmlsFilter-1.1-jar-with-dependencies.jar MRCONSO.RRF MRSTY.RRF GER --grounded --semanticGroup=GEOG > dic/UMLS-2019AB-GEOG-GER.txt | ||
java -jar JenaUmlsFilter-1.1-jar-with-dependencies.jar MRCONSO.RRF MRSTY.RRF GER --grounded --semanticGroup=LIVB > dic/UMLS-2019AB-LIVB-GER.txt | ||
java -jar JenaUmlsFilter-1.1-jar-with-dependencies.jar MRCONSO.RRF MRSTY.RRF GER --grounded --semanticGroup=OBJC > dic/UMLS-2019AB-OBJC-GER.txt | ||
|
||
java -jar JenaUmlsFilter-1.1-jar-with-dependencies.jar MRCONSO.RRF MRSTY.RRF GER --grounded --semanticGroup=OCCU > dic/UMLS-2019AB-OCCU-GER.txt | ||
java -jar JenaUmlsFilter-1.1-jar-with-dependencies.jar MRCONSO.RRF MRSTY.RRF GER --grounded --semanticGroup=ORGA > dic/UMLS-2019AB-ORGA-GER.txt | ||
java -jar JenaUmlsFilter-1.1-jar-with-dependencies.jar MRCONSO.RRF MRSTY.RRF GER --grounded --semanticGroup=PHEN > dic/UMLS-2019AB-PHEN-GER.txt | ||
java -jar JenaUmlsFilter-1.1-jar-with-dependencies.jar MRCONSO.RRF MRSTY.RRF GER --grounded --semanticGroup=PHYS > dic/UMLS-2019AB-PHYS-GER.txt | ||
java -jar JenaUmlsFilter-1.1-jar-with-dependencies.jar MRCONSO.RRF MRSTY.RRF GER --grounded --semanticGroup=PROC > dic/UMLS-2019AB-PROC-GER.txt | ||
|
||
java -jar JenaUmlsFilter-1.1-jar-with-dependencies.jar MRCONSO.RRF MRSTY.RRF GER --grounded > dic/UMLS-2019AB-GER.txt | ||
|
||
#Only the following semantic group names are supported: | ||
#ACTI, ANAT, CHEM, CONC, DEVI, | ||
#DISO, GENE, GEOG, LIVB, OBJC, | ||
#OCCU, ORGA, PHEN, PHYS, PROC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
/detectStopWords/ | ||
/detectUMLSentries/ |
Binary file not shown.
Binary file not shown.
Binary file added
BIN
+17.2 MB
jcore-pipelines/jcore-pipeline-builder-cli-0.4.1-SNAPSHOT-jar-with-dependencies.jar
Binary file not shown.
Binary file added
BIN
+61.7 MB
jcore-pipelines/jcore-pipeline-runner-base-0.4.1-SNAPSHOT-cli-assembly.jar
Binary file not shown.
Binary file added
BIN
+14.5 MB
jcore-pipelines/jcore-pipeline-runner-cpe-0.4.1-SNAPSHOT-jar-with-dependencies.jar
Binary file not shown.