-
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcitation.py
99 lines (91 loc) · 8.24 KB
/
citation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
software = {
"antismash": {
"publication": "Kai Blin and others, antiSMASH 6.0: improving cluster detection and comparison capabilities, Nucleic Acids Research, Volume 49, Issue W1, 2 July 2021, Pages W29-W35, 10.1093/nar/gkab335"
},
"diamond": {
"publication": "Buchfink, B., Reuter, K. & Drost, HG. Sensitive protein alignments at tree-of-life scale using DIAMOND. Nat Methods 18, 366-368 (2021). 10.1038/s41592-021-01101-x"
},
"hmmer": {
"publication": "Eddy SR. Accelerated Profile HMM Searches. PLoS Comput Biol. 2011 Oct;7(10):e1002195. doi: 10.1371/journal.pcbi.1002195"
},
"mmseqs2": {
"publication": "Steinegger, M., Söding, J. MMseqs2 enables sensitive protein sequence searching for the analysis of massive data sets. Nat Biotechnol 35, 1026-1028 (2017). 10.1038/nbt.3988"
},
"ncbi-genome-download": {
"publication": "Maybe cite the GitHub repo? Discussion here: https://github.com/kblin/ncbi-genome-download/issues/207"
},
"ncbi_datasets": {"publication": "https://github.com/ncbi/datasets"},
"neo4j": {
"publication": "Robinson, I., Webber, J. & Eifrem, E. Graph Databases: New Opportunities for Connected Data. (O'Reilly Media, Inc., 2015)."
},
"nf-core": {
"publication": "Ewels PA, Peltzer A, Fillinger S, Patel H, Alneberg J, Wilm A, Garcia MU, Di Tommaso P, Nahnsen S. The nf-core framework for community-curated bioinformatics pipelines. Nat Biotechnol. 2020 Mar;38(3):276-278. doi: 10.1038/s41587-020-0439-x"
},
}
databases = {
"amrfinder": {
"publication": "Feldgarden M, Brover V, Gonzalez-Escalona N, Frye JG, Haendiges J, Haft DH, Hoffmann M, Pettengill JB, Prasad AB, Tillman GE, Tyson GH, Klimke W. AMRFinderPlus and the Reference Gene Catalog facilitate examination of the genomic links among antimicrobial resistance, stress response, and virulence. Sci Rep. 2021 Jun 16;11(1):12728. doi: 10.1038/s41598-021-91456-0",
"license": "https://github.com/ncbi/amr/wiki/Licenses",
"website": "https://www.ncbi.nlm.nih.gov/pathogens/antimicrobial-resistance/AMRFinder/",
},
"antismash": {
"publication": "Kai Blin and others, antiSMASH 6.0: improving cluster detection and comparison capabilities, Nucleic Acids Research, Volume 49, Issue W1, 2 July 2021, Pages W29-W35, 10.1093/nar/gkab335",
"license": "https://github.com/antismash/antismash/blob/master/LICENSE.txt",
"website": "https://antismash.secondarymetabolites.org/",
},
"bigslice": {
"publication": "Kautsar SA, van der Hooft JJJ, de Ridder D, Medema MH. BiG-SLiCE: A highly scalable tool maps the diversity of 1.2 million biosynthetic gene clusters. Gigascience. 2021 Jan 13;10(1):giaa154. doi: 10.1093/gigascience/giaa154",
"license": "https://github.com/medema-group/bigslice/blob/master/LICENSE.txt",
"website": "https://bigfam.bioinformatics.nl/home",
},
"classiphage": {
"publication": "Chibani CM, Farr A, Klama S, Dietrich S, Liesegang H. Classifying the Unclassified: A Phage Classification Method. Viruses. 2019 Feb 24;11(2):195. doi: 10.3390/v11020195",
"license": "No license found",
"website": "http://appmibio.uni-goettingen.de/software/ClassiPhage/",
},
"goterms": {
"publication": [
"Ashburner M, Ball CA, Blake JA, Botstein D, Butler H, Cherry JM, Davis AP, Dolinski K, Dwight SS, Eppig JT, Harris MA, Hill DP, Issel-Tarver L, Kasarskis A, Lewis S, Matese JC, Richardson JE, Ringwald M, Rubin GM, Sherlock G. Gene ontology: tool for the unification of biology. The Gene Ontology Consortium. Nat Genet. 2000 May;25(1):25-9. doi: 10.1038/75556",
"Gene Ontology Consortium; Aleksander SA, Balhoff J, Carbon S, Cherry JM, Drabkin HJ, Ebert D, Feuermann M, Gaudet P, Harris NL, Hill DP, Lee R, Mi H, Moxon S, Mungall CJ, Muruganugan A, Mushayahama T, Sternberg PW, Thomas PD, Van Auken K, Ramsey J, Siegele DA, Chisholm RL, Fey P, Aspromonte MC, Nugnes MV, Quaglia F, Tosatto S, Giglio M, Nadendla S, Antonazzo G, Attrill H, Dos Santos G, Marygold S, Strelets V, Tabone CJ, Thurmond J, Zhou P, Ahmed SH, Asanitthong P, Luna Buitrago D, Erdol MN, Gage MC, Ali Kadhum M, Li KYC, Long M, Michalak A, Pesala A, Pritazahra A, Saverimuttu SCC, Su R, Thurlow KE, Lovering RC, Logie C, Oliferenko S, Blake J, Christie K, Corbani L, Dolan ME, Drabkin HJ, Hill DP, Ni L, Sitnikov D, Smith C, Cuzick A, Seager J, Cooper L, Elser J, Jaiswal P, Gupta P, Jaiswal P, Naithani S, Lera-Ramirez M, Rutherford K, Wood V, De Pons JL, Dwinell MR, Hayman GT, Kaldunski ML, Kwitek AE, Laulederkind SJF, Tutaj MA, Vedi M, Wang SJ, D'Eustachio P, Aimo L, Axelsen K, Bridge A, Hyka-Nouspikel N, Morgat A, Aleksander SA, Cherry JM, Engel SR, Karra K, Miyasato SR, Nash RS, Skrzypek MS, Weng S, Wong ED, Bakker E, Berardini TZ, Reiser L, Auchincloss A, Axelsen K, Argoud-Puy G, Blatter MC, Boutet E, Breuza L, Bridge A, Casals-Casas C, Coudert E, Estreicher A, Livia Famiglietti M, Feuermann M, Gos A, Gruaz-Gumowski N, Hulo C, Hyka-Nouspikel N, Jungo F, Le Mercier P, Lieberherr D, Masson P, Morgat A, Pedruzzi I, Pourcel L, Poux S, Rivoire C, Sundaram S, Bateman A, Bowler-Barnett E, Bye-A-Jee H, Denny P, Ignatchenko A, Ishtiaq R, Lock A, Lussi Y, Magrane M, Martin MJ, Orchard S, Raposo P, Speretta E, Tyagi N, Warner K, Zaru R, Diehl AD, Lee R, Chan J, Diamantakis S, Raciti D, Zarowiecki M, Fisher M, James-Zorn C, Ponferrada V, Zorn A, Ramachandran S, Ruzicka L, Westerfield M. The Gene Ontology knowledgebase in 2023. Genetics. 2023 May 4;224(1):iyad031. doi: 10.1093/genetics/iyad031",
],
"license": "http://geneontology.org/docs/go-citation-policy/",
"website": "http://geneontology.org/",
},
"ipresto": {
"publication": "Louwen JJR, Kautsar SA, van der Burg S, Medema MH, van der Hooft JJJ. iPRESTO: Automated discovery of biosynthetic sub-clusters linked to specific natural product substructures. PLoS Comput Biol. 2023 Feb 9;19(2):e1010462. doi: 10.1371/journal.pcbi.1010462",
"license": "https://git.wageningenur.nl/bioinformatics/iPRESTO/-/blob/master/LICENSE",
"website": "https://git.wageningenur.nl/bioinformatics/iPRESTO/",
},
"pfam": {
"publication": "Jaina Mistry and others, Pfam: The protein families database in 2021, Nucleic Acids Research, Volume 49, Issue D1, 8 January 2021, Pages D412-D419, 10.1093/nar/gkaa913",
"website": "'Pfam is freely available under the Creative Commons Zero (“CC0”) licence.'; https://www.ebi.ac.uk/interpro/entry/pfam",
"license": "https://pfam-docs.readthedocs.io/en/latest/",
},
"prism": {
"publication": "Skinnider MA, Merwin NJ, Johnston CW, Magarvey NA. PRISM 3: expanded prediction of natural product chemical structures from microbial genomes. Nucleic Acids Res. 2017 Jul 3;45(W1):W49-W54. doi: 10.1093/nar/gkx320",
"license": "No redistribution allowed",
},
"resfams": {
"publication": "Gibson MK, Forsberg KJ, Dantas G. Improved annotation of antibiotic resistance determinants reveals microbial resistomes cluster by ecology. ISME J. 2015 Jan;9(1):207-16. doi: 10.1038/ismej.2014.106. Epub 2014 Jul 8",
"license": "http://www.dantaslab.org/s/LICENSE.txt",
"website": "http://www.dantaslab.org/resfams",
},
"tigrfam": {
"publication": "Haft DH, Selengut JD, White O. The TIGRFAMs database of protein families. Nucleic Acids Res. 2003 Jan 1;31(1):371-3. doi: 10.1093/nar/gkg128.",
"license": "https://creativecommons.org/licenses/by-sa/4.0/legalcode",
"website": "'TIGRFAMs data are made available under a Creative Commons Attribution-ShareAlike 4.0 license.'; https://www.ncbi.nlm.nih.gov/genome/annotation_prok/tigrfams/",
},
"vogdb": {
"publication": "Note sure the reference; https://vogdb.org",
"license": "No license found",
"website": "https://vogdb.org/",
},
}
"The non-redundant set of proteins created by socialgene was clustered by MMseqs2"
"Using socialgene, the Diamond software was used to perform an all-vs-all BLASTp search of the non-redundant set of proteins."
"A graph database was created using Neo4j version..."
to_cite = set()
for i in versions_by_process.values():
for ii in i.keys():
to_cite.add(ii)
to_cite = {i: citations[i] for i in to_cite if i in citations}