-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparser.js
312 lines (288 loc) · 9.59 KB
/
parser.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
import yaml from "js-yaml";
import fs from "fs";
import * as utils from "./utils.js";
import { crossReferencesDict } from "./identifiers.js";
const getInfoFromYaml = (yamlFile) => {
// extract information from the YAML file
const [metadata, metabolites, reactions, genes, compartments] = yaml.load(
fs.readFileSync(yamlFile, "utf8")
);
const metadataSection = metadata.metaData || metadata.metadata;
const model = utils.toLabelCase(metadataSection.short_name);
const version = `V${metadataSection.version.replace(/\./g, "_")}`;
const isHuman = metadataSection.short_name === "Human-GEM";
return [
metadata,
metabolites,
reactions,
genes,
compartments,
metadataSection,
model,
version,
isHuman,
];
};
const getComponentSvgRel = (
content,
component,
svgNodes,
modelDir,
metadataSection
) => {
// get SVG files for compartments and subsystems
const filename = `${component}SVG.tsv`;
const mappingFile = utils.getFile(modelDir, filename);
const isCustom = component === "custom";
let svgRels = [];
if (mappingFile) {
const lines = fs
.readFileSync(mappingFile, { encoding: "utf8", flag: "r" })
.split("\n")
.filter(Boolean);
const filenameSet = new Set(); // check uniqness of values in the file
for (let i = 0; i < lines.length; i++) {
if (lines[i][0] == "#" || lines[i][0] == "@") {
continue;
}
let componentName, mapName, mapFilename;
const columns = lines[i].split("\t").map((e) => e.trim());
if (isCustom) {
[mapName, mapFilename] = columns;
} else {
[componentName, mapName, mapFilename] = columns;
}
if (
componentName &&
!content[component].map((e) => e.name).includes(componentName)
) {
throw new Error(
`${component} "${componentName}" does not exist in the model "${metadataSection.short_name}"`
);
}
if (filenameSet.has(mapFilename)) {
throw new Error(
`map ${mapFilename} can only be linked to one ${component} in the model "${metadataSection.short_name}"`
);
}
filenameSet.add(mapFilename);
if (!/^[a-z0-9_]+[.]svg$/.test(mapFilename)) {
throw new Error(
`map "${mapFilename}" referenced by ${metadataSection.short_name}/${filename} is invalid`
);
}
svgNodes.push({
id: mapFilename.split(".")[0],
filename: mapFilename,
customName: mapName,
});
if (componentName) {
svgRels.push({
[`${component}Id`]: utils.idfyString(componentName),
svgMapId: mapFilename.split(".")[0],
});
}
}
} else {
console.log(
`Warning: cannot find mappingfile ${filename} in path`,
modelDir
);
}
return svgRels;
};
const getPMIDs = (PMIDSset, componentIdDict) => {
// get pubmed IDs from the componentIdDict, which is read from the YAML file
const reactionPMID = [];
const PMIDs = [];
for (const reactionId in componentIdDict.reaction) {
const ECList = componentIdDict.reaction[reactionId].ec;
let PMIDList = componentIdDict.reaction[reactionId].references;
if (PMIDList) {
PMIDList.split(";").forEach((pubmedReferenceId) => {
pubmedReferenceId = pubmedReferenceId.trim();
if (pubmedReferenceId.match("^PMID")) {
pubmedReferenceId = pubmedReferenceId.replace(/PMID:*/g, "").trim();
const isnum = /^\d+$/.test(pubmedReferenceId);
if (isnum) {
reactionPMID.push({ reactionId, pubmedReferenceId });
if (!PMIDSset.has(pubmedReferenceId)) {
PMIDs.push(pubmedReferenceId);
PMIDSset.add(pubmedReferenceId);
}
}
}
});
}
}
return [PMIDs, reactionPMID];
};
const getGeneAnnotation = (componentIdDict, modelDir) => {
// get annotaitons for genes from the genes tsv file
const geneAnnoFile = utils.getFile(modelDir, /genes[.]tsv$/);
if (!geneAnnoFile) {
console.log(
"Warning: cannot find gene annotation file genes.tsv in path",
modelDir
);
} else {
// TODO use one of the csv parsing lib (sync)
const lines = fs
.readFileSync(geneAnnoFile, { encoding: "utf8", flag: "r" })
.split("\n")
.filter(Boolean);
for (let i = 0; i < lines.length; i++) {
if (lines[i][0] == "#" || lines[i][0] == "@") {
continue;
}
// thefunction, ec and catalytic_activity are not defined in the new TSV
// format and thus set the default value as empty
const thefunction = "";
const ec = "";
const catalytic_activity = "";
const [
geneId,
geneENSTID,
geneENSPID,
geneUniProtID,
name,
geneEntrezID,
alternateName,
synonyms,
] = lines[i].split("\t").map((e) => utils.trim(e, '"'));
if (geneId in componentIdDict.gene) {
//only keep the ones in the model
const gene = componentIdDict.gene[geneId];
Object.assign(gene, {
name,
alternateName,
synonyms,
function: thefunction,
}); // other props are not in the db design, TODO remove them?
}
}
}
};
const getComponentExternalDb = (
externalIdNodes,
externalIdDBMap,
extNodeIdTracker,
component,
componentIdDict,
modelDir
) => {
// get externalId from the components tsv files
const externalIdDBComponentRel = [];
const filename = `${component}s.tsv`;
const extIDFile = utils.getFile(modelDir, filename);
const fcomponent =
component === "metabolite" ? "compartmentalizedMetabolite" : component;
if (extIDFile) {
// TODO use one of the csv parsing lib (sync)
const lines = fs
.readFileSync(extIDFile, { encoding: "utf8", flag: "r" })
.split("\n")
.filter(Boolean);
var headerArr = [];
var contentArr = [];
for (let i = 0; i < lines.length; i++) {
if (lines[i][0] == "#") {
continue;
} else if (i == 0) {
/*read the header line*/
headerArr = lines[i].split("\t").map((e) => e.trim());
continue;
} else {
contentArr = lines[i].split("\t").map((e) => utils.trim(e, '"'));
}
const id = contentArr[0];
if (!(id in componentIdDict[fcomponent])) {
//only keep the ones in the model
console.log(
"Warning: id " +
id +
" not in " +
" componentIdDict[" +
fcomponent +
"]"
);
continue;
}
if (fcomponent == "gene") {
/*add two more items Ensembl and Protein Atlas which is not included in the new format*/
headerArr.push("geneEnsemblID");
headerArr.push("geneProteinAtlasID");
contentArr.push(id); /*For Ensembl, externalId is equal to id*/
contentArr.push(id); /*For Protein Atlas, externalId is equal to id*/
}
const numItem = contentArr.length;
for (let j = 1; j < numItem; j++) {
const header = headerArr[j];
const regexGene = "gene.*ID$";
const regexRxn = "rxn.*ID$";
const regexMet = "met.*ID$";
if (
(fcomponent == "gene" && header.match(regexGene) == null) ||
(fcomponent == "reaction" && header.match(regexRxn) == null) ||
(fcomponent == "compartmentalizedMetabolite" &&
header.match(regexMet) == null)
) {
continue;
}
const crossReferencesArray = Object.values(crossReferencesDict);
const referenceData = crossReferencesArray.find((item) =>
item.headers.includes(header)
);
const dbName = referenceData ? referenceData.db : "";
const dbPrefix = referenceData ? referenceData.dbPrefix : "";
const suffix =
fcomponent == "gene"
? referenceData.geneSuffix
: fcomponent == "reaction"
? referenceData.reactionSuffix
: referenceData.compoundSuffix;
const rawExternalId = utils.cleanExternalId(contentArr[j], dbName);
if (rawExternalId == "") {
//ignore the record whithout any valid externalId
continue;
}
// There might be multiple ids in one externalId item
const externalIdArr = rawExternalId.split(";").map((e) => e.trim());
for (var externalId of externalIdArr) {
var url = "";
if (dbPrefix != "" && externalId != "") {
const urlMap = `https://identifiers.org/${dbPrefix}${suffix}`;
url = `${urlMap}:${externalId}`;
}
const externalDbEntryKey = `${dbName}${externalId}${url}`; // diff url leads to new nodes!
externalId = dbName === "ChEBI" ? "CHEBI:" + externalId : externalId;
let node = null;
if (externalDbEntryKey in externalIdDBMap) {
node = externalIdDBMap[externalDbEntryKey]; // reuse the node and id
} else {
node = { id: extNodeIdTracker, dbName, externalId, url };
externalIdDBMap[externalDbEntryKey] = node;
extNodeIdTracker += 1;
// save the node for externalDBs.csv
externalIdNodes.push(node);
}
// save the relationships between the node and the current component ID (reaction, gene, etc)
externalIdDBComponentRel.push({ id, externalDbId: node.id }); // e.g. geneId, externalDbId
}
}
}
} else {
console.log(
`Warning: cannot find external ID file ${filename} in path`,
modelDir
);
}
return [extNodeIdTracker, fcomponent, externalIdDBComponentRel];
};
export {
getInfoFromYaml,
getComponentSvgRel,
getPMIDs,
getGeneAnnotation,
getComponentExternalDb,
};