Skip to content

Commit

Permalink
refactor(api): Group by affiliations into API
Browse files Browse the repository at this point in the history
  • Loading branch information
annelhote committed Nov 19, 2023
1 parent 81297a5 commit d38a05b
Show file tree
Hide file tree
Showing 4 changed files with 102 additions and 88 deletions.
60 changes: 2 additions & 58 deletions client/src/pages/index.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ import Filters from './filters';
import PublicationsTab from './publicationsTab';
import {
getAllIdsHtmlField,
getAffiliationRor,
getAffiliationsHtmlField,
getAffiliationsTooltipField,
getAuthorsHtmlField,
Expand Down Expand Up @@ -54,57 +53,6 @@ export default function Home() {
refetch();
};

const normalizedName = (name) => name
.toLowerCase()
.normalize('NFD')
.replace(/[^a-zA-Z0-9]/g, '');

const groupByAffiliations = () => {
// Save already decided affiliations
const decidedAffiliations = Object.values(allAffiliations).filter((affiliation) => affiliation.status !== status.tobedecided.id);
// Compute distinct affiliations of the undecided works
let allAffiliationsTmp = {};
[...allDatasets, ...allPublications].filter((work) => work.status === status.tobedecided.id).forEach((work) => {
(work?.affiliations ?? [])
.filter((affiliation) => Object.keys(affiliation).length && affiliation?.name)
.forEach((affiliation) => {
const ror = getAffiliationRor(affiliation);
const normalizedAffiliationName = normalizedName(affiliation.name);
if (!allAffiliationsTmp?.[normalizedAffiliationName]) {
// Check matches in affiliation name
let matches = `${affiliation?.name}`?.match(regexp) ?? [];
// Normalize matched strings
matches = matches.map((name) => normalizedName(name));
// Filter matches as unique
matches = [...new Set(matches)];
allAffiliationsTmp[normalizedAffiliationName] = {
matches: matches.length,
name: affiliation.name,
nameHtml: affiliation.name.replace(regexp, '<b>$&</b>'),
ror,
rorHtml: ror?.replace(regexp, '<b>$&</b>'),
status: status.tobedecided.id,
works: [],
};
}
allAffiliationsTmp[normalizedAffiliationName].works.push(work.id);
});
});

decidedAffiliations.forEach((affiliation) => {
const affiliationName = normalizedName(affiliation.name);
if (!allAffiliationsTmp?.[affiliationName]) {
allAffiliationsTmp[affiliationName] = affiliation;
} else {
allAffiliationsTmp[affiliationName].status = affiliation.status;
}
});

allAffiliationsTmp = Object.values(allAffiliationsTmp)
.map((affiliation, index) => ({ ...affiliation, id: index.toString(), works: [...new Set(affiliation.works)], worksNumber: [...new Set(affiliation.works)].length }));
setAllAffiliations(allAffiliationsTmp);
};

useEffect(() => {
const regexpTmp = new RegExp(`(${(options?.affiliations ?? [])
.map((affiliationQuery) => affiliationQuery
Expand Down Expand Up @@ -138,6 +86,7 @@ export default function Home() {
status: status.tobedecided.id,
}));
allPublicationsTmp = data.publications
.filter((publication) => !!publication?.affiliations)
.map((publication) => ({
...publication,
affiliationsHtml: getAffiliationsHtmlField(publication, regexp),
Expand All @@ -152,11 +101,6 @@ export default function Home() {
setAllPublications(allPublicationsTmp);
}, [data, regexp]);

useEffect(() => {
groupByAffiliations();
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [allDatasets, allPublications, regexp]);

const tagPublications = (publications, action) => {
const allPublicationsTmp = [...allPublications];
const publicationsIds = publications.map((publication) => publication.id);
Expand Down Expand Up @@ -209,7 +153,7 @@ export default function Home() {
setAllPublications={setAllPublications}
tagAffiliations={tagAffiliations}
/>
{allAffiliations.length > 0 && (
{data?.length && (
<Tabs defaultActiveTab={0}>
<Tab label="Grouped affiliations of works">
<AffiliationsTab
Expand Down
8 changes: 0 additions & 8 deletions client/src/utils/templates.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,6 @@ const authorsTemplate = (rowData) => (
</>
);

const getAffiliationRor = (affiliation) => {
if (!affiliation?.ror) return undefined;
if (Array.isArray(affiliation.ror)) return affiliation.ror.map((ror) => (ror.startsWith('https') ? ror : `https://ror.org/${ror}`)).join(' ');
if (!affiliation.ror.startsWith('https')) return `https://ror.org/${affiliation.ror}`;
return affiliation.ror;
};

const getAffiliationsHtmlField = (rowData, regexp) => {
let affiliations = (rowData?.affiliations ?? [])
.filter((affiliation) => Object.keys(affiliation).length && affiliation?.name)
Expand Down Expand Up @@ -94,7 +87,6 @@ export {
affiliationsTemplate,
allIdsTemplate,
authorsTemplate,
getAffiliationRor,
getAffiliationsHtmlField,
getAffiliationsTooltipField,
getAllIdsHtmlField,
Expand Down
12 changes: 6 additions & 6 deletions server/src/routes/works.routes.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import express from 'express';
import {
getBsoWorks,
getOpenAlexPublications,
groupByAffiliations,
mergePublications,
} from '../utils';

Expand All @@ -20,12 +21,9 @@ router.route('/works')
getOpenAlexPublications(options),
getBsoWorks({ options, index: process.env.VITE_BSO_DATASETS_INDEX, filter: 'q=genre:dataset' }),
]);
const data = { datasets: [], publications: [], total: {} };
results.slice(0, 2).forEach((publication) => {
data.publications = [...data.publications, ...publication.results];
});
data.datasets = [...data.datasets, ...results[2].results];

const data = {};
data.publications = [...results[0].results, ...results[1].results];
data.datasets = results[2].results;
// Deduplicate publications by DOI or by hal_id
const deduplicatedPublications = {};
data.publications.forEach((publication) => {
Expand All @@ -37,6 +35,8 @@ router.route('/works')
}
});
data.publications = Object.values(deduplicatedPublications);
// Goup by affiliations
data.affiliations = groupByAffiliations({ ...data, options });
res.status(200).json(data);
}
} catch (err) {
Expand Down
110 changes: 94 additions & 16 deletions server/src/utils.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,28 @@
import { status } from 'client/src/config';

const VITE_OPENALEX_MAX_PAGE = Math.floor(process.env.VITE_OPENALEX_SIZE / process.env.VITE_OPENALEX_PER_PAGE);

const getAffilitionsFromOpenAlex = (publication) => {
if (publication?.authorships) {
return publication?.authorships?.map((author) => {
if (author.raw_affiliation_strings.length === 1) {
const affiliation = { name: author.raw_affiliation_strings[0] };
if (author?.institutions?.[0]?.ror) affiliation.ror = author.institutions[0].ror;
return affiliation;
}
return author.raw_affiliation_strings.map((name) => ({ name }));
}).flat();
}
return publication.affiliations;
};

const getAffiliationRor = (affiliation) => {
if (!affiliation?.ror) return undefined;
if (Array.isArray(affiliation.ror)) return affiliation.ror.map((ror) => (ror.startsWith('https') ? ror : `https://ror.org/${ror}`)).join(' ');
if (!affiliation.ror.startsWith('https')) return `https://ror.org/${affiliation.ror}`;
return affiliation.ror;
};

const getBsoQuery = (options, pit, searchAfter) => {
const query = { size: process.env.VITE_BSO_SIZE, query: { bool: { filter: [], must: [], must_not: [], should: [] } } };
const affiliationsFields = [
Expand Down Expand Up @@ -89,7 +112,6 @@ const getBsoWorks = async ({
}
return ({
datasource: 'bso',
total: response?.hits?.total?.value ?? 0,
results: allResults,
});
});
Expand All @@ -105,20 +127,6 @@ const getIdValue = (id) => (
: null
);

const getAffilitionsFromOpenAlex = (publication) => {
if (publication?.authorships) {
return publication?.authorships?.map((author) => {
if (author.raw_affiliation_strings.length === 1) {
const affiliation = { name: author.raw_affiliation_strings[0] };
if (author?.institutions?.[0]?.ror) affiliation.ror = author.institutions[0].ror;
return affiliation;
}
return author.raw_affiliation_strings.map((name) => ({ name }));
}).flat();
}
return publication.affiliations;
};

const getTypeFromOpenAlex = (type) => {
let newType = type;
// eslint-disable-next-line default-case
Expand Down Expand Up @@ -197,7 +205,7 @@ const getOpenAlexPublications = (options, page = '1', previousResponse = []) =>
if (Number(response.results.length) === Number(process.env.VITE_OPENALEX_PER_PAGE) && nextPage <= VITE_OPENALEX_MAX_PAGE) {
return getOpenAlexPublications(options, nextPage, results);
}
return ({ total: response.meta.count, results });
return ({ results });
})
.then((response) => ({
datasource: 'openalex',
Expand All @@ -217,6 +225,75 @@ const getOpenAlexPublications = (options, page = '1', previousResponse = []) =>
}));
};

const getRegexpFromOptions = (options) => {
const regex = new RegExp(`(${(options?.affiliations ?? [])
.map((affiliationQuery) => affiliationQuery
.replaceAll(/(a|à|á|â|ã|ä|å)/g, '(a|à|á|â|ã|ä|å)')
.replaceAll(/(e|è|é|ê|ë)/g, '(e|è|é|ê|ë)')
.replaceAll(/(i|ì|í|î|ï)/g, '(i|ì|í|î|ï)')
.replaceAll(/(o|ò|ó|ô|õ|ö|ø)/g, '(o|ò|ó|ô|õ|ö|ø)')
.replaceAll(/(u|ù|ú|û|ü)/g, '(u|ù|ú|û|ü)')
.replaceAll(/(y|ý|ÿ)/g, '(y|ý|ÿ)')
.replaceAll(/(n|ñ)/g, '(n|ñ)')
.replaceAll(/(c|ç)/g, '(c|ç)')
.replaceAll(/æ/g, '(æ|ae)')
.replaceAll(/œ/g, '(œ|oe)'))
.join('|')})`, 'gi');
return regex;
};

const normalizedName = (name) => name
.toLowerCase()
.normalize('NFD')
.replace(/[^a-zA-Z0-9]/g, '');

const groupByAffiliations = ({ datasets, options, publications }) => {
const regexp = getRegexpFromOptions(options);
// Save already decided affiliations
// const decidedAffiliations = Object.values(allAffiliations).filter((affiliation) => affiliation.status !== status.tobedecided.id);
// Compute distinct affiliations of the undecided works
let allAffiliationsTmp = {};
[...datasets, ...publications].filter((work) => work.status === status.tobedecided.id).forEach((work) => {
(work?.affiliations ?? [])
.filter((affiliation) => Object.keys(affiliation).length && affiliation?.name)
.forEach((affiliation) => {
const ror = getAffiliationRor(affiliation);
const normalizedAffiliationName = normalizedName(affiliation.name);
if (!allAffiliationsTmp?.[normalizedAffiliationName]) {
// Check matches in affiliation name
let matches = `${affiliation?.name}`?.match(regexp) ?? [];
// Normalize matched strings
matches = matches.map((name) => normalizedName(name));
// Filter matches as unique
matches = [...new Set(matches)];
allAffiliationsTmp[normalizedAffiliationName] = {
matches: matches.length,
name: affiliation.name,
nameHtml: affiliation.name.replace(regexp, '<b>$&</b>'),
ror,
rorHtml: ror?.replace(regexp, '<b>$&</b>'),
status: status.tobedecided.id,
works: [],
};
}
allAffiliationsTmp[normalizedAffiliationName].works.push(work.id);
});
});

// decidedAffiliations.forEach((affiliation) => {
// const affiliationName = normalizedName(affiliation.name);
// if (!allAffiliationsTmp?.[affiliationName]) {
// allAffiliationsTmp[affiliationName] = affiliation;
// } else {
// allAffiliationsTmp[affiliationName].status = affiliation.status;
// }
// });

allAffiliationsTmp = Object.values(allAffiliationsTmp)
.map((affiliation, index) => ({ ...affiliation, id: index.toString(), works: [...new Set(affiliation.works)], worksNumber: [...new Set(affiliation.works)].length }));
return allAffiliationsTmp;
};

const mergePublications = (publi1, publi2) => {
const priorityPublication = [publi1, publi2].some((publi) => publi.datasource === 'bso')
? [publi1, publi2].find((publi) => publi.datasource === 'bso')
Expand All @@ -236,5 +313,6 @@ export {
getBsoQuery,
getBsoWorks,
getOpenAlexPublications,
groupByAffiliations,
mergePublications,
};

0 comments on commit d38a05b

Please sign in to comment.