Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/staging'
Browse files Browse the repository at this point in the history
  • Loading branch information
Mihoub2 committed Nov 13, 2024
2 parents 4e4a51d + e4693f4 commit 0ad52ce
Show file tree
Hide file tree
Showing 23 changed files with 668 additions and 258 deletions.
3 changes: 2 additions & 1 deletion client/.env.production
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
VITE_API_URL="/api"
VITE_MISTRAL_URL="/mistral"
VITE_APP_MATOMO_BASE_URL="https://piwik.enseignementsup-recherche.pro"
VITE_APP_MATOMO_SITE_ID="36"
VITE_APP_MATOMO_SITE_ID="36"
VITE_APP_ENV="production"
3 changes: 2 additions & 1 deletion client/.env.staging
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
VITE_API_URL="/api"
VITE_MISTRAL_URL="/mistral"
VITE_APP_MATOMO_BASE_URL="https://matomo.staging.dataesr.ovh"
VITE_APP_MATOMO_SITE_ID="4"
VITE_APP_MATOMO_SITE_ID="4"
VITE_APP_ENV="staging"
59 changes: 33 additions & 26 deletions client/src/api/networks/network/communities.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@ import { nodeGetId } from "./network"
const CURRENT_YEAR = new Date().getFullYear()
const RECENT_YEARS = [CURRENT_YEAR - 1, CURRENT_YEAR]

const nodeGetCitationsCount = (citationsByYear: Record<string, number>): number =>
citationsByYear ? Object.values(citationsByYear).reduce((acc: number, value: number) => acc + value, 0) : 0

const nodeGetCitationsRecent = (citationsByYear: Record<string, number>): number =>
citationsByYear ? (citationsByYear?.[CURRENT_YEAR - 1] || 0) + (citationsByYear?.[CURRENT_YEAR] || 0) : 0

const communityGetAttribute = (graph: Graph, community: number, name: string): Array<string> | Array<number> =>
graph.reduceNodes(
(acc, _, attr) => (attr.community === community && attr?.[name] ? (acc = arrayPush(acc, attr[name])) : acc),
Expand Down Expand Up @@ -44,9 +50,7 @@ const communityGetNodes = (graph: Graph, community: number): Array<{ id: string;
return nodes.sort((a, b) => b.weight - a.weight)
}

const communityGetPublicationsCount = (aggs: ElasticAggregations): number =>
aggs?.publicationsByYear?.buckets.reduce((acc, bucket) => acc + bucket.doc_count, 0) +
aggs?.publicationsByYear?.sum_other_doc_count
const communityGetPublicationsCount = (aggs: ElasticAggregations): number => aggs?.publicationsCount?.value || 0

const communityGetPublicationsByYear = (aggs: ElasticAggregations): Record<string, number> =>
aggs?.publicationsByYear?.buckets.reduce((acc, bucket) => ({ ...acc, [bucket.key]: bucket.doc_count }), {})
Expand Down Expand Up @@ -77,30 +81,33 @@ const communityGetOaPercent = (aggs: ElasticAggregations): number => {
return (isOa / (isOa + isNotOa || 1)) * 100
}

const communityGetPublications = (hits: ElasticHits): Array<Record<string, string>> =>
const communityGetPublications = (hits: ElasticHits): Array<Record<string, string | number>> =>
hits.map((hit) => ({
id: hit.id,
title: hit.title.default,
citationsCount: nodeGetCitationsCount(hit?.cited_by_counts_by_year),
citationsRecent: nodeGetCitationsRecent(hit?.cited_by_counts_by_year),
}))

const communityGetNodesInfos = (hits: ElasticHits, model: string): any =>
hits.reduce((acc, hit) => {
const field = CONFIG[model].field.split(".")[0]
const citationsByYear = hit?.["counts_by_year"]?.reduce(
(acc, citations) => ({
...acc,
[citations.year]: citations.cited_by_count,
}),
{}
)
hit?.[field].forEach((node) => {
const citationsByYear = hit?.cited_by_counts_by_year
hit?.[field]?.forEach((node) => {
const key = node[CONFIG[model].field.split(".")[1]]
if (!key) return
const id = nodeGetId(key)
acc[id] = {
...acc?.[id],
publicationsCount: acc?.[id]?.publicationsCount ? acc[id].publicationsCount + 1 : 1,
citationsByYear: citationsByYear,
citationsByYear: {
...citationsByYear,
...(acc?.[id]?.citationsByYear &&
Object.entries(acc[id].citationsByYear).reduce(
(obj, [key, value]: [string, number]) => ({ ...obj, [key]: value + (citationsByYear?.[key] || 0) }),
{}
)),
},
}
})
return acc
Expand Down Expand Up @@ -128,26 +135,25 @@ export default async function communitiesCreate(graph: Graph, computeClusters: b
const hits = await networkSearchHits({ model, query, filters, links: communityGetLinks(graph, index) })
const aggs = await networkSearchAggs({ model, query, filters, links: communityGetLinks(graph, index) })

// Add info to nodes
if (hits) {
const nodesInfos = communityGetNodesInfos(hits, model)
graph.forEachNode((key) => {
communityGetIds(graph, index).forEach((key) => {
if (!Object.keys(nodesInfos).includes(key)) return
const nodeInfos = nodesInfos[key]
const publicationsCount = nodeInfos.publicationsCount
const citationsCount = nodeInfos?.citationsByYear
? Object.values(nodeInfos?.citationsByYear).reduce((acc: number, value: number) => acc + value, 0)
: 0
const citationsRecent = nodeInfos?.citationsByYear
? (nodeInfos.citationsByYear?.[CURRENT_YEAR - 1] || 0) + (nodeInfos.citationsByYear?.[CURRENT_YEAR] || 0)
: 0
const citationsScore = citationsRecent / (publicationsCount || 1)
graph.setNodeAttribute(key, "publicationsCount", publicationsCount)
graph.setNodeAttribute(key, "citationsCount", citationsCount)
graph.setNodeAttribute(key, "citationsRecent", citationsRecent)
graph.setNodeAttribute(key, "citationsScore", citationsScore)
const nodeCitationsByYear = nodeInfos?.citationsByYear
const nodePublicationsCount = nodeInfos.publicationsCount
const nodeCitationsCount = nodeGetCitationsCount(nodeCitationsByYear)
const nodeCitationsRecent = nodeGetCitationsRecent(nodeCitationsByYear)
const nodeCitationsScore = nodeCitationsRecent / (nodePublicationsCount || 1) || 0
graph.setNodeAttribute(key, "publicationsCount", nodePublicationsCount)
graph.setNodeAttribute(key, "citationsCount", nodeCitationsCount)
graph.setNodeAttribute(key, "citationsRecent", nodeCitationsRecent)
graph.setNodeAttribute(key, "citationsScore", nodeCitationsScore)
})
}

// Add info to communities
const community = {
cluster: index + 1,
label: COLORS?.[index] ? GetColorName(COLORS[index]) : `Unnamed ${index + 1}`,
Expand All @@ -167,6 +173,7 @@ export default async function communitiesCreate(graph: Graph, computeClusters: b
}),
...(hits && {
publications: communityGetPublications(hits),
publicationsCount: hits.length,
}),
}
return community
Expand Down
4 changes: 2 additions & 2 deletions client/src/api/networks/network/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ export const CONFIG = {
},
},
software: {
field: "software.id_name",
aggregation: "software.id_name.keyword",
field: "softwares.id_name",
aggregation: "softwares.id_name.keyword",
co_aggregation: "co_software.keyword",
url: (_: string, label: string) => `/search/publications?q="${label.replace(/ /g, "+")}"`,
terminology: {
Expand Down
42 changes: 33 additions & 9 deletions client/src/api/networks/network/mistralai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,28 @@ const { VITE_MISTRAL_URL: MISTRAL_URL, VITE_MISTRAL_KEY: MISTRAL_KEY } = import.
const headers = MISTRAL_KEY ? { Authorization: `Bearer ${MISTRAL_KEY}` } : {}
const postHeaders = { ...headers, "Content-Type": "application/json" }

function cleanMistralLabels(mistralLabels: any): Array<string> {
if (!mistralLabels) return mistralLabels

const cleanLabel = (label: any): string => (Array.isArray(label) ? label[0] : label)
const cleanLabels = Object.values(mistralLabels).map((label) => cleanLabel(label))

let counts = {}
const deduplicateLabels = cleanLabels.reduce((acc, label: string) => {
if (!counts[label]) {
counts[label] = 1
acc.push(label)
} else {
counts[label]++
acc.push(label + " (" + counts[label] + ")")
}

return acc
}, [])

return deduplicateLabels
}

async function mistralLabelsFromDomains(domains: string): Promise<string> {
const chatBody = {
messages: [
Expand All @@ -13,14 +35,15 @@ async function mistralLabelsFromDomains(domains: string): Promise<string> {
You have been tasked with naming distinct fields of study for several communities of research publications.
Below are lists of topics and their weights representing each community.
Your goal is to provide a unique and descriptive name for each field of study that best encapsulates the essence of the topics within that community.
Each name should be unique and as short as possible.
Output as JSON object with the list number and the single generated name.
Each should be unique and as short as possible.
If the list of topic is empty, output a empty string.
Output as JSON object with the list number and the single unique generated name.
${domains}`,
},
],
model: "open-mistral-7b",
temperature: 0.3,
model: "open-mistral-nemo",
temperature: 0.4,
response_format: { type: "json_object" },
random_seed: 42,
}
Expand Down Expand Up @@ -53,17 +76,18 @@ export async function openAiLabeledClusters(clusters: NetworkCommunities): Promi

if (!domains) return clusters

const mistral_labels = await mistralLabelsFromDomains(domains).then(
const mistralLabels = await mistralLabelsFromDomains(domains).then(
(response) => JSON.parse(response),
(err) => console.error(err)
)
if (!mistral_labels || mistral_labels.constructor != Object) {
if (!mistralLabels || mistralLabels.constructor != Object) {
return clusters
}

Object.entries(mistral_labels).forEach((entries, index) => {
const value = entries[1]
clusters[index].label = Array.isArray(value) ? value[0] : value
const cleanLabels = cleanMistralLabels(mistralLabels)

cleanLabels.forEach((label, index) => {
clusters[index].label = label ? label : clusters[index].label + " (Unlabelled)"
})

return clusters
Expand Down
9 changes: 7 additions & 2 deletions client/src/api/networks/network/network.ts
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,12 @@ export default async function networkCreate(
y: attr.y,
label: attr.label,
cluster: attr?.community + 1,
weights: { Weight: attr.weight, Degree: graph.degree(key) },
scores: { "Last activity": attr?.maxYear },
weights: {
Weight: attr.weight,
Degree: graph.degree(key),
...(attr?.citationsCount !== undefined && { Citations: attr.citationsCount || 0 }),
},
scores: { "Last publication": attr?.maxYear },
page: configGetItemUrl(model, key, attr.label),
...(attr?.publicationsCount !== undefined && { publicationsCount: attr?.publicationsCount }),
...(attr?.citationsCount !== undefined && { citationsCount: attr?.citationsCount }),
Expand All @@ -99,5 +103,6 @@ export default async function networkCreate(
clusters: communities,
}

console.log("network", network)
return network
}
5 changes: 4 additions & 1 deletion client/src/api/networks/search/search.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ const DEFAULT_YEARS = Array.from({ length: (2010 - CURRENT_YEAR) / -1 + 1 }, (_,

const DEFAULT_SIZE = 2000
const SEARCH_FIELDS = ["title.*^3", "authors.fullName^3", "summary.*^2", "domains.label.*^2"]
const HIT_FIELDS = ["id", "title.default", "year", "productionType", "isOa", "domains", "counts_by_year"]
const HIT_FIELDS = ["id", "title.default", "year", "productionType", "isOa", "domains", "cited_by_counts_by_year"]

const networkSearchBody = (model: string, query?: string | unknown): NetworkSearchBody => ({
size: 0,
Expand Down Expand Up @@ -133,6 +133,9 @@ export async function networkSearchAggs({
},
},
aggs: {
publicationsCount: {
value_count: { field: "id.keyword" },
},
publicationsByYear: {
terms: { field: "year", include: DEFAULT_YEARS, size: DEFAULT_YEARS.length },
},
Expand Down
30 changes: 23 additions & 7 deletions client/src/api/patents/[id]/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ export async function getPatentById(id: string): Promise<Patent> {
if (!patent) throw new Error("404");
return { ...patent, _id: data?.hits?.hits?.[0]._id };
}

export async function getCpcAggregation(value: string): Promise<Patent> {
const body: any = {
size: 10000,
Expand All @@ -49,9 +50,17 @@ export async function getCpcAggregation(value: string): Promise<Patent> {
aggs: {
byCpc: {
terms: {
field: "cpc.ss_classe.code.keyword",
field: "cpc.classe.code.keyword",
size: 10000,
},
aggs: {
bySectionLabel: {
terms: {
field: "cpc.section.label.keyword",
size: 1,
},
},
},
},
},
};
Expand All @@ -68,7 +77,7 @@ export async function getCpcAggregation(value: string): Promise<Patent> {
const hits = data?.hits?.hits;

const labelsByCode = hits.reduce((acc: any, hit: any) => {
const cpcGroups = hit._source.cpc?.ss_classe ?? [];
const cpcGroups = hit._source.cpc?.classe ?? [];
cpcGroups.forEach((cpc: any) => {
if (!acc[cpc.code]) {
acc[cpc.code] = cpc.label;
Expand All @@ -77,11 +86,18 @@ export async function getCpcAggregation(value: string): Promise<Patent> {
return acc;
}, {});

const patent = buckets.map((bucket: any) => ({
code: bucket.key,
doc_count: bucket.doc_count,
label: labelsByCode[bucket.key] || "Label non trouvé",
}));
const patent = buckets.map((bucket: any) => {
console.log(bucket);
const sectionLabel =
bucket.bySectionLabel?.buckets?.[0]?.key || "Label de section non trouvé";

return {
code: bucket.key,
doc_count: bucket.doc_count,
label: labelsByCode[bucket.key] || "Label non trouvé",
sectionLabel,
};
});

return patent;
}
Loading

0 comments on commit 0ad52ce

Please sign in to comment.