From 9b0fac3d8ee92f614cf7a3abdeb8ca1c486fbfa6 Mon Sep 17 00:00:00 2001 From: johnfouf Date: Thu, 21 Nov 2024 16:06:14 +0200 Subject: [PATCH 1/2] Update communitiesextract.sql - add NBFC mining --- .../oozie_app/lib/scripts/communitiesextract.sql | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/community/main_sqlite/oozie_app/lib/scripts/communitiesextract.sql b/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/community/main_sqlite/oozie_app/lib/scripts/communitiesextract.sql index e38e17bbb..2c00ec9e9 100644 --- a/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/community/main_sqlite/oozie_app/lib/scripts/communitiesextract.sql +++ b/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/community/main_sqlite/oozie_app/lib/scripts/communitiesextract.sql @@ -73,6 +73,14 @@ setschema 'docid,prev,middle,next' select c1, textwindow2s(keywords(filterstopwo ), grants where conceptLabel="INSPIRED RIs" and regexprmatches("\bMIS|INSPIRED", context) ) group by docid +-- NBFC +union all +select jdict('documentId',docid, 'conceptId', 'nbfc', 'confidenceLevel', 0.8,'textsnippet',context) as c1 from ( + select docid, prev||" "||middle||" "||next as context + from ( + setschema 'docid,prev,middle,next' select c1, textwindow2s(keywords(lower(c2)),7,4,3, 'national biodiversity future cent') from pubs where c2 is not null + ) + ) group by docid union all From f4e4e60d386d463a29c85333fbdf53865d59eaf3 Mon Sep 17 00:00:00 2001 From: johnfouf Date: Tue, 26 Nov 2024 19:04:50 +0200 Subject: [PATCH 2/2] Update communitiesextract.sql - add label --- .../oozie_app/lib/scripts/communitiesextract.sql | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/community/main_sqlite/oozie_app/lib/scripts/communitiesextract.sql b/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/community/main_sqlite/oozie_app/lib/scripts/communitiesextract.sql index 2c00ec9e9..4d2dab937 100644 --- a/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/community/main_sqlite/oozie_app/lib/scripts/communitiesextract.sql +++ b/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/community/main_sqlite/oozie_app/lib/scripts/communitiesextract.sql @@ -75,13 +75,12 @@ setschema 'docid,prev,middle,next' select c1, textwindow2s(keywords(filterstopwo -- NBFC union all -select jdict('documentId',docid, 'conceptId', 'nbfc', 'confidenceLevel', 0.8,'textsnippet',context) as c1 from ( - select docid, prev||" "||middle||" "||next as context +select jdict('documentId',docid, 'conceptId', conceptId, 'confidenceLevel', 0.8,'textsnippet',context) as c1 from ( + select docid, conceptId, conceptLabel, prev||" "||middle||" "||next as context from ( setschema 'docid,prev,middle,next' select c1, textwindow2s(keywords(lower(c2)),7,4,3, 'national biodiversity future cent') from pubs where c2 is not null - ) + ),grants where conceptLabel="Italian National Biodiversity Future Center" ) group by docid - union all -- DARIAH