From 39a7f284f46dfbd9d2fdb07969ade6e1228af32e Mon Sep 17 00:00:00 2001 From: yannisfoufoulas <93664091+yannisfoufoulas@users.noreply.github.com> Date: Fri, 13 Sep 2024 17:19:59 +0300 Subject: [PATCH 1/2] Update pilot6.sql - modifications after curation of first results --- .../main/oozie_app/lib/scripts/pilot6.sql | 221 +++++++++++++++++- 1 file changed, 212 insertions(+), 9 deletions(-) diff --git a/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/ukrn/pilot6/main/oozie_app/lib/scripts/pilot6.sql b/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/ukrn/pilot6/main/oozie_app/lib/scripts/pilot6.sql index 3bfbaa526..1e8c2cb31 100644 --- a/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/ukrn/pilot6/main/oozie_app/lib/scripts/pilot6.sql +++ b/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/ukrn/pilot6/main/oozie_app/lib/scripts/pilot6.sql @@ -1,5 +1,6 @@ --For testing... --- attach database "../06.Biomedical06/mydata.db" as d1; +-- cd /storage/eleni/openAIRE/UKNR +-- attach database "/storage/eleni/openAIRE/06.Biomedical06/mydata.db" as d1; -- create table mydata as select * from (setschema 'docid,text' select * from mydata where -- docid ='PMC2931525' or docid ='PMC2933899' or -- docid ='PMC3737084' or docid ='PMC3737070' ); @@ -13,19 +14,221 @@ create temp table mydata as select * from (setschema 'docid,text' select jsonpath(c1,'$.id', '$.text') from stdinput()); -hidden var 'urls' from "www\.animalstudyregistry\.org|aspredicted\.org|www\.anzctr\.org\.au|ensaiosclinicos\.gov\.br|www\.chictr\.org\.cn|cris\.nih\.go\.kr\cris|euclinicaltrials\.eu|ctri\.nic\.in|clinicaltrials\.gov|rpcec\.sld\.cu|www\.onderzoekmetmensen\.nl|www\.clinicaltrialsregister\.eu|drks\.de|trialsearch\.who\.int|inplasy\.com|www\.crd\.york\.ac\.uk\/prospero|www\.isrctn\.com|itmctr\.ccebtcm\.org\.cn|www\.irct\.ir|rctportal\.niph\.go\.jp|www\.clinicaltrials\.jp|rctportal\.niph\.go\.jp|lbctr\.moph\.gov\.lb|osf\.io/search\?resourceType=Registration|pactr\.samrc\.ac\.za|ensayosclinicos-repec\.ins\.gob\.pe|preclinicaltrials\.eu|www\.researchregistry\.com|www\.slctr\.lk|thaiclinicaltrials\.org|ww\.umin\.ac\.jp"; +hidden var 'ASR' from "www\.animalstudyregistry\.org|\b10\.17590\/asr\.\d+\b"; +hidden var 'AsPredicted' from "aspredicted\.org|\bAsPredicted\s{0,1}#\d+\b"; +hidden var 'ANZCTR' from "www\.anzctr\.org\.au|\bACTRN:{0,1}\s{0,1}\d+p{0,1}\b"; +hidden var 'ReBec' from "ensaiosclinicos\.gov\.br|\bRBR-\d+[a-z0-9]+\b"; +hidden var 'ChiCTR' from "www\.chictr\.org\.cn|\bChiCTR\s{0,1}-{0,1}(?:TRC){0,1}-{0,1}\d+\b"; +hidden var 'CRiS' from "cris\.nih\.go\.kr\cris|\bKCT\s{0,1}\d{7}\b"; +hidden var 'CTIS' from "euclinicaltrials\.eu|\bEUCT\s{0,1}\d{4}-\d+-\d{2}-\d{2}\b"; +hidden var 'CTRI' from "ctri\.nic\.in|\bCTRI/\d{4}/\d{2}/\d+\b"; +hidden var 'CT_gov' from "clinicaltrials\.gov|\bNCT\s{0,1}\d+\b"; +hidden var 'RPCEC' from "rpcec\.sld\.cu|\bRPCEC\s{0,1}\d+\b"; +hidden var 'LTR' from "www\.onderzoekmetmensen\.nl|\bNL\s{0,1}\d+ ; NTR\s{0,1}\d+\b"; +hidden var 'EU_CTR' from "www\.clinicaltrialsregister\.eu"; +hidden var 'DRKS' from "drks\.de|\bDRKS\s{0,1}\d+\b"; +hidden var 'ICTRP' from "trialsearch\.who\.int|\bU\d{4}-\d{4}-\d{4}\b"; +hidden var 'INPLASY' from "inplasy\.com|\bINPLASY\s{0,1}\d+\b"; +hidden var 'PROSPERO' from "www\.crd\.york\.ac\.uk\/prospero|\bCRD\s{0,1}\d+\b"; +hidden var 'ISRCTN' from "www\.isrctn\.com|\bISRCTN\s{0,1}\d+\b"; +hidden var 'ITMCTR' from "itmctr\.ccebtcm\.org\.cn|\bITMCTR\s{0,1}\d+\b"; +hidden var 'IRCT' from "www\.irct\.ir|\bIRCT\s{0,1}\d+\b"; +hidden var 'JMACCT' from "rctportal\.niph\.go\.jp|\bJMA-IIA\s{0,1}\d+\b"; +hidden var 'JAPIC' from "www\.clinicaltrials\.jp|\bJapicCTI\s{0,1}-{0,1}\d+\b"; +hidden var 'jRCT' from "rctportal\.niph\.go\.jp|\bjRCTs{0,1}\s{0,1}\d+\b"; +hidden var 'LBCTR' from "lbctr\.moph\.gov\.lb|\bLBCTR\s{0,1}\d+\b"; +hidden var 'OSF' from "osf\.io/search\?resourceType=Registration"; +hidden var 'PACTR' from "pactr\.samrc\.ac\.za|\bPACTR\s{0,1}\d+\b"; +hidden var 'REPEC' from "ensayosclinicos-repec\.ins\.gob\.pe|\bPER-\d+-\d+\b"; +hidden var 'PCT' from "preclinicaltrials\.eu|\bPCTE\s{0,1}\d+\b"; +hidden var 'ResearchRegistry' from "www\.researchregistry\.com|\bresearchregistry\s{0,1}\d+\b"; +hidden var 'SLCTR' from "www\.slctr\.lk|\bSLCTR/\d{4}/\d+\b"; +hidden var 'TCTR' from "thaiclinicaltrials\.org|\bTCTR\s{0,1}\d+\b"; +hidden var 'UMIN' from "ww\.umin\.ac\.jp|\bUMIN\s{0,1}\d+\b"; -hidden var 'regexstatements' from -"10\.17590\/asr\.\d+|AsPredicted\s{0,1}#\d+|ACTRN:{0,1}\s{0,1}\d+p{0,1}|RBR-\d+[a-z0-9]+|ChiCTR\s{0,1}-{0,1}(?:TRC){0,1}-{0,1}\d+|KCT\s{0,1}\d{7}|EUCT\s{0,1}\d{4}-\d+-\d{2}-\d{2}|CTRI/\d{4}/\d{2}/\d+|NCT\s{0,1}\d+|RPCEC\s{0,1}\d+|NL\s{0,1}\d+ ; NTR\s{0,1}\d+|\d{4}[-–]\d+-\d+|DRKS\s{0,1}\d+|U\d{4}-\d{4}-\d{4}|INPLASY\s{0,1}\d+|CRD\s{0,1}\d+|ISRCTN\s{0,1}\d+|ITMCTR\s{0,1}\d+|IRCT\s{0,1}\d+|JMA-IIA\s{0,1}\d+|JapicCTI\s{0,1}-{0,1}\d+|jRCTs{0,1}\s{0,1}\d+|LBCTR\s{0,1}\d+|PACTR\s{0,1}\d+|PER-\d+-\d+|PCTE\s{0,1}\d+|researchregistry\s{0,1}\d+|SLCTR/\d{4}/\d+|TCTR\s{0,1}\d+|UMIN\s{0,1}\d+"; -select jdict('query', 'a', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +select jdict('query', 'ASR', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) from ( select docid, prev, middle, next - from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('urls')) + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('ASR')) from (select docid, text from mydata)) ) union all -select jdict('query', 'b', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +select jdict('query', 'AsPredicted', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) from ( select docid, prev, middle, next - from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('regexstatements')) + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('AsPredicted')) from (select docid, text from mydata)) -); \ No newline at end of file +) +union all +select jdict('query', 'ANZCTR', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +from ( select docid, prev, middle, next + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('ANZCTR')) + from (select docid, text from mydata)) +) +union all +select jdict('query', 'ReBec', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +from ( select docid, prev, middle, next + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('ReBec')) + from (select docid, text from mydata)) +) +union all +select jdict('query', 'ChiCTR', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +from ( select docid, prev, middle, next + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('ChiCTR')) + from (select docid, text from mydata)) +) +union all +select jdict('query', 'CRiS', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +from ( select docid, prev, middle, next + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('CRiS')) + from (select docid, text from mydata)) +) +union all +select jdict('query', 'CTIS', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +from ( select docid, prev, middle, next + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('CTIS')) + from (select docid, text from mydata)) +) +union all +select jdict('query', 'CTRI', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +from ( select docid, prev, middle, next + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('CTRI')) + from (select docid, text from mydata)) +) +union all +select jdict('query', 'CT_gov', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +from ( select docid, prev, middle, next + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('CT_gov')) + from (select docid, text from mydata)) +) +union all +select jdict('query', 'RPCEC', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +from ( select docid, prev, middle, next + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('RPCEC')) + from (select docid, text from mydata)) +) +union all +select jdict('query', 'LTR', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +from ( select docid, prev, middle, next + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('LTR')) + from (select docid, text from mydata)) +) +union all +select jdict('query', 'EU_CTR', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +from ( select docid, prev, middle, next + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('EU_CTR')) + from (select docid, text from mydata)) +) +union all +select jdict('query', 'DRKS', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +from ( select docid, prev, middle, next + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('DRKS')) + from (select docid, text from mydata)) +) +union all +select jdict('query', 'ICTRP', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +from ( select docid, prev, middle, next + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('ICTRP')) + from (select docid, text from mydata)) +) +union all +select jdict('query', 'INPLASY', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +from ( select docid, prev, middle, next + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('INPLASY')) + from (select docid, text from mydata)) +) +union all +select jdict('query', 'PROSPERO', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +from ( select docid, prev, middle, next + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('PROSPERO')) + from (select docid, text from mydata)) +) +union all +select jdict('query', 'ISRCTN', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +from ( select docid, prev, middle, next + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('ISRCTN')) + from (select docid, text from mydata)) +) +union all +select jdict('query', 'ITMCTR', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +from ( select docid, prev, middle, next + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('ITMCTR')) + from (select docid, text from mydata)) +) +union all +select jdict('query', 'IRCT', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +from ( select docid, prev, middle, next + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('IRCT')) + from (select docid, text from mydata)) +) +union all +select jdict('query', 'JMACCT', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +from ( select docid, prev, middle, next + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('JMACCT')) + from (select docid, text from mydata)) +) +union all +select jdict('query', 'JAPIC', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +from ( select docid, prev, middle, next + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('JAPIC')) + from (select docid, text from mydata)) +) +union all +select jdict('query', 'jRCT', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +from ( select docid, prev, middle, next + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('jRCT')) + from (select docid, text from mydata)) +) +union all +select jdict('query', 'LBCTR', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +from ( select docid, prev, middle, next + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('LBCTR')) + from (select docid, text from mydata)) +) +union all +select jdict('query', 'OSF', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +from ( select docid, prev, middle, next + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('OSF')) + from (select docid, text from mydata)) +) +union all +select jdict('query', 'PACTR', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +from ( select docid, prev, middle, next + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('PACTR')) + from (select docid, text from mydata)) +) +union all +select jdict('query', 'REPEC', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +from ( select docid, prev, middle, next + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('REPEC')) + from (select docid, text from mydata)) +) +union all +select jdict('query', 'PCT', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +from ( select docid, prev, middle, next + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('PCT')) + from (select docid, text from mydata)) +) +union all +select jdict('query', 'ResearchRegistry', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +from ( select docid, prev, middle, next + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('ResearchRegistry')) + from (select docid, text from mydata)) +) +union all +select jdict('query', 'SLCTR', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +from ( select docid, prev, middle, next + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('SLCTR')) + from (select docid, text from mydata)) +) +union all +select jdict('query', 'TCTR', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +from ( select docid, prev, middle, next + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('TCTR')) + from (select docid, text from mydata)) +) +union all +select jdict('query', 'UMIN', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +from ( select docid, prev, middle, next + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('UMIN')) + from (select docid, text from mydata)) +); From 40a3a08e6b7dd5aa0d420d784cb54d0f9382d177 Mon Sep 17 00:00:00 2001 From: yannisfoufoulas <93664091+yannisfoufoulas@users.noreply.github.com> Date: Thu, 31 Oct 2024 16:28:41 +0200 Subject: [PATCH 2/2] pilot6 version 4 --- .../main/oozie_app/lib/scripts/pilot6.sql | 316 ++++++++++++------ 1 file changed, 210 insertions(+), 106 deletions(-) diff --git a/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/ukrn/pilot6/main/oozie_app/lib/scripts/pilot6.sql b/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/ukrn/pilot6/main/oozie_app/lib/scripts/pilot6.sql index 1e8c2cb31..578d1f044 100644 --- a/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/ukrn/pilot6/main/oozie_app/lib/scripts/pilot6.sql +++ b/iis-wf/iis-wf-referenceextraction/src/main/resources/eu/dnetlib/iis/wf/referenceextraction/ukrn/pilot6/main/oozie_app/lib/scripts/pilot6.sql @@ -7,227 +7,331 @@ -- output 'pubs.txt' select jdict('id', docid, 'text', text) from mydata; -- --cp pubs.txt pubs.json ---cat pubs.json | python ~/Desktop/openAIRE/madis2/src/mexec.py -f pilot6.sql -d test01.db > results_v1.json +--cat pubs.json | python ~/Desktop/openAIRE/madis2/src/mexec.py -f pilot6_v4.sql -d test01.db > results_v4.json --cat pubs_empty.json | python ~/Desktop/openAIRE/madis2/src/mexec.py -f pilot6.sql -d test01.db > results_v2.json create temp table mydata as select * from (setschema 'docid,text' select jsonpath(c1,'$.id', '$.text') from stdinput()); -hidden var 'ASR' from "www\.animalstudyregistry\.org|\b10\.17590\/asr\.\d+\b"; -hidden var 'AsPredicted' from "aspredicted\.org|\bAsPredicted\s{0,1}#\d+\b"; -hidden var 'ANZCTR' from "www\.anzctr\.org\.au|\bACTRN:{0,1}\s{0,1}\d+p{0,1}\b"; -hidden var 'ReBec' from "ensaiosclinicos\.gov\.br|\bRBR-\d+[a-z0-9]+\b"; -hidden var 'ChiCTR' from "www\.chictr\.org\.cn|\bChiCTR\s{0,1}-{0,1}(?:TRC){0,1}-{0,1}\d+\b"; -hidden var 'CRiS' from "cris\.nih\.go\.kr\cris|\bKCT\s{0,1}\d{7}\b"; -hidden var 'CTIS' from "euclinicaltrials\.eu|\bEUCT\s{0,1}\d{4}-\d+-\d{2}-\d{2}\b"; -hidden var 'CTRI' from "ctri\.nic\.in|\bCTRI/\d{4}/\d{2}/\d+\b"; -hidden var 'CT_gov' from "clinicaltrials\.gov|\bNCT\s{0,1}\d+\b"; -hidden var 'RPCEC' from "rpcec\.sld\.cu|\bRPCEC\s{0,1}\d+\b"; -hidden var 'LTR' from "www\.onderzoekmetmensen\.nl|\bNL\s{0,1}\d+ ; NTR\s{0,1}\d+\b"; -hidden var 'EU_CTR' from "www\.clinicaltrialsregister\.eu"; -hidden var 'DRKS' from "drks\.de|\bDRKS\s{0,1}\d+\b"; -hidden var 'ICTRP' from "trialsearch\.who\.int|\bU\d{4}-\d{4}-\d{4}\b"; -hidden var 'INPLASY' from "inplasy\.com|\bINPLASY\s{0,1}\d+\b"; -hidden var 'PROSPERO' from "www\.crd\.york\.ac\.uk\/prospero|\bCRD\s{0,1}\d+\b"; -hidden var 'ISRCTN' from "www\.isrctn\.com|\bISRCTN\s{0,1}\d+\b"; -hidden var 'ITMCTR' from "itmctr\.ccebtcm\.org\.cn|\bITMCTR\s{0,1}\d+\b"; -hidden var 'IRCT' from "www\.irct\.ir|\bIRCT\s{0,1}\d+\b"; -hidden var 'JMACCT' from "rctportal\.niph\.go\.jp|\bJMA-IIA\s{0,1}\d+\b"; -hidden var 'JAPIC' from "www\.clinicaltrials\.jp|\bJapicCTI\s{0,1}-{0,1}\d+\b"; -hidden var 'jRCT' from "rctportal\.niph\.go\.jp|\bjRCTs{0,1}\s{0,1}\d+\b"; -hidden var 'LBCTR' from "lbctr\.moph\.gov\.lb|\bLBCTR\s{0,1}\d+\b"; -hidden var 'OSF' from "osf\.io/search\?resourceType=Registration"; +hidden var 'ASR' from "\b10\.17590\/asr\.\d+\b"; +hidden var 'ASR1' from "asr\.\d+\b"; +hidden var 'AsPredicted1' from "\bAsPredicted\s?#\d+\b|aspredicted\.org"; +hidden var 'AsPredicted2' from "\bAsPredicted\s?#\d+\b"; +hidden var 'AsPredicted3' from "aspredicted\.org\/blind\.php\?x ?=\/? ?[a-zA-Z0-9]{3}_?[a-zA-Z0-9]{3}|aspredicted\.org\/[a-z0-9]{5}\.pdf|aspredicted\.org\/[A-Z0-9]{3}_?[A-Z0-9]{3}"; +hidden var 'ANZCTR1' from "\bACTRN:{0,1}\s{0,1}\d+p{0,1}\b|anzctr\.org\.au"; +hidden var 'ANZCTR2' from "\bACTRN:{0,1}\s{0,1}\d+p{0,1}\b"; +hidden var 'ANZCTR3' from "anzctr\.org\.au\/Trial\/Registration\/TrialReview\.aspx\?id= ?\d{6}"; +hidden var 'ReBec' from "\bRBR-\d+[a-z0-9]+\b"; + +hidden var 'ChiCTR1' from "\bChiCTR\s{0,1}-{0,1}(?:TRC){0,1}-{0,1}\d+\b|chictr\.org\.cn"; +hidden var 'ChiCTR2' from "\bChiCTR\s{0,1}-{0,1}(?:TRC){0,1}-{0,1}\d+\b"; +hidden var 'ChiCTR3' from "chictr\.org\.cn\/showproj.html\?proj=\d+|chictr\.org\.cn\/showproj.aspx\?proj=\d+|chictr\.org\.cn\/showprojen.aspx\?proj=\d+|chictr\.org\.cn\/edit.aspx\?pid=\d+|chictr\.org\.cn\/hvshowproject\.aspx\?id=\d+|chictr\.org\.cn\/hvshowprojectEN.html\?id=\d+|chictr\.org\.cn\/showprojEN\.html\?proj=\d+|chictr\.org\.cn\/bin\/project\/edit\?pid=\d+"; + +hidden var 'CRiS1' from "cris\.nih\.go\.kr|\bKCT\s{0,1}\d{7}\b"; +hidden var 'CRiS2' from "\bKCT\s{0,1}\d{7}\b"; +hidden var 'CRiS3' from "cris\.nih\.go\.kr\/cris\/search\/detailSearch\.do\?seq=\d+"; +-- den brhka kapoio apotelesma me url + +hidden var 'CTIS' from "\bEU\s{0,1}CT\s{0,1}\d{4}-\d+-\d{2}-\d{2}\b|\bEU\s{0,1}CT\s{0,1}number\s{0,1}\d{4}-\d+-\d{2}-\d{2}\b"; +-- den brhka kapoio apotelesma meto EUCT + +hidden var 'CTRI1' from "ctri\.nic\.in|\bCTRI/\d{4}/\d{2}/\d+\b"; +hidden var 'CTRI2' from "\bCTRI/\d{4}/\d{2}/\d+\b"; +hidden var 'CTRI3' from "ctri\.nic\.in\/Clinicaltrials\/pmaindet2\.php\?trialid=\d+|ctri\.nic\.in\/Clinicaltrials\/pdf_generate\.php\?trialid=\d+|ctri\.nic\.in\/Clinicaltrials\/rmaindet\.php\?trialid=\d+|ctri\.nic\.in\/Clinicaltrials\/showallp\.php\?mid1=\d+"; +hidden var 'CT_gov' from "NCT\s{0,1}\d{6,}"; +hidden var 'RPCEC' from "\bRPCEC\s{0,1}\d+\b"; +hidden var 'LTR' from "onderzoekmetmensen\.nl\/[a-z]{2}\/trial\/\d+|\bNL\s{0,1}\d+\b|\bNTR\s{0,1}\d+\b"; +-- eixa lathos thn prohgoumenh fora +hidden var 'LTR_positivewords' from 'onderzoekmetmensen|dutch trial'; +hidden var 'EU_CTR' from "\d{4}[-–]\d+-\d+"; +hidden var 'EU_CTR_positivewords' from 'eudract|ctr-search'; +hidden var 'DRKS' from "\bDRKS\s{0,1}\d+\b"; +hidden var 'ICTRP' from "\bU\d{4}-\d{4}-\d{4}\b"; +hidden var 'INPLASY' from "\inplasy\s{0,1}\d+\b|inplasy-\d{4}-\d{2}-\d+"; +hidden var 'PROSPERO' from "www\.crd\.york\.ac\.uk\/prospero|\bCRD\s{0,1}\d{6,}\b"; +hidden var 'PROSPERO1' from "www\.crd\.york\.ac\.uk\/prospero\/display_record\.php\?RecordID=\d+"; +hidden var 'PROSPERO1b' from "www\.crd\.york\.ac\.uk\/prospero\/display_record\.php\?RecordID=(\d+)"; +hidden var 'PROSPERO2' from "\bCRD\s{0,1}\d{6,}\b"; + +hidden var 'ISRCTN' from "\bISRCTN\s{0,1}\d+\b"; +hidden var 'ITMCTR' from "\bITMCTR\s{0,1}\d+\b|ccebtcm\.org\.cn\/[a-z]{2}-[A-Z]{2}\/Home\/ProjectView\?pid=[a-z0-9-]+"; +hidden var 'IRCT' from "\bIRCT\s{0,1}\d+N{0,1}\d+\b"; +hidden var 'JMACCT' from "\bJMA-IIA\s{0,1}\d+\b"; +hidden var 'JAPIC' from "\bJapicCTI\s{0,1}-{0,1}\d+\b"; +hidden var 'jRCT' from "\bjRCTs{0,1}\s{0,1}\d+\b"; +hidden var 'LBCTR' from "lbctr\.moph\.gov\.lb\/Trials\/Details\/\d+|\bLBCTR\s{0,1}\d+\b"; +hidden var 'OSF' from "osf.io\/\s{0,1}\w{5}\b"; +hidden var 'OSF1' from "osf.io\/\s{0,1}(\w{5})\b"; hidden var 'PACTR' from "pactr\.samrc\.ac\.za|\bPACTR\s{0,1}\d+\b"; -hidden var 'REPEC' from "ensayosclinicos-repec\.ins\.gob\.pe|\bPER-\d+-\d+\b"; -hidden var 'PCT' from "preclinicaltrials\.eu|\bPCTE\s{0,1}\d+\b"; -hidden var 'ResearchRegistry' from "www\.researchregistry\.com|\bresearchregistry\s{0,1}\d+\b"; -hidden var 'SLCTR' from "www\.slctr\.lk|\bSLCTR/\d{4}/\d+\b"; -hidden var 'TCTR' from "thaiclinicaltrials\.org|\bTCTR\s{0,1}\d+\b"; -hidden var 'UMIN' from "ww\.umin\.ac\.jp|\bUMIN\s{0,1}\d+\b"; +hidden var 'PACTR2' from "\bPACTR\s{0,1}\d+\b"; +hidden var 'PACTR3' from "\bpactr\.samrc\.ac\.za\/TrialDisplay\.aspx\?TrialID=\d+\b|\bpactr\.samrc\.ac\.za\/Researcher\/TrialRegister\.aspx\?TrialID=\d+\b"; +hidden var 'REPEC' from "\brepec\b|\bREPEC\b"; +hidden var 'PCT' from "\bPCTE\s{0,1}\d{3,}\b"; +-- I changed from \d+ to \d{3,} +hidden var 'ResearchRegistry' from "www\.researchregistry\.com|\bresearchregistry\s{0,1}\d+\b"; +hidden var 'ResearchRegistry1' from "www\.researchregistry\.com\/browse-the-registry#registryofsystematicreviewsmeta-analyses\/registryofsystematicreviewsmeta-analysesdetails\/[a-z0-9]+\/|www\.researchregistry\.com\/browse-the-registry#home\/registrationdetails\/[a-z0-9]+\/|www\.researchregistry\.com\/browse-the-registry#registryofsystematicreviewsmetaanalyses\/registryofsystematicreviewsmeta-analysesdetails\/[a-z0-9]+\/"; +hidden var 'ResearchRegistry2' from "\bresearchregistry\s{0,1}\d+\b"; +hidden var 'SLCTR' from "\bslctr/\d{4}/\d+\b"; +hidden var 'TCTR' from "\bTCTR\s{0,1}\d+\b"; +hidden var 'UMIN' from "\b(?:JPRN-)?UMIN\s{0,1}\d+\b"; -select jdict('query', 'ASR', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +select jdict('query', 'ASR', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next, 'id', regexpr("("||var('ASR1')||")", middle)) from ( select docid, prev, middle, next from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('ASR')) from (select docid, text from mydata)) ) union all -select jdict('query', 'AsPredicted', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) -from ( select docid, prev, middle, next - from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('AsPredicted')) - from (select docid, text from mydata)) +select jdict('query', 'AsPredicted', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next, 'id', id) +from ( + select docid, prev, middle, next, + case when regexprmatches(var('AsPredicted2'), middle) + then regexpr("("||var('AsPredicted2')||")", middle) + else regexpr("("||var('AsPredicted3')||")", middle||next) + end as id + from ( + select docid, prev, middle, next + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('AsPredicted1')) + from (select docid, text from mydata)) + ) + where regexprmatches(var('AsPredicted2'), middle) = 1 or + regexprmatches(var('AsPredicted3'), middle||next) = 1 ) union all -select jdict('query', 'ANZCTR', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) -from ( select docid, prev, middle, next - from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('ANZCTR')) - from (select docid, text from mydata)) +select jdict('query', 'ANZCTR', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next, 'id', id) +from ( + select docid, prev, middle, next, + case when regexprmatches(var('ANZCTR2'), middle) then regexpr("("||var('ANZCTR2')||")", middle) + else regexpr("("||var('ANZCTR3')||")", middle||next) + end as id + from ( select docid, prev, middle, next + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('ANZCTR1')) + from (select docid, text from mydata)) + ) + where regexprmatches(var('ANZCTR2'), middle) = 1 or + regexprmatches(var('ANZCTR3'),middle||next) = 1 ) union all -select jdict('query', 'ReBec', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +select jdict('query', 'ReBec', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next, 'id', regexpr("("||var('ReBec')||")", middle)) from ( select docid, prev, middle, next from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('ReBec')) from (select docid, text from mydata)) ) union all -select jdict('query', 'ChiCTR', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) -from ( select docid, prev, middle, next - from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('ChiCTR')) - from (select docid, text from mydata)) +select jdict('query', 'ChiCTR', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next, 'id', id) +from ( + select docid, prev, middle, next, + case when regexprmatches(var('ChiCTR2'), middle) then regexpr("("||var('ChiCTR2')||")", middle) + else regexpr("("||var('ChiCTR3')||")", middle||next) + end as id + from ( select docid, prev, middle, next + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('ChiCTR1')) + from (select docid, text from mydata)) + ) + where + regexprmatches(var('ChiCTR2'), middle) = 1 or + regexprmatches(var('ChiCTR3'),middle||next) = 1 ) union all -select jdict('query', 'CRiS', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) -from ( select docid, prev, middle, next - from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('CRiS')) - from (select docid, text from mydata)) +select jdict('query', 'CRiS', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next, 'id', id) +from ( + select docid, prev, middle, next, + case when regexprmatches(var('CRiS2'), middle) then regexpr("("||var('CRiS2')||")", middle) + else regexpr("("||var('CRiS3')||")", middle||next) + end as id + from ( select docid, prev, middle, next + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('CRiS1')) + from (select docid, text from mydata)) + ) + where + regexprmatches(var('CRiS2'), middle) = 1 or + regexprmatches(var('CRiS3'),middle||next) = 1 ) union all -select jdict('query', 'CTIS', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +select jdict('query', 'CTIS', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next, 'id', regexpr("("||var('CTIS')||")", middle)) from ( select docid, prev, middle, next from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('CTIS')) from (select docid, text from mydata)) ) union all -select jdict('query', 'CTRI', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) -from ( select docid, prev, middle, next - from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('CTRI')) - from (select docid, text from mydata)) -) -union all -select jdict('query', 'CT_gov', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) -from ( select docid, prev, middle, next - from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('CT_gov')) - from (select docid, text from mydata)) +select jdict('query', 'CTRI', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next, 'id', id) +from ( + select docid, prev, middle, next, + case when regexprmatches(var('CTRI2'), middle) then regexpr("("||var('CTRI2')||")", middle) + else regexpr("("||var('CTRI3')||")", middle||next) + end as id + from ( select docid, prev, middle, next + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('CTRI1')) + from (select docid, text from mydata)) + ) + where + regexprmatches(var('CTRI2'), middle) = 1 or + regexprmatches(var('CTRI3'),middle||next) = 1 ) union all -select jdict('query', 'RPCEC', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +select jdict('query', 'RPCEC', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next, 'id', regexpr("("||var('RPCEC')||")", middle)) from ( select docid, prev, middle, next from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('RPCEC')) from (select docid, text from mydata)) ) union all -select jdict('query', 'LTR', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +select jdict('query', 'LTR', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next, 'id', regexpr("("||var('LTR')||")", middle)) from ( select docid, prev, middle, next from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('LTR')) from (select docid, text from mydata)) + where regexprmatches(var('LTR_positivewords'), lower(prev||middle||next)) = 1 ) union all -select jdict('query', 'EU_CTR', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +select jdict('query', 'EU_CTR', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next,'id', regexpr("("||var('EU_CTR')||")", middle)) from ( select docid, prev, middle, next from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('EU_CTR')) from (select docid, text from mydata)) + where regexprmatches(var('EU_CTR_positivewords'), prev||middle||next) = 1 ) union all -select jdict('query', 'DRKS', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +select jdict('query', 'CT_gov', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next, 'id', regexpr("("||var('CT_gov')||")", middle)) from ( select docid, prev, middle, next - from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('DRKS')) - from (select docid, text from mydata)) + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('CT_gov')) from mydata) + where regexprmatches('[A-Z1-9]NCT\s{0,1}\d{6,}',middle) = 0 and length(regexpr("(NCT\s{0,1}\d+)", middle))>=7 ) union all -select jdict('query', 'ICTRP', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +select jdict('query', 'DRKS', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next, 'id', regexpr("("||var('DRKS')||")", middle) ) from ( select docid, prev, middle, next - from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('ICTRP')) + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('DRKS')) from (select docid, text from mydata)) ) union all -select jdict('query', 'INPLASY', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +select jdict('query', 'ICTRP', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next,'id', regexpr("("||var('ICTRP')||")", middle)) from ( select docid, prev, middle, next - from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('INPLASY')) + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('ICTRP')) from (select docid, text from mydata)) ) union all -select jdict('query', 'PROSPERO', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +select jdict('query', 'INPLASY', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next,'id', regexpr("("||var('INPLASY')||")", middle)) from ( select docid, prev, middle, next - from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('PROSPERO')) - from (select docid, text from mydata)) + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('INPLASY')) + from (select docid, lower(text) as text from mydata)) +) +union all +select jdict('query', 'PROSPERO', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next, 'id', id) +from ( + select docid, prev, middle, next, + case when regexprmatches(var('PROSPERO1'), middle||next) then regexpr(var('PROSPERO1b'), middle||next) + else regexpr("("||var('PROSPERO2')||")", middle) + end as id + from ( select docid, prev, middle, next + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('PROSPERO')) + from (select docid, text from mydata)) + ) + where ( regexprmatches('prospero\/display_record', middle) = 1 and regexprmatches(var('PROSPERO1'), middle||next) = 1 ) + or ( regexprmatches('prospero\/display_record', middle) = 0 and length(regexpr("\bCRD\s{0,1}(\d{6,})\b", middle))>= 8 ) + or ( regexprmatches('prospero\/display_record', middle) = 0 and regexprmatches('grant',lower(prev||middle||next)) = 0 and length(regexpr("\bCRD\s{0,1}(\d{6,})\b", middle))>=6 and length(regexpr("\bCRD\s{0,1}(\d{6,})\b", middle)) <=7) ) union all -select jdict('query', 'ISRCTN', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +select jdict('query', 'ISRCTN', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next, 'id', regexpr("("||var('ISRCTN')||")", middle)) from ( select docid, prev, middle, next - from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('ISRCTN')) - from (select docid, text from mydata)) + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('ISRCTN')) + from (select docid, text from mydata)) ) union all -select jdict('query', 'ITMCTR', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +select jdict('query', 'ITMCTR', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next, 'id', regexpr("("||var('ITMCTR')||")", middle)) from ( select docid, prev, middle, next - from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('ITMCTR')) + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('ITMCTR')) from (select docid, text from mydata)) ) union all -select jdict('query', 'IRCT', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +select jdict('query', 'IRCT', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next, 'id', regexpr("("||var('IRCT')||")", middle)) from ( select docid, prev, middle, next - from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('IRCT')) + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('IRCT')) from (select docid, text from mydata)) ) union all -select jdict('query', 'JMACCT', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +select jdict('query', 'JMACCT', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next,'id', regexpr("("||var('JMACCT')||")", middle)) from ( select docid, prev, middle, next - from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('JMACCT')) + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('JMACCT')) from (select docid, text from mydata)) ) union all -select jdict('query', 'JAPIC', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +select jdict('query', 'JAPIC', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next, 'id', regexpr("("||var('JAPIC')||")", middle)) from ( select docid, prev, middle, next - from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('JAPIC')) + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('JAPIC')) from (select docid, text from mydata)) ) union all -select jdict('query', 'jRCT', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +select jdict('query', 'jRCT', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next, 'id', regexpr("("||var('jRCT')||")", middle)) from ( select docid, prev, middle, next from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('jRCT')) from (select docid, text from mydata)) ) union all -select jdict('query', 'LBCTR', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +select jdict('query', 'LBCTR', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next, 'id', regexpr("("||var('LBCTR')||")", middle)) from ( select docid, prev, middle, next from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('LBCTR')) - from (select docid, text from mydata)) + from (select docid, text from mydata)) ) union all -select jdict('query', 'OSF', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +select jdict('query', 'OSF', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next, 'id', regexpr(var('OSF1'), middle)) from ( select docid, prev, middle, next from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('OSF')) - from (select docid, text from mydata)) + from (select docid, lower(text) as text from mydata)) ) union all -select jdict('query', 'PACTR', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) -from ( select docid, prev, middle, next - from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('PACTR')) - from (select docid, text from mydata)) +select jdict('query', 'PACTR', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next, 'id', id) +from ( + select docid, prev, middle, next, + case when regexprmatches(var('PACTR2'), middle) + then regexpr("("||var('PACTR2')||")", middle) + else regexpr("("||var('PACTR3')||")", middle||next) + end as id + from ( select docid, prev, middle, next + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('PACTR')) + from (select docid, text from mydata)) + ) + where regexprmatches(var('PACTR2'), middle) = 1 or + regexprmatches(var('PACTR3'), middle||next) = 1 ) union all -select jdict('query', 'REPEC', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +select jdict('query', 'REPEC', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next, 'id' , middle) from ( select docid, prev, middle, next from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('REPEC')) from (select docid, text from mydata)) ) union all -select jdict('query', 'PCT', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +select jdict('query', 'PCT', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next, 'id', regexpr("("||var('PCT')||")", middle)) from ( select docid, prev, middle, next - from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('PCT')) - from (select docid, text from mydata)) + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('PCT')) + from (select docid, text from mydata)) ) union all -select jdict('query', 'ResearchRegistry', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) -from ( select docid, prev, middle, next - from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('ResearchRegistry')) - from (select docid, text from mydata)) +select jdict('query', 'ResearchRegistry', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next, 'id', id) +from ( + select docid, prev, middle, next, + case when regexprmatches(var('ResearchRegistry2'), middle) + then regexpr("("||var('ResearchRegistry2')||")", middle) + else regexpr("("||var('ResearchRegistry1')||")", middle||next) + end as id + from ( select docid, prev, middle, next + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('ResearchRegistry')) + from (select docid, text from mydata)) + ) + where regexprmatches(var('ResearchRegistry2'), middle) = 1 or + regexprmatches(var('ResearchRegistry1'), middle||next) = 1 ) union all -select jdict('query', 'SLCTR', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +select jdict('query', 'SLCTR', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next, 'id', regexpr("("||var('SLCTR')||")", middle)) from ( select docid, prev, middle, next - from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('SLCTR')) - from (select docid, text from mydata)) + from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('SLCTR')) + from (select docid, lower(text) as text from mydata)) ) union all -select jdict('query', 'TCTR', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +select jdict('query', 'TCTR', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next , 'id', regexpr("("||var('TCTR')||")", middle)) from ( select docid, prev, middle, next from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('TCTR')) from (select docid, text from mydata)) ) union all -select jdict('query', 'UMIN', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next) +select jdict('query', 'UMIN', 'documentId', docid, 'prev', prev, 'middle', middle, 'next', next, 'id', regexpr("("||var('UMIN')||")", middle)) from ( select docid, prev, middle, next from (setschema 'docid,prev,middle,next' select docid, textwindow2s(regexpr("\n",text," "), 10, 1, 10, var('UMIN')) from (select docid, text from mydata))