From 9e66745bda547f0d363dc8d36cfcb92807d3f427 Mon Sep 17 00:00:00 2001 From: Fabian Steeg Date: Fri, 28 Jan 2022 14:29:14 +0100 Subject: [PATCH] Update to current metafix (#137) And add some now required `set_array` statements, see https://github.com/metafacture/metafacture-fix/pull/110 --- build.gradle | 2 +- data/production/duepublico/duepublico-to-oersi.flux | 2 +- data/production/openRub/openRub.fix | 11 ++++++----- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/build.gradle b/build.gradle index 7c2cee6..9223287 100644 --- a/build.gradle +++ b/build.gradle @@ -44,7 +44,7 @@ dependencies { implementation 'org.metafacture:metafacture-elasticsearch:5.3.0-rc2' implementation 'org.metafacture:metafacture-xml:5.3.0-rc2' implementation 'org.metafacture:metafacture-fix:0.2.0-rc2' - implementation 'org.metafacture:metafix:0.2.0-SNAPSHOT' + implementation 'org.metafacture:metafix:0.2.0-rc9' implementation('org.metafacture:metafacture-biblio:5.3.0-rc2') { exclude group: 'xml-apis', module: 'xml-apis' } diff --git a/data/production/duepublico/duepublico-to-oersi.flux b/data/production/duepublico/duepublico-to-oersi.flux index 98bc9c6..ba22a96 100644 --- a/data/production/duepublico/duepublico-to-oersi.flux +++ b/data/production/duepublico/duepublico-to-oersi.flux @@ -15,7 +15,7 @@ XML_FILE | open-file | decode-xml | handle-generic-xml(emitNamespace="true") -| fix(FLUX_DIR + "duepublico.fix", *) +| metafix(FLUX_DIR + "duepublico.fix", *) | encode-json(prettyPrinting="true") | oersi.JsonValidator(output_schema, writeValid=metadata_valid, writeInvalid=metadata_invalid) | oersi.OersiWriter(backend_api, user=backend_user, pass=backend_pass, log=metadata_responses) diff --git a/data/production/openRub/openRub.fix b/data/production/openRub/openRub.fix index bbb335d..fbfe482 100644 --- a/data/production/openRub/openRub.fix +++ b/data/production/openRub/openRub.fix @@ -26,12 +26,14 @@ replace_all("description", ' ', " ") trim("description") # ---------sourceOrganization--------- +set_array("sourceOrganization[]") add_field("sourceOrganization[].$append.name", "Ruhr-Universität Bochum") add_field("sourceOrganization[].$last.type","Organization") # ------------learningResourceType----- lookup("learningResourceType", "data/maps/openRubLearningResourceTypes.tsv", "sep_char":"\t") lookup("learningResourceType", "data/maps/hcrt-de-labels-to-uri.tsv", "sep_char":"\t") +set_array("learningResourceType[]") copy_field("learningResourceType", "learningResourceType[].$append.id") copy_field("learningResourceType", "learningResourceTypeDE") move_field("learningResourceType", "learningResourceTypeEN") @@ -41,17 +43,16 @@ move_field("learningResourceTypeDE", "learningResourceType[].$last.prefLabel.de" move_field("learningResourceTypeEN", "learningResourceType[].$last.prefLabel.en") # ------about---- -# sometimes the field is not an array. This needs to be settled: -move_field("fields", "fields[].$append") -lookup("fields[]", "data/maps/destatisLabels-to-uri.tsv","sep_char":"\t") -do list(path: "fields[]", "var": f) +lookup("fields", "data/maps/destatisLabels-to-uri.tsv","sep_char":"\t") +do list(path: "fields", "var": f) copy_field("f", "about[].$append.id") lookup("f", "data/maps/subject-labels.tsv","sep_char":"\t") copy_field("f", "about[].$last.prefLabel.de") end -remove_field('fields[]') +remove_field('fields') # ---------mainEntityOfPage---------------- +set_array("mainEntityOfPage[]") copy_field("id", "mainEntityOfPage[].$append.id") add_field("mainEntityOfPage[].$last.provider.id","$[service_id]") add_field("mainEntityOfPage[].$last.provider.type","Service")