From a5a3c3bd699bb5840a77798d6c563dafa163723e Mon Sep 17 00:00:00 2001 From: eblondel Date: Tue, 25 Jun 2024 11:59:47 +0200 Subject: [PATCH] #390 test feeding metadataeditr from CSW, consolidate CSW entity handler, metadataeditr project creator --- inst/actions/metadataeditr_create_project.R | 112 +++++++++++------- .../config_metadataeditr_from_csw.json | 51 ++++++++ inst/extdata/workflows/workflows.csv | 23 ++-- inst/metadata/entity/entity_handler_csw.R | 19 ++- .../test_config_metadataeditr_from_csw.R | 42 +++++++ .../test_config_metadataeditr_from_gsheets.R | 8 +- 6 files changed, 196 insertions(+), 59 deletions(-) create mode 100644 inst/extdata/workflows/config_metadataeditr_from_csw.json create mode 100644 tests/testthat/test_config_metadataeditr_from_csw.R diff --git a/inst/actions/metadataeditr_create_project.R b/inst/actions/metadataeditr_create_project.R index 9fd7b7ef..4886287a 100644 --- a/inst/actions/metadataeditr_create_project.R +++ b/inst/actions/metadataeditr_create_project.R @@ -1,5 +1,7 @@ function(action, entity, config){ + skipEnrichWithData = if(!is.null(config$profile$options$skipEnrichWithData)) config$profile$options$skipEnrichWithData else FALSE + if(!requireNamespace("metadataeditr", quietly = TRUE)){ stop("The 'metadataeditr-create-project' action requires the 'metadataeditr' package") } @@ -69,23 +71,23 @@ function(action, entity, config){ if(is.null(x$firstName)) x$firstName = NA if(is.null(x$lastName)) x$lastName = NA if(!is.na(x$firstName) && !is.na(x$lastName)) md_contact$individualName = paste(x$firstName, x$lastName) - if(!is.na(x$organizationName)) md_contact$organisationName = x$organizationName - if(!is.na(x$positionName)) md_contact$positionName = x$positionName - if(!is.na(x$role)) md_contact$role = x$role + if(!is.null(x$organizationName)) if(!is.na(x$organizationName)) md_contact$organisationName = x$organizationName + if(!is.null(x$positionName)) if(!is.na(x$positionName)) md_contact$positionName = x$positionName + if(!is.null(x$role)) if(!is.na(x$role)) md_contact$role = x$role md_contact$contactInfo = list() md_contact$contactInfo$address = list() - if(!is.na(x$email)) md_contact$contactInfo$address$electronicMailAddress = x$email - if(!is.na(x$postalAddress)) md_contact$contactInfo$address$deliveryPoint = x$postalAddress - if(!is.na(x$city)) md_contact$contactInfo$address$city = x$city - if(!is.na(x$postalCode)) md_contact$contactInfo$address$postalCode = x$postalCode - if(!is.na(x$country)) md_contact$contactInfo$address$country = x$country + if(!is.null(x$email)) if(!is.na(x$email)) md_contact$contactInfo$address$electronicMailAddress = x$email + if(!is.null(x$postalAddress)) if(!is.na(x$postalAddress)) md_contact$contactInfo$address$deliveryPoint = x$postalAddress + if(!is.null(x$city)) if(!is.na(x$city)) md_contact$contactInfo$address$city = x$city + if(!is.null(x$postalCode)) if(!is.na(x$postalCode)) md_contact$contactInfo$address$postalCode = x$postalCode + if(!is.null(x$country)) if(!is.na(x$country)) md_contact$contactInfo$address$country = x$country md_contact$contactInfo$phone = list() - if(!is.na(x$voice)) md_contact$contactInfo$phone$voice = x$voice - if(!is.na(x$facsimile)) md_contact$contactInfo$phone$facsimile = x$facsimile + if(!is.null(x$country)) if(!is.na(x$voice)) md_contact$contactInfo$phone$voice = x$voice + if(!is.null(x$facsimile)) if(!is.na(x$facsimile)) md_contact$contactInfo$phone$facsimile = x$facsimile md_contact$contactInfo$onlineResource = list() - if(!is.na(x$websiteUrl)) md_contact$contactInfo$onlineResource$linkage = x$websiteUrl - if(!is.na(x$websiteName)) md_contact$contactInfo$onlineResource$name = x$websiteName + if(!is.null(x$websiteUrl)) if(!is.na(x$websiteUrl)) md_contact$contactInfo$onlineResource$linkage = x$websiteUrl + if(!is.null(x$websiteName)) if(!is.na(x$websiteName)) md_contact$contactInfo$onlineResource$name = x$websiteName return(md_contact) } @@ -123,6 +125,10 @@ function(action, entity, config){ production_date = Sys.Date() #metadata_information + edition = "" + if(!is.null(entity$descriptions$edition)) if(!is.na(entity$descriptions$edition)){ + edition = entity$descriptions$edition + } project$metadata_information = list( title = entity$titles[["title"]], producers = lapply(producers, function(x){ @@ -132,7 +138,7 @@ function(action, entity, config){ list(name = name) }), production_date = production_date, - version = entity$descriptions$edition + version = edition ) #description (~ ISO 19115) @@ -242,7 +248,7 @@ function(action, entity, config){ date = lapply(entity$dates, function(x){ list(date = x$value, type = x$key) }), - edition = entity$descriptions$edition, + edition = edition, editionDate = if(any(sapply(entity$dates, function(x){x$key == "edition"}))){ entity$dates[sapply(entity$dates, function(x){x$key == "edition"})][[1]]$value }else "", @@ -264,30 +270,40 @@ function(action, entity, config){ if(!is.null(thumbnail$description)) th$fileDescription = thumbnail$description return(th) }) else list(), - resourceFormat = lapply(entity$formats[sapply(entity$formats, function(x){x$key == "resource"})], function(resourceFormat){ - rf = list(name = resourceFormat$name) - if(!is.null(resourceFormat$description)) rf$specification = resourceFormat$description - return(rf) - }), - descriptiveKeywords = do.call(c, lapply(entity$subjects[sapply(entity$subjects, function(x){return(x$key != "topic")})], function(subject){ - lapply(subject$keywords, function(kwd){ - out_kwd = list(type = subject$key, keyword = kwd$name) - if(!is.null(subject$name)) out_kwd$thesaurusName = subject$name - return(out_kwd) + resourceFormat = if(length(entity$formats)>0){ + lapply(entity$formats[sapply(entity$formats, function(x){x$key == "resource"})], function(resourceFormat){ + rf = list(name = resourceFormat$name) + if(!is.null(resourceFormat$description)) rf$specification = resourceFormat$description + return(rf) }) - })), + }else{ list() }, + descriptiveKeywords = if(length(entity$subjects)>0){ + do.call(c, lapply(entity$subjects[sapply(entity$subjects, function(x){return(x$key != "topic")})], function(subject){ + lapply(subject$keywords, function(kwd){ + out_kwd = list(type = subject$key, keyword = kwd$name) + if(!is.null(subject$name)) out_kwd$thesaurusName = subject$name + return(out_kwd) + }) + })) + }else{list()}, resourceConstraints = list( list( legalConstraints = list( - useLimitation = lapply(entity$rights[sapply(entity$rights, function(x){tolower(x$key) == "uselimitation"})], function(cons){ - cons$values[[1]] - }), - accessConstraints = lapply(entity$rights[sapply(entity$rights, function(x){tolower(x$key) == "accessconstraint"})], function(cons){ - cons$values[[1]] - }), - useConstraints = lapply(entity$rights[sapply(entity$rights, function(x){tolower(x$key) == "useconstraint"})], function(cons){ - cons$values[[1]] - }) + useLimitation = if(length(entity$rights)>0){ + lapply(entity$rights[sapply(entity$rights, function(x){tolower(x$key) == "uselimitation"})], function(cons){ + cons$values[[1]] + }) + }else{list()}, + accessConstraints = if(length(entity$rights)>0){ + lapply(entity$rights[sapply(entity$rights, function(x){tolower(x$key) == "accessconstraint"})], function(cons){ + cons$values[[1]] + }) + }else{list()}, + useConstraints = if(length(entity$rights)>0){ + lapply(entity$rights[sapply(entity$rights, function(x){tolower(x$key) == "useconstraint"})], function(cons){ + cons$values[[1]] + }) + }else{list()} ) ) ), @@ -297,10 +313,10 @@ function(action, entity, config){ geographicElement = list( list( geographicBoundingBox = list( - southBoundLatitude = entity$spatial_bbox$ymin, - westBoundLongitude = entity$spatial_bbox$xmin, - northBoundLatitude = entity$spatial_bbox$ymax, - eastBoundLongitude = entity$spatial_bbox$xmax + southBoundLatitude = entity$geo_bbox$ymin, + westBoundLongitude = entity$geo_bbox$xmin, + northBoundLatitude = entity$geo_bbox$ymax, + eastBoundLongitude = entity$geo_bbox$xmax ) ) ), @@ -312,7 +328,11 @@ function(action, entity, config){ } ) ), - spatialRepresentationType = entity$data$spatialRepresentationType, + spatialRepresentationType = if(!is.null(entity$data$spatialRepresentationType)){ + entity$data$spatialRepresentationType + }else{ + + }, language = list(entity$language), characterSet = list( list(codeListValue = "utf8") @@ -322,11 +342,13 @@ function(action, entity, config){ #description/distributionInfo project$description$distributionInfo = list( - distributionFormat = lapply(entity$formats[sapply(entity$formats, function(x){x$key == "distribution"})], function(distFormat){ - df = list(name = distFormat$name) - if(!is.null(distFormat$description)) df$specification = distFormat$description - return(df) - }), + distributionFormat = if(length(entity$formats)>0){ + lapply(entity$formats[sapply(entity$formats, function(x){x$key == "distribution"})], function(distFormat){ + df = list(name = distFormat$name) + if(!is.null(distFormat$description)) df$specification = distFormat$description + return(df) + }) + }else{list()}, distributor = lapply(distributors, produce_md_contact) ) @@ -336,7 +358,7 @@ function(action, entity, config){ project$description$dataQualityInfo = list( list( lineage = list( - statement = entity$provenance$statement, + statement = if(!is.null(entity$provenance$statement)) entity$provenance$statement else "", processStep = lapply(entity$provenance$processes, function(process){ list( description = process$description, diff --git a/inst/extdata/workflows/config_metadataeditr_from_csw.json b/inst/extdata/workflows/config_metadataeditr_from_csw.json new file mode 100644 index 00000000..8a00616c --- /dev/null +++ b/inst/extdata/workflows/config_metadataeditr_from_csw.json @@ -0,0 +1,51 @@ +{ + "profile": { + "id": "my-csw-workflow", + "name": "My CSW workflow", + "project": "Test geoflow project", + "organization": "My organization", + "logos": [ + "https://via.placeholder.com/300x150.png/09f/fff?text=geoflow" + ], + "mode": "entity" + }, + "metadata": { + "entities": [ + { + "handler": "ogc_csw", + "source": "dc:title like '%fishery purpose%'" + } + ] + }, + "software": [ + { + "id": "csw", + "type": "input", + "software_type": "csw", + "parameters": { + "url": "https://www.fao.org/fishery/geonetwork/srv/eng/csw", + "serviceVersion": "2.0.2", + "logger": "DEBUG" + } + }, + { + "id": "wb-metadataeditr", + "type": "output", + "software_type": "metadataeditr", + "parameters": { + "api_url": "{{ WB_METADATAEDITOR_API_URL }}", + "api_key": "{{ WB_METADATAEDITOR_API_KEY }}", + "verbose": true + }, + "properties": { + "collection_names": "geoflow" + } + } + ], + "actions": [ + { + "id": "metadataeditr-create-project", + "run": true + } + ] +} diff --git a/inst/extdata/workflows/workflows.csv b/inst/extdata/workflows/workflows.csv index 800294eb..42900ffa 100644 --- a/inst/extdata/workflows/workflows.csv +++ b/inst/extdata/workflows/workflows.csv @@ -1,22 +1,29 @@ config_file,description +config_metadata_csw.json,Load metadata entities from an OGC Catalogue Service for the Web (CSW) query +config_metadata_dataverse.json,Load metadata entities/contacts from Dataverse records +config_metadata_dbi.json,Load metadata entities from a database config_metadata_gsheets.json,Load metadata entities/contacts from Google Spreadsheets +config_metadata_gsheets_cloud_upload.json, Load metadata entities/contacts from Google spreadsheets and upload into OCS cloud config_metadata_gsheets_eml.json, Load metadata entities/contacts from Google Spreadsheets and genrate EML metadata config_metadata_gsheets_iso19115.json,Load metadata entities/contacts from Google Spreadsheets and generate ISO 19115/19139 metadata config_metadata_gsheets_iso19115_geonetwork.json,Load metadata entities/contacts from Google Spreadsheet; generate ISO 19115/19139 metadata and publish them in GeoNetwork (using the GeoNetwork API) +config_metadata_gsheets_iso19115_geonetwork_datacite.json,Load metadata entities/contacts from Google Spreadsheet; generate ISO 19115/19139 metadata and publish them in GeoNetwork (using the GeoNetwork API) with DOI publication to DataCite config_metadata_gsheets_iso19115_inspire.json,Load metadata entities/contacts from Google Spreadsheets and generate ISO 19115/19139 metadata (with INSPIRE validation) config_metadata_gsheets_iso19115_inspire_geonetwork.json,Load metadata entities/contacts from Google Spreadsheet; generate ISO 19115/19139 metadata (with INSPIRE validation) and publish them in GeoNetwork (using the GeoNetwork API) config_metadata_gsheets_iso19115_inspire_geonetwork_with_uuid.json,Load metadata entities/contacts from Google Spreadsheet; generate ISO 19115/19139 metadata (with INSPIRE validation) and publish them in GeoNetwork (using the GeoNetwork API). Metadata identification with UUIDs. +config_metadata_gsheets_sdi_geonode_shp.json,Load metadata/contacts from Google spreadsheet and publish in GeoNode +config_metadata_gsheets_sdi_geoserver_geotiff.json,Load metadata entities/contacts from Google Spreadsheets and publish GeoTIFF based layer on GeoServer +config_metadata_gsheets_sdi_geoserver_geotiff_directory.json,Load metadata entities/contacts from Google Spreadsheets and publish multiple GeoTIFF based layers on GeoServer for a single entity +config_metadata_gsheets_sdi_geoserver_geotiff_view.json,Load metadata entities/contacts from Google Spreadsheets and publish GeoTIFF based view layer on GeoServer +config_metadata_gsheets_sdi_geoserver_shp.json,Load metadata entities/contacts from Google Spreadsheets and publish ESRI shapefile based layer on GeoServer +config_metadata_gsheets_sdi_postgis_dbi.json,Load metadata entities/contacts from Google spreadsheets and upload a shapefile into a Postgis database config_metadata_gsheets_zenodo.json,Load metadata entities/contacts from Google Spreadsheets and deposit records on Zenodo config_metadata_gsheets_zenodo_clean_options.json,Load metadata entities/contacts from Google Spreadsheets and deposit records on Zenodo, with deposit cleaning options config_metadata_gsheets_zenodo_full.json,Load metadata entities/contacts from Google Spreadsheets, deposit records on Zenodo and publish them -config_metadata_gsheets_sdi_postgis_dbi.json,Load metadata entities/contacts from Google spreadsheets and upload a shapefile into a Postgis database -config_metadata_gsheets_sdi_geoserver_shp.json,Load metadata entities/contacts from Google Spreadsheets and publish ESRI shapefile based layer on GeoServer -config_metadata_gsheets_sdi_geoserver_geotiff.json,Load metadata entities/contacts from Google Spreadsheets and publish GeoTIFF based layer on GeoServer -config_metadata_gsheets_sdi_geoserver_geotiff_view.json,Load metadata entities/contacts from Google Spreadsheets and publish GeoTIFF based view layer on GeoServer -config_metadata_gsheets_sdi_geoserver_geotiff_directory.json,Load metadata entities/contacts from Google Spreadsheets and publish multiple GeoTIFF based layers on GeoServer for a single entity +config_metadata_ncml.json,Load a metadata entity/contacts from a NCML source config_metadata_ocs.json,Load metadata entities/contacts/dictionary from CSV files hosted on a cloud supporting OCS API (eg. nextcloud/owncloud) -config_metadata_dataverse.json,Load metadata entities/contacts from Dataverse records +config_metadata_thredds_noaa.json,Load metadata entities/contact from a Thredds data server (NOAA example) config_metadata_zenodo.json,Load metadata entities/contacts from Zenodo deposits/records config_metadata_zenodo_with_options.json,Load metadata entities/contacts from Zenodo deposits/records (with options declared to fetch from public records and by DOI) -config_metadata_ncml.json,Load a metadata entity/contacts from a NCML source -config_metadata_thredds_noaa.json,Load metadata entities/contact from a Thredds data server (NOAA example) \ No newline at end of file +config_metadataeditr_from_csw,Load metadata entities from an OGC Catalogue Service for the Web (CSW) query and publish into the World Bank Metadata editor +config_metadataeditr_from_gsheets,Load metadata entities/contacts from a Google spreadsheets and publish into the World Bank Metadata editor diff --git a/inst/metadata/entity/entity_handler_csw.R b/inst/metadata/entity/entity_handler_csw.R index aa8e6e47..377df6b2 100644 --- a/inst/metadata/entity/entity_handler_csw.R +++ b/inst/metadata/entity/entity_handler_csw.R @@ -56,7 +56,13 @@ handle_entities_csw <- function(handler, source, config, handle = TRUE){ #type if(length(rec$hierarchyLevel)>0) entity$setType(key = "generic", type = rec$hierarchyLevel[[1]]$attrs$codeListValue) #language - entity$setLanguage(rec$language$attrs$codeListValue) + lang = rec$language + if(is.list(lang)) if(!is.null(lang$attrs$codeListValue)){ + entity$setLanguage(rec$language$attrs$codeListValue) + }else if(is.character(lang)){ + entity$setLanguage(lang) + } + #srid if(length(rec$referenceSystemInfo)>0){ code = rec$referenceSystemInfo[[1]]$referenceSystemIdentifier$code @@ -64,7 +70,7 @@ handle_entities_csw <- function(handler, source, config, handle = TRUE){ code = code_parts[length(code_parts)] code_parts = unlist(strsplit(code, ":")) code = code_parts[length(code_parts)] - if(code == "WGS 84") code = 4326 + if(startsWith(code,"WGS 84")|startsWith(code,"WGS84")) code = 4326 entity$setSrid(suppressWarnings(as.integer(code))) } @@ -365,6 +371,15 @@ handle_entities_csw <- function(handler, source, config, handle = TRUE){ } } + #spatialRepresentationInfo + if(length(rec$spatialRepresentationInfo)>0){ + if(is.null(entity$data)) entity$data = geoflow_data$new() + entity$data$spatialRepresentationType = switch(class(rec$spatialRepresentationInfo[[1]])[1], + "ISOVectorSpatialRepresentation" = "vector", + "ISOGridSpatialRepresentation" = "grid" + ) + } + return(entity) }) diff --git a/tests/testthat/test_config_metadataeditr_from_csw.R b/tests/testthat/test_config_metadataeditr_from_csw.R new file mode 100644 index 00000000..54f88e84 --- /dev/null +++ b/tests/testthat/test_config_metadataeditr_from_csw.R @@ -0,0 +1,42 @@ +# test_config_metadata_gsheets.R +# Author: Emmanuel Blondel +# +# Description: Integration tests for config_metadataeditr_from_csw.json workflow +#======================= +require(geoflow, quietly = TRUE) +require(testthat) + +cfg_file = system.file("extdata/workflows/config_metadataeditr_from_csw.json", package = "geoflow") + +#init +test_that("init",{ + CFG <- geoflow::initWorkflow(cfg_file) + expect_is(CFG$metadata$content, "list") + expect_equal(length(CFG$metadata$content), 1L) + expect_equal(names(CFG$metadata$content), c("entities")) + expect_equal(length(CFG$metadata$content$contacts), 0L) + expect_equal(length(CFG$getContacts()), 0L) + expect_equal(length(CFG$metadata$content$entities), 1L) + expect_equal(length(CFG$getEntities()), 1L) + expect_equal(length(CFG$actions), 1L) + expect_equal(length(CFG$software), 2L) + expect_equal(names(CFG$software), c("input", "output")) + expect_equal(length(CFG$software$input), 2L) + expect_equal(length(CFG$software$output), 2L) +}) + +#debug +test_that("debug",{ + DEBUG <- geoflow::debugWorkflow(cfg_file) + expect_equal(names(DEBUG), c("config", "entity")) + expect_is(DEBUG$config, "list") + expect_is(DEBUG$entity, "geoflow_entity") + expect_equal(DEBUG$entity$identifiers[["id"]], "ac02a460-da52-11dc-9d70-0017f293bd28") + +}) + +#execute +test_that("execute",{ + EXEC <- geoflow::executeWorkflow(cfg_file, dir = ".") + expect_true(dir.exists(EXEC)) +}) diff --git a/tests/testthat/test_config_metadataeditr_from_gsheets.R b/tests/testthat/test_config_metadataeditr_from_gsheets.R index a04e76bb..6bf40dd4 100644 --- a/tests/testthat/test_config_metadataeditr_from_gsheets.R +++ b/tests/testthat/test_config_metadataeditr_from_gsheets.R @@ -1,7 +1,7 @@ # test_config_metadata_gsheets.R # Author: Emmanuel Blondel # -# Description: Integration tests for config_metadata_gsheets.json workflow +# Description: Integration tests for config_metadataeditr_from_gsheets.json workflow #======================= require(geoflow, quietly = TRUE) require(testthat) @@ -12,13 +12,13 @@ cfg_file = system.file("extdata/workflows/config_metadataeditr_from_gsheets.json test_that("init",{ CFG <- geoflow::initWorkflow(cfg_file) expect_is(CFG$metadata$content, "list") - expect_equal(length(CFG$metadata$content), 2L) - expect_equal(names(CFG$metadata$content), c("contacts", "entities")) + expect_equal(length(CFG$metadata$content), 3L) + expect_equal(names(CFG$metadata$content), c("dictionary","contacts", "entities")) expect_equal(length(CFG$metadata$content$contacts), 4L) expect_equal(length(CFG$getContacts()), 4L) expect_equal(length(CFG$metadata$content$entities), 1L) expect_equal(length(CFG$getEntities()), 1L) - expect_equal(length(CFG$actions), 1L) + expect_equal(length(CFG$actions), 3L) expect_equal(length(CFG$software), 2L) expect_equal(names(CFG$software), c("input", "output")) expect_equal(length(CFG$software$input), 0L)