Skip to content

Commit

Permalink
#390 test feeding metadataeditr from CSW, consolidate CSW entity hand…
Browse files Browse the repository at this point in the history
…ler, metadataeditr project creator
  • Loading branch information
eblondel committed Jun 25, 2024
1 parent 98bb0eb commit a5a3c3b
Show file tree
Hide file tree
Showing 6 changed files with 196 additions and 59 deletions.
112 changes: 67 additions & 45 deletions inst/actions/metadataeditr_create_project.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
function(action, entity, config){

skipEnrichWithData = if(!is.null(config$profile$options$skipEnrichWithData)) config$profile$options$skipEnrichWithData else FALSE

if(!requireNamespace("metadataeditr", quietly = TRUE)){
stop("The 'metadataeditr-create-project' action requires the 'metadataeditr' package")
}
Expand Down Expand Up @@ -69,23 +71,23 @@ function(action, entity, config){
if(is.null(x$firstName)) x$firstName = NA
if(is.null(x$lastName)) x$lastName = NA
if(!is.na(x$firstName) && !is.na(x$lastName)) md_contact$individualName = paste(x$firstName, x$lastName)
if(!is.na(x$organizationName)) md_contact$organisationName = x$organizationName
if(!is.na(x$positionName)) md_contact$positionName = x$positionName
if(!is.na(x$role)) md_contact$role = x$role
if(!is.null(x$organizationName)) if(!is.na(x$organizationName)) md_contact$organisationName = x$organizationName
if(!is.null(x$positionName)) if(!is.na(x$positionName)) md_contact$positionName = x$positionName
if(!is.null(x$role)) if(!is.na(x$role)) md_contact$role = x$role

md_contact$contactInfo = list()
md_contact$contactInfo$address = list()
if(!is.na(x$email)) md_contact$contactInfo$address$electronicMailAddress = x$email
if(!is.na(x$postalAddress)) md_contact$contactInfo$address$deliveryPoint = x$postalAddress
if(!is.na(x$city)) md_contact$contactInfo$address$city = x$city
if(!is.na(x$postalCode)) md_contact$contactInfo$address$postalCode = x$postalCode
if(!is.na(x$country)) md_contact$contactInfo$address$country = x$country
if(!is.null(x$email)) if(!is.na(x$email)) md_contact$contactInfo$address$electronicMailAddress = x$email
if(!is.null(x$postalAddress)) if(!is.na(x$postalAddress)) md_contact$contactInfo$address$deliveryPoint = x$postalAddress
if(!is.null(x$city)) if(!is.na(x$city)) md_contact$contactInfo$address$city = x$city
if(!is.null(x$postalCode)) if(!is.na(x$postalCode)) md_contact$contactInfo$address$postalCode = x$postalCode
if(!is.null(x$country)) if(!is.na(x$country)) md_contact$contactInfo$address$country = x$country
md_contact$contactInfo$phone = list()
if(!is.na(x$voice)) md_contact$contactInfo$phone$voice = x$voice
if(!is.na(x$facsimile)) md_contact$contactInfo$phone$facsimile = x$facsimile
if(!is.null(x$country)) if(!is.na(x$voice)) md_contact$contactInfo$phone$voice = x$voice
if(!is.null(x$facsimile)) if(!is.na(x$facsimile)) md_contact$contactInfo$phone$facsimile = x$facsimile
md_contact$contactInfo$onlineResource = list()
if(!is.na(x$websiteUrl)) md_contact$contactInfo$onlineResource$linkage = x$websiteUrl
if(!is.na(x$websiteName)) md_contact$contactInfo$onlineResource$name = x$websiteName
if(!is.null(x$websiteUrl)) if(!is.na(x$websiteUrl)) md_contact$contactInfo$onlineResource$linkage = x$websiteUrl
if(!is.null(x$websiteName)) if(!is.na(x$websiteName)) md_contact$contactInfo$onlineResource$name = x$websiteName

return(md_contact)
}
Expand Down Expand Up @@ -123,6 +125,10 @@ function(action, entity, config){
production_date = Sys.Date()

#metadata_information
edition = ""
if(!is.null(entity$descriptions$edition)) if(!is.na(entity$descriptions$edition)){
edition = entity$descriptions$edition
}
project$metadata_information = list(
title = entity$titles[["title"]],
producers = lapply(producers, function(x){
Expand All @@ -132,7 +138,7 @@ function(action, entity, config){
list(name = name)
}),
production_date = production_date,
version = entity$descriptions$edition
version = edition
)

#description (~ ISO 19115)
Expand Down Expand Up @@ -242,7 +248,7 @@ function(action, entity, config){
date = lapply(entity$dates, function(x){
list(date = x$value, type = x$key)
}),
edition = entity$descriptions$edition,
edition = edition,
editionDate = if(any(sapply(entity$dates, function(x){x$key == "edition"}))){
entity$dates[sapply(entity$dates, function(x){x$key == "edition"})][[1]]$value
}else "",
Expand All @@ -264,30 +270,40 @@ function(action, entity, config){
if(!is.null(thumbnail$description)) th$fileDescription = thumbnail$description
return(th)
}) else list(),
resourceFormat = lapply(entity$formats[sapply(entity$formats, function(x){x$key == "resource"})], function(resourceFormat){
rf = list(name = resourceFormat$name)
if(!is.null(resourceFormat$description)) rf$specification = resourceFormat$description
return(rf)
}),
descriptiveKeywords = do.call(c, lapply(entity$subjects[sapply(entity$subjects, function(x){return(x$key != "topic")})], function(subject){
lapply(subject$keywords, function(kwd){
out_kwd = list(type = subject$key, keyword = kwd$name)
if(!is.null(subject$name)) out_kwd$thesaurusName = subject$name
return(out_kwd)
resourceFormat = if(length(entity$formats)>0){
lapply(entity$formats[sapply(entity$formats, function(x){x$key == "resource"})], function(resourceFormat){
rf = list(name = resourceFormat$name)
if(!is.null(resourceFormat$description)) rf$specification = resourceFormat$description
return(rf)
})
})),
}else{ list() },
descriptiveKeywords = if(length(entity$subjects)>0){
do.call(c, lapply(entity$subjects[sapply(entity$subjects, function(x){return(x$key != "topic")})], function(subject){
lapply(subject$keywords, function(kwd){
out_kwd = list(type = subject$key, keyword = kwd$name)
if(!is.null(subject$name)) out_kwd$thesaurusName = subject$name
return(out_kwd)
})
}))
}else{list()},
resourceConstraints = list(
list(
legalConstraints = list(
useLimitation = lapply(entity$rights[sapply(entity$rights, function(x){tolower(x$key) == "uselimitation"})], function(cons){
cons$values[[1]]
}),
accessConstraints = lapply(entity$rights[sapply(entity$rights, function(x){tolower(x$key) == "accessconstraint"})], function(cons){
cons$values[[1]]
}),
useConstraints = lapply(entity$rights[sapply(entity$rights, function(x){tolower(x$key) == "useconstraint"})], function(cons){
cons$values[[1]]
})
useLimitation = if(length(entity$rights)>0){
lapply(entity$rights[sapply(entity$rights, function(x){tolower(x$key) == "uselimitation"})], function(cons){
cons$values[[1]]
})
}else{list()},
accessConstraints = if(length(entity$rights)>0){
lapply(entity$rights[sapply(entity$rights, function(x){tolower(x$key) == "accessconstraint"})], function(cons){
cons$values[[1]]
})
}else{list()},
useConstraints = if(length(entity$rights)>0){
lapply(entity$rights[sapply(entity$rights, function(x){tolower(x$key) == "useconstraint"})], function(cons){
cons$values[[1]]
})
}else{list()}
)
)
),
Expand All @@ -297,10 +313,10 @@ function(action, entity, config){
geographicElement = list(
list(
geographicBoundingBox = list(
southBoundLatitude = entity$spatial_bbox$ymin,
westBoundLongitude = entity$spatial_bbox$xmin,
northBoundLatitude = entity$spatial_bbox$ymax,
eastBoundLongitude = entity$spatial_bbox$xmax
southBoundLatitude = entity$geo_bbox$ymin,
westBoundLongitude = entity$geo_bbox$xmin,
northBoundLatitude = entity$geo_bbox$ymax,
eastBoundLongitude = entity$geo_bbox$xmax
)
)
),
Expand All @@ -312,7 +328,11 @@ function(action, entity, config){
}
)
),
spatialRepresentationType = entity$data$spatialRepresentationType,
spatialRepresentationType = if(!is.null(entity$data$spatialRepresentationType)){
entity$data$spatialRepresentationType
}else{

},
language = list(entity$language),
characterSet = list(
list(codeListValue = "utf8")
Expand All @@ -322,11 +342,13 @@ function(action, entity, config){

#description/distributionInfo
project$description$distributionInfo = list(
distributionFormat = lapply(entity$formats[sapply(entity$formats, function(x){x$key == "distribution"})], function(distFormat){
df = list(name = distFormat$name)
if(!is.null(distFormat$description)) df$specification = distFormat$description
return(df)
}),
distributionFormat = if(length(entity$formats)>0){
lapply(entity$formats[sapply(entity$formats, function(x){x$key == "distribution"})], function(distFormat){
df = list(name = distFormat$name)
if(!is.null(distFormat$description)) df$specification = distFormat$description
return(df)
})
}else{list()},
distributor = lapply(distributors, produce_md_contact)
)

Expand All @@ -336,7 +358,7 @@ function(action, entity, config){
project$description$dataQualityInfo = list(
list(
lineage = list(
statement = entity$provenance$statement,
statement = if(!is.null(entity$provenance$statement)) entity$provenance$statement else "",
processStep = lapply(entity$provenance$processes, function(process){
list(
description = process$description,
Expand Down
51 changes: 51 additions & 0 deletions inst/extdata/workflows/config_metadataeditr_from_csw.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
{
"profile": {
"id": "my-csw-workflow",
"name": "My CSW workflow",
"project": "Test geoflow project",
"organization": "My organization",
"logos": [
"https://via.placeholder.com/300x150.png/09f/fff?text=geoflow"
],
"mode": "entity"
},
"metadata": {
"entities": [
{
"handler": "ogc_csw",
"source": "dc:title like '%fishery purpose%'"
}
]
},
"software": [
{
"id": "csw",
"type": "input",
"software_type": "csw",
"parameters": {
"url": "https://www.fao.org/fishery/geonetwork/srv/eng/csw",
"serviceVersion": "2.0.2",
"logger": "DEBUG"
}
},
{
"id": "wb-metadataeditr",
"type": "output",
"software_type": "metadataeditr",
"parameters": {
"api_url": "{{ WB_METADATAEDITOR_API_URL }}",
"api_key": "{{ WB_METADATAEDITOR_API_KEY }}",
"verbose": true
},
"properties": {
"collection_names": "geoflow"
}
}
],
"actions": [
{
"id": "metadataeditr-create-project",
"run": true
}
]
}
23 changes: 15 additions & 8 deletions inst/extdata/workflows/workflows.csv
Original file line number Diff line number Diff line change
@@ -1,22 +1,29 @@
config_file,description
config_metadata_csw.json,Load metadata entities from an OGC Catalogue Service for the Web (CSW) query
config_metadata_dataverse.json,Load metadata entities/contacts from Dataverse records
config_metadata_dbi.json,Load metadata entities from a database
config_metadata_gsheets.json,Load metadata entities/contacts from Google Spreadsheets
config_metadata_gsheets_cloud_upload.json, Load metadata entities/contacts from Google spreadsheets and upload into OCS cloud
config_metadata_gsheets_eml.json, Load metadata entities/contacts from Google Spreadsheets and genrate EML metadata
config_metadata_gsheets_iso19115.json,Load metadata entities/contacts from Google Spreadsheets and generate ISO 19115/19139 metadata
config_metadata_gsheets_iso19115_geonetwork.json,Load metadata entities/contacts from Google Spreadsheet; generate ISO 19115/19139 metadata and publish them in GeoNetwork (using the GeoNetwork API)
config_metadata_gsheets_iso19115_geonetwork_datacite.json,Load metadata entities/contacts from Google Spreadsheet; generate ISO 19115/19139 metadata and publish them in GeoNetwork (using the GeoNetwork API) with DOI publication to DataCite
config_metadata_gsheets_iso19115_inspire.json,Load metadata entities/contacts from Google Spreadsheets and generate ISO 19115/19139 metadata (with INSPIRE validation)
config_metadata_gsheets_iso19115_inspire_geonetwork.json,Load metadata entities/contacts from Google Spreadsheet; generate ISO 19115/19139 metadata (with INSPIRE validation) and publish them in GeoNetwork (using the GeoNetwork API)
config_metadata_gsheets_iso19115_inspire_geonetwork_with_uuid.json,Load metadata entities/contacts from Google Spreadsheet; generate ISO 19115/19139 metadata (with INSPIRE validation) and publish them in GeoNetwork (using the GeoNetwork API). Metadata identification with UUIDs.
config_metadata_gsheets_sdi_geonode_shp.json,Load metadata/contacts from Google spreadsheet and publish in GeoNode
config_metadata_gsheets_sdi_geoserver_geotiff.json,Load metadata entities/contacts from Google Spreadsheets and publish GeoTIFF based layer on GeoServer
config_metadata_gsheets_sdi_geoserver_geotiff_directory.json,Load metadata entities/contacts from Google Spreadsheets and publish multiple GeoTIFF based layers on GeoServer for a single entity
config_metadata_gsheets_sdi_geoserver_geotiff_view.json,Load metadata entities/contacts from Google Spreadsheets and publish GeoTIFF based view layer on GeoServer
config_metadata_gsheets_sdi_geoserver_shp.json,Load metadata entities/contacts from Google Spreadsheets and publish ESRI shapefile based layer on GeoServer
config_metadata_gsheets_sdi_postgis_dbi.json,Load metadata entities/contacts from Google spreadsheets and upload a shapefile into a Postgis database
config_metadata_gsheets_zenodo.json,Load metadata entities/contacts from Google Spreadsheets and deposit records on Zenodo
config_metadata_gsheets_zenodo_clean_options.json,Load metadata entities/contacts from Google Spreadsheets and deposit records on Zenodo, with deposit cleaning options
config_metadata_gsheets_zenodo_full.json,Load metadata entities/contacts from Google Spreadsheets, deposit records on Zenodo and publish them
config_metadata_gsheets_sdi_postgis_dbi.json,Load metadata entities/contacts from Google spreadsheets and upload a shapefile into a Postgis database
config_metadata_gsheets_sdi_geoserver_shp.json,Load metadata entities/contacts from Google Spreadsheets and publish ESRI shapefile based layer on GeoServer
config_metadata_gsheets_sdi_geoserver_geotiff.json,Load metadata entities/contacts from Google Spreadsheets and publish GeoTIFF based layer on GeoServer
config_metadata_gsheets_sdi_geoserver_geotiff_view.json,Load metadata entities/contacts from Google Spreadsheets and publish GeoTIFF based view layer on GeoServer
config_metadata_gsheets_sdi_geoserver_geotiff_directory.json,Load metadata entities/contacts from Google Spreadsheets and publish multiple GeoTIFF based layers on GeoServer for a single entity
config_metadata_ncml.json,Load a metadata entity/contacts from a NCML source
config_metadata_ocs.json,Load metadata entities/contacts/dictionary from CSV files hosted on a cloud supporting OCS API (eg. nextcloud/owncloud)
config_metadata_dataverse.json,Load metadata entities/contacts from Dataverse records
config_metadata_thredds_noaa.json,Load metadata entities/contact from a Thredds data server (NOAA example)
config_metadata_zenodo.json,Load metadata entities/contacts from Zenodo deposits/records
config_metadata_zenodo_with_options.json,Load metadata entities/contacts from Zenodo deposits/records (with options declared to fetch from public records and by DOI)
config_metadata_ncml.json,Load a metadata entity/contacts from a NCML source
config_metadata_thredds_noaa.json,Load metadata entities/contact from a Thredds data server (NOAA example)
config_metadataeditr_from_csw,Load metadata entities from an OGC Catalogue Service for the Web (CSW) query and publish into the World Bank Metadata editor
config_metadataeditr_from_gsheets,Load metadata entities/contacts from a Google spreadsheets and publish into the World Bank Metadata editor
19 changes: 17 additions & 2 deletions inst/metadata/entity/entity_handler_csw.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,15 +56,21 @@ handle_entities_csw <- function(handler, source, config, handle = TRUE){
#type
if(length(rec$hierarchyLevel)>0) entity$setType(key = "generic", type = rec$hierarchyLevel[[1]]$attrs$codeListValue)
#language
entity$setLanguage(rec$language$attrs$codeListValue)
lang = rec$language
if(is.list(lang)) if(!is.null(lang$attrs$codeListValue)){
entity$setLanguage(rec$language$attrs$codeListValue)
}else if(is.character(lang)){
entity$setLanguage(lang)
}

#srid
if(length(rec$referenceSystemInfo)>0){
code = rec$referenceSystemInfo[[1]]$referenceSystemIdentifier$code
code_parts = unlist(strsplit(code, "/"))
code = code_parts[length(code_parts)]
code_parts = unlist(strsplit(code, ":"))
code = code_parts[length(code_parts)]
if(code == "WGS 84") code = 4326
if(startsWith(code,"WGS 84")|startsWith(code,"WGS84")) code = 4326
entity$setSrid(suppressWarnings(as.integer(code)))
}

Expand Down Expand Up @@ -365,6 +371,15 @@ handle_entities_csw <- function(handler, source, config, handle = TRUE){
}
}

#spatialRepresentationInfo
if(length(rec$spatialRepresentationInfo)>0){
if(is.null(entity$data)) entity$data = geoflow_data$new()
entity$data$spatialRepresentationType = switch(class(rec$spatialRepresentationInfo[[1]])[1],
"ISOVectorSpatialRepresentation" = "vector",
"ISOGridSpatialRepresentation" = "grid"
)
}

return(entity)
})

Expand Down
42 changes: 42 additions & 0 deletions tests/testthat/test_config_metadataeditr_from_csw.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# test_config_metadata_gsheets.R
# Author: Emmanuel Blondel <[email protected]>
#
# Description: Integration tests for config_metadataeditr_from_csw.json workflow
#=======================
require(geoflow, quietly = TRUE)
require(testthat)

cfg_file = system.file("extdata/workflows/config_metadataeditr_from_csw.json", package = "geoflow")

#init
test_that("init",{
CFG <- geoflow::initWorkflow(cfg_file)
expect_is(CFG$metadata$content, "list")
expect_equal(length(CFG$metadata$content), 1L)
expect_equal(names(CFG$metadata$content), c("entities"))
expect_equal(length(CFG$metadata$content$contacts), 0L)
expect_equal(length(CFG$getContacts()), 0L)
expect_equal(length(CFG$metadata$content$entities), 1L)
expect_equal(length(CFG$getEntities()), 1L)
expect_equal(length(CFG$actions), 1L)
expect_equal(length(CFG$software), 2L)
expect_equal(names(CFG$software), c("input", "output"))
expect_equal(length(CFG$software$input), 2L)
expect_equal(length(CFG$software$output), 2L)
})

#debug
test_that("debug",{
DEBUG <- geoflow::debugWorkflow(cfg_file)
expect_equal(names(DEBUG), c("config", "entity"))
expect_is(DEBUG$config, "list")
expect_is(DEBUG$entity, "geoflow_entity")
expect_equal(DEBUG$entity$identifiers[["id"]], "ac02a460-da52-11dc-9d70-0017f293bd28")

})

#execute
test_that("execute",{
EXEC <- geoflow::executeWorkflow(cfg_file, dir = ".")
expect_true(dir.exists(EXEC))
})
8 changes: 4 additions & 4 deletions tests/testthat/test_config_metadataeditr_from_gsheets.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# test_config_metadata_gsheets.R
# Author: Emmanuel Blondel <[email protected]>
#
# Description: Integration tests for config_metadata_gsheets.json workflow
# Description: Integration tests for config_metadataeditr_from_gsheets.json workflow
#=======================
require(geoflow, quietly = TRUE)
require(testthat)
Expand All @@ -12,13 +12,13 @@ cfg_file = system.file("extdata/workflows/config_metadataeditr_from_gsheets.json
test_that("init",{
CFG <- geoflow::initWorkflow(cfg_file)
expect_is(CFG$metadata$content, "list")
expect_equal(length(CFG$metadata$content), 2L)
expect_equal(names(CFG$metadata$content), c("contacts", "entities"))
expect_equal(length(CFG$metadata$content), 3L)
expect_equal(names(CFG$metadata$content), c("dictionary","contacts", "entities"))
expect_equal(length(CFG$metadata$content$contacts), 4L)
expect_equal(length(CFG$getContacts()), 4L)
expect_equal(length(CFG$metadata$content$entities), 1L)
expect_equal(length(CFG$getEntities()), 1L)
expect_equal(length(CFG$actions), 1L)
expect_equal(length(CFG$actions), 3L)
expect_equal(length(CFG$software), 2L)
expect_equal(names(CFG$software), c("input", "output"))
expect_equal(length(CFG$software$input), 0L)
Expand Down

0 comments on commit a5a3c3b

Please sign in to comment.