Skip to content

Commit

Permalink
Schema 4.6 (#193)
Browse files Browse the repository at this point in the history
* New 4.6 xsd files

* Add support for new attributes for schema 4.6

* Update the kernel-4 files and test to check updated changes

* Copied the XML from Schema to bolognese

* Revert "Copied the XML from Schema to bolognese"

This reverts commit 15a47cd.

* Implement writing Schema 4.6 values to Schema.org

* Adds support for Schema 4.6 changes in Citeproc and Schema.org reading/writing

* Support for reading Schema 4.6 values from crossref

* Fix for multiple objects in person_group

* Test for Schema 4.6 values in datacite writer

* Updated bolognese gem version

---------

Co-authored-by: Suzanne Vogt <[email protected]>
Co-authored-by: codycooperross <[email protected]>
  • Loading branch information
3 people authored Nov 14, 2024
1 parent b3721a2 commit ddddadf
Show file tree
Hide file tree
Showing 48 changed files with 2,628 additions and 28 deletions.
2 changes: 1 addition & 1 deletion Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
PATH
remote: .
specs:
bolognese (2.2.0)
bolognese (2.3.0)
activesupport (>= 4.2.5)
benchmark_methods (~> 0.7)
bibtex-ruby (>= 5.1.0)
Expand Down
1 change: 1 addition & 0 deletions lib/bolognese/metadata_utils.rb
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ def citeproc_hsh
"language" => language,
"author" => author,
"contributor" => to_citeproc(contributors),
"translator" => contributors ? to_citeproc(contributors.select { |c| c["contributorType"] == "Translator" }) : nil,
"issued" => get_date(dates, "Issued") ? get_date_parts(get_date(dates, "Issued")) : get_date_parts(publication_year.to_s),
"submitted" => Array.wrap(dates).find { |d| d["dateType"] == "Submitted" }.to_h.fetch("__content__", nil),
"abstract" => parse_attributes(descriptions, content: "description", first: true),
Expand Down
6 changes: 6 additions & 0 deletions lib/bolognese/readers/citeproc_reader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@ def read_citeproc(string: nil, **options)
[{ "nameType" => "Organizational", "name" => ":(unav)" }]
end
contributors = get_authors(from_citeproc(Array.wrap(meta.fetch("editor", nil))))
translators = get_authors(from_citeproc(Array.wrap(meta.fetch("translator", nil))))
translators.each do |translator|
translator["contributorType"] = "Translator"
end
contributors += translators

dates = if date = get_date_from_date_parts(meta.fetch("issued", nil))
if Date.edtf(date).present?
[{ "date" => date,
Expand Down
40 changes: 37 additions & 3 deletions lib/bolognese/readers/crossref_reader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ module Bolognese
module Readers
module CrossrefReader
# CrossRef types from https://api.crossref.org/types

CR_TO_DC_CONTRIBUTOR_TYPES = {
"editor" => "Editor",
"translator" => "Translator",
}

def get_crossref(id: nil, **options)
return { "string" => nil, "state" => "not_found" } unless id.present?

Expand Down Expand Up @@ -138,7 +144,7 @@ def read_crossref(string: nil, **options)

state = meta.present? || read_options.present? ? "findable" : "not_found"

related_identifiers = Array.wrap(crossref_is_part_of(journal_metadata)) + Array.wrap(crossref_references(bibliographic_metadata))
related_identifiers = Array.wrap(crossref_is_part_of(journal_metadata)) + Array.wrap(crossref_references(bibliographic_metadata)) + Array.wrap(crossref_has_translation(program_metadata)) + Array.wrap(crossref_is_translation_of(program_metadata))

container = if journal_metadata.present?
issn = normalize_issn(journal_metadata.to_h.fetch("issn", nil))
Expand Down Expand Up @@ -187,7 +193,7 @@ def read_crossref(string: nil, **options)
"titles" => titles,
"identifiers" => identifiers,
"creators" => crossref_people(bibliographic_metadata, "author"),
"contributors" => crossref_people(bibliographic_metadata, "editor"),
"contributors" => crossref_people(bibliographic_metadata, "editor") + crossref_people(bibliographic_metadata, "translator"),
"funding_references" => crossref_funding_reference(program_metadata),
"publisher" => publisher,
"container" => container,
Expand Down Expand Up @@ -276,13 +282,15 @@ def crossref_people(bibliographic_metadata, contributor_role)
end
end.compact

contributor_type = CR_TO_DC_CONTRIBUTOR_TYPES[a["contributor_role"]]

{ "nameType" => "Personal",
"nameIdentifiers" => name_identifiers,
"name" => [family_name, given_name].compact.join(", "),
"givenName" => given_name,
"familyName" => family_name,
"affiliation" => affiliation.presence,
"contributorType" => contributor_role == "editor" ? "Editor" : nil }.compact
"contributorType" => contributor_type }.compact
else
{ "nameType" => "Organizational",
"name" => a["name"] || a["__content__"] }
Expand Down Expand Up @@ -362,6 +370,32 @@ def crossref_references(bibliographic_metadata)
end
end.compact.unwrap
end

def crossref_has_translation(program_metadata)
refs = program_metadata.dig("related_item") if program_metadata.is_a?(Hash)
Array.wrap(refs).select { |a| a["intra_work_relation"]["relationship_type"] == "hasTranslation" }.map do |c|
if c["intra_work_relation"]["identifier_type"] == "doi"
{ "relatedIdentifier" => parse_attributes(c["intra_work_relation"]).downcase,
"relationType" => "HasTranslation",
"relatedIdentifierType" => "DOI" }.compact
else
nil
end
end.compact.unwrap
end

def crossref_is_translation_of(program_metadata)
refs = program_metadata.dig("related_item") if program_metadata.is_a?(Hash)
Array.wrap(refs).select { |a| a["intra_work_relation"]["relationship_type"] == "isTranslationOf" }.map do |c|
if c["intra_work_relation"]["identifier_type"] == "doi"
{ "relatedIdentifier" => parse_attributes(c["intra_work_relation"]).downcase,
"relationType" => "IsTranslationOf",
"relatedIdentifierType" => "DOI" }.compact
else
nil
end
end.compact.unwrap
end
end
end
end
23 changes: 21 additions & 2 deletions lib/bolognese/readers/schema_org_reader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ module SchemaOrgReader
"isPartOf" => "IsPartOf",
"hasPart" => "HasPart",
"isPredecessor" => "IsPreviousVersionOf",
"isSuccessor" => "IsNewVersionOf"
"isSuccessor" => "IsNewVersionOf",
"workTranslation" => "HasTranslation",
"translationOfWork" => "IsTranslationOf"
}

SO_TO_DC_REVERSE_RELATION_TYPES = {
Expand Down Expand Up @@ -74,6 +76,12 @@ def read_schema_org(string: nil, **options)
creators = get_authors(from_schema_org_creators(Array.wrap(authors)))
end
contributors = get_authors(from_schema_org_contributors(Array.wrap(meta.fetch("editor", nil))))
translators = get_authors(from_schema_org_contributors(Array.wrap(meta.fetch("translator", nil))))
translators.map! do |translator|
translator["contributorType"] = "Translator"
translator
end
contributors += translators

publisher = {
"name" => parse_attributes(meta.fetch("publisher", nil), content: "name", first: true),
Expand Down Expand Up @@ -106,7 +114,9 @@ def read_schema_org(string: nil, **options)
Array.wrap(schema_org_references(meta)) +
Array.wrap(schema_org_is_referenced_by(meta)) +
Array.wrap(schema_org_is_supplement_to(meta)) +
Array.wrap(schema_org_is_supplemented_by(meta))
Array.wrap(schema_org_is_supplemented_by(meta)) +
Array.wrap(schema_org_has_translation(meta)) +
Array.wrap(schema_org_is_translation_of(meta))

rights_list = Array.wrap(meta.fetch("license", nil)).compact.map do |rl|
hsh_to_spdx("__content__" => rl["name"], "rightsURI" => rl["id"])
Expand All @@ -127,6 +137,7 @@ def read_schema_org(string: nil, **options)
dates << { "date" => meta.fetch("datePublished"), "dateType" => "Issued" } if Date.edtf(meta.fetch("datePublished", nil)).present?
dates << { "date" => meta.fetch("dateCreated"), "dateType" => "Created" } if Date.edtf(meta.fetch("dateCreated", nil)).present?
dates << { "date" => meta.fetch("dateModified"), "dateType" => "Updated" } if Date.edtf(meta.fetch("dateModified", nil)).present?
dates << { "date" => meta.fetch("temporalCoverage"), "dateType" => "Coverage" } if Date.edtf(meta.fetch("temporalCoverage", nil)).present?
publication_year = meta.fetch("datePublished")[0..3] if meta.fetch("datePublished", nil).present?

if meta.fetch("inLanguage", nil).is_a?(String)
Expand Down Expand Up @@ -240,6 +251,14 @@ def schema_org_is_supplemented_by(meta)
schema_org_related_identifier(meta, relation_type: "isBasedOn")
end

def schema_org_has_translation(meta)
schema_org_related_identifier(meta, relation_type: "workTranslation", )
end

def schema_org_is_translation_of(meta)
schema_org_related_identifier(meta, relation_type: "translationOfWork")
end

end
end
end
19 changes: 12 additions & 7 deletions lib/bolognese/utils.rb
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,9 @@ class << self
"Other" => "CreativeWork",
# not part of DataCite schema, but used internally
"Periodical" => "Periodical",
"DataCatalog" => "DataCatalog"
"DataCatalog" => "DataCatalog",
"Award" => "Grant",
"Project" => "Project"
}

DC_TO_CP_TRANSLATIONS = {
Expand Down Expand Up @@ -825,7 +827,8 @@ def to_schema_org_creators(element)

def to_schema_org_contributors(element)
element = Array.wrap(element).map do |c|
c["affiliation"] = Array.wrap(c["affiliation"]).map do |a|
transformed_c = c.dup
transformed_c["affiliation"] = Array.wrap(c["affiliation"]).map do |a|
if a.is_a?(String)
name = a
affiliation_identifier = nil
Expand All @@ -839,10 +842,10 @@ def to_schema_org_contributors(element)
"@id" => affiliation_identifier,
"name" => name }.compact
end.unwrap
c["@type"] = c["nameType"].present? ? c["nameType"][0..-3] : nil
c["@id"] = Array.wrap(c["nameIdentifiers"]).first.to_h.fetch("nameIdentifier", nil)
c["name"] = c["familyName"].present? ? [c["givenName"], c["familyName"]].join(" ") : c["name"]
c.except("nameIdentifiers", "nameType").compact
transformed_c["@type"] = c["nameType"].present? ? c["nameType"][0..-3] : nil
transformed_c["@id"] = Array.wrap(c["nameIdentifiers"]).first.to_h.fetch("nameIdentifier", nil)
transformed_c["name"] = c["familyName"].present? ? [c["givenName"], c["familyName"]].join(" ") : c["name"]
transformed_c.except("nameIdentifiers", "nameType").compact
end.unwrap
end

Expand Down Expand Up @@ -1234,7 +1237,9 @@ def get_identifier_type(identifier_type)
"urn" => "URN",
"md5" => "md5",
"minid" => "minid",
"dataguid" => "dataguid"
"dataguid" => "dataguid",
"cstr" => "CSTR",
"rrid" => "RRID"
}

identifierTypes[identifier_type.downcase] || identifier_type
Expand Down
2 changes: 1 addition & 1 deletion lib/bolognese/version.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
module Bolognese
VERSION = "2.2.0"
VERSION = "2.3.0"
end
4 changes: 4 additions & 0 deletions lib/bolognese/writers/schema_org_writer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ def schema_hsh
"name" => parse_attributes(titles, content: "title", first: true),
"author" => to_schema_org_creators(creators),
"editor" => to_schema_org_contributors(contributors),
"translator" => contributors ? to_schema_org_contributors(contributors.select { |c| c["contributorType"] == "Translator" }) : nil,
"description" => parse_attributes(abstract_description, content: "description", first: true),
"license" => Array.wrap(rights_list).map { |l| l["rightsUri"] }.compact.unwrap,
"version" => version_info,
Expand All @@ -23,6 +24,7 @@ def schema_hsh
"dateCreated" => get_date(dates, "Created"),
"datePublished" => get_date(dates, "Issued") || publication_year,
"dateModified" => get_date(dates, "Updated"),
"temporalCoverage" => get_date(dates, "Coverage"),
"pageStart" => container.to_h["firstPage"],
"pageEnd" => container.to_h["lastPage"],
"spatialCoverage" => to_schema_org_spatial_coverage(geo_locations),
Expand All @@ -32,6 +34,8 @@ def schema_hsh
"predecessor_of" => to_schema_org_relation(related_identifiers: related_identifiers, relation_type: "IsPreviousVersionOf"),
"successor_of" => to_schema_org_relation(related_identifiers: related_identifiers, relation_type: "IsNewVersionOf"),
"citation" => to_schema_org_relation(related_identifiers: related_identifiers, relation_type: "References"),
"workTranslation" => to_schema_org_relation(related_identifiers: related_identifiers, relation_type: "HasTranslation"),
"translationOfWork" => to_schema_org_relation(related_identifiers: related_identifiers, relation_type: "IsTranslationOf"),
"@reverse" => reverse.presence,
"contentUrl" => Array.wrap(content_url).unwrap,
"schemaVersion" => schema_version,
Expand Down
37 changes: 37 additions & 0 deletions resources/kernel-4.6/include/datacite-contributorType-v4.xsd
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- Version 1.0 - Created 2011-01-13 - FZ, TIB, Germany
2013-05 v3.0: Addition of ID to simpleType element, added values "ResearchGroup" & "Other"
2014-08-20 v3.1: Addition of value "DataCurator"
2015-05-14 v4.0 dropped value "Funder", use new "funderReference"
2024-12-31 v4.6: Addition of value "Translator" -->
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns="http://datacite.org/schema/kernel-4" targetNamespace="http://datacite.org/schema/kernel-4" elementFormDefault="qualified">
<xs:simpleType name="contributorType" id="contributorType">
<xs:annotation>
<xs:documentation>The type of contributor of the resource.</xs:documentation>
</xs:annotation>
<xs:restriction base="xs:string">
<xs:enumeration value="ContactPerson" />
<xs:enumeration value="DataCollector" />
<xs:enumeration value="DataCurator" />
<xs:enumeration value="DataManager" />
<xs:enumeration value="Distributor" />
<xs:enumeration value="Editor" />
<xs:enumeration value="HostingInstitution" />
<xs:enumeration value="Other" />
<xs:enumeration value="Producer" />
<xs:enumeration value="ProjectLeader" />
<xs:enumeration value="ProjectManager" />
<xs:enumeration value="ProjectMember" />
<xs:enumeration value="RegistrationAgency" />
<xs:enumeration value="RegistrationAuthority" />
<xs:enumeration value="RelatedPerson" />
<xs:enumeration value="ResearchGroup" />
<xs:enumeration value="RightsHolder" />
<xs:enumeration value="Researcher" />
<xs:enumeration value="Sponsor" />
<xs:enumeration value="Supervisor" />
<xs:enumeration value="Translator" />
<xs:enumeration value="WorkPackageLeader" />
</xs:restriction>
</xs:simpleType>
</xs:schema>
27 changes: 27 additions & 0 deletions resources/kernel-4.6/include/datacite-dateType-v4.xsd
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- Version 1.0 - Created 2011-01-13 - FZ, TIB, Germany
2013-05 v3.0: Addition of ID to simpleType element; addition of value "Collected"; deleted "StartDate" & "EndDate"
2017-10-23 v4.1: Addition of value "Other"
2019-02-14 v4.2: Addition of value "Withdrawn"
2024-12-31 v4.6: Addition of value "Coverage"-->
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns="http://datacite.org/schema/kernel-4" targetNamespace="http://datacite.org/schema/kernel-4" elementFormDefault="qualified">
<xs:simpleType name="dateType" id="dateType">
<xs:annotation>
<xs:documentation>The type of date. Use RKMS‐ISO8601 standard for depicting date ranges.To indicate the end of an embargo period, use Available. To indicate the start of an embargo period, use Submitted or Accepted, as appropriate.</xs:documentation>
</xs:annotation>
<xs:restriction base="xs:string">
<xs:enumeration value="Accepted" />
<xs:enumeration value="Available" />
<xs:enumeration value="Collected" />
<xs:enumeration value="Copyrighted" />
<xs:enumeration value="Coverage" />
<xs:enumeration value="Created" />
<xs:enumeration value="Issued" />
<xs:enumeration value="Other" />
<xs:enumeration value="Submitted" />
<xs:enumeration value="Updated" />
<xs:enumeration value="Valid" />
<xs:enumeration value="Withdrawn" />
</xs:restriction>
</xs:simpleType>
</xs:schema>
19 changes: 19 additions & 0 deletions resources/kernel-4.6/include/datacite-descriptionType-v4.xsd
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- Version 1.0 - Created 2011-01-13 - FZ, TIB, Germany
2013-05 v3.0: Addition of ID to simpleType element, addition of value "Methods"
2015-02-12 v4.0: Addition of value "TechnicalInfo"-->
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns="http://datacite.org/schema/kernel-4" targetNamespace="http://datacite.org/schema/kernel-4" elementFormDefault="qualified">
<xs:simpleType name="descriptionType" id="descriptionType">
<xs:annotation>
<xs:documentation>The type of the description.</xs:documentation>
</xs:annotation>
<xs:restriction base="xs:string">
<xs:enumeration value="Abstract" />
<xs:enumeration value="Methods" />
<xs:enumeration value="SeriesInformation" />
<xs:enumeration value="TableOfContents" />
<xs:enumeration value="TechnicalInfo" />
<xs:enumeration value="Other" />
</xs:restriction>
</xs:simpleType>
</xs:schema>
16 changes: 16 additions & 0 deletions resources/kernel-4.6/include/datacite-funderIdentifierType-v4.xsd
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- Version 1.0 - Created 2016-05-14 -->
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns="http://datacite.org/schema/kernel-4" targetNamespace="http://datacite.org/schema/kernel-4" elementFormDefault="qualified">
<xs:simpleType name="funderIdentifierType" id="funderIdentifierType">
<xs:annotation>
<xs:documentation>The type of the funderIdentifier.</xs:documentation>
</xs:annotation>
<xs:restriction base="xs:string">
<xs:enumeration value="ISNI" />
<xs:enumeration value="GRID" />
<xs:enumeration value="ROR" />
<xs:enumeration value="Crossref Funder ID" />
<xs:enumeration value="Other" />
</xs:restriction>
</xs:simpleType>
</xs:schema>
10 changes: 10 additions & 0 deletions resources/kernel-4.6/include/datacite-nameType-v4.xsd
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- Version 4.1 - Created 2017-10-23 -->
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns="http://datacite.org/schema/kernel-4" targetNamespace="http://datacite.org/schema/kernel-4" elementFormDefault="qualified">
<xs:simpleType name="nameType" id="nameType">
<xs:restriction base="xs:string">
<xs:enumeration value="Organizational" />
<xs:enumeration value="Personal" />
</xs:restriction>
</xs:simpleType>
</xs:schema>
12 changes: 12 additions & 0 deletions resources/kernel-4.6/include/datacite-numberType-v4.xsd
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- Version 4.4 - Created 2021-03-05 -->
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns="http://datacite.org/schema/kernel-4" targetNamespace="http://datacite.org/schema/kernel-4" elementFormDefault="qualified">
<xs:simpleType name="numberType" id="numberType">
<xs:restriction base="xs:string">
<xs:enumeration value="Article" />
<xs:enumeration value="Chapter" />
<xs:enumeration value="Report" />
<xs:enumeration value="Other" />
</xs:restriction>
</xs:simpleType>
</xs:schema>
Loading

0 comments on commit ddddadf

Please sign in to comment.