Skip to content

Commit

Permalink
add optional language attribute to title and description
Browse files Browse the repository at this point in the history
  • Loading branch information
mfenner committed Jan 31, 2024
1 parent d3fc72a commit 2734afe
Show file tree
Hide file tree
Showing 14 changed files with 555 additions and 135 deletions.
9 changes: 5 additions & 4 deletions Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
PATH
remote: .
specs:
commonmeta-ruby (3.11.0)
commonmeta-ruby (3.12.0)
activesupport (>= 4.2.5, < 8.0)
addressable (~> 2.8.1, < 2.8.2)
base32-url (>= 0.7.0, < 1)
Expand Down Expand Up @@ -58,15 +58,16 @@ GEM
rubocop (~> 1.0)
concurrent-ruby (1.2.3)
connection_pool (2.4.1)
crack (0.4.5)
crack (0.4.6)
bigdecimal
rexml
crass (1.0.6)
csl (2.0.0)
namae (~> 1.0)
rexml
csl-styles (2.0.1)
csl (~> 2.0)
diff-lcs (1.5.0)
diff-lcs (1.5.1)
docile (1.4.0)
domain_name (0.6.20240107)
drb (2.2.0)
Expand Down Expand Up @@ -154,7 +155,7 @@ GEM
iniparser (>= 0.1.0)
public_suffix (4.0.7)
racc (1.7.3)
rack (3.0.8)
rack (3.0.9)
rack-test (2.1.0)
rack (>= 1.3)
rainbow (3.1.1)
Expand Down
15 changes: 9 additions & 6 deletions lib/commonmeta/author_utils.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ def datacite_contributor_roles = {
"Researcher" => "Other",
"Sponsor" => "Other",
"Supervisor" => "Supervision",
"WorkPackageLeader" => "Other"
}
"WorkPackageLeader" => "Other",
}

def get_one_author(author)
# basic sanity checks
Expand Down Expand Up @@ -55,20 +55,20 @@ def get_one_author(author)
parse_attributes(author.fetch("identifier", nil), first: true) ||
parse_attributes(author.fetch("sameAs", nil), first: true)
id = normalize_orcid(id) || normalize_ror(id) if id.present?

# DataCite metadata
if id.nil? && author["nameIdentifiers"].present?
id = Array.wrap(author.dig("nameIdentifiers")).find do |ni|
normalize_name_identifier(ni).present?
end
id = normalize_name_identifier(id) if id.present?
# Crossref metadata
# Crossref metadata
elsif id.nil? && author["ORCID"].present?
id = author.fetch("ORCID")
id = normalize_orcid(id)
# JSON Feed metadata
# JSON Feed metadata
elsif id.nil? && author["url"].present?
id = author.fetch("url")
id = author.fetch("url")
end

# parse author type, i.e. "Person", "Organization" or not specified
Expand Down Expand Up @@ -168,6 +168,9 @@ def is_personal_name?(name: nil)
# check if a name has only one word, e.g. "FamousOrganization", not including commas
return false if name.to_s.split(" ").size == 1 && name.to_s.exclude?(",")

# check if name contains words known to be used in organization names
return false if %w[University College Institute School Center Department Laboratory Library Museum Foundation Society Association Company Corporation Collaboration Consortium Incorporated Inc. Institut Research Science].any? { |word| name.to_s.include?(word) }

# check for suffixes, e.g. "John Smith, MD"
return true if name && %w[MD PhD].include?(name.split(", ").last)

Expand Down
228 changes: 117 additions & 111 deletions lib/commonmeta/readers/datacite_reader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,170 +4,176 @@ module Commonmeta
module Readers
module DataciteReader
def get_datacite(id: nil, **options)
return { 'string' => nil, 'state' => 'not_found' } unless id.present?
return { "string" => nil, "state" => "not_found" } unless id.present?

api_url = datacite_api_url(id, options)
response = HTTP.get(api_url)
return { 'string' => nil, 'state' => 'not_found' } unless response.status.success?
return { "string" => nil, "state" => "not_found" } unless response.status.success?

body = JSON.parse(response.body)
client = Array.wrap(body.fetch('included', nil)).find do |m|
m['type'] == 'clients'
client = Array.wrap(body.fetch("included", nil)).find do |m|
m["type"] == "clients"
end
client_id = client.to_h.fetch('id', nil)
provider_id = Array.wrap(client.to_h.fetch('relationships', nil)).find do |m|
m['provider'].present?
end.to_h.dig('provider', 'data', 'id')

{ 'string' => response.body.to_s,
'provider_id' => provider_id,
'client_id' => client_id }
client_id = client.to_h.fetch("id", nil)
provider_id = Array.wrap(client.to_h.fetch("relationships", nil)).find do |m|
m["provider"].present?
end.to_h.dig("provider", "data", "id")

{ "string" => response.body.to_s,
"provider_id" => provider_id,
"client_id" => client_id }
end

def read_datacite(string: nil, **_options)
errors = jsonlint(string)
return { 'errors' => errors } if errors.present?
return { "errors" => errors } if errors.present?

read_options = ActiveSupport::HashWithIndifferentAccess.new(_options.except(:doi, :id, :url,
:sandbox, :validate, :ra))

meta = string.present? ? JSON.parse(string) : {}

# optionally strip out the message wrapper from API
meta = meta.dig('data', 'attributes') if meta.dig('data').present?
meta = meta.dig("data", "attributes") if meta.dig("data").present?

meta.transform_keys!(&:underscore)

id = normalize_doi(meta.fetch('doi', nil))
id = normalize_doi(meta.fetch("doi", nil))

resource_type_general = meta.dig('types', 'resourceTypeGeneral')
resource_type = meta.dig('types', 'resourceType')
resource_type_general = meta.dig("types", "resourceTypeGeneral")
resource_type = meta.dig("types", "resourceType")
# if resource_type is one of the new resource_type_general types introduced in schema 4.3, use it
type = Commonmeta::Utils::DC_TO_CM_TRANSLATIONS.fetch(resource_type, nil) ||
Commonmeta::Utils::DC_TO_CM_TRANSLATIONS.fetch(resource_type_general, 'Other')
Commonmeta::Utils::DC_TO_CM_TRANSLATIONS.fetch(resource_type_general, "Other")

alternate_identifiers = Array.wrap(meta.fetch('alternate_identifiers', nil)).map do |i|
alternate_identifiers = Array.wrap(meta.fetch("alternate_identifiers", nil)).map do |i|
i.transform_keys! { |k| k.camelize(:lower) }
end
url = meta.fetch('url', nil)
titles = Array.wrap(meta.fetch('titles', nil)).map do |title|
title.compact
url = meta.fetch("url", nil)
titles = Array.wrap(meta.fetch("titles", nil)).map do |title|
{ "title" => title.fetch("title", nil),
"type" => title.fetch("titleType", nil),
"language" => title.fetch("lang", nil) }.compact
end
contributors = get_authors(from_datacite(meta.fetch('creators', nil)))
contributors += get_authors(from_datacite(meta.fetch('contributors', nil)))
if meta.fetch('publisher', nil).is_a?(Hash)
publisher = { 'name' => meta.fetch('publisher', nil).fetch('name', nil) }
elsif meta.fetch('publisher', nil).is_a?(String)
publisher = { 'name' => meta.fetch('publisher', nil) }
contributors = get_authors(from_datacite(meta.fetch("creators", nil)))
contributors += get_authors(from_datacite(meta.fetch("contributors", nil)))
if meta.fetch("publisher", nil).is_a?(Hash)
publisher = { "name" => meta.fetch("publisher", nil).fetch("name", nil) }
elsif meta.fetch("publisher", nil).is_a?(String)
publisher = { "name" => meta.fetch("publisher", nil) }
else
publisher = nil
end

container = meta.fetch('container', nil)
funding_references = meta.fetch('funding_references', nil)
container = meta.fetch("container", nil)
funding_references = meta.fetch("funding_references", nil)

date = {}
date['created'] =
get_iso8601_date(meta.dig('created')) || get_date(meta.dig('dates'), 'Created')
date['published'] =
get_iso8601_date(meta.dig('published')) || get_date(meta.dig('dates'),
'Issued') || get_iso8601_date(meta.dig('publication_year'))
date['registered'] = get_iso8601_date(meta.dig('registered'))
date['updated'] =
get_iso8601_date(meta.dig('updated')) || get_date(meta.dig('dates'), 'Updated')

descriptions = Array.wrap(meta.fetch('descriptions', nil)).map do |description|
description.compact
date["created"] =
get_iso8601_date(meta.dig("created")) || get_date(meta.dig("dates"), "Created")
date["published"] =
get_iso8601_date(meta.dig("published")) || get_date(meta.dig("dates"),
"Issued") || get_iso8601_date(meta.dig("publication_year"))
date["registered"] = get_iso8601_date(meta.dig("registered"))
date["updated"] =
get_iso8601_date(meta.dig("updated")) || get_date(meta.dig("dates"), "Updated")

descriptions = Array.wrap(meta.fetch("descriptions", nil)).map do |description|
description_type = description.fetch("descriptionType", nil)
description_type = "Other" unless %w[Abstract Methods TechnicalInfo].include?(description_type)
{ "description" => description.fetch("description", nil),
"type" => description_type,
"language" => description.fetch("lang", nil) }.compact
end
license = Array.wrap(meta.fetch('rights_list', nil)).find do |r|
r['rightsUri'].present?
license = Array.wrap(meta.fetch("rights_list", nil)).find do |r|
r["rightsUri"].present?
end
license = hsh_to_spdx('rightsURI' => license['rightsUri']) if license.present?
version = meta.fetch('version', nil)
subjects = meta.fetch('subjects', nil)
language = meta.fetch('language', nil)
geo_locations = meta.fetch('geo_locations', nil)
references = (Array.wrap(meta.fetch('related_identifiers',
nil)) + Array.wrap(meta.fetch('related_items',
license = hsh_to_spdx("rightsURI" => license["rightsUri"]) if license.present?
version = meta.fetch("version", nil)
subjects = meta.fetch("subjects", nil)
language = meta.fetch("language", nil)
geo_locations = meta.fetch("geo_locations", nil)
references = (Array.wrap(meta.fetch("related_identifiers",
nil)) + Array.wrap(meta.fetch("related_items",
nil))).select do |r|
%w[References Cites IsSupplementedBy].include?(r['relationType'])
end.map do |reference|
%w[References Cites IsSupplementedBy].include?(r["relationType"])
end.map do |reference|
get_datacite_reference(reference)
end
files = Array.wrap(meta.fetch("content_url", nil)).map { |file| { "url" => file } }
formats = meta.fetch('formats', nil)
sizes = meta.fetch('sizes', nil)
schema_version = meta.fetch('schema_version', nil) || 'http://datacite.org/schema/kernel-4'
state = id.present? || read_options.present? ? 'findable' : 'not_found'

{ 'id' => id,
'type' => type,
'additional_type' => resource_type == type ? nil : resource_type,
'url' => url,
'titles' => titles,
'contributors' => contributors,
'container' => container,
'publisher' => publisher,
'provider' => 'DataCite',
'alternate_identifiers' => alternate_identifiers.presence,
'references' => references,
'funding_references' => funding_references,
'files' => files.presence,
'date' => date.compact,
'descriptions' => descriptions,
'license' => license,
'version' => version,
'subjects' => subjects,
'language' => language,
'geo_locations' => geo_locations,
'formats' => formats,
'sizes' => sizes,
'state' => state }.compact # .merge(read_options)
files = Array.wrap(meta.fetch("content_url", nil)).map { |file| { "url" => file } }
formats = meta.fetch("formats", nil)
sizes = meta.fetch("sizes", nil)
schema_version = meta.fetch("schema_version", nil) || "http://datacite.org/schema/kernel-4"
state = id.present? || read_options.present? ? "findable" : "not_found"

{ "id" => id,
"type" => type,
"additional_type" => resource_type == type ? nil : resource_type,
"url" => url,
"titles" => titles,
"contributors" => contributors,
"container" => container,
"publisher" => publisher,
"provider" => "DataCite",
"alternate_identifiers" => alternate_identifiers.presence,
"references" => references,
"funding_references" => funding_references,
"files" => files.presence,
"date" => date.compact,
"descriptions" => descriptions,
"license" => license,
"version" => version,
"subjects" => subjects,
"language" => language,
"geo_locations" => geo_locations,
"formats" => formats,
"sizes" => sizes,
"state" => state }.compact # .merge(read_options)
end

def format_contributor(contributor)
type = contributor.fetch('nameType', nil)

{ 'name' => type == 'Person' ? nil : contributor.fetch('name', nil),
'type' => type,
'givenName' => contributor.fetch('givenName', nil),
'familyName' => contributor.fetch('familyName', nil),
'nameIdentifiers' => contributor.fetch('nameIdentifiers', nil).presence,
'affiliations' => contributor.fetch('affiliations', nil).presence,
'contributorType' => contributor.fetch('contributorType', nil) }.compact
type = contributor.fetch("nameType", nil)

{ "name" => type == "Person" ? nil : contributor.fetch("name", nil),
"type" => type,
"givenName" => contributor.fetch("givenName", nil),
"familyName" => contributor.fetch("familyName", nil),
"nameIdentifiers" => contributor.fetch("nameIdentifiers", nil).presence,
"affiliations" => contributor.fetch("affiliations", nil).presence,
"contributorType" => contributor.fetch("contributorType", nil) }.compact
end

def get_datacite_reference(reference)
return nil unless reference.present? || !reference.is_a?(Hash)

key = reference['relatedIdentifier']
key = reference["relatedIdentifier"]
doi = nil
url = nil

case reference['relatedIdentifierType']
when 'DOI'
doi = normalize_doi(reference['relatedIdentifier'])
when 'URL'
url = reference['relatedIdentifier']
case reference["relatedIdentifierType"]
when "DOI"
doi = normalize_doi(reference["relatedIdentifier"])
when "URL"
url = reference["relatedIdentifier"]
else
url = reference['relatedIdentifier']
url = reference["relatedIdentifier"]
end

{
'key' => key,
'doi' => doi,
'url' => url,
'contributor' => reference.dig('author'),
'title' => reference.dig('article-title'),
'publisher' => reference.dig('publisher'),
'publicationYear' => reference.dig('year'),
'volume' => reference.dig('volume'),
'issue' => reference.dig('issue'),
'firstPage' => reference.dig('first-page'),
'lastPage' => reference.dig('last-page'),
'containerTitle' => reference.dig('journal-title'),
'edition' => nil,
'unstructured' => doi.nil? ? reference.dig('unstructured') : nil
"key" => key,
"doi" => doi,
"url" => url,
"contributor" => reference.dig("author"),
"title" => reference.dig("article-title"),
"publisher" => reference.dig("publisher"),
"publicationYear" => reference.dig("year"),
"volume" => reference.dig("volume"),
"issue" => reference.dig("issue"),
"firstPage" => reference.dig("first-page"),
"lastPage" => reference.dig("last-page"),
"containerTitle" => reference.dig("journal-title"),
"edition" => nil,
"unstructured" => doi.nil? ? reference.dig("unstructured") : nil,
}.compact
end
end
Expand Down
2 changes: 1 addition & 1 deletion lib/commonmeta/schema_utils.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

module Commonmeta
module SchemaUtils
COMMONMETA = File.read(File.expand_path("../../resources/commonmeta_v0.10.6.json",
COMMONMETA = File.read(File.expand_path("../../resources/commonmeta_v0.10.7.json",
__dir__))

def json_schema_errors
Expand Down
2 changes: 1 addition & 1 deletion lib/commonmeta/version.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# frozen_string_literal: true

module Commonmeta
VERSION = '3.11.0'
VERSION = '3.12.0'
end
2 changes: 1 addition & 1 deletion lib/commonmeta/writers/commonmeta_writer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ def commonmeta
end
end
end
end
end
Loading

0 comments on commit 2734afe

Please sign in to comment.