Skip to content

Commit

Permalink
Merge branch 'master' into support-rightsidentifier
Browse files Browse the repository at this point in the history
  • Loading branch information
Martin Fenner authored Jun 2, 2020
2 parents 5e4d3f2 + a0672da commit b0c8136
Show file tree
Hide file tree
Showing 35 changed files with 2,205 additions and 54 deletions.
15 changes: 9 additions & 6 deletions Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
PATH
remote: .
specs:
bolognese (1.6.1)
bolognese (1.6.3)
activesupport (>= 4.2.5)
benchmark_methods (~> 0.7)
bibtex-ruby (>= 5.1.0)
Expand Down Expand Up @@ -123,25 +123,28 @@ GEM
rack-test (0.8.3)
rack (>= 1.0, < 3)
rake (12.3.3)
rdf (3.1.1)
rdf (3.1.2)
hamster (~> 3.0)
link_header (~> 0.0, >= 0.0.8)
rdf-aggregate-repo (3.1.0)
rdf (~> 3.1)
rdf-rdfa (3.1.0)
rdf-rdfa (3.1.1)
haml (~> 5.1)
htmlentities (~> 4.3)
rdf (~> 3.1)
rdf (~> 3.1, >= 3.1.2)
rdf-aggregate-repo (~> 3.1)
rdf-vocab (~> 3.1, >= 3.1.5)
rdf-xsd (~> 3.1)
rdf-rdfxml (3.1.0)
htmlentities (~> 4.3)
rdf (~> 3.1)
rdf-rdfa (~> 3.1)
rdf-xsd (~> 3.1)
rdf-turtle (3.1.0)
rdf-turtle (3.1.1)
ebnf (~> 1.2)
rdf (~> 3.1)
rdf (~> 3.1, >= 3.1.2)
rdf-vocab (3.1.5)
rdf (~> 3.1, >= 3.1.2)
rdf-xsd (3.1.0)
rdf (~> 3.1)
rspec (3.9.0)
Expand Down
4 changes: 2 additions & 2 deletions lib/bolognese/datacite_utils.rb
Original file line number Diff line number Diff line change
Expand Up @@ -172,9 +172,9 @@ def insert_subjects(xml)
end

def insert_version(xml)
return xml unless version.present?
return xml unless version_info.present?

xml.version(version)
xml.version(version_info)
end


Expand Down
11 changes: 4 additions & 7 deletions lib/bolognese/metadata.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class Metadata
attr_accessor :string, :from, :sandbox, :meta, :regenerate, :issue, :show_errors
attr_reader :doc, :page_start, :page_end
attr_writer :id, :provider_id, :client_id, :doi, :identifiers, :creators, :contributors, :titles, :publisher,
:rights_list, :dates, :publication_year, :volume, :url, :version,
:rights_list, :dates, :publication_year, :volume, :url, :version_info,
:subjects, :contributor, :descriptions, :language, :sizes,
:formats, :schema_version, :meta, :container, :agency,
:format, :funding_references, :state, :geo_locations,
Expand Down Expand Up @@ -106,7 +106,7 @@ def initialize(input: nil, from: nil, **options)
:publication_year,
:descriptions,
:rights_list,
:version,
:version_info,
:subjects,
:language,
:geo_locations,
Expand Down Expand Up @@ -189,13 +189,10 @@ def url
@url ||= meta.fetch("url", nil)
end

def version
@version ||= meta.fetch("version", nil)
def version_info
@version_info ||= meta.fetch("version_info", nil) || meta.fetch("version", nil)
end

# for backwards compatibility
alias_attribute :version_info, :version

def publication_year
@publication_year ||= meta.fetch("publication_year", nil)
end
Expand Down
4 changes: 2 additions & 2 deletions lib/bolognese/metadata_utils.rb
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ def citeproc_hsh
"title" => parse_attributes(titles, content: "title", first: true),
"URL" => url,
"copyright" => Array.wrap(rights_list).map { |l| l["rights"] }.first,
"version" => version,
"version" => version_info
}.compact.symbolize_keys
end

Expand All @@ -181,7 +181,7 @@ def crosscite_hsh
"identifiers" => identifiers,
"sizes" => sizes,
"formats" => formats,
"version" => version,
"version" => version_info,
"rights_list" => rights_list,
"descriptions" => descriptions,
"geo_locations" => geo_locations,
Expand Down
8 changes: 5 additions & 3 deletions lib/bolognese/readers/citeproc_reader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,10 @@ def read_citeproc(string: nil, **options)
doi = Array.wrap(identifiers).find { |r| r["identifierType"] == "DOI" }.to_h.fetch("identifier", nil)

state = id.present? || read_options.present? ? "findable" : "not_found"
subjects = Array.wrap(meta.fetch("categories", nil)).map do |s|
{ "subject" => s }
subjects = Array.wrap(meta.fetch("categories", nil)).reduce([]) do |sum, subject|
sum += name_to_fos(subject)

sum
end

{ "id" => id,
Expand All @@ -119,7 +121,7 @@ def read_citeproc(string: nil, **options)
"publication_year" => publication_year,
"descriptions" => meta.fetch("abstract", nil).present? ? [{ "description" => sanitize(meta.fetch("abstract")), "descriptionType" => "Abstract" }] : [],
"rights_list" => rights_list,
"version" => meta.fetch("version", nil),
"version_info" => meta.fetch("version", nil),
"subjects" => subjects,
"state" => state
}.merge(read_options)
Expand Down
8 changes: 5 additions & 3 deletions lib/bolognese/readers/codemeta_reader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,10 @@ def read_codemeta(string: nil, **options)
"bibtex" => Bolognese::Utils::SO_TO_BIB_TRANSLATIONS[schema_org] || "misc",
"ris" => Bolognese::Utils::SO_TO_RIS_TRANSLATIONS[schema_org] || "GEN"
}.compact
subjects = Array.wrap(meta.fetch("tags", nil)).map do |s|
{ "subject" => s }
subjects = Array.wrap(meta.fetch("tags", nil)).reduce([]) do |sum, subject|
sum += name_to_fos(subject)

sum
end

has_title = meta.fetch("title", nil)
Expand All @@ -79,7 +81,7 @@ def read_codemeta(string: nil, **options)
"publication_year" => publication_year,
"descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description")), "descriptionType" => "Abstract" }] : nil,
"rights_list" => rights_list,
"version" => meta.fetch("version", nil),
"version_info" => meta.fetch("version", nil),
"subjects" => subjects,
"state" => state
}.merge(read_options)
Expand Down
2 changes: 1 addition & 1 deletion lib/bolognese/readers/crossref_reader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ def read_crossref(string: nil, **options)
"publication_year" => publication_year,
"descriptions" => crossref_description(bibliographic_metadata),
"rights_list" => crossref_license(program_metadata),
"version" => nil,
"version_info" => nil,
"subjects" => nil,
"language" => nil,
"sizes" => nil,
Expand Down
20 changes: 11 additions & 9 deletions lib/bolognese/readers/datacite_reader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -122,15 +122,17 @@ def read_datacite(string: nil, **options)
hsh_to_spdx(r)
end
end.compact
subjects = Array.wrap(meta.dig("subjects", "subject")).map do |k|
if k.blank?
nil
elsif k.is_a?(String)
{ "subject" => sanitize(k) }
elsif k.is_a?(Hash)
{ "subject" => sanitize(k["__content__"]), "subjectScheme" => k["subjectScheme"], "schemeUri" => k["schemeURI"], "valueUri" => k["valueURI"], "lang" => k["lang"] }.compact

subjects = Array.wrap(meta.dig("subjects", "subject")).reduce([]) do |sum, subject|
if subject.is_a?(String)
sum += name_to_fos(subject)
elsif subject.is_a?(Hash)
sum += hsh_to_fos(subject)
end
end.compact

sum
end.uniq

dates = Array.wrap(meta.dig("dates", "date")).map do |r|
if r.is_a?(Hash) && date = sanitize(r["__content__"]).presence
if Date.edtf(date).present? || Bolognese::Utils::UNKNOWN_INFORMATION.key?(date)
Expand Down Expand Up @@ -233,7 +235,7 @@ def read_datacite(string: nil, **options)
"publication_year" => parse_attributes(meta.fetch("publicationYear", nil), first: true).to_s.strip.presence,
"descriptions" => descriptions,
"rights_list" => Array.wrap(rights_list),
"version" => meta.fetch("version", nil).to_s.presence,
"version_info" => meta.fetch("version", nil).to_s.presence,
"subjects" => subjects,
"language" => parse_attributes(meta.fetch("language", nil), first: true).to_s.strip.presence,
"geo_locations" => geo_locations,
Expand Down
2 changes: 1 addition & 1 deletion lib/bolognese/readers/npm_reader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def read_npm(string: nil, **options)
#"publication_year" => publication_year,
"descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description")), "descriptionType" => "Abstract" }] : [],
"rights_list" => rights_list,
"version" => meta.fetch("version", nil),
"version_info" => meta.fetch("version", nil),
"subjects" => subjects
#"state" => state
}.merge(read_options)
Expand Down
6 changes: 4 additions & 2 deletions lib/bolognese/readers/ris_reader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,10 @@ def read_ris(string: nil, **options)
nil
end
state = meta.fetch("DO", nil).present? || read_options.present? ? "findable" : "not_found"
subjects = Array.wrap(meta.fetch("KW", nil)).map do |s|
{ "subject" => s }
subjects = Array.wrap(meta.fetch("KW", nil)).reduce([]) do |sum, subject|
sum += name_to_fos(subject)

sum
end

{ "id" => id,
Expand Down
11 changes: 8 additions & 3 deletions lib/bolognese/readers/schema_org_reader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -149,8 +149,13 @@ def read_schema_org(string: nil, **options)
"geoLocationBox" => geo_location_box
}.compact
end
subjects = Array.wrap(meta.fetch("keywords", nil).to_s.split(", ")).map do |s|
{ "subject" => s }

# handle keywords as array and as comma-separated string
subjects = meta.fetch("keywords", nil)
subjects = subjects.to_s.split(", ") if subjects.is_a?(String)
subjects = Array.wrap(subjects).reduce([]) do |sum, subject|
sum += name_to_fos(subject)
sum
end

{ "id" => id,
Expand All @@ -172,7 +177,7 @@ def read_schema_org(string: nil, **options)
"dates" => dates,
"descriptions" => meta.fetch("description", nil).present? ? [{ "description" => sanitize(meta.fetch("description")), "descriptionType" => "Abstract" }] : nil,
"rights_list" => rights_list,
"version" => meta.fetch("version", nil).to_s.presence,
"version_info" => meta.fetch("version", nil).to_s.presence,
"subjects" => subjects,
"state" => state,
"schema_version" => meta.fetch("schemaVersion", nil).to_s.presence,
Expand Down
94 changes: 94 additions & 0 deletions lib/bolognese/utils.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1133,6 +1133,100 @@ def hsh_to_spdx(hsh)
"rightsIdentifierScheme" => hsh["rightsIdentifierScheme"],
"schemeUri" => hsh["schemeUri"],
"lang" => hsh["lang"] }.compact

def name_to_fos(name)
# first find subject in Fields of Science (OECD)
fos = JSON.load(File.read(File.expand_path('../../../resources/oecd/fos-mappings.json', __FILE__))).fetch("fosFields")

subject = fos.find { |l| l["fosLabel"] == name || "FOS: " + l["fosLabel"] == name }

if subject
return [{
"subject" => sanitize(name) },
{
"subject" => "FOS: " + subject["fosLabel"],
"subjectScheme" => "Fields of Science and Technology (FOS)",
"schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"
}]
end

# if not found, look in Fields of Research (Australian and New Zealand Standard Research Classification)
# and map to Fields of Science. Add an extra entry for the latter
fores = JSON.load(File.read(File.expand_path('../../../resources/oecd/for-mappings.json', __FILE__)))
for_fields = fores.fetch("forFields")
for_disciplines = fores.fetch("forDisciplines")

subject = for_fields.find { |l| l["forLabel"] == name } ||
for_disciplines.find { |l| l["forLabel"] == name }

if subject
[{
"subject" => sanitize(name) },
{
"subject" => "FOS: " + subject["fosLabel"],
"subjectScheme" => "Fields of Science and Technology (FOS)",
"schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"
}]
else
[{ "subject" => sanitize(name) }]
end
end

def hsh_to_fos(hsh)
# first find subject in Fields of Science (OECD)
fos = JSON.load(File.read(File.expand_path('../../../resources/oecd/fos-mappings.json', __FILE__))).fetch("fosFields")
subject = fos.find { |l| l["fosLabel"] == hsh["__content__"] || "FOS: " + l["fosLabel"] == hsh["__content__"] }

if subject
return [{
"subject" => sanitize(hsh["__content__"]),
"subjectScheme" => hsh["subjectScheme"],
"schemeUri" => hsh["schemeURI"],
"valueUri" => hsh["valueURI"],
"lang" => hsh["lang"] }.compact,
{
"subject" => "FOS: " + subject["fosLabel"],
"subjectScheme" => "Fields of Science and Technology (FOS)",
"schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf" }.compact]
end

# if not found, look in Fields of Research (Australian and New Zealand Standard Research Classification)
# and map to Fields of Science. Add an extra entry for the latter
fores = JSON.load(File.read(File.expand_path('../../../resources/oecd/for-mappings.json', __FILE__)))
for_fields = fores.fetch("forFields")
for_disciplines = fores.fetch("forDisciplines")

# try to extract forId
if hsh["subjectScheme"] == "FOR"
for_id = hsh["__content__"].split(" ").first
for_id = for_id.rjust(6, "0")

subject = for_fields.find { |l| l["forId"] == for_id } ||
for_disciplines.find { |l| l["forId"] == for_id[0..3] }
else
subject = for_fields.find { |l| l["forLabel"] == hsh["__content__"] } ||
for_disciplines.find { |l| l["forLabel"] == hsh["__content__"] }
end

if subject
[{
"subject" => sanitize(hsh["__content__"]),
"subjectScheme" => hsh["subjectScheme"],
"schemeUri" => hsh["schemeURI"],
"valueUri" => hsh["valueURI"],
"lang" => hsh["lang"] }.compact,
{
"subject" => "FOS: " + subject["fosLabel"],
"subjectScheme" => "Fields of Science and Technology (FOS)",
"schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf"
}]
else
[{
"subject" => sanitize(hsh["__content__"]),
"subjectScheme" => hsh["subjectScheme"],
"schemeUri" => hsh["schemeURI"],
"valueUri" => hsh["valueURI"],
"lang" => hsh["lang"] }.compact]
end
end
end
Expand Down
2 changes: 1 addition & 1 deletion lib/bolognese/version.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
module Bolognese
VERSION = "1.6.1"
VERSION = "1.6.3"
end
2 changes: 1 addition & 1 deletion lib/bolognese/writers/codemeta_writer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def codemeta
"name" => parse_attributes(titles, content: "title", first: true),
"authors" => creators,
"description" => parse_attributes(descriptions, content: "description", first: true),
"version" => version,
"version" => version_info,
"tags" => subjects.present? ? Array.wrap(subjects).map { |k| parse_attributes(k, content: "subject", first: true) } : nil,
"datePublished" => get_date(dates, "Issued"),
"dateModified" => get_date(dates, "Updated"),
Expand Down
4 changes: 2 additions & 2 deletions lib/bolognese/writers/jats_writer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def insert_citation(xml)
insert_issue(xml) if container.to_h["issue"].present?
insert_fpage(xml) if container.to_h["firstPage"].present?
insert_lpage(xml) if container.to_h["lastPage"].present?
insert_version(xml) if version.present?
insert_version(xml) if version_info.present?
insert_pub_id(xml)
end

Expand Down Expand Up @@ -112,7 +112,7 @@ def insert_lpage(xml)
end

def insert_version(xml)
xml.version(version)
xml.version(version_info)
end

def insert_pub_id(xml)
Expand Down
2 changes: 1 addition & 1 deletion lib/bolognese/writers/schema_org_writer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def schema_hsh
"editor" => to_schema_org_contributors(contributors),
"description" => parse_attributes(descriptions, content: "description", first: true),
"license" => Array.wrap(rights_list).map { |l| l["rightsUri"] }.compact.unwrap,
"version" => version,
"version" => version_info,
"keywords" => subjects.present? ? Array.wrap(subjects).map { |k| parse_attributes(k, content: "subject", first: true) }.join(", ") : nil,
"inLanguage" => language,
"contentSize" => Array.wrap(sizes).unwrap,
Expand Down
Loading

0 comments on commit b0c8136

Please sign in to comment.