Skip to content

Commit

Permalink
add support for files in multiple formats in json_feed and crossref_x…
Browse files Browse the repository at this point in the history
…ml writer
  • Loading branch information
mfenner committed Jan 9, 2024
1 parent af4deee commit d960ccf
Show file tree
Hide file tree
Showing 9 changed files with 179 additions and 90 deletions.
23 changes: 12 additions & 11 deletions Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
PATH
remote: .
specs:
commonmeta-ruby (3.7.3)
commonmeta-ruby (3.8.0)
activesupport (>= 4.2.5, < 8.0)
addressable (~> 2.8.1, < 2.8.2)
base32-url (>= 0.7.0, < 1)
Expand Down Expand Up @@ -44,8 +44,9 @@ GEM
uuidtools (~> 2.1, >= 2.1.5)
base64 (0.2.0)
bcp47_spec (0.2.1)
bibtex-ruby (6.0.0)
bibtex-ruby (6.1.0)
latex-decode (~> 0.0)
racc (~> 1.7)
bigdecimal (3.1.5)
builder (3.2.4)
citeproc (1.0.10)
Expand All @@ -67,7 +68,7 @@ GEM
csl (~> 2.0)
diff-lcs (1.5.0)
docile (1.4.0)
domain_name (0.6.20231109)
domain_name (0.6.20240107)
drb (2.2.0)
ruby2_keywords
ebnf (2.5.0)
Expand Down Expand Up @@ -135,13 +136,13 @@ GEM
multi_json (1.15.0)
mutex_m (0.2.0)
namae (1.1.1)
nokogiri (1.15.5-arm64-darwin)
nokogiri (1.16.0-arm64-darwin)
racc (~> 1.4)
oj (3.16.3)
bigdecimal (>= 3.0)
optimist (3.1.0)
parallel (1.24.0)
parser (3.2.2.4)
parser (3.3.0.2)
ast (~> 2.4.1)
racc
postrank-uri (1.1)
Expand Down Expand Up @@ -171,7 +172,7 @@ GEM
rdf-xsd (3.3.0)
rdf (~> 3.3)
rexml (~> 3.2)
regexp_parser (2.8.3)
regexp_parser (2.9.0)
rexml (3.2.6)
rspec (3.12.0)
rspec-core (~> 3.12.0)
Expand Down Expand Up @@ -202,16 +203,16 @@ GEM
unicode-display_width (>= 2.4.0, < 3.0)
rubocop-ast (1.30.0)
parser (>= 3.2.1.0)
rubocop-capybara (2.19.0)
rubocop-capybara (2.20.0)
rubocop (~> 1.41)
rubocop-factory_bot (2.24.0)
rubocop (~> 1.33)
rubocop-performance (1.20.1)
rubocop-factory_bot (2.25.1)
rubocop (~> 1.41)
rubocop-performance (1.20.2)
rubocop (>= 1.48.1, < 2.0)
rubocop-ast (>= 1.30.0, < 2.0)
rubocop-rake (0.6.0)
rubocop (~> 1.0)
rubocop-rspec (2.25.0)
rubocop-rspec (2.26.1)
rubocop (~> 1.40)
rubocop-capybara (~> 2.17)
rubocop-factory_bot (~> 2.22)
Expand Down
8 changes: 7 additions & 1 deletion lib/commonmeta/crossref_utils.rb
Original file line number Diff line number Diff line change
Expand Up @@ -342,11 +342,17 @@ def insert_doi_data(xml)
return xml if doi_from_url(id).blank? || url.blank?

xml.doi_data do
xml.doi(doi_from_url(id).downcase)
doi = doi_from_url(id).downcase
xml.doi(doi)
xml.resource(url)
xml.collection("property" => "text-mining") do
xml.item do
xml.resource(url, "mime_type" => "text/html")
if is_rogue_scholar_doi?(doi)
Array.wrap(files).each do |file|
xml.resource(file["url"], "mime_type" => file["mimeType"])
end
end
end
end
end
Expand Down
5 changes: 5 additions & 0 deletions lib/commonmeta/doi_utils.rb
Original file line number Diff line number Diff line change
Expand Up @@ -77,5 +77,10 @@ def get_crossref_member(member_id)

{ 'id' => "https://api.crossref.org/members/#{member_id}", 'name' => name }
end

def is_rogue_scholar_doi?(doi)
prefix = validate_prefix(doi)
%w[10.34732 10.53731 10.54900 10.57689 10.59348 10.59349 10.59350].include?(prefix)
end
end
end
128 changes: 66 additions & 62 deletions lib/commonmeta/metadata.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# frozen_string_literal: truefiles

require_relative 'metadata_utils'
require_relative "metadata_utils"

module Commonmeta
class Metadata
Expand All @@ -10,7 +10,11 @@ class Metadata
:email, :registrant
attr_reader :doc, :page_start, :page_end
attr_writer :id, :provider_id, :client_id, :doi, :alternate_identifiers, :contributors,
:titles, :publisher, :license, :date, :volume, :url, :version, :subjects, :descriptions, :language, :sizes, :formats, :schema_version, :meta, :container, :provider, :format, :funding_references, :state, :geo_locations, :type, :additional_type, :files, :references, :related_identifiers, :related_items, :style, :locale, :archive_locations
:titles, :publisher, :license, :date, :volume, :url, :version, :subjects,
:descriptions, :language, :sizes, :formats, :schema_version, :meta, :container,
:provider, :format, :funding_references, :state, :geo_locations, :type,
:additional_type, :files, :references, :related_identifiers, :related_items,
:style, :locale, :archive_locations

def initialize(options = {})
options.symbolize_keys!
Expand All @@ -23,29 +27,29 @@ def initialize(options = {})

# mEDRA, KISTI, JaLC and OP DOIs are found in the Crossref index
case @from
when 'medra'
ra = 'mEDRA'
when 'kisti'
ra = 'KISTI'
when 'jalc'
ra = 'JaLC'
when 'op'
ra = 'OP'
when "medra"
ra = "mEDRA"
when "kisti"
ra = "KISTI"
when "jalc"
ra = "JaLC"
when "op"
ra = "OP"
end

# generate name for method to call dynamically
hsh = @from.present? ? send("get_#{@from}", id: id, **options) : {}
string = hsh.fetch('string', nil)
string = hsh.fetch("string", nil)
elsif options[:input].present? && File.exist?(options[:input])
filename = File.basename(options[:input])
ext = File.extname(options[:input])
if %w[.bib .ris .xml .json .cff].include?(ext)
hsh = {
'url' => options[:url],
'state' => options[:state],
'provider_id' => options[:provider_id],
'client_id' => options[:client_id],
'files' => options[:files]
"url" => options[:url],
"state" => options[:state],
"provider_id" => options[:provider_id],
"client_id" => options[:client_id],
"files" => options[:files],
}
string = File.read(options[:input])
@from = options[:from] || find_from_format(string: string, ext: ext)
Expand All @@ -55,22 +59,22 @@ def initialize(options = {})
end
else
hsh = {
'url' => options[:url],
'state' => options[:state],
'provider_id' => options[:provider_id],
'client_id' => options[:client_id],
'files' => options[:files],
'contributors' => options[:contributors],
'titles' => options[:titles],
'publisher' => options[:publisher]
"url" => options[:url],
"state" => options[:state],
"provider_id" => options[:provider_id],
"client_id" => options[:client_id],
"files" => options[:files],
"contributors" => options[:contributors],
"titles" => options[:titles],
"publisher" => options[:publisher],
}
string = options[:input]
@from = options[:from] || find_from_format(string: string)
end

# make sure input is encoded as utf8
if string.present? && string.is_a?(String)
dup_string = string.dup.force_encoding('UTF-8').encode!
dup_string = string.dup.force_encoding("UTF-8").encode!
end
@string = dup_string

Expand All @@ -81,11 +85,11 @@ def initialize(options = {})
@sandbox = options[:sandbox]

# options that come from the datacite database
@url = hsh.to_h['url'].presence || options[:url].presence
@state = hsh.to_h['state'].presence
@provider_id = hsh.to_h['provider_id'].presence
@client_id = hsh.to_h['client_id'].presence
@files = hsh.to_h['files'].presence
@url = hsh.to_h["url"].presence || options[:url].presence
@state = hsh.to_h["state"].presence
@provider_id = hsh.to_h["provider_id"].presence
@client_id = hsh.to_h["client_id"].presence
@files = hsh.to_h["files"].presence

# options that come from the cli, needed
# for crossref doi registration
Expand Down Expand Up @@ -125,23 +129,23 @@ def initialize(options = {})
end

def id
@id ||= meta.fetch('id', nil)
@id ||= meta.fetch("id", nil)
end

def doi
@doi ||= meta.fetch('doi', nil)
@doi ||= meta.fetch("doi", nil)
end

def provider_id
@provider_id ||= meta.fetch('provider_id', nil)
@provider_id ||= meta.fetch("provider_id", nil)
end

def client_id
@client_id ||= meta.fetch('client_id', nil)
@client_id ||= meta.fetch("client_id", nil)
end

def exists?
(@state || meta.fetch('state', nil)) != 'not_found'
(@state || meta.fetch("state", nil)) != "not_found"
end

def valid?
Expand All @@ -151,111 +155,111 @@ def valid?
# Catch errors in the reader
# Then validate against JSON schema for Commonmeta
def errors
meta.fetch('errors', nil) || json_schema_errors
meta.fetch("errors", nil) || json_schema_errors
end

def descriptions
@descriptions ||= meta.fetch('descriptions', nil)
@descriptions ||= meta.fetch("descriptions", nil)
end

def license
@license ||= meta.fetch('license', nil)
@license ||= meta.fetch("license", nil)
end

def subjects
@subjects ||= meta.fetch('subjects', nil)
@subjects ||= meta.fetch("subjects", nil)
end

def language
@language ||= meta.fetch('language', nil)
@language ||= meta.fetch("language", nil)
end

def sizes
@sizes ||= meta.fetch('sizes', nil)
@sizes ||= meta.fetch("sizes", nil)
end

def formats
@formats ||= meta.fetch('formats', nil)
@formats ||= meta.fetch("formats", nil)
end

def schema_version
@schema_version ||= meta.fetch('schema_version', nil)
@schema_version ||= meta.fetch("schema_version", nil)
end

def funding_references
@funding_references ||= meta.fetch('funding_references', nil)
@funding_references ||= meta.fetch("funding_references", nil)
end

def references
@references ||= meta.fetch('references', nil)
@references ||= meta.fetch("references", nil)
end

def related_identifiers
@related_identifiers ||= meta.fetch('related_identifiers', nil)
@related_identifiers ||= meta.fetch("related_identifiers", nil)
end

def related_items
@related_items ||= meta.fetch('related_items', nil)
@related_items ||= meta.fetch("related_items", nil)
end

def url
@url ||= meta.fetch('url', nil)
@url ||= meta.fetch("url", nil)
end

def version
@version ||= meta.fetch('version', nil)
@version ||= meta.fetch("version", nil)
end

def container
@container ||= meta.fetch('container', nil)
@container ||= meta.fetch("container", nil)
end

def geo_locations
@geo_locations ||= meta.fetch('geo_locations', nil)
@geo_locations ||= meta.fetch("geo_locations", nil)
end

def date
@date ||= meta.fetch('date', nil)
@date ||= meta.fetch("date", nil)
end

def publisher
@publisher ||= meta.fetch('publisher', nil)
@publisher ||= meta.fetch("publisher", nil)
end

def alternate_identifiers
@alternate_identifiers ||= meta.fetch('alternate_identifiers', nil)
@alternate_identifiers ||= meta.fetch("alternate_identifiers", nil)
end

def files
@files ||= meta.fetch('files', nil)
@files ||= meta.fetch("files", nil)
end

def provider
@provider ||= meta.fetch('provider', nil)
@provider ||= meta.fetch("provider", nil)
end

def state
@state ||= meta.fetch('state', nil)
@state ||= meta.fetch("state", nil)
end

def type
@type ||= meta.fetch('type', nil)
@type ||= meta.fetch("type", nil)
end

def additional_type
@additional_type ||= meta.fetch('additional_type', nil)
@additional_type ||= meta.fetch("additional_type", nil)
end

def titles
@titles ||= meta.fetch('titles', nil)
@titles ||= meta.fetch("titles", nil)
end

def contributors
@contributors ||= meta.fetch('contributors', nil)
@contributors ||= meta.fetch("contributors", nil)
end

def archive_locations
@archive_locations ||= meta.fetch('archive_locations', nil)
@archive_locations ||= meta.fetch("archive_locations", nil)
end
end
end
Loading

0 comments on commit d960ccf

Please sign in to comment.