Skip to content

Commit

Permalink
include medra dois in dois index. #303
Browse files Browse the repository at this point in the history
  • Loading branch information
Martin Fenner committed Jul 5, 2019
1 parent 982522e commit 097ba72
Show file tree
Hide file tree
Showing 9 changed files with 71 additions and 14 deletions.
2 changes: 1 addition & 1 deletion Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ GEM
latex-decode (~> 0.0)
binding_of_caller (0.8.0)
debug_inspector (>= 0.0.1)
bolognese (1.2.3)
bolognese (1.2.4)
activesupport (>= 4.2.5, < 6)
benchmark_methods (~> 0.7)
bibtex-ruby (~> 4.1)
Expand Down
2 changes: 1 addition & 1 deletion app/controllers/dois_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,7 @@ def random
def get_url
authorize! :get_url, @doi

if !@doi.is_registered_or_findable? || %w(europ crossref).include?(@doi.provider_id) || %w(Crossref).include?(@doi.agency)
if !@doi.is_registered_or_findable? || %w(europ crossref medra).include?(@doi.provider_id) || %w(Crossref mEDRA).include?(@doi.agency)
url = @doi.url
head :no_content and return unless url.present?
else
Expand Down
28 changes: 24 additions & 4 deletions app/jobs/crossref_doi_by_id_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,14 @@ def perform(id, options={})
return {} unless result.blank?
end

# otherwise store Crossref metadata with DataCite
# using client crossref.citations and DataCite XML
# otherwise store DOI metadata with DataCite
# check DOI registration agency as Crossref also indexes DOIs from other RAs
# using client crossref.citations, medra.citations, etc. and DataCite XML
ra = get_doi_ra(id).downcase
return {} unless ra.present?

client_id = ra.downcase + ".citations"

xml = Base64.strict_encode64(id)
attributes = {
"xml" => xml,
Expand All @@ -35,7 +41,7 @@ def perform(id, options={})
"client" => {
"data" => {
"type" => "clients",
"id" => "crossref.citations"
"id" => client_id
}
}
}
Expand All @@ -54,7 +60,7 @@ def perform(id, options={})
elsif response.status == 200
logger.info "DOI #{doi} record updated."
else
logger.error "[Error parsing Crossref DOI #{doi}]: " + response.body["errors"].inspect
logger.error "[Error parsing #{ra} DOI #{doi}]: " + response.body["errors"].inspect
end
end

Expand All @@ -64,4 +70,18 @@ def doi_from_url(url)
uri.path.gsub(/^\//, '').downcase
end
end

def get_doi_ra(doi)
prefix = validate_prefix(doi)
return nil if prefix.blank?

url = "https://doi.org/ra/#{prefix}"
result = Maremma.get(url)

result.body.dig("data", 0, "RA")
end

def validate_prefix(doi)
Array(/\A(?:(http|https):\/(\/)?(dx\.)?(doi.org|handle.test.datacite.org)\/)?(doi:)?(10\.\d{4,5}).*\z/.match(doi)).last
end
end
4 changes: 2 additions & 2 deletions app/jobs/url_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@ def perform(doi_id)
response = Doi.get_doi(doi: doi.doi, agency: doi.agency)
url = response.body.dig('data', 'values', 0, 'data', 'value')
if url.present?
if (doi.is_registered_or_findable? || %w(europ crossref).include?(doi.provider_id)) && doi.minted.blank?
if (doi.is_registered_or_findable? || %w(europ crossref medra).include?(doi.provider_id)) && doi.minted.blank?
doi.update_attributes(url: url, minted: Time.zone.now)
else
doi.update_attributes(url: url)
end

doi.update_attributes(aasm_state: "findable") if %w(europ crossref).include?(doi.provider_id)
doi.update_attributes(aasm_state: "findable") if %w(europ crossref medra).include?(doi.provider_id)

doi.__elasticsearch__.index_document

Expand Down
4 changes: 2 additions & 2 deletions app/models/ability.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def initialize(user)
if user.role_id == "staff_admin"
can :manage, :all
cannot [:new, :create], Doi do |doi|
doi.client.blank? || !(doi.client.prefixes.where(prefix: doi.prefix).first || doi.client.symbol.downcase.start_with?("crossref."))
doi.client.blank? || !(doi.client.prefixes.where(prefix: doi.prefix).first || doi.client.symbol.downcase.start_with?("crossref.") || doi.client.symbol.downcase.start_with?("medra.")
end
elsif user.role_id == "staff_user"
can :read, :all
Expand Down Expand Up @@ -65,7 +65,7 @@ def initialize(user)

can [:read, :destroy, :update, :register_url, :validate, :undo, :get_url, :get_urls, :read_landing_page_results], Doi, :client_id => user.client_id
can [:new, :create], Doi do |doi|
doi.client.prefixes.where(prefix: doi.prefix).present? || doi.client.symbol.downcase.start_with?("crossref.")
doi.client.prefixes.where(prefix: doi.prefix).present? || doi.client.symbol.downcase.start_with?("crossref.") || doi.client.symbol.downcase.start_with?("medra.")
end
can [:read], Doi do |doi|
doi.findable?
Expand Down
6 changes: 3 additions & 3 deletions app/models/concerns/indexable.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ module Indexable
IndexJob.perform_later(self)
if self.class.name == "Doi"
update_column(:indexed, Time.zone.now)
send_import_message(self.to_jsonapi) if aasm_state == "findable" unless (Rails.env.test? || client_id == "crossref.citations")
send_import_message(self.to_jsonapi) if aasm_state == "findable" unless (Rails.env.test? || client_id == "crossref.citations" || client_id == "medra.citations")
end
end

Expand Down Expand Up @@ -183,12 +183,12 @@ def query(query, options={})
must << { terms: { "software.raw" => options[:software].split(",") }} if options[:software].present?
must << { term: { repository_id: options[:repository_id].gsub("/", '\/') }} if options[:repository_id].present?
must_not << { exists: { field: "deleted_at" }} unless options[:include_deleted]
must_not << { terms: { provider_id: ["crossref"] }} if options[:exclude_registration_agencies]
must_not << { terms: { provider_id: ["crossref", "medra"] }} if options[:exclude_registration_agencies]
elsif self.name == "Doi"
must << { terms: { aasm_state: options[:state].to_s.split(",") }} if options[:state].present?
must << { range: { registered: { gte: "#{options[:registered].split(",").min}||/y", lte: "#{options[:registered].split(",").max}||/y", format: "yyyy" }}} if options[:registered].present?
must << { term: { "client.repository_id" => options[:repository_id].upcase.gsub("/", '\/') }} if options[:repository_id].present?
must_not << { terms: { provider_id: ["crossref"] }} if options[:exclude_registration_agencies]
must_not << { terms: { provider_id: ["crossref", "medra"] }} if options[:exclude_registration_agencies]
elsif self.name == "Event"
must << { term: { subj_id: options[:subj_id] }} if options[:subj_id].present?
must << { term: { obj_id: options[:obj_id] }} if options[:obj_id].present?
Expand Down
2 changes: 1 addition & 1 deletion app/models/doi.rb
Original file line number Diff line number Diff line change
Expand Up @@ -580,7 +580,7 @@ def registerable?
# end

def is_registered_or_findable?
%w(registered findable).include?(aasm_state) || %w(crossref).include?(provider_id)
%w(registered findable).include?(aasm_state) || %w(crossref medra).include?(provider_id)
end

def validatable?
Expand Down
28 changes: 28 additions & 0 deletions app/models/event.rb
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,34 @@ def self.update_datacite_crossref(options={})
response.results.total
end

def self.update_datacite_medra(options={})
logger = Logger.new(STDOUT)

size = (options[:size] || 1000).to_i
cursor = (options[:cursor] || 0).to_i

response = Event.query(nil, source_id: "datacite-medra", page: { size: 1, cursor: cursor })
logger.info "[Update] #{response.results.total} events for source datacite-medra."

# walk through results using cursor
if response.results.total > 0
while response.results.results.length > 0 do
response = Event.query(nil, source_id: "datacite-medra", page: { size: size, cursor: cursor })
break unless response.results.results.length > 0

logger.info "[Update] Updating #{response.results.results.length} datacite-medra events starting with _id #{cursor + 1}."
cursor = response.results.to_a.last[:sort].first.to_i

dois = response.results.results.map(&:obj_id).uniq

# use same jobs as for crossref dois
CrossrefDoiJob.perform_later(dois, options)
end
end

response.results.total
end

def self.update_datacite_orcid_auto_update(options={})
logger = Logger.new(STDOUT)

Expand Down
9 changes: 9 additions & 0 deletions lib/tasks/event.rake
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,15 @@ namespace :datacite_crossref do
end
end

namespace :datacite_medra do
desc 'Import medra dois for all events'
task :import_doi => :environment do
cursor = (ENV['CURSOR'] || Event.minimum(:id)).to_i

Event.update_datacite_medra(cursor: cursor, refresh: ENV['REFRESH'], size: ENV['SIZE'])
end
end

namespace :datacite_orcid_auto_update do
desc 'Import orcid ids for all events'
task :import_orcid => :environment do
Expand Down

0 comments on commit 097ba72

Please sign in to comment.