diff --git a/Gemfile.lock b/Gemfile.lock index 7b5684dfc..cd92acee5 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -106,7 +106,7 @@ GEM latex-decode (~> 0.0) binding_of_caller (0.8.0) debug_inspector (>= 0.0.1) - bolognese (1.2.3) + bolognese (1.2.4) activesupport (>= 4.2.5, < 6) benchmark_methods (~> 0.7) bibtex-ruby (~> 4.1) diff --git a/app/controllers/dois_controller.rb b/app/controllers/dois_controller.rb index d786ee203..d61946eef 100644 --- a/app/controllers/dois_controller.rb +++ b/app/controllers/dois_controller.rb @@ -397,7 +397,7 @@ def random def get_url authorize! :get_url, @doi - if !@doi.is_registered_or_findable? || %w(europ crossref).include?(@doi.provider_id) || %w(Crossref).include?(@doi.agency) + if !@doi.is_registered_or_findable? || %w(europ crossref medra).include?(@doi.provider_id) || %w(Crossref mEDRA).include?(@doi.agency) url = @doi.url head :no_content and return unless url.present? else diff --git a/app/jobs/crossref_doi_by_id_job.rb b/app/jobs/crossref_doi_by_id_job.rb index aa0c0af1a..75bd58ef3 100644 --- a/app/jobs/crossref_doi_by_id_job.rb +++ b/app/jobs/crossref_doi_by_id_job.rb @@ -19,8 +19,14 @@ def perform(id, options={}) return {} unless result.blank? end - # otherwise store Crossref metadata with DataCite - # using client crossref.citations and DataCite XML + # otherwise store DOI metadata with DataCite + # check DOI registration agency as Crossref also indexes DOIs from other RAs + # using client crossref.citations, medra.citations, etc. and DataCite XML + ra = get_doi_ra(id).downcase + return {} unless ra.present? + + client_id = ra.downcase + ".citations" + xml = Base64.strict_encode64(id) attributes = { "xml" => xml, @@ -35,7 +41,7 @@ def perform(id, options={}) "client" => { "data" => { "type" => "clients", - "id" => "crossref.citations" + "id" => client_id } } } @@ -54,7 +60,7 @@ def perform(id, options={}) elsif response.status == 200 logger.info "DOI #{doi} record updated." else - logger.error "[Error parsing Crossref DOI #{doi}]: " + response.body["errors"].inspect + logger.error "[Error parsing #{ra} DOI #{doi}]: " + response.body["errors"].inspect end end @@ -64,4 +70,18 @@ def doi_from_url(url) uri.path.gsub(/^\//, '').downcase end end + + def get_doi_ra(doi) + prefix = validate_prefix(doi) + return nil if prefix.blank? + + url = "https://doi.org/ra/#{prefix}" + result = Maremma.get(url) + + result.body.dig("data", 0, "RA") + end + + def validate_prefix(doi) + Array(/\A(?:(http|https):\/(\/)?(dx\.)?(doi.org|handle.test.datacite.org)\/)?(doi:)?(10\.\d{4,5}).*\z/.match(doi)).last + end end \ No newline at end of file diff --git a/app/jobs/url_job.rb b/app/jobs/url_job.rb index 555872c84..03bb95ca3 100644 --- a/app/jobs/url_job.rb +++ b/app/jobs/url_job.rb @@ -14,13 +14,13 @@ def perform(doi_id) response = Doi.get_doi(doi: doi.doi, agency: doi.agency) url = response.body.dig('data', 'values', 0, 'data', 'value') if url.present? - if (doi.is_registered_or_findable? || %w(europ crossref).include?(doi.provider_id)) && doi.minted.blank? + if (doi.is_registered_or_findable? || %w(europ crossref medra).include?(doi.provider_id)) && doi.minted.blank? doi.update_attributes(url: url, minted: Time.zone.now) else doi.update_attributes(url: url) end - doi.update_attributes(aasm_state: "findable") if %w(europ crossref).include?(doi.provider_id) + doi.update_attributes(aasm_state: "findable") if %w(europ crossref medra).include?(doi.provider_id) doi.__elasticsearch__.index_document diff --git a/app/models/ability.rb b/app/models/ability.rb index e0556d688..2e55aa4c7 100644 --- a/app/models/ability.rb +++ b/app/models/ability.rb @@ -12,7 +12,7 @@ def initialize(user) if user.role_id == "staff_admin" can :manage, :all cannot [:new, :create], Doi do |doi| - doi.client.blank? || !(doi.client.prefixes.where(prefix: doi.prefix).first || doi.client.symbol.downcase.start_with?("crossref.")) + doi.client.blank? || !(doi.client.prefixes.where(prefix: doi.prefix).first || doi.client.symbol.downcase.start_with?("crossref.") || doi.client.symbol.downcase.start_with?("medra.") end elsif user.role_id == "staff_user" can :read, :all @@ -65,7 +65,7 @@ def initialize(user) can [:read, :destroy, :update, :register_url, :validate, :undo, :get_url, :get_urls, :read_landing_page_results], Doi, :client_id => user.client_id can [:new, :create], Doi do |doi| - doi.client.prefixes.where(prefix: doi.prefix).present? || doi.client.symbol.downcase.start_with?("crossref.") + doi.client.prefixes.where(prefix: doi.prefix).present? || doi.client.symbol.downcase.start_with?("crossref.") || doi.client.symbol.downcase.start_with?("medra.") end can [:read], Doi do |doi| doi.findable? diff --git a/app/models/concerns/indexable.rb b/app/models/concerns/indexable.rb index 80c7bf3f8..4b9558eb7 100644 --- a/app/models/concerns/indexable.rb +++ b/app/models/concerns/indexable.rb @@ -9,7 +9,7 @@ module Indexable IndexJob.perform_later(self) if self.class.name == "Doi" update_column(:indexed, Time.zone.now) - send_import_message(self.to_jsonapi) if aasm_state == "findable" unless (Rails.env.test? || client_id == "crossref.citations") + send_import_message(self.to_jsonapi) if aasm_state == "findable" unless (Rails.env.test? || client_id == "crossref.citations" || client_id == "medra.citations") end end @@ -183,12 +183,12 @@ def query(query, options={}) must << { terms: { "software.raw" => options[:software].split(",") }} if options[:software].present? must << { term: { repository_id: options[:repository_id].gsub("/", '\/') }} if options[:repository_id].present? must_not << { exists: { field: "deleted_at" }} unless options[:include_deleted] - must_not << { terms: { provider_id: ["crossref"] }} if options[:exclude_registration_agencies] + must_not << { terms: { provider_id: ["crossref", "medra"] }} if options[:exclude_registration_agencies] elsif self.name == "Doi" must << { terms: { aasm_state: options[:state].to_s.split(",") }} if options[:state].present? must << { range: { registered: { gte: "#{options[:registered].split(",").min}||/y", lte: "#{options[:registered].split(",").max}||/y", format: "yyyy" }}} if options[:registered].present? must << { term: { "client.repository_id" => options[:repository_id].upcase.gsub("/", '\/') }} if options[:repository_id].present? - must_not << { terms: { provider_id: ["crossref"] }} if options[:exclude_registration_agencies] + must_not << { terms: { provider_id: ["crossref", "medra"] }} if options[:exclude_registration_agencies] elsif self.name == "Event" must << { term: { subj_id: options[:subj_id] }} if options[:subj_id].present? must << { term: { obj_id: options[:obj_id] }} if options[:obj_id].present? diff --git a/app/models/doi.rb b/app/models/doi.rb index 325500ec9..0beeb2e76 100644 --- a/app/models/doi.rb +++ b/app/models/doi.rb @@ -580,7 +580,7 @@ def registerable? # end def is_registered_or_findable? - %w(registered findable).include?(aasm_state) || %w(crossref).include?(provider_id) + %w(registered findable).include?(aasm_state) || %w(crossref medra).include?(provider_id) end def validatable? diff --git a/app/models/event.rb b/app/models/event.rb index 27ae1bc76..0425766c0 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -331,6 +331,34 @@ def self.update_datacite_crossref(options={}) response.results.total end + def self.update_datacite_medra(options={}) + logger = Logger.new(STDOUT) + + size = (options[:size] || 1000).to_i + cursor = (options[:cursor] || 0).to_i + + response = Event.query(nil, source_id: "datacite-medra", page: { size: 1, cursor: cursor }) + logger.info "[Update] #{response.results.total} events for source datacite-medra." + + # walk through results using cursor + if response.results.total > 0 + while response.results.results.length > 0 do + response = Event.query(nil, source_id: "datacite-medra", page: { size: size, cursor: cursor }) + break unless response.results.results.length > 0 + + logger.info "[Update] Updating #{response.results.results.length} datacite-medra events starting with _id #{cursor + 1}." + cursor = response.results.to_a.last[:sort].first.to_i + + dois = response.results.results.map(&:obj_id).uniq + + # use same jobs as for crossref dois + CrossrefDoiJob.perform_later(dois, options) + end + end + + response.results.total + end + def self.update_datacite_orcid_auto_update(options={}) logger = Logger.new(STDOUT) diff --git a/lib/tasks/event.rake b/lib/tasks/event.rake index c3ccee13a..f18992ad5 100644 --- a/lib/tasks/event.rake +++ b/lib/tasks/event.rake @@ -41,6 +41,15 @@ namespace :datacite_crossref do end end +namespace :datacite_medra do + desc 'Import medra dois for all events' + task :import_doi => :environment do + cursor = (ENV['CURSOR'] || Event.minimum(:id)).to_i + + Event.update_datacite_medra(cursor: cursor, refresh: ENV['REFRESH'], size: ENV['SIZE']) + end +end + namespace :datacite_orcid_auto_update do desc 'Import orcid ids for all events' task :import_orcid => :environment do