From d732568ab9d23c3608df7271dcb8a2a935438fad Mon Sep 17 00:00:00 2001 From: Martin Fenner Date: Thu, 4 Jul 2019 08:43:08 +0200 Subject: [PATCH] allow manual updates of crossref and orcid metadata stored with datacite. #301 --- Gemfile.lock | 2 +- app/jobs/crossref_doi_by_id_job.rb | 21 +++++++++++++-------- app/jobs/crossref_doi_job.rb | 4 ++-- app/jobs/orcid_auto_update_by_id_job.rb | 11 +++++++---- app/jobs/orcid_auto_update_job.rb | 4 ++-- app/models/event.rb | 4 ++-- lib/tasks/event.rake | 4 ++-- 7 files changed, 29 insertions(+), 21 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index 65d39724f..22edd452a 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -106,7 +106,7 @@ GEM latex-decode (~> 0.0) binding_of_caller (0.8.0) debug_inspector (>= 0.0.1) - bolognese (1.2) + bolognese (1.2.2) activesupport (>= 4.2.5, < 6) benchmark_methods (~> 0.7) bibtex-ruby (~> 4.1) diff --git a/app/jobs/crossref_doi_by_id_job.rb b/app/jobs/crossref_doi_by_id_job.rb index 73ff468be..aa0c0af1a 100644 --- a/app/jobs/crossref_doi_by_id_job.rb +++ b/app/jobs/crossref_doi_by_id_job.rb @@ -6,15 +6,18 @@ class CrossrefDoiByIdJob < ActiveJob::Base # discard_on ActiveJob::DeserializationError - def perform(id) + def perform(id, options={}) logger = Logger.new(STDOUT) doi = doi_from_url(id) return {} unless doi.present? - # check whether DOI has been registered with DataCite already - result = Doi.find_by_id(doi).results.first - return {} unless result.blank? + # check whether DOI has been stored with DataCite already + # unless we want to refresh the metadata + unless options[:refresh] + result = Doi.find_by_id(doi).results.first + return {} unless result.blank? + end # otherwise store Crossref metadata with DataCite # using client crossref.citations and DataCite XML @@ -42,14 +45,16 @@ def perform(id) url = "http://localhost/dois/#{doi}" response = Maremma.put(url, accept: 'application/vnd.api+json', content_type: 'application/vnd.api+json', - data: data.to_json, + data: data.to_json, username: ENV["ADMIN_USERNAME"], password: ENV["ADMIN_PASSWORD"]) - if [200, 201].include?(response.status) - logger.info "DOI #{doi} created." + if response.status == 201 + logger.info "DOI #{doi} record created." + elsif response.status == 200 + logger.info "DOI #{doi} record updated." else - logger.warn "[Error for DOI #{doi}]: " + response.body["errors"].inspect + logger.error "[Error parsing Crossref DOI #{doi}]: " + response.body["errors"].inspect end end diff --git a/app/jobs/crossref_doi_job.rb b/app/jobs/crossref_doi_job.rb index caf5f6bac..1608c7d79 100644 --- a/app/jobs/crossref_doi_job.rb +++ b/app/jobs/crossref_doi_job.rb @@ -1,7 +1,7 @@ class CrossrefDoiJob < ActiveJob::Base queue_as :lupo_background - def perform(ids) - ids.each { |id| CrossrefDoiByIdJob.perform_later(id) } + def perform(ids, options={}) + ids.each { |id| CrossrefDoiByIdJob.perform_later(id, options) } end end diff --git a/app/jobs/orcid_auto_update_by_id_job.rb b/app/jobs/orcid_auto_update_by_id_job.rb index a7d9c3502..7635b432c 100644 --- a/app/jobs/orcid_auto_update_by_id_job.rb +++ b/app/jobs/orcid_auto_update_by_id_job.rb @@ -6,15 +6,18 @@ class OrcidAutoUpdateByIdJob < ActiveJob::Base # discard_on ActiveJob::DeserializationError - def perform(id) + def perform(id, options={}) logger = Logger.new(STDOUT) orcid = orcid_from_url(id) return {} unless orcid.present? - # check whether ORCID ID has been registered with DataCite already - result = Researcher.find_by_id(orcid).results.first - return {} unless result.blank? + # check whether ORCID ID has been stored with DataCite already + # unless we want to refresh the metadata + unless options[:refresh] + result = Researcher.find_by_id(orcid).results.first + return {} unless result.blank? + end # otherwise fetch basic ORCID metadata and store with DataCite url = "https://pub.orcid.org/v2.1/#{orcid}/person" diff --git a/app/jobs/orcid_auto_update_job.rb b/app/jobs/orcid_auto_update_job.rb index 40474ef03..08fbe7872 100644 --- a/app/jobs/orcid_auto_update_job.rb +++ b/app/jobs/orcid_auto_update_job.rb @@ -1,7 +1,7 @@ class OrcidAutoUpdateJob < ActiveJob::Base queue_as :lupo_background - def perform(ids) - ids.each { |id| OrcidAutoUpdateByIdJob.perform_later(id) } + def perform(ids, options={}) + ids.each { |id| OrcidAutoUpdateByIdJob.perform_later(id, options) } end end diff --git a/app/models/event.rb b/app/models/event.rb index 1f4cd3759..27ae1bc76 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -324,7 +324,7 @@ def self.update_datacite_crossref(options={}) cursor = response.results.to_a.last[:sort].first.to_i dois = response.results.results.map(&:obj_id).uniq - CrossrefDoiJob.perform_later(dois) + CrossrefDoiJob.perform_later(dois, options) end end @@ -350,7 +350,7 @@ def self.update_datacite_orcid_auto_update(options={}) cursor = response.results.to_a.last[:sort].first.to_i ids = response.results.results.map(&:obj_id).uniq - OrcidAutoUpdateJob.perform_later(ids) + OrcidAutoUpdateJob.perform_later(ids, options) end end diff --git a/lib/tasks/event.rake b/lib/tasks/event.rake index e9e206e05..c3ccee13a 100644 --- a/lib/tasks/event.rake +++ b/lib/tasks/event.rake @@ -37,7 +37,7 @@ namespace :datacite_crossref do task :import_doi => :environment do cursor = (ENV['CURSOR'] || Event.minimum(:id)).to_i - Event.update_datacite_crossref(cursor: cursor) + Event.update_datacite_crossref(cursor: cursor, refresh: ENV['REFRESH'], size: ENV['SIZE']) end end @@ -46,6 +46,6 @@ namespace :datacite_orcid_auto_update do task :import_orcid => :environment do cursor = (ENV['CURSOR'] || Event.minimum(:id)).to_i - Event.update_datacite_orcid_auto_update(cursor: cursor) + Event.update_datacite_orcid_auto_update(cursor: cursor, refresh: ENV['REFRESH'], size: ENV['SIZE']) end end