From 52c6e7adc8a948b9a60b82a9725610ba1e486283 Mon Sep 17 00:00:00 2001 From: Martin Fenner Date: Sun, 20 Sep 2020 10:09:18 +0200 Subject: [PATCH] rake tasks to re-import metadata based on Elasticsearch query #638 --- app/jobs/import_doi_job.rb | 7 +++++++ app/models/doi.rb | 11 +++++------ lib/tasks/doi.rake | 22 +++++++++++++++++++--- spec/jobs/import_doi_job_spec.rb | 16 ++++++++++++++++ spec/jobs/update_doi_job_spec.rb | 16 ++++++++++++++++ 5 files changed, 63 insertions(+), 9 deletions(-) create mode 100644 app/jobs/import_doi_job.rb create mode 100644 spec/jobs/import_doi_job_spec.rb create mode 100644 spec/jobs/update_doi_job_spec.rb diff --git a/app/jobs/import_doi_job.rb b/app/jobs/import_doi_job.rb new file mode 100644 index 000000000..ada847c17 --- /dev/null +++ b/app/jobs/import_doi_job.rb @@ -0,0 +1,7 @@ +class ImportDoiJob < ActiveJob::Base + queue_as :lupo_background + + def perform(doi_id, options={}) + Doi.import_one(doi_id: doi_id) + end +end diff --git a/app/models/doi.rb b/app/models/doi.rb index 9fa208905..d90f949f9 100644 --- a/app/models/doi.rb +++ b/app/models/doi.rb @@ -1224,8 +1224,8 @@ def self.index_one(doi_id: nil) def self.import_one(doi_id: nil) doi = Doi.where(doi: doi_id).first - if doi.nil? - Rails.logger.error "[MySQL] DOI #{doi_id} not found." + if doi.blank? + Rails.logger.error "[MySQL] Error importing DOI " + doi_id + ": not found" return nil end @@ -1238,17 +1238,16 @@ def self.import_one(doi_id: nil) meta = doi.read_datacite(string: string, sandbox: doi.sandbox) attrs = %w(creators contributors titles publisher publication_year types descriptions container sizes formats language dates identifiers related_identifiers funding_references geo_locations rights_list subjects content_url version_info).map do |a| [a.to_sym, meta[a]] - end.to_h.merge(schema_version: meta["schema_version"] || "http://datacite.org/schema/kernel-4", xml: string) + end.to_h.merge(schema_version: meta["schema_version"] || "http://datacite.org/schema/kernel-4", xml: string, version: doi.version.to_i + 1) # update_attributes will trigger validations and Elasticsearch indexing doi.update_attributes(attrs) - Rails.logger.warn "[MySQL] Imported metadata for DOI " + doi.doi + "." - "Imported DOI #{doi.doi}." + Rails.logger.info "[MySQL] Imported metadata for DOI " + doi.doi + "." rescue TypeError, NoMethodError, RuntimeError, ActiveRecord::StatementInvalid, ActiveRecord::LockWaitTimeout => e if doi.present? Rails.logger.error "[MySQL] Error importing metadata for " + doi.doi + ": " + e.message - "Imported DOI #{doi.doi}." else + Rails.logger.error "[MySQL] Error importing metadata: " + e.message Raven.capture_exception(e) end end diff --git a/lib/tasks/doi.rake b/lib/tasks/doi.rake index cb7122359..e38f45348 100644 --- a/lib/tasks/doi.rake +++ b/lib/tasks/doi.rake @@ -154,10 +154,10 @@ namespace :doi do Doi.loop_through_dois(options) end - desc "Trigger DOI Update based on query" + desc "Trigger DOI update based on query" task update_dois_by_query: :environment do - # Ensure we have to specify a query of some kind. - if ENV['QUERY'].nil? + # Ensure we have specified a query of some kind. + if ENV['QUERY'].blank? puts "ENV['QUERY'] is required" exit end @@ -171,6 +171,22 @@ namespace :doi do puts Doi.loop_through_dois(options) end + desc "Trigger DOI import based on query" + task import_dois_by_query: :environment do + # Ensure we have specified a query of some kind. + if ENV['QUERY'].blank? + puts "ENV['QUERY'] is required" + exit + end + + options = { + query: ENV["QUERY"], + label: "[ImportDoiByQuery]", + job_name: "ImportDoiJob", + cursor: ENV["CURSOR"].present? ? Base64.urlsafe_decode64(ENV["CURSOR"]).split(",", 2) : [], + } + puts Doi.loop_through_dois(options) + end # until all Crossref DOIs are indexed as otherDoi desc "Refresh metadata" diff --git a/spec/jobs/import_doi_job_spec.rb b/spec/jobs/import_doi_job_spec.rb new file mode 100644 index 000000000..f93969fe6 --- /dev/null +++ b/spec/jobs/import_doi_job_spec.rb @@ -0,0 +1,16 @@ +require "rails_helper" + +describe ImportDoiJob, type: :job do + let(:doi) { create(:doi) } + subject(:job) { ImportDoiJob.perform_later(doi.doi) } + + it "queues the job" do + expect { job }.to have_enqueued_job(ImportDoiJob) + .on_queue("test_lupo_background").at_least(1).times + end + + after do + clear_enqueued_jobs + clear_performed_jobs + end +end diff --git a/spec/jobs/update_doi_job_spec.rb b/spec/jobs/update_doi_job_spec.rb new file mode 100644 index 000000000..8b514fcd9 --- /dev/null +++ b/spec/jobs/update_doi_job_spec.rb @@ -0,0 +1,16 @@ +require "rails_helper" + +describe UpdateDoiJob, type: :job do + let(:doi) { create(:doi) } + subject(:job) { UpdateDoiJob.perform_later(doi.doi) } + + it "queues the job" do + expect { job }.to have_enqueued_job(UpdateDoiJob) + .on_queue("test_lupo_background").at_least(1).times + end + + after do + clear_enqueued_jobs + clear_performed_jobs + end +end