Skip to content

Commit

Permalink
rake tasks to re-import metadata based on Elasticsearch query #638
Browse files Browse the repository at this point in the history
  • Loading branch information
Martin Fenner committed Sep 20, 2020
1 parent d1cb8c0 commit 52c6e7a
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 9 deletions.
7 changes: 7 additions & 0 deletions app/jobs/import_doi_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
class ImportDoiJob < ActiveJob::Base
queue_as :lupo_background

def perform(doi_id, options={})
Doi.import_one(doi_id: doi_id)
end
end
11 changes: 5 additions & 6 deletions app/models/doi.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1224,8 +1224,8 @@ def self.index_one(doi_id: nil)

def self.import_one(doi_id: nil)
doi = Doi.where(doi: doi_id).first
if doi.nil?
Rails.logger.error "[MySQL] DOI #{doi_id} not found."
if doi.blank?
Rails.logger.error "[MySQL] Error importing DOI " + doi_id + ": not found"
return nil
end

Expand All @@ -1238,17 +1238,16 @@ def self.import_one(doi_id: nil)
meta = doi.read_datacite(string: string, sandbox: doi.sandbox)
attrs = %w(creators contributors titles publisher publication_year types descriptions container sizes formats language dates identifiers related_identifiers funding_references geo_locations rights_list subjects content_url version_info).map do |a|
[a.to_sym, meta[a]]
end.to_h.merge(schema_version: meta["schema_version"] || "http://datacite.org/schema/kernel-4", xml: string)
end.to_h.merge(schema_version: meta["schema_version"] || "http://datacite.org/schema/kernel-4", xml: string, version: doi.version.to_i + 1)

# update_attributes will trigger validations and Elasticsearch indexing
doi.update_attributes(attrs)
Rails.logger.warn "[MySQL] Imported metadata for DOI " + doi.doi + "."
"Imported DOI #{doi.doi}."
Rails.logger.info "[MySQL] Imported metadata for DOI " + doi.doi + "."
rescue TypeError, NoMethodError, RuntimeError, ActiveRecord::StatementInvalid, ActiveRecord::LockWaitTimeout => e
if doi.present?
Rails.logger.error "[MySQL] Error importing metadata for " + doi.doi + ": " + e.message
"Imported DOI #{doi.doi}."
else
Rails.logger.error "[MySQL] Error importing metadata: " + e.message
Raven.capture_exception(e)
end
end
Expand Down
22 changes: 19 additions & 3 deletions lib/tasks/doi.rake
Original file line number Diff line number Diff line change
Expand Up @@ -154,10 +154,10 @@ namespace :doi do
Doi.loop_through_dois(options)
end

desc "Trigger DOI Update based on query"
desc "Trigger DOI update based on query"
task update_dois_by_query: :environment do
# Ensure we have to specify a query of some kind.
if ENV['QUERY'].nil?
# Ensure we have specified a query of some kind.
if ENV['QUERY'].blank?
puts "ENV['QUERY'] is required"
exit
end
Expand All @@ -171,6 +171,22 @@ namespace :doi do
puts Doi.loop_through_dois(options)
end

desc "Trigger DOI import based on query"
task import_dois_by_query: :environment do
# Ensure we have specified a query of some kind.
if ENV['QUERY'].blank?
puts "ENV['QUERY'] is required"
exit
end

options = {
query: ENV["QUERY"],
label: "[ImportDoiByQuery]",
job_name: "ImportDoiJob",
cursor: ENV["CURSOR"].present? ? Base64.urlsafe_decode64(ENV["CURSOR"]).split(",", 2) : [],
}
puts Doi.loop_through_dois(options)
end

# until all Crossref DOIs are indexed as otherDoi
desc "Refresh metadata"
Expand Down
16 changes: 16 additions & 0 deletions spec/jobs/import_doi_job_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
require "rails_helper"

describe ImportDoiJob, type: :job do
let(:doi) { create(:doi) }
subject(:job) { ImportDoiJob.perform_later(doi.doi) }

it "queues the job" do
expect { job }.to have_enqueued_job(ImportDoiJob)
.on_queue("test_lupo_background").at_least(1).times
end

after do
clear_enqueued_jobs
clear_performed_jobs
end
end
16 changes: 16 additions & 0 deletions spec/jobs/update_doi_job_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
require "rails_helper"

describe UpdateDoiJob, type: :job do
let(:doi) { create(:doi) }
subject(:job) { UpdateDoiJob.perform_later(doi.doi) }

it "queues the job" do
expect { job }.to have_enqueued_job(UpdateDoiJob)
.on_queue("test_lupo_background").at_least(1).times
end

after do
clear_enqueued_jobs
clear_performed_jobs
end
end

0 comments on commit 52c6e7a

Please sign in to comment.