From 7022102a2b1214b005a3f3ae582a7abf4521f67c Mon Sep 17 00:00:00 2001 From: Martin Fenner Date: Mon, 17 Dec 2018 13:37:28 +0100 Subject: [PATCH] improve import missing dois rake test --- app/jobs/doi_import_by_day_missing_job.rb | 7 +++++++ app/models/doi.rb | 25 ++++++++++++++++------- lib/tasks/doi.rake | 9 ++++++-- 3 files changed, 32 insertions(+), 9 deletions(-) create mode 100644 app/jobs/doi_import_by_day_missing_job.rb diff --git a/app/jobs/doi_import_by_day_missing_job.rb b/app/jobs/doi_import_by_day_missing_job.rb new file mode 100644 index 000000000..c7f8e0cc8 --- /dev/null +++ b/app/jobs/doi_import_by_day_missing_job.rb @@ -0,0 +1,7 @@ +class DoiImportByDayMissingJob < ActiveJob::Base + queue_as :lupo_background + + def perform(options={}) + Doi.import_by_day_missing(options) + end +end \ No newline at end of file diff --git a/app/models/doi.rb b/app/models/doi.rb index ab5a1acc6..446020a0e 100644 --- a/app/models/doi.rb +++ b/app/models/doi.rb @@ -367,6 +367,17 @@ def self.import_all(options={}) end end + def self.import_missing(options={}) + from_date = options[:from_date].present? ? Date.parse(options[:from_date]) : Date.current + until_date = options[:until_date].present? ? Date.parse(options[:until_date]) : Date.current + + # get every day between from_date and until_date + (from_date..until_date).each do |d| + DoiImportByDayMissingJob.perform_later(from_date: d.strftime("%F")) + puts "Queued importing for missing DOIs created on #{d.strftime("%F")}." + end + end + def self.import_by_day(options={}) return nil unless options[:from_date].present? from_date = Date.parse(options[:from_date]) @@ -397,15 +408,15 @@ def self.import_by_day(options={}) end end - def self.import_missing(options={}) - from_date = options[:from_date].present? ? Date.parse(options[:from_date]) : Date.current - until_date = options[:until_date].present? ? Date.parse(options[:until_date]) : Date.current + def self.import_by_day_missing(options={}) + return nil unless options[:from_date].present? + from_date = Date.parse(options[:from_date]) count = 0 logger = Logger.new(STDOUT) - Doi.where(schema_version: nil).where(created: from_date.midnight..until_date.end_of_day).find_each do |doi| + Doi.where(schema_version: nil).where(created: from_date.midnight..from_date.end_of_day).find_each do |doi| begin string = doi.current_metadata.present? ? doi.current_metadata.xml : nil meta = doi.read_datacite(string: string, sandbox: doi.sandbox) @@ -413,8 +424,8 @@ def self.import_missing(options={}) [a.to_sym, meta[a]] end.to_h.merge(schema_version: meta["schema_version"] || "http://datacite.org/schema/kernel-4", version_info: meta["version"], xml: string) - # update_attributes will trigger validations and Elasticsearch indexing - doi.update_attributes(attrs) + # update_columns will NOT trigger validations and Elasticsearch indexing + doi.update_columns(attrs) rescue TypeError, NoMethodError, ActiveRecord::LockWaitTimeout => error logger.error "[MySQL] Error importing metadata for " + doi.doi + ": " + error.message else @@ -423,7 +434,7 @@ def self.import_missing(options={}) end if count > 0 - logger.info "[MySQL] Imported metadata for #{count} DOIs created #{options[:from_date]} - #{options[:until_date]}." + logger.info "[MySQL] Imported metadata for #{count} DOIs created on #{options[:from_date]}." end end diff --git a/lib/tasks/doi.rake b/lib/tasks/doi.rake index 29d609d12..be3ce1cb1 100644 --- a/lib/tasks/doi.rake +++ b/lib/tasks/doi.rake @@ -29,8 +29,13 @@ namespace :doi do desc 'Import missing DOIs' task :import_missing => :environment do - from_date = ENV['FROM_DATE'] || Date.current.strftime("%F") - until_date = ENV['UNTIL_DATE'] || Date.current.strftime("%F") + if ENV['YEAR'].present? + from_date = "#{ENV['YEAR']}-01-01" + until_date = "#{ENV['YEAR']}-12-31" + else + from_date = ENV['FROM_DATE'] || Date.current.strftime("%F") + until_date = ENV['UNTIL_DATE'] || Date.current.strftime("%F") + end Doi.import_missing(from_date: from_date, until_date: until_date) end