Skip to content

Commit

Permalink
improve import missing dois rake test
Browse files Browse the repository at this point in the history
  • Loading branch information
Martin Fenner committed Dec 17, 2018
1 parent 29303fe commit 7022102
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 9 deletions.
7 changes: 7 additions & 0 deletions app/jobs/doi_import_by_day_missing_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
class DoiImportByDayMissingJob < ActiveJob::Base
queue_as :lupo_background

def perform(options={})
Doi.import_by_day_missing(options)
end
end
25 changes: 18 additions & 7 deletions app/models/doi.rb
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,17 @@ def self.import_all(options={})
end
end

def self.import_missing(options={})
from_date = options[:from_date].present? ? Date.parse(options[:from_date]) : Date.current
until_date = options[:until_date].present? ? Date.parse(options[:until_date]) : Date.current

# get every day between from_date and until_date
(from_date..until_date).each do |d|
DoiImportByDayMissingJob.perform_later(from_date: d.strftime("%F"))
puts "Queued importing for missing DOIs created on #{d.strftime("%F")}."
end
end

def self.import_by_day(options={})
return nil unless options[:from_date].present?
from_date = Date.parse(options[:from_date])
Expand Down Expand Up @@ -397,24 +408,24 @@ def self.import_by_day(options={})
end
end

def self.import_missing(options={})
from_date = options[:from_date].present? ? Date.parse(options[:from_date]) : Date.current
until_date = options[:until_date].present? ? Date.parse(options[:until_date]) : Date.current
def self.import_by_day_missing(options={})
return nil unless options[:from_date].present?
from_date = Date.parse(options[:from_date])

count = 0

logger = Logger.new(STDOUT)

Doi.where(schema_version: nil).where(created: from_date.midnight..until_date.end_of_day).find_each do |doi|
Doi.where(schema_version: nil).where(created: from_date.midnight..from_date.end_of_day).find_each do |doi|
begin
string = doi.current_metadata.present? ? doi.current_metadata.xml : nil
meta = doi.read_datacite(string: string, sandbox: doi.sandbox)
attrs = %w(creators contributors titles publisher publication_year types descriptions container sizes formats language dates identifiers related_identifiers funding_references geo_locations rights_list subjects content_url).map do |a|
[a.to_sym, meta[a]]
end.to_h.merge(schema_version: meta["schema_version"] || "http://datacite.org/schema/kernel-4", version_info: meta["version"], xml: string)

# update_attributes will trigger validations and Elasticsearch indexing
doi.update_attributes(attrs)
# update_columns will NOT trigger validations and Elasticsearch indexing
doi.update_columns(attrs)
rescue TypeError, NoMethodError, ActiveRecord::LockWaitTimeout => error
logger.error "[MySQL] Error importing metadata for " + doi.doi + ": " + error.message
else
Expand All @@ -423,7 +434,7 @@ def self.import_missing(options={})
end

if count > 0
logger.info "[MySQL] Imported metadata for #{count} DOIs created #{options[:from_date]} - #{options[:until_date]}."
logger.info "[MySQL] Imported metadata for #{count} DOIs created on #{options[:from_date]}."
end
end

Expand Down
9 changes: 7 additions & 2 deletions lib/tasks/doi.rake
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,13 @@ namespace :doi do

desc 'Import missing DOIs'
task :import_missing => :environment do
from_date = ENV['FROM_DATE'] || Date.current.strftime("%F")
until_date = ENV['UNTIL_DATE'] || Date.current.strftime("%F")
if ENV['YEAR'].present?
from_date = "#{ENV['YEAR']}-01-01"
until_date = "#{ENV['YEAR']}-12-31"
else
from_date = ENV['FROM_DATE'] || Date.current.strftime("%F")
until_date = ENV['UNTIL_DATE'] || Date.current.strftime("%F")
end

Doi.import_missing(from_date: from_date, until_date: until_date)
end
Expand Down

0 comments on commit 7022102

Please sign in to comment.