From e8be8d39501af4be6cf24e7cb7bc6ecee239637a Mon Sep 17 00:00:00 2001 From: Martin Fenner Date: Sun, 3 Feb 2019 23:26:29 +0100 Subject: [PATCH] support import of missing metadata by client. #196 --- app/models/client.rb | 8 ++++++++ app/models/doi.rb | 20 ++++++++++++++------ config/initializers/_version.rb | 2 +- lib/tasks/client.rake | 27 +++++++++++++++++++-------- 4 files changed, 42 insertions(+), 15 deletions(-) diff --git a/app/models/client.rb b/app/models/client.rb index 4ca18e589..6ff5b342d 100644 --- a/app/models/client.rb +++ b/app/models/client.rb @@ -189,6 +189,14 @@ def index_all_dois Doi.index(from_date: "2011-01-01", client_id: id) end + def import_all_dois + Doi.import_all(from_date: "2011-01-01", client_id: id) + end + + def import_missing_dois + Doi.import_missing(from_date: "2011-01-01", client_id: id) + end + def cache_key "clients/#{uid}-#{updated.iso8601}" end diff --git a/app/models/doi.rb b/app/models/doi.rb index 3a68d935e..d9b4812d7 100644 --- a/app/models/doi.rb +++ b/app/models/doi.rb @@ -377,34 +377,38 @@ def self.import_one(doi_id: nil) def self.import_all(options={}) from_date = options[:from_date].present? ? Date.parse(options[:from_date]) : Date.current until_date = options[:until_date].present? ? Date.parse(options[:until_date]) : Date.current + client_id = options[:client_id] # get every day between from_date and until_date (from_date..until_date).each do |d| - DoiImportByDayJob.perform_later(from_date: d.strftime("%F")) - puts "Queued importing for DOIs created on #{d.strftime("%F")}." + DoiImportByDayJob.perform_later(from_date: d.strftime("%F"), client_id: client_id) end end def self.import_missing(options={}) from_date = options[:from_date].present? ? Date.parse(options[:from_date]) : Date.current until_date = options[:until_date].present? ? Date.parse(options[:until_date]) : Date.current + client_id = options[:client_id] # get every day between from_date and until_date (from_date..until_date).each do |d| - DoiImportByDayMissingJob.perform_later(from_date: d.strftime("%F")) - puts "Queued importing for missing DOIs created on #{d.strftime("%F")}." + DoiImportByDayMissingJob.perform_later(from_date: d.strftime("%F"), client_id: client_id) end end def self.import_by_day(options={}) return nil unless options[:from_date].present? from_date = Date.parse(options[:from_date]) + client_id = options[:client_id] count = 0 logger = Logger.new(STDOUT) - Doi.where(created: from_date.midnight..from_date.end_of_day).find_each do |doi| + collection = Doi.where(created: from_date.midnight..from_date.end_of_day) + collection = collection.where(datacentre: client_id) if client_id.present? + + collection.find_each do |doi| begin # ignore broken xml string = doi.current_metadata.present? ? doi.from_xml(doi.current_metadata.xml.to_s.force_encoding("UTF-8")) : nil @@ -436,12 +440,16 @@ def self.import_by_day(options={}) def self.import_by_day_missing(options={}) return nil unless options[:from_date].present? from_date = Date.parse(options[:from_date]) + client_id = options[:client_id] count = 0 logger = Logger.new(STDOUT) - Doi.where(xml: nil).where(created: from_date.midnight..from_date.end_of_day).find_each do |doi| + collection = Doi.where(xml: nil).where(created: from_date.midnight..from_date.end_of_day) + collection = collection.where(datacentre: client_id) if client_id.present? + + collection.find_each do |doi| begin string = doi.current_metadata.present? ? doi.clean_xml(doi.current_metadata.xml) : nil unless string.present? diff --git a/config/initializers/_version.rb b/config/initializers/_version.rb index 36d9c12d6..8fdb40fe8 100644 --- a/config/initializers/_version.rb +++ b/config/initializers/_version.rb @@ -1,5 +1,5 @@ module Lupo class Application - VERSION = "2.3.19" + VERSION = "2.3.20" end end \ No newline at end of file diff --git a/lib/tasks/client.rake b/lib/tasks/client.rake index 90853d858..bbece394e 100644 --- a/lib/tasks/client.rake +++ b/lib/tasks/client.rake @@ -51,15 +51,26 @@ namespace :client do end # import DOIs for client - puts "#{client.dois.length} DOIs will be imported." - client.dois.find_each do |doi| - begin - Doi.import_one(doi_id: doi.doi) - puts "DOI #{doi.doi} imported." - rescue TypeError, NoMethodError, RuntimeError, ActiveRecord::StatementInvalid, ActiveRecord::LockWaitTimeout, Elasticsearch::Transport::Transport::Errors::BadRequest => error - puts "[MySQL] Error importing metadata for " + doi.doi + ": " + error.message - end + # puts "#{client.dois.length} DOIs will be imported." + client.import_all_dois + end + + desc 'Import missing DOIs by client' + task :import_missing_dois => :environment do + if ENV['CLIENT_ID'].nil? + puts "ENV['CLIENT_ID'] is required." + exit + end + + client = Client.where(deleted_at: nil).where(symbol: ENV['CLIENT_ID']).first + if client.nil? + puts "Client not found for client ID #{ENV['CLIENT_ID']}." + exit end + + # import DOIs for client + # puts "#{client.dois.length} DOIs will be imported." + client.import_missing_dois end desc 'Delete client transferred to other DOI registration agency'