From 86a68d28ab1684d5eecfa08df363f4808f359a02 Mon Sep 17 00:00:00 2001 From: Martin Fenner Date: Wed, 27 May 2020 15:01:36 +0200 Subject: [PATCH 1/2] import dois by client rake task --- app/models/doi.rb | 49 ++++++++++++++++++++++++++++++++++++------- lib/tasks/client.rake | 24 +++------------------ 2 files changed, 45 insertions(+), 28 deletions(-) diff --git a/app/models/doi.rb b/app/models/doi.rb index a5ac0b167..196cc101c 100644 --- a/app/models/doi.rb +++ b/app/models/doi.rb @@ -977,6 +977,47 @@ def self.import_by_ids(options={}) (from_id..until_id).to_a.length end + def self.import_by_client(client_id: nil) + client = ::Client.where(symbol: client_id).first + return nil if client.blank? + + index = if Rails.env.test? + "dois-test" + else + self.active_index + end + errors = 0 + count = 0 + + Doi.where(datacentre: client.id).find_in_batches(batch_size: 500) do |dois| + response = Doi.__elasticsearch__.client.bulk \ + index: index, + type: Doi.document_type, + body: dois.map { |doi| { index: { _id: doi.id, data: doi.as_indexed_json } } } + + # try to handle errors + response['items'].select { |k, v| k.values.first['error'].present? }.each do |item| + errors += 1 + Rails.logger.error "[Elasticsearch] " + item.inspect + doi_id = item.dig("index", "_id").to_i + import_one(doi_id: doi_id) if doi_id > 0 + end + + count += dois.length + end + + if errors > 1 + Rails.logger.error "[Elasticsearch] #{errors} errors importing #{count} DOIs for client #{client_id}." + elsif count > 0 + Rails.logger.warn "[Elasticsearch] Imported #{count} DOIs for client #{client_id}." + end + + count + + rescue Elasticsearch::Transport::Transport::Errors::RequestEntityTooLarge, Faraday::ConnectionFailed, ActiveRecord::LockWaitTimeout => error + Rails.logger.error "[Elasticsearch] Error #{error.message} importing DOIs for client #{client_id}." + end + def self.import_by_id(options={}) return nil if options[:id].blank? @@ -998,18 +1039,12 @@ def self.import_by_id(options={}) body: dois.map { |doi| { index: { _id: doi.id, data: doi.as_indexed_json } } } # try to handle errors - response['items'].select { |k, v| k.values.first['error'].present? }.each do |item| + errors += response['items'].select { |k, v| k.values.first['error'].present? }.each do |item| Rails.logger.error "[Elasticsearch] " + item.inspect doi_id = item.dig("index", "_id").to_i import_one(doi_id: doi_id) if doi_id > 0 end - # log errors - # errors += response['items'].map { |k, v| k.values.first['error'] }.compact.length - # response['items'].select { |k, v| k.values.first['error'].present? }.each do |err| - # Rails.logger.error "[Elasticsearch] " + err.inspect - # end - count += dois.length end diff --git a/lib/tasks/client.rake b/lib/tasks/client.rake index 8532c9a81..556c3b5bd 100644 --- a/lib/tasks/client.rake +++ b/lib/tasks/client.rake @@ -70,7 +70,7 @@ namespace :client do end desc 'Import DOIs by client' - task :import_all_dois => :environment do + task :import_dois => :environment do if ENV['CLIENT_ID'].nil? puts "ENV['CLIENT_ID'] is required." exit @@ -83,26 +83,8 @@ namespace :client do end # import DOIs for client - # puts "#{client.dois.length} DOIs will be imported." - client.import_all_dois - end - - desc 'Import missing DOIs by client' - task :import_missing_dois => :environment do - if ENV['CLIENT_ID'].nil? - puts "ENV['CLIENT_ID'] is required." - exit - end - - client = Client.where(deleted_at: nil).where(symbol: ENV['CLIENT_ID']).first - if client.nil? - puts "Client not found for client ID #{ENV['CLIENT_ID']}." - exit - end - - # import DOIs for client - # puts "#{client.dois.length} DOIs will be imported." - client.import_missing_dois + puts "#{client.dois.length} DOIs will be imported." + Doi.import_by_client(client_id: ENV['CLIENT_ID']) end desc 'Delete client transferred to other DOI registration agency' From 0bf8e2ae51632cefeeacbfffb6fe52cdc28fa49c Mon Sep 17 00:00:00 2001 From: Martin Fenner Date: Wed, 27 May 2020 15:27:08 +0200 Subject: [PATCH 2/2] fix errors --- app/models/doi.rb | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/app/models/doi.rb b/app/models/doi.rb index 196cc101c..bdad3e987 100644 --- a/app/models/doi.rb +++ b/app/models/doi.rb @@ -996,8 +996,9 @@ def self.import_by_client(client_id: nil) body: dois.map { |doi| { index: { _id: doi.id, data: doi.as_indexed_json } } } # try to handle errors - response['items'].select { |k, v| k.values.first['error'].present? }.each do |item| - errors += 1 + errors_in_response = response['items'].select { |k, v| k.values.first['error'].present? } + errors += errors_in_response.length + errors_in_response.each do |item| Rails.logger.error "[Elasticsearch] " + item.inspect doi_id = item.dig("index", "_id").to_i import_one(doi_id: doi_id) if doi_id > 0 @@ -1039,7 +1040,9 @@ def self.import_by_id(options={}) body: dois.map { |doi| { index: { _id: doi.id, data: doi.as_indexed_json } } } # try to handle errors - errors += response['items'].select { |k, v| k.values.first['error'].present? }.each do |item| + errors_in_response = response['items'].select { |k, v| k.values.first['error'].present? } + errors += errors_in_response.length + errors_in_response.each do |item| Rails.logger.error "[Elasticsearch] " + item.inspect doi_id = item.dig("index", "_id").to_i import_one(doi_id: doi_id) if doi_id > 0