diff --git a/app/models/doi.rb b/app/models/doi.rb index a5ac0b167..bdad3e987 100644 --- a/app/models/doi.rb +++ b/app/models/doi.rb @@ -977,6 +977,48 @@ def self.import_by_ids(options={}) (from_id..until_id).to_a.length end + def self.import_by_client(client_id: nil) + client = ::Client.where(symbol: client_id).first + return nil if client.blank? + + index = if Rails.env.test? + "dois-test" + else + self.active_index + end + errors = 0 + count = 0 + + Doi.where(datacentre: client.id).find_in_batches(batch_size: 500) do |dois| + response = Doi.__elasticsearch__.client.bulk \ + index: index, + type: Doi.document_type, + body: dois.map { |doi| { index: { _id: doi.id, data: doi.as_indexed_json } } } + + # try to handle errors + errors_in_response = response['items'].select { |k, v| k.values.first['error'].present? } + errors += errors_in_response.length + errors_in_response.each do |item| + Rails.logger.error "[Elasticsearch] " + item.inspect + doi_id = item.dig("index", "_id").to_i + import_one(doi_id: doi_id) if doi_id > 0 + end + + count += dois.length + end + + if errors > 1 + Rails.logger.error "[Elasticsearch] #{errors} errors importing #{count} DOIs for client #{client_id}." + elsif count > 0 + Rails.logger.warn "[Elasticsearch] Imported #{count} DOIs for client #{client_id}." + end + + count + + rescue Elasticsearch::Transport::Transport::Errors::RequestEntityTooLarge, Faraday::ConnectionFailed, ActiveRecord::LockWaitTimeout => error + Rails.logger.error "[Elasticsearch] Error #{error.message} importing DOIs for client #{client_id}." + end + def self.import_by_id(options={}) return nil if options[:id].blank? @@ -998,18 +1040,14 @@ def self.import_by_id(options={}) body: dois.map { |doi| { index: { _id: doi.id, data: doi.as_indexed_json } } } # try to handle errors - response['items'].select { |k, v| k.values.first['error'].present? }.each do |item| + errors_in_response = response['items'].select { |k, v| k.values.first['error'].present? } + errors += errors_in_response.length + errors_in_response.each do |item| Rails.logger.error "[Elasticsearch] " + item.inspect doi_id = item.dig("index", "_id").to_i import_one(doi_id: doi_id) if doi_id > 0 end - # log errors - # errors += response['items'].map { |k, v| k.values.first['error'] }.compact.length - # response['items'].select { |k, v| k.values.first['error'].present? }.each do |err| - # Rails.logger.error "[Elasticsearch] " + err.inspect - # end - count += dois.length end diff --git a/lib/tasks/client.rake b/lib/tasks/client.rake index 8532c9a81..556c3b5bd 100644 --- a/lib/tasks/client.rake +++ b/lib/tasks/client.rake @@ -70,7 +70,7 @@ namespace :client do end desc 'Import DOIs by client' - task :import_all_dois => :environment do + task :import_dois => :environment do if ENV['CLIENT_ID'].nil? puts "ENV['CLIENT_ID'] is required." exit @@ -83,26 +83,8 @@ namespace :client do end # import DOIs for client - # puts "#{client.dois.length} DOIs will be imported." - client.import_all_dois - end - - desc 'Import missing DOIs by client' - task :import_missing_dois => :environment do - if ENV['CLIENT_ID'].nil? - puts "ENV['CLIENT_ID'] is required." - exit - end - - client = Client.where(deleted_at: nil).where(symbol: ENV['CLIENT_ID']).first - if client.nil? - puts "Client not found for client ID #{ENV['CLIENT_ID']}." - exit - end - - # import DOIs for client - # puts "#{client.dois.length} DOIs will be imported." - client.import_missing_dois + puts "#{client.dois.length} DOIs will be imported." + Doi.import_by_client(client_id: ENV['CLIENT_ID']) end desc 'Delete client transferred to other DOI registration agency'