Skip to content

Commit

Permalink
Merge pull request #536 from datacite/import-dois-by-client
Browse files Browse the repository at this point in the history
import dois by client rake task
  • Loading branch information
Martin Fenner authored May 27, 2020
2 parents acd411d + 0bf8e2a commit b88d3fa
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 28 deletions.
52 changes: 45 additions & 7 deletions app/models/doi.rb
Original file line number Diff line number Diff line change
Expand Up @@ -977,6 +977,48 @@ def self.import_by_ids(options={})
(from_id..until_id).to_a.length
end

def self.import_by_client(client_id: nil)
client = ::Client.where(symbol: client_id).first
return nil if client.blank?

index = if Rails.env.test?
"dois-test"
else
self.active_index
end
errors = 0
count = 0

Doi.where(datacentre: client.id).find_in_batches(batch_size: 500) do |dois|
response = Doi.__elasticsearch__.client.bulk \
index: index,
type: Doi.document_type,
body: dois.map { |doi| { index: { _id: doi.id, data: doi.as_indexed_json } } }

# try to handle errors
errors_in_response = response['items'].select { |k, v| k.values.first['error'].present? }
errors += errors_in_response.length
errors_in_response.each do |item|
Rails.logger.error "[Elasticsearch] " + item.inspect
doi_id = item.dig("index", "_id").to_i
import_one(doi_id: doi_id) if doi_id > 0
end

count += dois.length
end

if errors > 1
Rails.logger.error "[Elasticsearch] #{errors} errors importing #{count} DOIs for client #{client_id}."
elsif count > 0
Rails.logger.warn "[Elasticsearch] Imported #{count} DOIs for client #{client_id}."
end

count

rescue Elasticsearch::Transport::Transport::Errors::RequestEntityTooLarge, Faraday::ConnectionFailed, ActiveRecord::LockWaitTimeout => error
Rails.logger.error "[Elasticsearch] Error #{error.message} importing DOIs for client #{client_id}."
end

def self.import_by_id(options={})
return nil if options[:id].blank?

Expand All @@ -998,18 +1040,14 @@ def self.import_by_id(options={})
body: dois.map { |doi| { index: { _id: doi.id, data: doi.as_indexed_json } } }

# try to handle errors
response['items'].select { |k, v| k.values.first['error'].present? }.each do |item|
errors_in_response = response['items'].select { |k, v| k.values.first['error'].present? }
errors += errors_in_response.length
errors_in_response.each do |item|
Rails.logger.error "[Elasticsearch] " + item.inspect
doi_id = item.dig("index", "_id").to_i
import_one(doi_id: doi_id) if doi_id > 0
end

# log errors
# errors += response['items'].map { |k, v| k.values.first['error'] }.compact.length
# response['items'].select { |k, v| k.values.first['error'].present? }.each do |err|
# Rails.logger.error "[Elasticsearch] " + err.inspect
# end

count += dois.length
end

Expand Down
24 changes: 3 additions & 21 deletions lib/tasks/client.rake
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ namespace :client do
end

desc 'Import DOIs by client'
task :import_all_dois => :environment do
task :import_dois => :environment do
if ENV['CLIENT_ID'].nil?
puts "ENV['CLIENT_ID'] is required."
exit
Expand All @@ -83,26 +83,8 @@ namespace :client do
end

# import DOIs for client
# puts "#{client.dois.length} DOIs will be imported."
client.import_all_dois
end

desc 'Import missing DOIs by client'
task :import_missing_dois => :environment do
if ENV['CLIENT_ID'].nil?
puts "ENV['CLIENT_ID'] is required."
exit
end

client = Client.where(deleted_at: nil).where(symbol: ENV['CLIENT_ID']).first
if client.nil?
puts "Client not found for client ID #{ENV['CLIENT_ID']}."
exit
end

# import DOIs for client
# puts "#{client.dois.length} DOIs will be imported."
client.import_missing_dois
puts "#{client.dois.length} DOIs will be imported."
Doi.import_by_client(client_id: ENV['CLIENT_ID'])
end

desc 'Delete client transferred to other DOI registration agency'
Expand Down

0 comments on commit b88d3fa

Please sign in to comment.