Skip to content

Commit

Permalink
add import dois not indexed rake task datacite/datacite#965
Browse files Browse the repository at this point in the history
  • Loading branch information
Martin Fenner committed Dec 19, 2020
1 parent 44135af commit 7034e81
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 6 deletions.
20 changes: 20 additions & 0 deletions app/models/client.rb
Original file line number Diff line number Diff line change
Expand Up @@ -712,6 +712,26 @@ def self.export_doi_counts(query: nil)
csv.join("")
end

# import all DOIs not indexed in Elasticsearch
def self.import_dois_not_indexed(query: nil)
table = CSV.parse(export_doi_counts(query: query), headers: true)

# loop through repositories that have DOIs not indexed in Elasticsearch
table.each do |row|
client = ::Client.where(deleted_at: nil).where(symbol: row["Repository ID"]).first
if client.nil?
puts "Client not found for client ID #{row["Repository ID"]}."
exit
end

# import DOIs for client. Ignore repositories with more than 10K DOIs
if client.dois.length <= 10000
puts "#{client.dois.length} DOIs for repository #{client.symbol} will be imported."
Doi.import_by_client(client_id: client.symbol, total_count: client.dois.length)
end
end
end

protected
def check_issn
Array.wrap(issn).each do |i|
Expand Down
12 changes: 6 additions & 6 deletions app/models/doi.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1243,7 +1243,7 @@ def self.import_one(doi_id: nil)
message
end

def self.import_by_client(client_id: nil)
def self.import_by_client(client_id: nil, total_count: nil)
client = ::Client.where(symbol: client_id).first
return nil if client.blank?

Expand Down Expand Up @@ -1271,18 +1271,18 @@ def self.import_by_client(client_id: nil)
end

count += dois.length
Rails.logger.info "[Elasticsearch] Imported #{count} DOIs for client #{client_id}."
Rails.logger.info "[Elasticsearch] Imported #{count} DOIs for repository #{client_id}." if total_count > 500
end

if errors > 1
Rails.logger.error "[Elasticsearch] #{errors} errors importing #{count} DOIs for client #{client_id}."
Rails.logger.error "[Elasticsearch] #{errors} errors importing #{count} DOIs for repository #{client_id}."
elsif count > 0
Rails.logger.info "[Elasticsearch] Imported a total of #{count} DOIs for client #{client_id}."
Rails.logger.info "[Elasticsearch] Imported a total of #{count} DOIs for repository #{client_id}."
end

count
rescue Elasticsearch::Transport::Transport::Errors::RequestEntityTooLarge, Faraday::ConnectionFailed, ActiveRecord::LockWaitTimeout => e
Rails.logger.error "[Elasticsearch] Error #{e.message} importing DOIs for client #{client_id}."
Rails.logger.error "[Elasticsearch] Error #{e.message} importing DOIs for repository #{client_id}."
end

def self.index_by_id(options = {})
Expand Down Expand Up @@ -2095,7 +2095,7 @@ def self.loop_through_dois(options = {})
query = options[:query].presence

response = Doi.query(query, filter.merge(page: { size: 1, cursor: [] }))
message = "#{label} #{response.results.total} Dois with #{label}."
message = "#{label} #{response.results.total} Dois."

# walk through results using cursor
if response.results.total.positive?
Expand Down
5 changes: 5 additions & 0 deletions lib/tasks/client.rake
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,11 @@ namespace :client do
Doi.import_by_client(client_id: ENV["CLIENT_ID"])
end

desc "Import dois not indexed"
task import_dois_not_indexed: :environment do
puts Client.import_dois_not_indexed(query: ENV["QUERY"])
end

desc "Export doi counts"
task export_doi_counts: :environment do
puts Client.export_doi_counts(query: ENV["QUERY"])
Expand Down

0 comments on commit 7034e81

Please sign in to comment.