Skip to content

Commit

Permalink
speed up doi transfer. #195
Browse files Browse the repository at this point in the history
  • Loading branch information
Martin Fenner committed Feb 3, 2019
1 parent c91a0d0 commit 10f2ea3
Show file tree
Hide file tree
Showing 5 changed files with 9 additions and 54 deletions.
39 changes: 4 additions & 35 deletions app/models/client.rb
Original file line number Diff line number Diff line change
Expand Up @@ -182,42 +182,11 @@ def target_id=(value)

target = c.records.first

errors = 0
count = 0

logger = Logger.new(STDOUT)

dois.find_in_batches(batch_size: 500) do |dois|
dois.each { |doi| doi.update_column(:datacentre, target.id) }

response = Doi.__elasticsearch__.client.bulk \
index: Doi.index_name,
type: Doi.document_type,
body: dois.map { |doi| { index: { _id: doi.id, data: doi.as_indexed_json } } }

errors += response['items'].map { |k, v| k.values.first['error'] }.compact.length
count += dois.length
dois.each { |doi| doi.update_column(:indexed, Time.zone.now) }
end

if errors > 1
logger.info "[Elasticsearch] #{errors} errors transferring #{count} DOIs to account #{value}."
elsif count > 1
logger.info "[Elasticsearch] Transferred #{count} DOIs to account #{value}."
end
rescue Elasticsearch::Transport::Transport::Errors::RequestEntityTooLarge, Faraday::ConnectionFailed => error
logger.info "[Elasticsearch] Error #{error.message} transferring DOIs to account #{value}."

count = 0
Doi.index(from_date: "2011-01-01", client_id: target.id)
end

dois.find_each do |doi|
doi.update_column(:datacentre, target.id)
IndexJob.perform_later(doi)
doi.update_column(:indexed, Time.zone.now)
count += 1
end

logger.info "[Elasticsearch] Transferred #{count} DOIs to account #{value}."
def index_all_dois
Doi.index(from_date: "2011-01-01", client_id: id)
end

def cache_key
Expand Down
3 changes: 1 addition & 2 deletions app/models/doi.rb
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,6 @@ def self.index(options={})
# get every day between from_date and until_date
(from_date..until_date).each do |d|
DoiIndexByDayJob.perform_later(from_date: d.strftime("%F"), index_time: index_time, client_id: client_id)
puts "Queued indexing for DOIs created on #{d.strftime("%F")}."
end
end

Expand All @@ -494,7 +493,7 @@ def self.index_by_day(options={})
logger = Logger.new(STDOUT)

collection = Doi.where(created: from_date.midnight..from_date.end_of_day).where("indexed < ?", index_time)
collection = collection.where(client_id: client_id) if client_id.present?
collection = collection.where(datacentre: client_id) if client_id.present?

collection.find_in_batches(batch_size: 250) do |dois|
response = Doi.__elasticsearch__.client.bulk \
Expand Down
17 changes: 2 additions & 15 deletions lib/tasks/client.rake
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,7 @@ namespace :client do

# index DOIs for client
puts "#{client.dois.length} DOIs will be indexed."
client.dois.find_each do |doi|
doi.__elasticsearch__.index_document
puts "DOI #{doi.doi} indexed."
end
client.index_all_dois
end

desc 'Import all clients'
Expand Down Expand Up @@ -154,17 +151,7 @@ namespace :client do

# update client for DOIs in batches
puts "#{client.dois.length} DOIs will be transferred."
client.dois.find_each do |doi|
doi.update_attributes(datacentre: target.id)
puts "DOI #{doi.doi} transferred to client #{target.symbol}."
end

if client.update_attributes(is_active: nil, deleted_at: Time.zone.now)
client.send_delete_email unless Rails.env.test?
puts "Client with client ID #{ENV['CLIENT_ID']} deleted."
else
puts client.errors.inspect
end
client.update_attributes(target_id: target.symbol)

prefixes.each do |prefix|
provider_prefix = ProviderPrefix.create(provider: target.provider.symbol, prefix: prefix)
Expand Down
2 changes: 1 addition & 1 deletion spec/lib/tasks/doi_rake_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
ENV['UNTIL_DATE'] = "2018-08-05"

let!(:doi) { create_list(:doi, 10) }
let(:output) { "Queued indexing for DOIs created on 2018-01-04.\n" }
let(:output) { "" }

it "prerequisites should include environment" do
expect(subject.prerequisites).to include("environment")
Expand Down
2 changes: 1 addition & 1 deletion spec/models/client_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
end

describe "doi transfer", elasticsearch: true do
let!(:dois) { create_list(:doi, 5, client: client) }
let!(:dois) { create_list(:doi, 5, client: client) }

# it "transfer all DOIs" do
# original_dois = Doi.where(client: client.symbol)
Expand Down

0 comments on commit 10f2ea3

Please sign in to comment.