Skip to content

Commit

Permalink
index in batches of 500
Browse files Browse the repository at this point in the history
  • Loading branch information
Martin Fenner committed Jan 4, 2019
1 parent 495b4af commit 8fba52b
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 11 deletions.
18 changes: 9 additions & 9 deletions app/models/doi.rb
Original file line number Diff line number Diff line change
Expand Up @@ -523,12 +523,12 @@ def self.index_by_day(options={})

def self.index_by_ids(options={})
from_id = (options[:from_id] || 1).to_i
until_id = (options[:until_id] || from_id + 249).to_i
until_id = (options[:until_id] || from_id + 499).to_i

# get every id between from_id and end_id
(from_id..until_id).step(250).each do |id|
(from_id..until_id).step(500).each do |id|
DoiIndexByIdJob.perform_later(id: id)
puts "Queued indexing for DOIs with IDs #{from_id} - #{(until_id)}."
puts "Queued indexing for DOIs with IDs starting with #{id}."
end
end

Expand All @@ -541,7 +541,7 @@ def self.index_by_id(options={})

logger = Logger.new(STDOUT)

Doi.where(id: id..(id + 249)).find_in_batches(batch_size: 250) do |dois|
Doi.where(id: id..(id + 499)).find_in_batches(batch_size: 500) do |dois|
response = Doi.__elasticsearch__.client.bulk \
index: Doi.index_name,
type: Doi.document_type,
Expand All @@ -557,21 +557,21 @@ def self.index_by_id(options={})
end

if errors > 1
logger.error "[Elasticsearch] #{errors} errors indexing #{count} DOIs with IDs #{id} - #{(id + 249)}."
logger.error "[Elasticsearch] #{errors} errors indexing #{count} DOIs with IDs #{id} - #{(id + 499)}."
elsif count > 1
logger.info "[Elasticsearch] Indexed #{count} DOIs with IDs #{id} - #{(id + 249)}."
logger.info "[Elasticsearch] Indexed #{count} DOIs with IDs #{id} - #{(id + 499)}."
end
rescue Elasticsearch::Transport::Transport::Errors::RequestEntityTooLarge, Faraday::ConnectionFailed, ActiveRecord::LockWaitTimeout => error
logger.info "[Elasticsearch] Error #{error.message} indexing DOIs with IDs #{id} - #{(id + 249)}."
logger.info "[Elasticsearch] Error #{error.message} indexing DOIs with IDs #{id} - #{(id + 499)}."

count = 0

Doi.where(id: id..(id + 249)).find_each do |doi|
Doi.where(id: id..(id + 499)).find_each do |doi|
IndexJob.perform_later(doi)
count += 1
end

logger.info "[Elasticsearch] Indexed #{count} DOIs with IDs #{id} - #{(id + 249)}."
logger.info "[Elasticsearch] Indexed #{count} DOIs with IDs #{id} - #{(id + 499)}."
end

def uid
Expand Down
2 changes: 1 addition & 1 deletion config/initializers/_version.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
module Lupo
class Application
VERSION = "2.2.4"
VERSION = "2.2.5"
end
end
2 changes: 1 addition & 1 deletion lib/tasks/doi.rake
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ namespace :doi do
desc 'Index DOIs by ID'
task :index_by_ids => :environment do
from_id = (ENV['FROM_ID'] || 1).to_i
until_id = (ENV['UNTIL_ID'] || from_id + 249).to_i
until_id = (ENV['UNTIL_ID'] || from_id + 499).to_i

Doi.index_by_ids(from_id: from_id, until_id: until_id)
end
Expand Down

0 comments on commit 8fba52b

Please sign in to comment.