From 8fba52b4a2045c67edde7520730cf0bafbb3f3f2 Mon Sep 17 00:00:00 2001 From: Martin Fenner Date: Fri, 4 Jan 2019 02:37:55 +0100 Subject: [PATCH] index in batches of 500 --- app/models/doi.rb | 18 +++++++++--------- config/initializers/_version.rb | 2 +- lib/tasks/doi.rake | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/app/models/doi.rb b/app/models/doi.rb index 90d736f92..e69ec7eec 100644 --- a/app/models/doi.rb +++ b/app/models/doi.rb @@ -523,12 +523,12 @@ def self.index_by_day(options={}) def self.index_by_ids(options={}) from_id = (options[:from_id] || 1).to_i - until_id = (options[:until_id] || from_id + 249).to_i + until_id = (options[:until_id] || from_id + 499).to_i # get every id between from_id and end_id - (from_id..until_id).step(250).each do |id| + (from_id..until_id).step(500).each do |id| DoiIndexByIdJob.perform_later(id: id) - puts "Queued indexing for DOIs with IDs #{from_id} - #{(until_id)}." + puts "Queued indexing for DOIs with IDs starting with #{id}." end end @@ -541,7 +541,7 @@ def self.index_by_id(options={}) logger = Logger.new(STDOUT) - Doi.where(id: id..(id + 249)).find_in_batches(batch_size: 250) do |dois| + Doi.where(id: id..(id + 499)).find_in_batches(batch_size: 500) do |dois| response = Doi.__elasticsearch__.client.bulk \ index: Doi.index_name, type: Doi.document_type, @@ -557,21 +557,21 @@ def self.index_by_id(options={}) end if errors > 1 - logger.error "[Elasticsearch] #{errors} errors indexing #{count} DOIs with IDs #{id} - #{(id + 249)}." + logger.error "[Elasticsearch] #{errors} errors indexing #{count} DOIs with IDs #{id} - #{(id + 499)}." elsif count > 1 - logger.info "[Elasticsearch] Indexed #{count} DOIs with IDs #{id} - #{(id + 249)}." + logger.info "[Elasticsearch] Indexed #{count} DOIs with IDs #{id} - #{(id + 499)}." end rescue Elasticsearch::Transport::Transport::Errors::RequestEntityTooLarge, Faraday::ConnectionFailed, ActiveRecord::LockWaitTimeout => error - logger.info "[Elasticsearch] Error #{error.message} indexing DOIs with IDs #{id} - #{(id + 249)}." + logger.info "[Elasticsearch] Error #{error.message} indexing DOIs with IDs #{id} - #{(id + 499)}." count = 0 - Doi.where(id: id..(id + 249)).find_each do |doi| + Doi.where(id: id..(id + 499)).find_each do |doi| IndexJob.perform_later(doi) count += 1 end - logger.info "[Elasticsearch] Indexed #{count} DOIs with IDs #{id} - #{(id + 249)}." + logger.info "[Elasticsearch] Indexed #{count} DOIs with IDs #{id} - #{(id + 499)}." end def uid diff --git a/config/initializers/_version.rb b/config/initializers/_version.rb index cb765f20b..4836da4c5 100644 --- a/config/initializers/_version.rb +++ b/config/initializers/_version.rb @@ -1,5 +1,5 @@ module Lupo class Application - VERSION = "2.2.4" + VERSION = "2.2.5" end end \ No newline at end of file diff --git a/lib/tasks/doi.rake b/lib/tasks/doi.rake index 5e6d05698..b9c9e6f9d 100644 --- a/lib/tasks/doi.rake +++ b/lib/tasks/doi.rake @@ -77,7 +77,7 @@ namespace :doi do desc 'Index DOIs by ID' task :index_by_ids => :environment do from_id = (ENV['FROM_ID'] || 1).to_i - until_id = (ENV['UNTIL_ID'] || from_id + 249).to_i + until_id = (ENV['UNTIL_ID'] || from_id + 499).to_i Doi.index_by_ids(from_id: from_id, until_id: until_id) end