Skip to content

Commit

Permalink
support indexing by id
Browse files Browse the repository at this point in the history
  • Loading branch information
Martin Fenner committed Jan 3, 2019
1 parent fc3c484 commit 495b4af
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 1 deletion.
7 changes: 7 additions & 0 deletions app/jobs/doi_index_by_id_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
class DoiIndexByIdJob < ActiveJob::Base
queue_as :lupo_background

def perform(options={})
Doi.index_by_id(options)
end
end
53 changes: 53 additions & 0 deletions app/models/doi.rb
Original file line number Diff line number Diff line change
Expand Up @@ -521,6 +521,59 @@ def self.index_by_day(options={})
logger.info "[Elasticsearch] Indexed #{count} DOIs created on #{options[:from_date]}."
end

def self.index_by_ids(options={})
from_id = (options[:from_id] || 1).to_i
until_id = (options[:until_id] || from_id + 249).to_i

# get every id between from_id and end_id
(from_id..until_id).step(250).each do |id|
DoiIndexByIdJob.perform_later(id: id)
puts "Queued indexing for DOIs with IDs #{from_id} - #{(until_id)}."
end
end

def self.index_by_id(options={})
return nil unless options[:id].present?
id = options[:id].to_i

errors = 0
count = 0

logger = Logger.new(STDOUT)

Doi.where(id: id..(id + 249)).find_in_batches(batch_size: 250) do |dois|
response = Doi.__elasticsearch__.client.bulk \
index: Doi.index_name,
type: Doi.document_type,
body: dois.map { |doi| { index: { _id: doi.id, data: doi.as_indexed_json } } }

# log errors
errors += response['items'].map { |k, v| k.values.first['error'] }.compact.length
response['items'].select { |k, v| k.values.first['error'].present? }.each do |err|
logger.error "[Elasticsearch] " + err.inspect
end

count += dois.length
end

if errors > 1
logger.error "[Elasticsearch] #{errors} errors indexing #{count} DOIs with IDs #{id} - #{(id + 249)}."
elsif count > 1
logger.info "[Elasticsearch] Indexed #{count} DOIs with IDs #{id} - #{(id + 249)}."
end
rescue Elasticsearch::Transport::Transport::Errors::RequestEntityTooLarge, Faraday::ConnectionFailed, ActiveRecord::LockWaitTimeout => error
logger.info "[Elasticsearch] Error #{error.message} indexing DOIs with IDs #{id} - #{(id + 249)}."

count = 0

Doi.where(id: id..(id + 249)).find_each do |doi|
IndexJob.perform_later(doi)
count += 1
end

logger.info "[Elasticsearch] Indexed #{count} DOIs with IDs #{id} - #{(id + 249)}."
end

def uid
doi.downcase
end
Expand Down
2 changes: 1 addition & 1 deletion config/initializers/_version.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
module Lupo
class Application
VERSION = "2.2.3"
VERSION = "2.2.4"
end
end
8 changes: 8 additions & 0 deletions lib/tasks/doi.rake
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,14 @@ namespace :doi do
puts "DOIs created on #{from_date} indexed."
end

desc 'Index DOIs by ID'
task :index_by_ids => :environment do
from_id = (ENV['FROM_ID'] || 1).to_i
until_id = (ENV['UNTIL_ID'] || from_id + 249).to_i

Doi.index_by_ids(from_id: from_id, until_id: until_id)
end

desc 'Set minted'
task :set_minted => :environment do
from_date = ENV['FROM_DATE'] || Time.zone.now - 1.day
Expand Down

0 comments on commit 495b4af

Please sign in to comment.