Skip to content

Commit

Permalink
bulk import elasticsearch in batches of 100. datacite/datacite#463
Browse files Browse the repository at this point in the history
  • Loading branch information
Martin Fenner committed Sep 1, 2018
1 parent 570c9d9 commit b9811f6
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 5 deletions.
8 changes: 6 additions & 2 deletions app/models/doi.rb
Original file line number Diff line number Diff line change
Expand Up @@ -252,17 +252,21 @@ def self.index_by_day(options={})
from_date = options[:from_date].present? ? Date.parse(options[:from_date]) : Date.current
until_date = from_date + 1.day
errors = 0
count = 0

Doi.where("updated >= ?", from_date.strftime("%F") + " 00:00:00").where("updated <= ?", until_date.strftime("%F") + " 00:00:00").find_in_batches(batch_size: 1000) do |dois|
logger = Logger.new(STDOUT)

Doi.where("updated >= ?", from_date.strftime("%F") + " 00:00:00").where("updated <= ?", until_date.strftime("%F") + " 00:00:00").find_in_batches(batch_size: 100) do |dois|
response = Doi.__elasticsearch__.client.bulk \
index: Doi.index_name,
type: Doi.document_type,
body: dois.map { |doi| { index: { _id: doi.id, data: doi.as_indexed_json } } }

errors += response['items'].map { |k, v| k.values.first['error'] }.compact.length
count += dois.length
end

errors
logger.info "[Elasticsearch] #{errors} errors indexing #{count} DOIs updated on #{from_date.strftime("%F")}."
end

def uid
Expand Down
4 changes: 2 additions & 2 deletions lib/tasks/doi.rake
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ namespace :doi do
task :index_by_day => :environment do
from_date = ENV['FROM_DATE'] || Date.current.strftime("%F")

count = Doi.index_by_day(from_date: from_date)
puts "DOIs updated on #{from_date} indexed with #{count} errors."
Doi.index_by_day(from_date: from_date)
puts "DOIs updated on #{from_date} indexed."
end

desc 'Set state'
Expand Down
2 changes: 1 addition & 1 deletion spec/lib/tasks/doi_rake_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
include_context "rake"

let!(:doi) { create_list(:doi, 10) }
let(:output) { "DOIs updated on 2018-01-04 indexed with 0 errors.\n" }
let(:output) { "DOIs updated on 2018-01-04 indexed.\n" }

it "prerequisites should include environment" do
expect(subject.prerequisites).to include("environment")
Expand Down

0 comments on commit b9811f6

Please sign in to comment.