Skip to content

Commit

Permalink
bulk updates of index. #98
Browse files Browse the repository at this point in the history
  • Loading branch information
Martin Fenner committed Sep 1, 2018
1 parent 6778c66 commit 7efed60
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 28 deletions.
7 changes: 7 additions & 0 deletions app/jobs/doi_index_by_day_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
class DoiIndexByDayJob < ActiveJob::Base
queue_as :lupo_background

def perform(options={})
Doi.index_by_day(options)
end
end
7 changes: 0 additions & 7 deletions app/jobs/doi_index_by_month_job.rb

This file was deleted.

25 changes: 18 additions & 7 deletions app/models/doi.rb
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ def as_indexed_json(options={})
"suffix" => suffix,
"resource_type_id" => resource_type_id,
"resource_type_subtype" => resource_type_subtype,
"alternate_identifier" => alternate_identifier,
"b_version" => b_version,
"is_active" => is_active,
"last_landing_page_status" => last_landing_page_status,
Expand Down Expand Up @@ -235,23 +236,33 @@ def self.find_by_id(id, options={})
})
end

def self.index_by_month(options={})
def self.index(options={})
from_date = (options[:from_date].present? ? Date.parse(options[:from_date]) : Date.current).beginning_of_month
until_date = (options[:until_date].present? ? Date.parse(options[:until_date]) : Date.current).end_of_month

# get first day of every month between from_date and until_date
(from_date..until_date).select {|d| d.day == 1}.each do |m|
DoiIndexByMonthJob.perform_later(from_date: m.strftime("%F"), until_date: m.end_of_month.strftime("%F"))
(from_date..until_date).each do |d|
DoiIndexByDayJob.perform_later(from_date: d.strftime("%F"))
end

"Queued indexing for DOIs updated from #{from_date.strftime("%F")} until #{until_date.strftime("%F")}."
end

def self.index(options={})
from_date = options[:from_date].present? ? Date.parse(options[:from_date]) : Date.current - 1.day
until_date = options[:until_date].present? ? Date.parse(options[:until_date]) : Date.current
def self.index_by_day(options={})
from_date = options[:from_date].present? ? Date.parse(options[:from_date]) : Date.current
until_date = from_date + 1.day
errors = 0

Doi.where("updated >= ?", from_date.strftime("%F") + " 00:00:00").where("updated <= ?", until_date.strftime("%F") + " 00:00:00").find_in_batches(batch_size: 1000) do |dois|
response = Doi.__elasticsearch__.client.bulk \
index: Doi.index_name,
type: Doi.document_type,
body: dois.map { |doi| { index: { _id: doi.id, data: doi.as_indexed_json } } }

errors += response['items'].map { |k, v| k.values.first['error'] }.compact.length
end

Doi.import query: -> { where("updated >= ?", from_date.strftime("%F")).where("updated <= ?", until_date.strftime("%F")) }, batch_size: 1000
errors
end

def uid
Expand Down
17 changes: 8 additions & 9 deletions lib/tasks/doi.rake
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,21 @@ namespace :doi do
end
end

desc 'Index all DOIs by month'
task :index_by_month => :environment do
desc 'Index all DOIs'
task :index => :environment do
from_date = ENV['FROM_DATE'] || Date.current.beginning_of_month.strftime("%F")
until_date = ENV['UNTIL_DATE'] || Date.current.end_of_month.strftime("%F")

response = Doi.index_by_month(from_date: from_date, until_date: until_date)
response = Doi.index(from_date: from_date, until_date: until_date)
puts response
end

desc 'Index all DOIs'
task :index => :environment do
from_date = ENV['FROM_DATE'] || (Date.current - 1.day).strftime("%F")
until_date = ENV['UNTIL_DATE'] || Date.current.strftime("%F")
desc 'Index DOIs per day'
task :index_by_day => :environment do
from_date = ENV['FROM_DATE'] || Date.current.strftime("%F")

Doi.index(from_date: from_date, until_date: until_date)
puts "Queued indexing for DOIs updated from #{from_date} - #{until_date}."
count = Doi.index_by_day(from_date: from_date)
puts "DOIs updated on #{from_date} indexed with #{count} errors."
end

desc 'Set state'
Expand Down
10 changes: 5 additions & 5 deletions spec/lib/tasks/doi_rake_spec.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
require 'rails_helper'

describe "doi:index_by_month", elasticsearch: true do
describe "doi:index", elasticsearch: true do
include ActiveJob::TestHelper
include_context "rake"

Expand All @@ -21,17 +21,17 @@
it "should enqueue an DoiIndexByMonthJob" do
expect {
capture_stdout { subject.invoke }
}.to change(enqueued_jobs, :size).by(8)
expect(enqueued_jobs.last[:job]).to be(DoiIndexByMonthJob)
}.to change(enqueued_jobs, :size).by(243)
expect(enqueued_jobs.last[:job]).to be(DoiIndexByDayJob)
end
end

describe "doi:index", elasticsearch: true do
describe "doi:index_by_day", elasticsearch: true do
include ActiveJob::TestHelper
include_context "rake"

let!(:doi) { create_list(:doi, 10) }
let(:output) { "Queued indexing for DOIs updated from 2018-01-04 - 2018-08-05.\n" }
let(:output) { "DOIs updated on 2018-01-04 indexed with 0 errors.\n" }

it "prerequisites should include environment" do
expect(subject.prerequisites).to include("environment")
Expand Down

0 comments on commit 7efed60

Please sign in to comment.