Skip to content

Commit

Permalink
doi indexing in batches. #98
Browse files Browse the repository at this point in the history
  • Loading branch information
Martin Fenner committed Sep 1, 2018
1 parent 593f32a commit 6778c66
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 16 deletions.
10 changes: 6 additions & 4 deletions app/models/doi.rb
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,10 @@ class Doi < ActiveRecord::Base
created: { type: :date },
updated: { type: :date }
}
indexes :alternate_identifier, type: :object, properties: {
type: { type: :keyword },
name: { type: :keyword }
}
indexes :resource_type_subtype, type: :keyword
indexes :version, type: :integer
indexes :is_active, type: :keyword
Expand Down Expand Up @@ -215,7 +219,7 @@ def self.query_aggregations
end

def self.query_fields
['doi^10', 'title_normalized^10', 'author_names^10', 'author_normalized.name^10', 'author_normalized.id^10', 'publisher^10', 'description_normalized^10', 'resource_type_id^10', 'resource_type_subtype^10', '_all']
['doi^10', 'title_normalized^10', 'author_names^10', 'author_normalized.name^10', 'author_normalized.id^10', 'publisher^10', 'description_normalized^10', 'resource_type_id^10', 'resource_type_subtype^10', 'alternate_identifier', '_all']
end

def self.find_by_id(id, options={})
Expand Down Expand Up @@ -247,9 +251,7 @@ def self.index(options={})
from_date = options[:from_date].present? ? Date.parse(options[:from_date]) : Date.current - 1.day
until_date = options[:until_date].present? ? Date.parse(options[:until_date]) : Date.current

Doi.where("updated >= ?", from_date.strftime("%F")).where("updated <= ?", until_date.strftime("%F")).find_each do |doi|
IndexJob.set(queue: :lupo_background).perform_later(doi)
end
Doi.import query: -> { where("updated >= ?", from_date.strftime("%F")).where("updated <= ?", until_date.strftime("%F")) }, batch_size: 1000
end

def uid
Expand Down
4 changes: 2 additions & 2 deletions lib/tasks/doi.rake
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ namespace :doi do
from_date = ENV['FROM_DATE'] || (Date.current - 1.day).strftime("%F")
until_date = ENV['UNTIL_DATE'] || Date.current.strftime("%F")

response = Doi.index(from_date: from_date, until_date: until_date)
puts "Queued indexing for #{response} DOIs updated from #{from_date} - #{until_date}."
Doi.index(from_date: from_date, until_date: until_date)
puts "Queued indexing for DOIs updated from #{from_date} - #{until_date}."
end

desc 'Set state'
Expand Down
13 changes: 3 additions & 10 deletions spec/lib/tasks/doi_rake_spec.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
require 'rails_helper'

describe "doi:index_by_month" do
describe "doi:index_by_month", elasticsearch: true do
include ActiveJob::TestHelper
include_context "rake"

Expand All @@ -26,12 +26,12 @@
end
end

describe "doi:index" do
describe "doi:index", elasticsearch: true do
include ActiveJob::TestHelper
include_context "rake"

let!(:doi) { create_list(:doi, 10) }
let(:output) { "Queued indexing for DOIs updated from 2018-01-04 - 2018-08-05.\n" }
let(:output) { "Queued indexing for DOIs updated from 2018-01-04 - 2018-08-05.\n" }

it "prerequisites should include environment" do
expect(subject.prerequisites).to include("environment")
Expand All @@ -40,11 +40,4 @@
it "should run the rake task" do
expect(capture_stdout { subject.invoke }).to eq(output)
end

it "should enqueue an IndexJob" do
expect {
capture_stdout { subject.invoke }
}.to change(enqueued_jobs, :size).by(0)
expect(enqueued_jobs.last[:job]).to be(IndexJob)
end
end

0 comments on commit 6778c66

Please sign in to comment.