From 6778c66fd01961d5707405c122c5732ac28cc88d Mon Sep 17 00:00:00 2001 From: Martin Fenner Date: Sat, 1 Sep 2018 11:58:54 +0200 Subject: [PATCH] doi indexing in batches. #98 --- app/models/doi.rb | 10 ++++++---- lib/tasks/doi.rake | 4 ++-- spec/lib/tasks/doi_rake_spec.rb | 13 +++---------- 3 files changed, 11 insertions(+), 16 deletions(-) diff --git a/app/models/doi.rb b/app/models/doi.rb index 67edd23e1..e6b4f4b1c 100644 --- a/app/models/doi.rb +++ b/app/models/doi.rb @@ -134,6 +134,10 @@ class Doi < ActiveRecord::Base created: { type: :date }, updated: { type: :date } } + indexes :alternate_identifier, type: :object, properties: { + type: { type: :keyword }, + name: { type: :keyword } + } indexes :resource_type_subtype, type: :keyword indexes :version, type: :integer indexes :is_active, type: :keyword @@ -215,7 +219,7 @@ def self.query_aggregations end def self.query_fields - ['doi^10', 'title_normalized^10', 'author_names^10', 'author_normalized.name^10', 'author_normalized.id^10', 'publisher^10', 'description_normalized^10', 'resource_type_id^10', 'resource_type_subtype^10', '_all'] + ['doi^10', 'title_normalized^10', 'author_names^10', 'author_normalized.name^10', 'author_normalized.id^10', 'publisher^10', 'description_normalized^10', 'resource_type_id^10', 'resource_type_subtype^10', 'alternate_identifier', '_all'] end def self.find_by_id(id, options={}) @@ -247,9 +251,7 @@ def self.index(options={}) from_date = options[:from_date].present? ? Date.parse(options[:from_date]) : Date.current - 1.day until_date = options[:until_date].present? ? Date.parse(options[:until_date]) : Date.current - Doi.where("updated >= ?", from_date.strftime("%F")).where("updated <= ?", until_date.strftime("%F")).find_each do |doi| - IndexJob.set(queue: :lupo_background).perform_later(doi) - end + Doi.import query: -> { where("updated >= ?", from_date.strftime("%F")).where("updated <= ?", until_date.strftime("%F")) }, batch_size: 1000 end def uid diff --git a/lib/tasks/doi.rake b/lib/tasks/doi.rake index 54446a264..13ac5ecfe 100644 --- a/lib/tasks/doi.rake +++ b/lib/tasks/doi.rake @@ -21,8 +21,8 @@ namespace :doi do from_date = ENV['FROM_DATE'] || (Date.current - 1.day).strftime("%F") until_date = ENV['UNTIL_DATE'] || Date.current.strftime("%F") - response = Doi.index(from_date: from_date, until_date: until_date) - puts "Queued indexing for #{response} DOIs updated from #{from_date} - #{until_date}." + Doi.index(from_date: from_date, until_date: until_date) + puts "Queued indexing for DOIs updated from #{from_date} - #{until_date}." end desc 'Set state' diff --git a/spec/lib/tasks/doi_rake_spec.rb b/spec/lib/tasks/doi_rake_spec.rb index a3702c6df..38c74e44e 100644 --- a/spec/lib/tasks/doi_rake_spec.rb +++ b/spec/lib/tasks/doi_rake_spec.rb @@ -1,6 +1,6 @@ require 'rails_helper' -describe "doi:index_by_month" do +describe "doi:index_by_month", elasticsearch: true do include ActiveJob::TestHelper include_context "rake" @@ -26,12 +26,12 @@ end end -describe "doi:index" do +describe "doi:index", elasticsearch: true do include ActiveJob::TestHelper include_context "rake" let!(:doi) { create_list(:doi, 10) } - let(:output) { "Queued indexing for DOIs updated from 2018-01-04 - 2018-08-05.\n" } + let(:output) { "Queued indexing for DOIs updated from 2018-01-04 - 2018-08-05.\n" } it "prerequisites should include environment" do expect(subject.prerequisites).to include("environment") @@ -40,11 +40,4 @@ it "should run the rake task" do expect(capture_stdout { subject.invoke }).to eq(output) end - - it "should enqueue an IndexJob" do - expect { - capture_stdout { subject.invoke } - }.to change(enqueued_jobs, :size).by(0) - expect(enqueued_jobs.last[:job]).to be(IndexJob) - end end \ No newline at end of file