From c2f1d06b98eedb47e931d87910aae14e9fbef1c0 Mon Sep 17 00:00:00 2001 From: Martin Fenner Date: Wed, 24 Oct 2018 18:51:24 +0200 Subject: [PATCH] tweak background jobs --- app/models/doi.rb | 21 ++++++++++----------- lib/tasks/doi.rake | 7 +++---- spec/lib/tasks/doi_rake_spec.rb | 4 ++-- 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/app/models/doi.rb b/app/models/doi.rb index dcab58e08..1932d9b6f 100644 --- a/app/models/doi.rb +++ b/app/models/doi.rb @@ -235,20 +235,19 @@ def self.index(options={}) # get every day between from_date and until_date (from_date..until_date).each do |d| DoiIndexByDayJob.perform_later(from_date: d.strftime("%F")) - end - - "Queued indexing for DOIs created from #{from_date.strftime("%F")} until #{until_date.strftime("%F")}." + puts "Queued indexing for DOIs created on #{d.strftime("%F")}." + end end def self.index_by_day(options={}) - from_date = options[:from_date].present? ? Date.parse(options[:from_date]) : Date.current - until_date = from_date + 1.day + return nil unless options[:from_date].present? + errors = 0 count = 0 logger = Logger.new(STDOUT) - Doi.where("created >= ?", from_date.strftime("%F") + " 00:00:00").where("created < ?", until_date.strftime("%F") + " 00:00:00").where("updated > indexed").find_in_batches(batch_size: 500) do |dois| + Doi.where(created: [options[:from_date] + " 00:00:00", options[:from_date] + " 23:59:59"]).not_indexed.find_in_batches(batch_size: 500) do |dois| response = Doi.__elasticsearch__.client.bulk \ index: Doi.index_name, type: Doi.document_type, @@ -260,22 +259,22 @@ def self.index_by_day(options={}) end if errors > 1 - logger.info "[Elasticsearch] #{errors} errors indexing #{count} DOIs created on #{from_date.strftime("%F")}." + logger.info "[Elasticsearch] #{errors} errors indexing #{count} DOIs created on #{options[:from_date]}." elsif count > 1 - logger.info "[Elasticsearch] Indexed #{count} DOIs created on #{from_date.strftime("%F")}." + logger.info "[Elasticsearch] Indexed #{count} DOIs created on #{options[:from_date]}." end rescue Elasticsearch::Transport::Transport::Errors::RequestEntityTooLarge, Faraday::ConnectionFailed => error - logger.info "[Elasticsearch] Error #{error.message} indexing DOIs created on #{from_date.strftime("%F")}." + logger.info "[Elasticsearch] Error #{error.message} indexing DOIs created on #{options[:from_date]}." count = 0 - Doi.where("created >= ?", from_date.strftime("%F") + " 00:00:00").where("created < ?", until_date.strftime("%F") + " 00:00:00").where("updated > indexed").find_each do |doi| + Doi.where(created: [options[:from_date] + " 00:00:00", options[:from_date] + " 23:59:59"]).not_indexed.find_each do |doi| IndexJob.perform_later(doi) doi.update_column(:indexed, Time.zone.now) count += 1 end - logger.info "[Elasticsearch] Indexed #{count} DOIs created on #{from_date.strftime("%F")}." + logger.info "[Elasticsearch] Indexed #{count} DOIs created on #{options[:from_date]}." end def uid diff --git a/lib/tasks/doi.rake b/lib/tasks/doi.rake index 89ada32c6..a5748b394 100644 --- a/lib/tasks/doi.rake +++ b/lib/tasks/doi.rake @@ -12,12 +12,11 @@ namespace :doi do from_date = "#{ENV['YEAR']}-01-01" until_date = "#{ENV['YEAR']}-12-31" else - from_date = ENV['FROM_DATE'] || Date.current.beginning_of_month.strftime("%F") - until_date = ENV['UNTIL_DATE'] || Date.current.end_of_month.strftime("%F") + from_date = ENV['FROM_DATE'] || Date.current.strftime("%F") + until_date = ENV['UNTIL_DATE'] || Date.current.strftime("%F") end - response = Doi.index(from_date: from_date, until_date: until_date) - puts response + Doi.index(from_date: from_date, until_date: until_date) end desc 'Index DOIs per day' diff --git a/spec/lib/tasks/doi_rake_spec.rb b/spec/lib/tasks/doi_rake_spec.rb index 137ef8c8c..2e2cf0420 100644 --- a/spec/lib/tasks/doi_rake_spec.rb +++ b/spec/lib/tasks/doi_rake_spec.rb @@ -8,14 +8,14 @@ ENV['UNTIL_DATE'] = "2018-08-05" let!(:doi) { create_list(:doi, 10) } - let(:output) { "Queued indexing for DOIs created from 2018-01-04 until 2018-08-05.\n" } + let(:output) { "Queued indexing for DOIs created on 2018-01-04.\n" } it "prerequisites should include environment" do expect(subject.prerequisites).to include("environment") end it "should run the rake task" do - expect(capture_stdout { subject.invoke }).to eq(output) + expect(capture_stdout { subject.invoke }).to start_with(output) end it "should enqueue an DoiIndexByDayJob" do