From 10fa673f7ba3bf40d794b7b65aca7d54cbad757c Mon Sep 17 00:00:00 2001 From: kjgarza Date: Thu, 23 Jan 2020 13:54:53 +0100 Subject: [PATCH 01/15] rake task to get list and totals of event with corssref events in both nodes. https://github.com/datacite/lupo/issues/351 subj_id in datacite-crossref,datacite-related events MUST always be a DataCite DOI --- app/models/event.rb | 33 +++++++++++++++++++++++++++++++++ lib/tasks/event.rake | 9 +++++++++ 2 files changed, 42 insertions(+) diff --git a/app/models/event.rb b/app/models/event.rb index b9ba26abc..a4017932b 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -489,6 +489,39 @@ def access_method end end + def self.subj_id_check(options = {}) + + size = (options[:size] || 1000).to_i + cursor = (options[:cursor] || []) + total_errors = 0 + + response = Event.query(nil, source_id: "datacite-crossref,datacite-related", page: { size: 1, cursor: [] }) + logger.info "[DoubleCheck] #{response.results.total} events for source datacite-crossref,datacite-related." + + # walk through results using cursor + if response.results.total > 0 + while response.results.results.length > 0 do + response = Event.query(nil, source_id: "datacite-crossref,datacite-related", page: { size: size, cursor: cursor }) + break unless response.results.results.length > 0 + + logger.info "[DoubleCheck] DoubleCheck #{response.results.results.length} events starting with _id #{response.results.to_a.first[:_id]}." + cursor = response.results.to_a.last[:sort] + + # dois = response.results.results.map(&:subj_id) + events = response.results.results + + events.lazy.each do | event| + subj_prefix = event.subj_id[/(10\.\d{4,5})/,1] + File.open("evens_with_double_crossref_dois.txt", "a+") do |f| + f.write(event.uuid, "\n") + total_errors= total_errors+1 + end if Prefix.where(prefix: subj_prefix).empty? + end + end + end + logger.warn "Total number of events with Errors: #{total_errors}" + end + def metric_type if relation_type_id.to_s =~ /(requests|investigations)/ arr = relation_type_id.split("-", 4) diff --git a/lib/tasks/event.rake b/lib/tasks/event.rake index 727bf6d74..ba440887c 100644 --- a/lib/tasks/event.rake +++ b/lib/tasks/event.rake @@ -71,6 +71,15 @@ namespace :crossref do end end +namespace :subj_id_check do + desc 'checks datacite-crossref,datacite-related events have a DataCite DOI in the subject node' + task :check => :environment do + cursor = ENV['CURSOR'].to_s.split(",") || [Event.minimum(:id),Event.minimum(:id)] + + Event.subj_id_check(cursor: cursor) + end +end + namespace :datacite_crossref do desc 'Import crossref dois for all events' task :import_doi => :environment do From ac3a09ffbb619e696c9e9cfc02774117b39bec10 Mon Sep 17 00:00:00 2001 From: kjgarza Date: Thu, 23 Jan 2020 13:59:00 +0100 Subject: [PATCH 02/15] remove trailing space --- app/models/event.rb | 2 -- 1 file changed, 2 deletions(-) diff --git a/app/models/event.rb b/app/models/event.rb index a4017932b..deb87f651 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -490,7 +490,6 @@ def access_method end def self.subj_id_check(options = {}) - size = (options[:size] || 1000).to_i cursor = (options[:cursor] || []) total_errors = 0 @@ -509,7 +508,6 @@ def self.subj_id_check(options = {}) # dois = response.results.results.map(&:subj_id) events = response.results.results - events.lazy.each do | event| subj_prefix = event.subj_id[/(10\.\d{4,5})/,1] File.open("evens_with_double_crossref_dois.txt", "a+") do |f| From b4234283e1cd9d202bce6286463d411fc8e9b264 Mon Sep 17 00:00:00 2001 From: kjgarza Date: Thu, 23 Jan 2020 16:23:44 +0100 Subject: [PATCH 03/15] label --- app/models/event.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/models/event.rb b/app/models/event.rb index deb87f651..b54bdb10f 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -517,7 +517,7 @@ def self.subj_id_check(options = {}) end end end - logger.warn "Total number of events with Errors: #{total_errors}" + logger.warn "[DoubleCheck] Total number of events with Errors: #{total_errors}" end def metric_type From 0f815e7be3e84fcc69c240df443969b5b3c153ac Mon Sep 17 00:00:00 2001 From: kjgarza Date: Fri, 24 Jan 2020 19:33:21 +0100 Subject: [PATCH 04/15] linting --- spec/models/event_spec.rb | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/spec/models/event_spec.rb b/spec/models/event_spec.rb index f94b8744d..3724a4c4e 100644 --- a/spec/models/event_spec.rb +++ b/spec/models/event_spec.rb @@ -37,5 +37,32 @@ expect(published).to eq("2011") expect(published).not_to eq(2011) end + + context "double_crossref_check", elasticsearch: true do + let(:provider) { create(:provider, symbol: "DATACITE") } + let(:client) { create(:client, provider: provider, symbol: ENV['MDS_USERNAME'], password: ENV['MDS_PASSWORD']) } + let!(:prefix) { create(:prefix, prefix: "10.14454") } + let!(:client_prefix) { create(:client_prefix, client: client, prefix: prefix) } + let!(:doi) { create(:doi, client: client) } + let!(:dois) { create_list(:doi, 10) } + let!(:events) { create_list(:event_for_datacite_related, 30, source_id: "datacite-crossref", obj_id: doi.doi) } + + before do + Provider.import + # Prefix.import + Client.import + Doi.import + Event.import + sleep 3 + end + + it "check run" do + # puts prefix.inspect + puts [Event.minimum(:id),Event.maximum(:id)] + expect(Event.crossref_double_check( cursor: [Event.minimum(:id),Event.maximum(:id)])).to eq("2006-06-13T16:14:19Z") + # expect(subject.obj["datePublished"]).to be_nil + end + + end end end From eefe175ce2edacca6f76c82469815038c2f8a548 Mon Sep 17 00:00:00 2001 From: kjgarza Date: Wed, 29 Jan 2020 14:51:43 +0100 Subject: [PATCH 05/15] name rake task change --- lib/tasks/event.rake | 4 ++-- spec/models/event_spec.rb | 4 +--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/lib/tasks/event.rake b/lib/tasks/event.rake index ba440887c..f1fffbbcb 100644 --- a/lib/tasks/event.rake +++ b/lib/tasks/event.rake @@ -72,9 +72,9 @@ namespace :crossref do end namespace :subj_id_check do - desc 'checks datacite-crossref,datacite-related events have a DataCite DOI in the subject node' + desc 'checks that events subject node congruency' task :check => :environment do - cursor = ENV['CURSOR'].to_s.split(",") || [Event.minimum(:id),Event.minimum(:id)] + cursor = ENV['CURSOR'].to_s.split(",") || [Event.minimum(:id),Event.maximum(:id)] Event.subj_id_check(cursor: cursor) end diff --git a/spec/models/event_spec.rb b/spec/models/event_spec.rb index 3724a4c4e..2180c855d 100644 --- a/spec/models/event_spec.rb +++ b/spec/models/event_spec.rb @@ -49,7 +49,6 @@ before do Provider.import - # Prefix.import Client.import Doi.import Event.import @@ -58,8 +57,7 @@ it "check run" do # puts prefix.inspect - puts [Event.minimum(:id),Event.maximum(:id)] - expect(Event.crossref_double_check( cursor: [Event.minimum(:id),Event.maximum(:id)])).to eq("2006-06-13T16:14:19Z") + expect(Event.subj_id_check(cursor: [Event.minimum(:id),Event.maximum(:id)])).to eq("2006-06-13T16:14:19Z") # expect(subject.obj["datePublished"]).to be_nil end From eadcb23219f17ef7431dd5c4ec6652392906e4cd Mon Sep 17 00:00:00 2001 From: kjgarza Date: Wed, 29 Jan 2020 14:52:34 +0100 Subject: [PATCH 06/15] linting --- app/models/event.rb | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/app/models/event.rb b/app/models/event.rb index f0c5af04d..ad1777d7e 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -498,10 +498,10 @@ def self.subj_id_check(options = {}) logger.info "[DoubleCheck] #{response.results.total} events for source datacite-crossref,datacite-related." # walk through results using cursor - if response.results.total > 0 - while response.results.results.length > 0 do + if response.results.total.positive? + while response.results.results.length.positive? response = Event.query(nil, source_id: "datacite-crossref,datacite-related", page: { size: size, cursor: cursor }) - break unless response.results.results.length > 0 + break unless response.results.results.length.positive? logger.info "[DoubleCheck] DoubleCheck #{response.results.results.length} events starting with _id #{response.results.to_a.first[:_id]}." cursor = response.results.to_a.last[:sort] @@ -509,11 +509,13 @@ def self.subj_id_check(options = {}) # dois = response.results.results.map(&:subj_id) events = response.results.results events.lazy.each do | event| - subj_prefix = event.subj_id[/(10\.\d{4,5})/,1] - File.open("evens_with_double_crossref_dois.txt", "a+") do |f| - f.write(event.uuid, "\n") - total_errors= total_errors+1 - end if Prefix.where(prefix: subj_prefix).empty? + subj_prefix = event.subj_id[/(10\.\d{4,5})/, 1] + if Prefix.where(prefix: subj_prefix).empty? + File.open("evens_with_double_crossref_dois.txt", "a+") do |f| + f.write(event.uuid, "\n") + total_errors = total_errors + 1 + end + end end end end From 4c414cf57635bc7fe815d3ad20126f7620b8f423 Mon Sep 17 00:00:00 2001 From: kjgarza Date: Wed, 29 Jan 2020 15:40:41 +0100 Subject: [PATCH 07/15] save uuids outside of the container --- app/models/event.rb | 13 +++++++++++-- spec/models/event_spec.rb | 5 +---- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/app/models/event.rb b/app/models/event.rb index ad1777d7e..03884fbc2 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -490,6 +490,7 @@ def access_method end def self.subj_id_check(options = {}) + file_name = "events_with_double_crossref_dois.txt" size = (options[:size] || 1000).to_i cursor = (options[:cursor] || []) total_errors = 0 @@ -511,7 +512,7 @@ def self.subj_id_check(options = {}) events.lazy.each do | event| subj_prefix = event.subj_id[/(10\.\d{4,5})/, 1] if Prefix.where(prefix: subj_prefix).empty? - File.open("evens_with_double_crossref_dois.txt", "a+") do |f| + File.open(file_name, "a+") do |f| f.write(event.uuid, "\n") total_errors = total_errors + 1 end @@ -519,7 +520,15 @@ def self.subj_id_check(options = {}) end end end - logger.warn "[DoubleCheck] Total number of events with Errors: #{total_errors}" + + file = File.open(file_name) + if file.present? + payload = { description: "events_with_errors_from_rake_task", public: true,files: {uids_with_errors: {content: file.read} }} + ### max file size 1MB + response = Maremma.post("https://api.github.com/gists", data: payload.to_json, username: ENV["GIST_USERNAME"], password:ENV["GIST_PASSWORD"]) + logger.warn "[DoubleCheck] Total number of events with Errors: #{total_errors}" + logger.warn "[DoubleCheck] IDs saved: #{response.body.dig('data','url')}" if [200,201].include?(response.status) + end end def metric_type diff --git a/spec/models/event_spec.rb b/spec/models/event_spec.rb index 2180c855d..95eb4f663 100644 --- a/spec/models/event_spec.rb +++ b/spec/models/event_spec.rb @@ -56,11 +56,8 @@ end it "check run" do - # puts prefix.inspect - expect(Event.subj_id_check(cursor: [Event.minimum(:id),Event.maximum(:id)])).to eq("2006-06-13T16:14:19Z") - # expect(subject.obj["datePublished"]).to be_nil + expect(Event.subj_id_check(cursor: [Event.minimum(:id),Event.maximum(:id)])).to eq(true) end - end end end From 8cb1f0fe0333896f906f3d4abf9e1b9cee735254 Mon Sep 17 00:00:00 2001 From: kjgarza Date: Wed, 29 Jan 2020 16:04:24 +0100 Subject: [PATCH 08/15] don't use pagination in a different way --- app/models/event.rb | 6 +++--- lib/tasks/event.rake | 7 ++++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/app/models/event.rb b/app/models/event.rb index 03884fbc2..47a7d6d40 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -490,9 +490,9 @@ def access_method end def self.subj_id_check(options = {}) - file_name = "events_with_double_crossref_dois.txt" + file_name = "evens_with_double_crossref_dois.txt" size = (options[:size] || 1000).to_i - cursor = (options[:cursor] || []) + cursor = [options[:from_id], options[:until_id]] total_errors = 0 response = Event.query(nil, source_id: "datacite-crossref,datacite-related", page: { size: 1, cursor: [] }) @@ -523,7 +523,7 @@ def self.subj_id_check(options = {}) file = File.open(file_name) if file.present? - payload = { description: "events_with_errors_from_rake_task", public: true,files: {uids_with_errors: {content: file.read} }} + payload = { description: "events_with_errors_from_rake_task #{Time.now.getutc}", public: true,files: {uids_with_errors: {content: file.read} }} ### max file size 1MB response = Maremma.post("https://api.github.com/gists", data: payload.to_json, username: ENV["GIST_USERNAME"], password:ENV["GIST_PASSWORD"]) logger.warn "[DoubleCheck] Total number of events with Errors: #{total_errors}" diff --git a/lib/tasks/event.rake b/lib/tasks/event.rake index f1fffbbcb..119de386b 100644 --- a/lib/tasks/event.rake +++ b/lib/tasks/event.rake @@ -74,9 +74,10 @@ end namespace :subj_id_check do desc 'checks that events subject node congruency' task :check => :environment do - cursor = ENV['CURSOR'].to_s.split(",") || [Event.minimum(:id),Event.maximum(:id)] - - Event.subj_id_check(cursor: cursor) + from_id = (ENV['FROM_ID'] || Event.minimum(:id)).to_i + until_id = (ENV['UNTIL_ID'] || Event.maximum(:id)).to_i + + Event.subj_id_check(from_id: from_id, until_id: until_id) end end From 5fd0f79fdfd226047380a1e65d9a25b763940176 Mon Sep 17 00:00:00 2001 From: kjgarza Date: Wed, 29 Jan 2020 16:39:30 +0100 Subject: [PATCH 09/15] comment test as it can only run with es --- spec/models/event_spec.rb | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/spec/models/event_spec.rb b/spec/models/event_spec.rb index 95eb4f663..0ddd279f2 100644 --- a/spec/models/event_spec.rb +++ b/spec/models/event_spec.rb @@ -38,26 +38,26 @@ expect(published).not_to eq(2011) end - context "double_crossref_check", elasticsearch: true do - let(:provider) { create(:provider, symbol: "DATACITE") } - let(:client) { create(:client, provider: provider, symbol: ENV['MDS_USERNAME'], password: ENV['MDS_PASSWORD']) } - let!(:prefix) { create(:prefix, prefix: "10.14454") } - let!(:client_prefix) { create(:client_prefix, client: client, prefix: prefix) } - let!(:doi) { create(:doi, client: client) } - let!(:dois) { create_list(:doi, 10) } - let!(:events) { create_list(:event_for_datacite_related, 30, source_id: "datacite-crossref", obj_id: doi.doi) } + # context "double_crossref_check", elasticsearch: true do + # let(:provider) { create(:provider, symbol: "DATACITE") } + # let(:client) { create(:client, provider: provider, symbol: ENV['MDS_USERNAME'], password: ENV['MDS_PASSWORD']) } + # let!(:prefix) { create(:prefix, prefix: "10.14454") } + # let!(:client_prefix) { create(:client_prefix, client: client, prefix: prefix) } + # let!(:doi) { create(:doi, client: client) } + # let!(:dois) { create_list(:doi, 10) } + # let!(:events) { create_list(:event_for_datacite_related, 30, source_id: "datacite-crossref", obj_id: doi.doi) } - before do - Provider.import - Client.import - Doi.import - Event.import - sleep 3 - end + # before do + # Provider.import + # Client.import + # Doi.import + # Event.import + # sleep 3 + # end - it "check run" do - expect(Event.subj_id_check(cursor: [Event.minimum(:id),Event.maximum(:id)])).to eq(true) - end - end + # it "check run" do + # expect(Event.subj_id_check(cursor: [Event.minimum(:id),Event.maximum(:id)])).to eq(true) + # end + # end end end From e437c617e2a3f5b503b408c68e8b4fcc1c7ed42c Mon Sep 17 00:00:00 2001 From: Martin Fenner Date: Mon, 3 Feb 2020 11:38:34 +0100 Subject: [PATCH 10/15] easier updates of events. #390 --- app/models/concerns/indexable.rb | 11 +---------- app/models/event.rb | 9 +++++---- lib/tasks/event.rake | 2 +- 3 files changed, 7 insertions(+), 15 deletions(-) diff --git a/app/models/concerns/indexable.rb b/app/models/concerns/indexable.rb index 56b5f44dc..479ada996 100644 --- a/app/models/concerns/indexable.rb +++ b/app/models/concerns/indexable.rb @@ -4,7 +4,7 @@ module Indexable require 'aws-sdk-sqs' included do - after_commit on: [:create] do + after_commit on: [:create, :update] do # use index_document instead of update_document to also update virtual attributes IndexJob.perform_later(self) if self.class.name == "Doi" @@ -20,15 +20,6 @@ module Indexable end end - after_commit on: [:update] do - # use index_document instead of update_document to also update virtual attributes - IndexJob.perform_later(self) - if self.class.name == "Doi" - update_column(:indexed, Time.zone.now) - send_import_message(self.to_jsonapi) if aasm_state == "findable" && !Rails.env.test? && !%w(crossref medra kisti jalc op).include?(client.symbol.downcase.split(".").first) - end - end - after_touch do # use index_document instead of update_document to also update virtual attributes IndexJob.perform_later(self) diff --git a/app/models/event.rb b/app/models/event.rb index e5ed19b49..82e77edf9 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -382,17 +382,18 @@ def self.update_crossref(options = {}) def self.update_target_doi(options = {}) size = (options[:size] || 1000).to_i cursor = (options[:cursor] || []) + target_relation_type_id = options[:target_relation_type_id] - response = Event.query(nil, target_doi: nil, page: { size: 1, cursor: [] }) - Rails.logger.info "[Update] #{response.results.total} events with no target_doi." + response = Event.query(nil, target_relation_type_id: target_relation_type_id, page: { size: 1, cursor: [] }) + Rails.logger.info "[Update] #{response.results.total} events with target_relation_type_id #{target_relation_type_id.to_s}." # walk through results using cursor if response.results.total > 0 while response.results.results.length > 0 do - response = Event.query(nil, target_doi: nil, page: { size: size, cursor: cursor }) + response = Event.query(nil, target_relation_type_id: target_relation_type_id, page: { size: size, cursor: cursor }) break unless response.results.results.length.positive? - Rails.logger.info "[Update] Updating #{response.results.results.length} events with no target_doi starting with _id #{response.results.to_a.first[:_id]}." + Rails.logger.info "[Update] Updating #{response.results.results.length} events with target_relation_type_id #{target_relation_type_id.to_s} starting with _id #{response.results.to_a.first[:_id]}." cursor = response.results.to_a.last[:sort] ids = response.results.results.map(&:uuid).uniq diff --git a/lib/tasks/event.rake b/lib/tasks/event.rake index 9a761968e..2271b7018 100644 --- a/lib/tasks/event.rake +++ b/lib/tasks/event.rake @@ -65,7 +65,7 @@ namespace :event do task :update_target_doi => :environment do cursor = ENV['CURSOR'].to_s.split(",") || [Event.minimum(:id), Event.minimum(:id)] - Event.update_target_doi(cursor: cursor, size: ENV['SIZE']) + Event.update_target_doi(cursor: cursor, target_relation_type_id: ENV['TARGET_RELATION_TYPE_ID'], size: ENV['SIZE']) end end From d92aa605f23b398e4c1ac6f01b5a284802063446 Mon Sep 17 00:00:00 2001 From: kjgarza Date: Mon, 3 Feb 2020 19:36:49 +0100 Subject: [PATCH 11/15] correct filter for person events --- app/controllers/concerns/metrics_helper.rb | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/app/controllers/concerns/metrics_helper.rb b/app/controllers/concerns/metrics_helper.rb index 54346d054..c2701a33d 100644 --- a/app/controllers/concerns/metrics_helper.rb +++ b/app/controllers/concerns/metrics_helper.rb @@ -21,18 +21,11 @@ def get_person_metrics(orcid) end def get_person_dois(orcid) - Event.query(nil, page: { size: 500 }, obj_id: https_to_http(orcid)).results.to_a.map do |e| + Event.query(nil, page: { size: 300 }, source_id: "datacite-orcid-auto-update", obj_id: "https://orcid.org/#{orcid}").results.to_a.map do |e| doi_from_url(e.subj_id) end end - def https_to_http(url) - orcid = orcid_from_url(url) - return nil if orcid.blank? - - "https://orcid.org/#{orcid}" - end - def mix_in_metrics_array(metadata_array_objects, metrics_array_hashes) return [] if metadata_array_objects.empty? From ca71f26d5653ae913ae45678349ee2e933e296d9 Mon Sep 17 00:00:00 2001 From: kjgarza Date: Mon, 3 Feb 2020 19:58:55 +0100 Subject: [PATCH 12/15] fix for empty case --- app/controllers/concerns/metrics_helper.rb | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/app/controllers/concerns/metrics_helper.rb b/app/controllers/concerns/metrics_helper.rb index c2701a33d..ec71c1b98 100644 --- a/app/controllers/concerns/metrics_helper.rb +++ b/app/controllers/concerns/metrics_helper.rb @@ -11,6 +11,10 @@ def get_metrics_array(dois) end def get_person_metrics(orcid) + if orcid.blank? + return { citations: 0, views: 0, downloads: 0 } + end + dois = get_person_dois(orcid).join(",") usage = EventsQuery.new.views_and_downloads(dois) { From f54b3a0acdff38489512a5eb0659043b64dfffed Mon Sep 17 00:00:00 2001 From: Martin Fenner Date: Tue, 4 Feb 2020 11:41:48 +0100 Subject: [PATCH 13/15] don't trigger reindexing of dois. #390 --- app/models/concerns/indexable.rb | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/app/models/concerns/indexable.rb b/app/models/concerns/indexable.rb index 479ada996..9d6afad8f 100644 --- a/app/models/concerns/indexable.rb +++ b/app/models/concerns/indexable.rb @@ -10,13 +10,13 @@ module Indexable if self.class.name == "Doi" update_column(:indexed, Time.zone.now) send_import_message(self.to_jsonapi) if aasm_state == "findable" && !Rails.env.test? && !%w(crossref medra kisti jalc op).include?(client.symbol.downcase.split(".").first) - elsif self.class.name == "Event" - # reindex dois associated with Event - @source_doi = Doi.where(doi: source_doi).first if source_doi - IndexJob.perform_later(@source_doi) if @source_doi + # elsif self.class.name == "Event" + # # reindex dois associated with Event + # @source_doi = Doi.where(doi: source_doi).first if source_doi + # IndexJob.perform_later(@source_doi) if @source_doi - @target_doi = Doi.where(doi: target_doi).first if target_doi - IndexJob.perform_later(@target_doi) if @target_doi + # @target_doi = Doi.where(doi: target_doi).first if target_doi + # IndexJob.perform_later(@target_doi) if @target_doi end end From 6b6fae318e7ee65e6389a053d341d2856bbf3ccc Mon Sep 17 00:00:00 2001 From: kjgarza Date: Tue, 4 Feb 2020 15:27:43 +0100 Subject: [PATCH 14/15] add rails to logger --- app/models/event.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/app/models/event.rb b/app/models/event.rb index d4316aedc..a6729bd73 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -536,7 +536,7 @@ def self.subj_id_check(options = {}) total_errors = 0 response = Event.query(nil, source_id: "datacite-crossref,datacite-related", page: { size: 1, cursor: [] }) - logger.info "[DoubleCheck] #{response.results.total} events for source datacite-crossref,datacite-related." + Rails.logger.info "[DoubleCheck] #{response.results.total} events for source datacite-crossref,datacite-related." # walk through results using cursor if response.results.total.positive? @@ -544,7 +544,7 @@ def self.subj_id_check(options = {}) response = Event.query(nil, source_id: "datacite-crossref,datacite-related", page: { size: size, cursor: cursor }) break unless response.results.results.length.positive? - logger.info "[DoubleCheck] DoubleCheck #{response.results.results.length} events starting with _id #{response.results.to_a.first[:_id]}." + Rails.logger.info "[DoubleCheck] DoubleCheck #{response.results.results.length} events starting with _id #{response.results.to_a.first[:_id]}." cursor = response.results.to_a.last[:sort] # dois = response.results.results.map(&:subj_id) @@ -566,8 +566,8 @@ def self.subj_id_check(options = {}) payload = { description: "events_with_errors_from_rake_task #{Time.now.getutc}", public: true,files: {uids_with_errors: {content: file.read} }} ### max file size 1MB response = Maremma.post("https://api.github.com/gists", data: payload.to_json, username: ENV["GIST_USERNAME"], password:ENV["GIST_PASSWORD"]) - logger.warn "[DoubleCheck] Total number of events with Errors: #{total_errors}" - logger.warn "[DoubleCheck] IDs saved: #{response.body.dig('data','url')}" if [200,201].include?(response.status) + Rails.logger.warn "[DoubleCheck] Total number of events with Errors: #{total_errors}" + Rails.logger.warn "[DoubleCheck] IDs saved: #{response.body.dig('data','url')}" if [200,201].include?(response.status) end end From a3c07275799030f11c046713bfe985383cf8fd43 Mon Sep 17 00:00:00 2001 From: Martin Fenner Date: Wed, 5 Feb 2020 05:13:31 +0100 Subject: [PATCH 15/15] reindex doi when event is updated. #388 --- app/jobs/index_background_job.rb | 11 +++++++++++ app/jobs/index_job.rb | 2 +- app/models/concerns/indexable.rb | 9 +-------- app/models/event.rb | 3 +++ 4 files changed, 16 insertions(+), 9 deletions(-) create mode 100644 app/jobs/index_background_job.rb diff --git a/app/jobs/index_background_job.rb b/app/jobs/index_background_job.rb new file mode 100644 index 000000000..22c1bfda8 --- /dev/null +++ b/app/jobs/index_background_job.rb @@ -0,0 +1,11 @@ +class IndexBackgroundJob < ActiveJob::Base + queue_as :lupo_background + + rescue_from ActiveJob::DeserializationError, Elasticsearch::Transport::Transport::Errors::BadRequest do |error| + Rails.logger.error error.message + end + + def perform(obj) + obj.__elasticsearch__.index_document + end +end diff --git a/app/jobs/index_job.rb b/app/jobs/index_job.rb index 5b2803300..e4205a44e 100644 --- a/app/jobs/index_job.rb +++ b/app/jobs/index_job.rb @@ -8,4 +8,4 @@ class IndexJob < ActiveJob::Base def perform(obj) obj.__elasticsearch__.index_document end -end \ No newline at end of file +end diff --git a/app/models/concerns/indexable.rb b/app/models/concerns/indexable.rb index 9d6afad8f..1eb395685 100644 --- a/app/models/concerns/indexable.rb +++ b/app/models/concerns/indexable.rb @@ -10,19 +10,12 @@ module Indexable if self.class.name == "Doi" update_column(:indexed, Time.zone.now) send_import_message(self.to_jsonapi) if aasm_state == "findable" && !Rails.env.test? && !%w(crossref medra kisti jalc op).include?(client.symbol.downcase.split(".").first) - # elsif self.class.name == "Event" - # # reindex dois associated with Event - # @source_doi = Doi.where(doi: source_doi).first if source_doi - # IndexJob.perform_later(@source_doi) if @source_doi - - # @target_doi = Doi.where(doi: target_doi).first if target_doi - # IndexJob.perform_later(@target_doi) if @target_doi end end after_touch do # use index_document instead of update_document to also update virtual attributes - IndexJob.perform_later(self) + IndexBackgroundJob.perform_later(self) end before_destroy do diff --git a/app/models/event.rb b/app/models/event.rb index 82e77edf9..dbd02bb52 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -18,6 +18,9 @@ class Event < ActiveRecord::Base include Elasticsearch::Model + belongs_to :doi_for_source, class_name: "Doi", primary_key: :doi, foreign_key: :source_doi, touch: true, optional: true + belongs_to :doi_for_target, class_name: "Doi", primary_key: :doi, foreign_key: :target_doi, touch: true, optional: true + before_validation :set_defaults before_create :set_source_and_target_doi