diff --git a/app/controllers/concerns/metrics_helper.rb b/app/controllers/concerns/metrics_helper.rb index 54346d054..ec71c1b98 100644 --- a/app/controllers/concerns/metrics_helper.rb +++ b/app/controllers/concerns/metrics_helper.rb @@ -11,6 +11,10 @@ def get_metrics_array(dois) end def get_person_metrics(orcid) + if orcid.blank? + return { citations: 0, views: 0, downloads: 0 } + end + dois = get_person_dois(orcid).join(",") usage = EventsQuery.new.views_and_downloads(dois) { @@ -21,18 +25,11 @@ def get_person_metrics(orcid) end def get_person_dois(orcid) - Event.query(nil, page: { size: 500 }, obj_id: https_to_http(orcid)).results.to_a.map do |e| + Event.query(nil, page: { size: 300 }, source_id: "datacite-orcid-auto-update", obj_id: "https://orcid.org/#{orcid}").results.to_a.map do |e| doi_from_url(e.subj_id) end end - def https_to_http(url) - orcid = orcid_from_url(url) - return nil if orcid.blank? - - "https://orcid.org/#{orcid}" - end - def mix_in_metrics_array(metadata_array_objects, metrics_array_hashes) return [] if metadata_array_objects.empty? diff --git a/app/jobs/index_background_job.rb b/app/jobs/index_background_job.rb new file mode 100644 index 000000000..22c1bfda8 --- /dev/null +++ b/app/jobs/index_background_job.rb @@ -0,0 +1,11 @@ +class IndexBackgroundJob < ActiveJob::Base + queue_as :lupo_background + + rescue_from ActiveJob::DeserializationError, Elasticsearch::Transport::Transport::Errors::BadRequest do |error| + Rails.logger.error error.message + end + + def perform(obj) + obj.__elasticsearch__.index_document + end +end diff --git a/app/jobs/index_job.rb b/app/jobs/index_job.rb index 5b2803300..e4205a44e 100644 --- a/app/jobs/index_job.rb +++ b/app/jobs/index_job.rb @@ -8,4 +8,4 @@ class IndexJob < ActiveJob::Base def perform(obj) obj.__elasticsearch__.index_document end -end \ No newline at end of file +end diff --git a/app/models/concerns/indexable.rb b/app/models/concerns/indexable.rb index 56b5f44dc..1eb395685 100644 --- a/app/models/concerns/indexable.rb +++ b/app/models/concerns/indexable.rb @@ -4,23 +4,7 @@ module Indexable require 'aws-sdk-sqs' included do - after_commit on: [:create] do - # use index_document instead of update_document to also update virtual attributes - IndexJob.perform_later(self) - if self.class.name == "Doi" - update_column(:indexed, Time.zone.now) - send_import_message(self.to_jsonapi) if aasm_state == "findable" && !Rails.env.test? && !%w(crossref medra kisti jalc op).include?(client.symbol.downcase.split(".").first) - elsif self.class.name == "Event" - # reindex dois associated with Event - @source_doi = Doi.where(doi: source_doi).first if source_doi - IndexJob.perform_later(@source_doi) if @source_doi - - @target_doi = Doi.where(doi: target_doi).first if target_doi - IndexJob.perform_later(@target_doi) if @target_doi - end - end - - after_commit on: [:update] do + after_commit on: [:create, :update] do # use index_document instead of update_document to also update virtual attributes IndexJob.perform_later(self) if self.class.name == "Doi" @@ -31,7 +15,7 @@ module Indexable after_touch do # use index_document instead of update_document to also update virtual attributes - IndexJob.perform_later(self) + IndexBackgroundJob.perform_later(self) end before_destroy do diff --git a/app/models/event.rb b/app/models/event.rb index e5ed19b49..6df5527de 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -18,6 +18,9 @@ class Event < ActiveRecord::Base include Elasticsearch::Model + belongs_to :doi_for_source, class_name: "Doi", primary_key: :doi, foreign_key: :source_doi, touch: true, optional: true + belongs_to :doi_for_target, class_name: "Doi", primary_key: :doi, foreign_key: :target_doi, touch: true, optional: true + before_validation :set_defaults before_create :set_source_and_target_doi @@ -382,17 +385,18 @@ def self.update_crossref(options = {}) def self.update_target_doi(options = {}) size = (options[:size] || 1000).to_i cursor = (options[:cursor] || []) + target_relation_type_id = options[:target_relation_type_id] - response = Event.query(nil, target_doi: nil, page: { size: 1, cursor: [] }) - Rails.logger.info "[Update] #{response.results.total} events with no target_doi." + response = Event.query(nil, target_relation_type_id: target_relation_type_id, page: { size: 1, cursor: [] }) + Rails.logger.info "[Update] #{response.results.total} events with target_relation_type_id #{target_relation_type_id.to_s}." # walk through results using cursor if response.results.total > 0 while response.results.results.length > 0 do - response = Event.query(nil, target_doi: nil, page: { size: size, cursor: cursor }) + response = Event.query(nil, target_relation_type_id: target_relation_type_id, page: { size: size, cursor: cursor }) break unless response.results.results.length.positive? - Rails.logger.info "[Update] Updating #{response.results.results.length} events with no target_doi starting with _id #{response.results.to_a.first[:_id]}." + Rails.logger.info "[Update] Updating #{response.results.results.length} events with target_relation_type_id #{target_relation_type_id.to_s} starting with _id #{response.results.to_a.first[:_id]}." cursor = response.results.to_a.last[:sort] ids = response.results.results.map(&:uuid).uniq @@ -528,6 +532,48 @@ def access_method end end + def self.subj_id_check(options = {}) + file_name = "evens_with_double_crossref_dois.txt" + size = (options[:size] || 1000).to_i + cursor = [options[:from_id], options[:until_id]] + total_errors = 0 + + response = Event.query(nil, source_id: "datacite-crossref,datacite-related", page: { size: 1, cursor: [] }) + Rails.logger.info "[DoubleCheck] #{response.results.total} events for source datacite-crossref,datacite-related." + + # walk through results using cursor + if response.results.total.positive? + while response.results.results.length.positive? + response = Event.query(nil, source_id: "datacite-crossref,datacite-related", page: { size: size, cursor: cursor }) + break unless response.results.results.length.positive? + + Rails.logger.info "[DoubleCheck] DoubleCheck #{response.results.results.length} events starting with _id #{response.results.to_a.first[:_id]}." + cursor = response.results.to_a.last[:sort] + + # dois = response.results.results.map(&:subj_id) + events = response.results.results + events.lazy.each do | event| + subj_prefix = event.subj_id[/(10\.\d{4,5})/, 1] + if Prefix.where(prefix: subj_prefix).empty? + File.open(file_name, "a+") do |f| + f.write(event.uuid, "\n") + total_errors = total_errors + 1 + end + end + end + end + end + + file = File.open(file_name) + if file.present? + payload = { description: "events_with_errors_from_rake_task #{Time.now.getutc}", public: true,files: {uids_with_errors: {content: file.read} }} + ### max file size 1MB + response = Maremma.post("https://api.github.com/gists", data: payload.to_json, username: ENV["GIST_USERNAME"], password:ENV["GIST_PASSWORD"]) + Rails.logger.warn "[DoubleCheck] Total number of events with Errors: #{total_errors}" + Rails.logger.warn "[DoubleCheck] IDs saved: #{response.body.dig('data','url')}" if [200,201].include?(response.status) + end + end + def metric_type if relation_type_id.to_s =~ /(requests|investigations)/ arr = relation_type_id.split("-", 4) diff --git a/lib/tasks/event.rake b/lib/tasks/event.rake index 9a761968e..63f052a04 100644 --- a/lib/tasks/event.rake +++ b/lib/tasks/event.rake @@ -65,7 +65,7 @@ namespace :event do task :update_target_doi => :environment do cursor = ENV['CURSOR'].to_s.split(",") || [Event.minimum(:id), Event.minimum(:id)] - Event.update_target_doi(cursor: cursor, size: ENV['SIZE']) + Event.update_target_doi(cursor: cursor, target_relation_type_id: ENV['TARGET_RELATION_TYPE_ID'], size: ENV['SIZE']) end end @@ -78,6 +78,16 @@ namespace :crossref do end end +namespace :subj_id_check do + desc 'checks that events subject node congruency' + task :check => :environment do + from_id = (ENV['FROM_ID'] || Event.minimum(:id)).to_i + until_id = (ENV['UNTIL_ID'] || Event.maximum(:id)).to_i + + Event.subj_id_check(from_id: from_id, until_id: until_id) + end +end + namespace :datacite_crossref do desc 'Import crossref dois for all events' task :import_doi => :environment do diff --git a/spec/models/event_spec.rb b/spec/models/event_spec.rb index f94b8744d..0ddd279f2 100644 --- a/spec/models/event_spec.rb +++ b/spec/models/event_spec.rb @@ -37,5 +37,27 @@ expect(published).to eq("2011") expect(published).not_to eq(2011) end + + # context "double_crossref_check", elasticsearch: true do + # let(:provider) { create(:provider, symbol: "DATACITE") } + # let(:client) { create(:client, provider: provider, symbol: ENV['MDS_USERNAME'], password: ENV['MDS_PASSWORD']) } + # let!(:prefix) { create(:prefix, prefix: "10.14454") } + # let!(:client_prefix) { create(:client_prefix, client: client, prefix: prefix) } + # let!(:doi) { create(:doi, client: client) } + # let!(:dois) { create_list(:doi, 10) } + # let!(:events) { create_list(:event_for_datacite_related, 30, source_id: "datacite-crossref", obj_id: doi.doi) } + + # before do + # Provider.import + # Client.import + # Doi.import + # Event.import + # sleep 3 + # end + + # it "check run" do + # expect(Event.subj_id_check(cursor: [Event.minimum(:id),Event.maximum(:id)])).to eq(true) + # end + # end end end