Skip to content

Commit

Permalink
Merge branch 'master' of github.com:datacite/lupo
Browse files Browse the repository at this point in the history
  • Loading branch information
richardhallett committed Feb 5, 2020
2 parents 42f62a8 + e63ae67 commit b8017cc
Show file tree
Hide file tree
Showing 7 changed files with 102 additions and 32 deletions.
13 changes: 5 additions & 8 deletions app/controllers/concerns/metrics_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ def get_metrics_array(dois)
end

def get_person_metrics(orcid)
if orcid.blank?
return { citations: 0, views: 0, downloads: 0 }
end

dois = get_person_dois(orcid).join(",")
usage = EventsQuery.new.views_and_downloads(dois)
{
Expand All @@ -21,18 +25,11 @@ def get_person_metrics(orcid)
end

def get_person_dois(orcid)
Event.query(nil, page: { size: 500 }, obj_id: https_to_http(orcid)).results.to_a.map do |e|
Event.query(nil, page: { size: 300 }, source_id: "datacite-orcid-auto-update", obj_id: "https://orcid.org/#{orcid}").results.to_a.map do |e|
doi_from_url(e.subj_id)
end
end

def https_to_http(url)
orcid = orcid_from_url(url)
return nil if orcid.blank?

"https://orcid.org/#{orcid}"
end

def mix_in_metrics_array(metadata_array_objects, metrics_array_hashes)
return [] if metadata_array_objects.empty?

Expand Down
11 changes: 11 additions & 0 deletions app/jobs/index_background_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
class IndexBackgroundJob < ActiveJob::Base
queue_as :lupo_background

rescue_from ActiveJob::DeserializationError, Elasticsearch::Transport::Transport::Errors::BadRequest do |error|
Rails.logger.error error.message
end

def perform(obj)
obj.__elasticsearch__.index_document
end
end
2 changes: 1 addition & 1 deletion app/jobs/index_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ class IndexJob < ActiveJob::Base
def perform(obj)
obj.__elasticsearch__.index_document
end
end
end
20 changes: 2 additions & 18 deletions app/models/concerns/indexable.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,7 @@ module Indexable
require 'aws-sdk-sqs'

included do
after_commit on: [:create] do
# use index_document instead of update_document to also update virtual attributes
IndexJob.perform_later(self)
if self.class.name == "Doi"
update_column(:indexed, Time.zone.now)
send_import_message(self.to_jsonapi) if aasm_state == "findable" && !Rails.env.test? && !%w(crossref medra kisti jalc op).include?(client.symbol.downcase.split(".").first)
elsif self.class.name == "Event"
# reindex dois associated with Event
@source_doi = Doi.where(doi: source_doi).first if source_doi
IndexJob.perform_later(@source_doi) if @source_doi

@target_doi = Doi.where(doi: target_doi).first if target_doi
IndexJob.perform_later(@target_doi) if @target_doi
end
end

after_commit on: [:update] do
after_commit on: [:create, :update] do
# use index_document instead of update_document to also update virtual attributes
IndexJob.perform_later(self)
if self.class.name == "Doi"
Expand All @@ -31,7 +15,7 @@ module Indexable

after_touch do
# use index_document instead of update_document to also update virtual attributes
IndexJob.perform_later(self)
IndexBackgroundJob.perform_later(self)
end

before_destroy do
Expand Down
54 changes: 50 additions & 4 deletions app/models/event.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ class Event < ActiveRecord::Base

include Elasticsearch::Model

belongs_to :doi_for_source, class_name: "Doi", primary_key: :doi, foreign_key: :source_doi, touch: true, optional: true
belongs_to :doi_for_target, class_name: "Doi", primary_key: :doi, foreign_key: :target_doi, touch: true, optional: true

before_validation :set_defaults
before_create :set_source_and_target_doi

Expand Down Expand Up @@ -382,17 +385,18 @@ def self.update_crossref(options = {})
def self.update_target_doi(options = {})
size = (options[:size] || 1000).to_i
cursor = (options[:cursor] || [])
target_relation_type_id = options[:target_relation_type_id]

response = Event.query(nil, target_doi: nil, page: { size: 1, cursor: [] })
Rails.logger.info "[Update] #{response.results.total} events with no target_doi."
response = Event.query(nil, target_relation_type_id: target_relation_type_id, page: { size: 1, cursor: [] })
Rails.logger.info "[Update] #{response.results.total} events with target_relation_type_id #{target_relation_type_id.to_s}."

# walk through results using cursor
if response.results.total > 0
while response.results.results.length > 0 do
response = Event.query(nil, target_doi: nil, page: { size: size, cursor: cursor })
response = Event.query(nil, target_relation_type_id: target_relation_type_id, page: { size: size, cursor: cursor })
break unless response.results.results.length.positive?

Rails.logger.info "[Update] Updating #{response.results.results.length} events with no target_doi starting with _id #{response.results.to_a.first[:_id]}."
Rails.logger.info "[Update] Updating #{response.results.results.length} events with target_relation_type_id #{target_relation_type_id.to_s} starting with _id #{response.results.to_a.first[:_id]}."
cursor = response.results.to_a.last[:sort]

ids = response.results.results.map(&:uuid).uniq
Expand Down Expand Up @@ -528,6 +532,48 @@ def access_method
end
end

def self.subj_id_check(options = {})
file_name = "evens_with_double_crossref_dois.txt"
size = (options[:size] || 1000).to_i
cursor = [options[:from_id], options[:until_id]]
total_errors = 0

response = Event.query(nil, source_id: "datacite-crossref,datacite-related", page: { size: 1, cursor: [] })
Rails.logger.info "[DoubleCheck] #{response.results.total} events for source datacite-crossref,datacite-related."

# walk through results using cursor
if response.results.total.positive?
while response.results.results.length.positive?
response = Event.query(nil, source_id: "datacite-crossref,datacite-related", page: { size: size, cursor: cursor })
break unless response.results.results.length.positive?

Rails.logger.info "[DoubleCheck] DoubleCheck #{response.results.results.length} events starting with _id #{response.results.to_a.first[:_id]}."
cursor = response.results.to_a.last[:sort]

# dois = response.results.results.map(&:subj_id)
events = response.results.results
events.lazy.each do | event|
subj_prefix = event.subj_id[/(10\.\d{4,5})/, 1]
if Prefix.where(prefix: subj_prefix).empty?
File.open(file_name, "a+") do |f|
f.write(event.uuid, "\n")
total_errors = total_errors + 1
end
end
end
end
end

file = File.open(file_name)
if file.present?
payload = { description: "events_with_errors_from_rake_task #{Time.now.getutc}", public: true,files: {uids_with_errors: {content: file.read} }}
### max file size 1MB
response = Maremma.post("https://api.github.com/gists", data: payload.to_json, username: ENV["GIST_USERNAME"], password:ENV["GIST_PASSWORD"])
Rails.logger.warn "[DoubleCheck] Total number of events with Errors: #{total_errors}"
Rails.logger.warn "[DoubleCheck] IDs saved: #{response.body.dig('data','url')}" if [200,201].include?(response.status)
end
end

def metric_type
if relation_type_id.to_s =~ /(requests|investigations)/
arr = relation_type_id.split("-", 4)
Expand Down
12 changes: 11 additions & 1 deletion lib/tasks/event.rake
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ namespace :event do
task :update_target_doi => :environment do
cursor = ENV['CURSOR'].to_s.split(",") || [Event.minimum(:id), Event.minimum(:id)]

Event.update_target_doi(cursor: cursor, size: ENV['SIZE'])
Event.update_target_doi(cursor: cursor, target_relation_type_id: ENV['TARGET_RELATION_TYPE_ID'], size: ENV['SIZE'])
end
end

Expand All @@ -78,6 +78,16 @@ namespace :crossref do
end
end

namespace :subj_id_check do
desc 'checks that events subject node congruency'
task :check => :environment do
from_id = (ENV['FROM_ID'] || Event.minimum(:id)).to_i
until_id = (ENV['UNTIL_ID'] || Event.maximum(:id)).to_i

Event.subj_id_check(from_id: from_id, until_id: until_id)
end
end

namespace :datacite_crossref do
desc 'Import crossref dois for all events'
task :import_doi => :environment do
Expand Down
22 changes: 22 additions & 0 deletions spec/models/event_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -37,5 +37,27 @@
expect(published).to eq("2011")
expect(published).not_to eq(2011)
end

# context "double_crossref_check", elasticsearch: true do
# let(:provider) { create(:provider, symbol: "DATACITE") }
# let(:client) { create(:client, provider: provider, symbol: ENV['MDS_USERNAME'], password: ENV['MDS_PASSWORD']) }
# let!(:prefix) { create(:prefix, prefix: "10.14454") }
# let!(:client_prefix) { create(:client_prefix, client: client, prefix: prefix) }
# let!(:doi) { create(:doi, client: client) }
# let!(:dois) { create_list(:doi, 10) }
# let!(:events) { create_list(:event_for_datacite_related, 30, source_id: "datacite-crossref", obj_id: doi.doi) }

# before do
# Provider.import
# Client.import
# Doi.import
# Event.import
# sleep 3
# end

# it "check run" do
# expect(Event.subj_id_check(cursor: [Event.minimum(:id),Event.maximum(:id)])).to eq(true)
# end
# end
end
end

0 comments on commit b8017cc

Please sign in to comment.