Skip to content

Commit

Permalink
updates registrant IDs for events that have not been updated.
Browse files Browse the repository at this point in the history
for events from crossref and to crossref
a rake task to be used only for this
  • Loading branch information
kjgarza committed Oct 4, 2019
1 parent 60f1ef8 commit 7c73641
Show file tree
Hide file tree
Showing 5 changed files with 137 additions and 0 deletions.
78 changes: 78 additions & 0 deletions app/jobs/event_registrant_update_by_id_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
class EventRegistrantUpdateByIdJob < ActiveJob::Base
queue_as :lupo_background


rescue_from ActiveJob::DeserializationError, Elasticsearch::Transport::Transport::Errors::BadRequest do |error|
logger = Logger.new(STDOUT)
logger.error error.message
end

def perform(id, options={})
logger = Logger.new(STDOUT)

item = Event.where(uuid: id).first
return false unless item.present?
logger.info "djdjdj"
logger.info id
logger.info item.source_id


case item.source_id
when "datacite-crossref"
registrant_id = get_crossref_member_id(item.obj_id) if get_doi_ra(item.obj_id) == "Crossref"
logger.info registrant_id

obj = item.obj.merge("registrant_id" => registrant_id) unless registrant_id.nil?
logger.info obj
item.update_attributes(obj: obj) if obj.present?
when "crossref"
registrant_id = get_crossref_member_id(item.subj) if get_doi_ra(item.subj) == "Crossref"
logger.info registrant_id

subj = item.subj.merge("registrant_id" => registrant_id) unless registrant_id.nil?
logger.info subj
item.update_attributes(subj: subj) if subj.present?
end

logger.error item.errors.full_messages.map { |message| { title: message } } if item.errors.any?
logger.info "#{item.uuid} Updated" if item.errors.blank? && registrant_id
end

def get_crossref_member_id(id, options={})
logger = Logger.new(STDOUT)
doi = doi_from_url(id)
# return "crossref.citations" unless doi.present?

url = "https://api.crossref.org/works/#{Addressable::URI.encode(doi)}[email protected]"
sleep(0.01) # to avoid crossref rate limitting
response = Maremma.get(url, host: true)
logger.info "[Crossref Response] [#{response.status}] for DOI #{doi} metadata"
return "" if response.status == 404 ### for cases when DOI is not in the crossreaf api
return "crossref.citations" if response.status != 200 ### for cases any other errors

message = response.body.dig("data", "message")

"crossref.#{message["member"]}"
end

def get_doi_ra(doi)
prefix = validate_prefix(doi)
return nil if prefix.blank?

url = "https://doi.org/ra/#{prefix}"
result = Maremma.get(url)

result.body.dig("data", 0, "RA")
end

def validate_prefix(doi)
Array(/\A(?:(http|https):\/(\/)?(dx\.)?(doi.org|handle.test.datacite.org)\/)?(doi:)?(10\.\d{4,5}).*\z/.match(doi)).last
end

def doi_from_url(url)
if /\A(?:(http|https):\/\/(dx\.)?(doi.org|handle.test.datacite.org)\/)?(doi:)?(10\.\d{4,5}\/.+)\z/.match(url)
uri = Addressable::URI.parse(url)
uri.path.gsub(/^\//, '').downcase
end
end
end
7 changes: 7 additions & 0 deletions app/jobs/event_registrant_update_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
class EventRegistrantUpdateJob < ActiveJob::Base
queue_as :lupo_background

def perform(ids, options={})
ids.each { |id| EventRegistrantUpdateByIdJob.perform_later(id, options) }
end
end
29 changes: 29 additions & 0 deletions app/models/event.rb
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,35 @@ def self.update_datacite_ra(options={})
response.results.total
end

def self.update_registrant(options={})
logger = Logger.new(STDOUT)

size = (options[:size] || 1000).to_i
cursor = (options[:cursor] || [])
# ra = options[:ra] || "crossref"
source_id = "datacite-crossref,crossref"

response = Event.query(nil, source_id: source_id, page: { size: 1, cursor: cursor })
logger.info "[Update] #{response.results.total} events for sources #{source_id}."

# walk through results using cursor
if response.results.total > 0
while response.results.results.length > 0 do
response = Event.query(nil, source_id: source_id, page: { size: size, cursor: cursor })
break unless response.results.results.length > 0

logger.info "[Update] Updating #{response.results.results.length} #{source_id} events starting with _id #{response.results.to_a.first[:_id]}."
cursor = response.results.to_a.last[:sort]

ids = response.results.results.map(&:uuid).uniq

EventRegistrantUpdateJob.perform_later(ids, options)
end
end

response.results.total
end

def self.update_datacite_orcid_auto_update(options={})
logger = Logger.new(STDOUT)

Expand Down
7 changes: 7 additions & 0 deletions lib/tasks/event.rake
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,13 @@ namespace :event do

Event.import_by_ids(from_id: from_id, until_id: until_id)
end

desc 'update registrant metadata'
task :update_registrant => :environment do
cursor = ENV['CURSOR'].to_s.split(",") || [Event.minimum(:id),Event.minimum(:id)]

Event.update_registrant(cursor: cursor, size: ENV['SIZE'])
end
end

namespace :crossref do
Expand Down
16 changes: 16 additions & 0 deletions spec/jobs/event_update_by_id_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
require 'rails_helper'

describe EventUpdateByIdJob, type: :job do
let(:event) { create(:event) }
subject(:job) { EventUpdateByIdJob.perform_later(event.uuid) }

it 'queues the job' do
expect { job }.to have_enqueued_job(EventUpdateByIdJob)
.on_queue("test_lupo_background")
end

after do
clear_enqueued_jobs
clear_performed_jobs
end
end

0 comments on commit 7c73641

Please sign in to comment.