Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/datacite/lupo
Browse files Browse the repository at this point in the history
  • Loading branch information
Martin Fenner committed Oct 6, 2019
2 parents 53429b8 + 370c65a commit 59ad0c0
Show file tree
Hide file tree
Showing 5 changed files with 137 additions and 0 deletions.
78 changes: 78 additions & 0 deletions app/jobs/event_registrant_update_by_id_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
class EventRegistrantUpdateByIdJob < ActiveJob::Base
queue_as :lupo_background


rescue_from ActiveJob::DeserializationError, Elasticsearch::Transport::Transport::Errors::BadRequest do |error|
logger = Logger.new(STDOUT)
logger.error error.message
end

def perform(id, options={})
logger = Logger.new(STDOUT)

item = Event.where(uuid: id).first
return false unless item.present?
logger.info "djdjdj"
logger.info id
logger.info item.source_id


case item.source_id
when "datacite-crossref"
registrant_id = get_crossref_member_id(item.obj_id) if get_doi_ra(item.obj_id) == "Crossref"
logger.info registrant_id

obj = item.obj.merge("registrant_id" => registrant_id) unless registrant_id.nil?
logger.info obj
item.update_attributes(obj: obj) if obj.present?
when "crossref"
registrant_id = get_crossref_member_id(item.subj) if get_doi_ra(item.subj) == "Crossref"
logger.info registrant_id

subj = item.subj.merge("registrant_id" => registrant_id) unless registrant_id.nil?
logger.info subj
item.update_attributes(subj: subj) if subj.present?
end

logger.error item.errors.full_messages.map { |message| { title: message } } if item.errors.any?
logger.info "#{item.uuid} Updated" if item.errors.blank? && registrant_id
end

def get_crossref_member_id(id, options={})
logger = Logger.new(STDOUT)
doi = doi_from_url(id)
# return "crossref.citations" unless doi.present?

url = "https://api.crossref.org/works/#{Addressable::URI.encode(doi)}[email protected]"
sleep(0.01) # to avoid crossref rate limitting
response = Maremma.get(url, host: true)
logger.info "[Crossref Response] [#{response.status}] for DOI #{doi} metadata"
return "" if response.status == 404 ### for cases when DOI is not in the crossreaf api
return "crossref.citations" if response.status != 200 ### for cases any other errors

message = response.body.dig("data", "message")

"crossref.#{message["member"]}"
end

def get_doi_ra(doi)
prefix = validate_prefix(doi)
return nil if prefix.blank?

url = "https://doi.org/ra/#{prefix}"
result = Maremma.get(url)

result.body.dig("data", 0, "RA")
end

def validate_prefix(doi)
Array(/\A(?:(http|https):\/(\/)?(dx\.)?(doi.org|handle.test.datacite.org)\/)?(doi:)?(10\.\d{4,5}).*\z/.match(doi)).last
end

def doi_from_url(url)
if /\A(?:(http|https):\/\/(dx\.)?(doi.org|handle.test.datacite.org)\/)?(doi:)?(10\.\d{4,5}\/.+)\z/.match(url)
uri = Addressable::URI.parse(url)
uri.path.gsub(/^\//, '').downcase
end
end
end
7 changes: 7 additions & 0 deletions app/jobs/event_registrant_update_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
class EventRegistrantUpdateJob < ActiveJob::Base
queue_as :lupo_background

def perform(ids, options={})
ids.each { |id| EventRegistrantUpdateByIdJob.perform_later(id, options) }
end
end
29 changes: 29 additions & 0 deletions app/models/event.rb
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,35 @@ def self.update_datacite_ra(options={})
response.results.total
end

def self.update_registrant(options={})
logger = Logger.new(STDOUT)

size = (options[:size] || 1000).to_i
cursor = (options[:cursor] || [])
# ra = options[:ra] || "crossref"
source_id = "datacite-crossref,crossref"

response = Event.query(nil, source_id: source_id, page: { size: 1, cursor: cursor })
logger.info "[Update] #{response.results.total} events for sources #{source_id}."

# walk through results using cursor
if response.results.total > 0
while response.results.results.length > 0 do
response = Event.query(nil, source_id: source_id, page: { size: size, cursor: cursor })
break unless response.results.results.length > 0

logger.info "[Update] Updating #{response.results.results.length} #{source_id} events starting with _id #{response.results.to_a.first[:_id]}."
cursor = response.results.to_a.last[:sort]

ids = response.results.results.map(&:uuid).uniq

EventRegistrantUpdateJob.perform_later(ids, options)
end
end

response.results.total
end

def self.update_datacite_orcid_auto_update(options={})
logger = Logger.new(STDOUT)

Expand Down
7 changes: 7 additions & 0 deletions lib/tasks/event.rake
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,13 @@ namespace :event do

Event.import_by_ids(from_id: from_id, until_id: until_id)
end

desc 'update registrant metadata'
task :update_registrant => :environment do
cursor = ENV['CURSOR'].to_s.split(",") || [Event.minimum(:id),Event.minimum(:id)]

Event.update_registrant(cursor: cursor, size: ENV['SIZE'])
end
end

namespace :crossref do
Expand Down
16 changes: 16 additions & 0 deletions spec/jobs/event_update_by_id_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
require 'rails_helper'

describe EventUpdateByIdJob, type: :job do
let(:event) { create(:event) }
subject(:job) { EventUpdateByIdJob.perform_later(event.uuid) }

it 'queues the job' do
expect { job }.to have_enqueued_job(EventUpdateByIdJob)
.on_queue("test_lupo_background")
end

after do
clear_enqueued_jobs
clear_performed_jobs
end
end

0 comments on commit 59ad0c0

Please sign in to comment.