diff --git a/app/jobs/crossref_doi_job.rb b/app/jobs/crossref_doi_job.rb new file mode 100644 index 000000000..35acaf960 --- /dev/null +++ b/app/jobs/crossref_doi_job.rb @@ -0,0 +1,62 @@ +class CrossrefDoiJob < ActiveJob::Base + queue_as :lupo_background + + # retry_on ActiveRecord::Deadlocked, wait: 10.seconds, attempts: 3 + # retry_on Faraday::TimeoutError, wait: 10.minutes, attempts: 3 + + # discard_on ActiveJob::DeserializationError + + def perform(id) + logger = Logger.new(STDOUT) + + doi = doi_from_url(id) + return {} unless doi.present? + + # check whether DOI has been registered with DataCite already + result = Doi.find_by_id(doi).results.first + return {} unless result.blank? + + # otherwise store Crossref metadata with DataCite + # using client crossref.citations and DataCite XML + xml = Base64.strict_encode64(id) + attributes = { + "xml" => xml, + "source" => "levriero", + "event" => "publish" }.compact + + data = { + "data" => { + "type" => "dois", + "attributes" => attributes, + "relationships" => { + "client" => { + "data" => { + "type" => "clients", + "id" => "crossref.citations" + } + } + } + } + } + + url = "http://localhost/dois/#{doi}" + response = Maremma.put(url, accept: 'application/vnd.api+json', + content_type: 'application/vnd.api+json', + data: data.to_json, + username: ENV["ADMIN_USERNAME"], + password: ENV["ADMIN_PASSWORD"]) + + if [200, 201].include?(response.status) + logger.info "DOI #{doi} created." + else + logger.warn response.body["errors"] + end + end + + def doi_from_url(url) + if /\A(?:(http|https):\/\/(dx\.)?(doi.org|handle.test.datacite.org)\/)?(doi:)?(10\.\d{4,5}\/.+)\z/.match(url) + uri = Addressable::URI.parse(url) + uri.path.gsub(/^\//, '').downcase + end + end +end \ No newline at end of file diff --git a/app/jobs/url_job.rb b/app/jobs/url_job.rb index 7013bade2..bbaed531c 100644 --- a/app/jobs/url_job.rb +++ b/app/jobs/url_job.rb @@ -14,13 +14,13 @@ def perform(doi_id) response = Doi.get_doi(doi: doi.doi) url = response.body.dig('data', 'values', 0, 'data', 'value') if url.present? - if (doi.is_registered_or_findable? || %w(europ ethz).include?(doi.provider_id)) && doi.minted.blank? + if (doi.is_registered_or_findable? || %w(europ ethz crossref).include?(doi.provider_id)) && doi.minted.blank? doi.update_attributes(url: url, minted: Time.zone.now) else doi.update_attributes(url: url) end - doi.update_attributes(aasm_state: "findable") if %w(europ ethz).include?(doi.provider_id) + doi.update_attributes(aasm_state: "findable") if %w(europ ethz crossref).include?(doi.provider_id) doi.__elasticsearch__.index_document diff --git a/app/models/concerns/indexable.rb b/app/models/concerns/indexable.rb index 87734a40c..7c8b6ec69 100644 --- a/app/models/concerns/indexable.rb +++ b/app/models/concerns/indexable.rb @@ -106,6 +106,10 @@ def query(query, options={}) from = 0 search_after = [options.dig(:page, :cursor)] sort = [{ _id: { order: 'asc' }}] + elsif self.name == "Event" && options.dig(:page, :cursor).present? + from = 0 + search_after = [options.dig(:page, :cursor)] + sort = [{ _id: { order: 'asc' }}] elsif self.name == "Activity" && options.dig(:page, :cursor).present? from = 0 search_after = [options.dig(:page, :cursor)] diff --git a/app/models/event.rb b/app/models/event.rb index 9b77b37ea..23e6a5068 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -5,6 +5,9 @@ class Event < ActiveRecord::Base # include event processing include Processable + # include helper methods for models + include Modelable + # include doi normalization include Identifiable @@ -340,6 +343,76 @@ def self.import_by_id(options={}) logger.info "[Elasticsearch] Imported #{count} events with IDs #{id} - #{(id + 499)}." end + def self.update_crossref(options={}) + logger = Logger.new(STDOUT) + + size = (options[:size] || 1000).to_i + + response = Event.query(nil, source_id: "crossref", page: { size: 1, cursor: 0 }) + logger.info "[Update] #{response.results.total} events for source crossref." + + if response.results.total > 0 + # walk through results using cursor + cursor = 0 + + while response.results.results.length > 0 do + response = Event.query(nil, source_id: "crossref", page: { size: size, cursor: cursor }) + break unless response.results.results.length > 0 + + logger.info "[Update] Updating #{response.results.results.length} crossref events starting with _id #{cursor + 1}." + cursor = response.results.to_a.last[:sort].first.to_i + + response.results.results.each do |e| + CrossrefDoiJob.perform_later(e.subj_id) + end + end + end + + response.results.total + end + + def self.update_datacite_crossref(options={}) + logger = Logger.new(STDOUT) + + size = (options[:size] || 1000).to_i + + response = Event.query(nil, source_id: "datacite_crossref", page: { size: 1, cursor: 0 }) + logger.info "[Update] #{response.results.total} events for source datacite_crossref." + + if response.results.total > 0 + # walk through results using cursor + cursor = 0 + + while response.results.results.length > 0 do + response = Event.query(nil, source_id: "crossref", page: { size: size, cursor: cursor }) + break unless response.results.results.length > 0 + + logger.info "[Update] Updating #{response.results.results.length} datacite_crossref events starting with _id #{cursor + 1}." + cursor = response.results.to_a.last[:sort].first.to_i + + response.results.results.each do |e| + CrossrefDoiJob.perform_later(e.obj_id) + end + end + end + + response.results.total + end + + def self.get_crossref_metadata(id) + doi = doi_from_url(id) + return {} unless doi.present? + + # check whether DOI has been registered with DataCite already + result = Doi.find_by_id(doi).results.first + if result.blank? + # otherwise store Crossref metadata with DataCite + # using client crossref.citations and DataCite XML + xml = Base64.strict_encode64(id) + Doi.create({ xml: xml, source: "levriero", event: "publish", client_id: "crossref.citations" }, :without_protection => true) + end + end + def to_param # overridden, use uuid instead of id uuid end @@ -356,6 +429,7 @@ def send_callback "timestamp" => timestamp }} Maremma.post(callback, data: data.to_json, token: ENV['API_KEY']) end + def access_method if relation_type_id.to_s =~ /(requests|investigations)/ relation_type_id.split("-").last if relation_type_id.present? diff --git a/config/application.rb b/config/application.rb index c1e5f8d66..e1f005e56 100644 --- a/config/application.rb +++ b/config/application.rb @@ -36,6 +36,7 @@ ENV['SITE_TITLE'] ||= "DataCite REST API" ENV['LOG_LEVEL'] ||= "info" ENV['CONCURRENCY'] ||= "25" +ENV['API_URL'] ||= "https://api.test.datacite.org" ENV['CDN_URL'] ||= "https://assets.datacite.org" ENV['BRACCO_URL'] ||= "https://doi.datacite.org" ENV['GITHUB_URL'] ||= "https://github.com/datacite/lupo" diff --git a/lib/tasks/event.rake b/lib/tasks/event.rake index d4b66417f..6fd84073f 100644 --- a/lib/tasks/event.rake +++ b/lib/tasks/event.rake @@ -22,3 +22,23 @@ namespace :event do Event.import_by_ids(from_id: from_id, until_id: until_id) end end + +namespace :crossref do + desc 'Import dois for all events' + task :import_doi => :environment do + from_id = (ENV['FROM_ID'] || 1).to_i + until_id = (ENV['UNTIL_ID'] || Event.maximum(:id)).to_i + + Event.update_crossref(from_id: from_id, until_id: until_id) + end +end + +namespace :datacite_crossref do + desc 'Import dois for all events' + task :import_doi => :environment do + from_id = (ENV['FROM_ID'] || 1).to_i + until_id = (ENV['UNTIL_ID'] || Event.maximum(:id)).to_i + + Event.update_datacite_crossref(from_id: from_id, until_id: until_id) + end +end