Skip to content

Commit

Permalink
more error reporting for doi import
Browse files Browse the repository at this point in the history
  • Loading branch information
Martin Fenner committed Dec 26, 2018
1 parent 382fbce commit c9a2640
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 7 deletions.
30 changes: 25 additions & 5 deletions app/models/doi.rb
Original file line number Diff line number Diff line change
Expand Up @@ -336,19 +336,29 @@ def self.find_by_id(id, options={})
})
end

def self.import_one(doi: nil)
doi = Doi.where(doi: doi).first
return nil unless doi.present?
def self.import_one(doi_id: nil)
logger = Logger.new(STDOUT)

doi = Doi.where(doi: doi_id).first
unless doi.present?
logger.error "[MySQL] DOI " + doi_id + " not found."
return nil
end

string = doi.current_metadata.present? && doi.current_metadata.xml.to_s.start_with?('<?xml version=') ? doi.current_metadata.xml.force_encoding("UTF-8") : nil
unless string.present?
logger.error "[MySQL] No metadata for DOI " + doi.doi + " found."
return nil
end

meta = doi.read_datacite(string: string, sandbox: doi.sandbox)
attrs = %w(creators contributors titles publisher publication_year types descriptions container sizes formats language dates identifiers related_identifiers funding_references geo_locations rights_list subjects content_url).map do |a|
[a.to_sym, meta[a]]
end.to_h.merge(schema_version: meta["schema_version"] || "http://datacite.org/schema/kernel-4", version_info: meta["version"], xml: string)

# update_attributes will trigger validations and Elasticsearch indexing
doi.update_attributes(attrs)
logger.info "[MySQL] Imported metadata for DOI " + doi.doi + "."
logger.info "[MySQL] Imported metadata for DOI " + doi.doi + "."
rescue TypeError, NoMethodError, RuntimeError, ActiveRecord::StatementInvalid, ActiveRecord::LockWaitTimeout => error
logger.error "[MySQL] Error importing metadata for " + doi.doi + ": " + error.message
Bugsnag.notify(error)
Expand Down Expand Up @@ -388,6 +398,11 @@ def self.import_by_day(options={})
begin
# ignore broken xml
string = doi.current_metadata.present? && doi.current_metadata.xml.to_s.start_with?('<?xml version=') ? doi.current_metadata.xml.force_encoding("UTF-8") : nil
unless string.present?
logger.error "[MySQL] No metadata for DOI " + doi.doi + " found."
return nil
end

meta = doi.read_datacite(string: string, sandbox: doi.sandbox)
attrs = %w(creators contributors titles publisher publication_year types descriptions container sizes formats language dates identifiers related_identifiers funding_references geo_locations rights_list subjects content_url).map do |a|
[a.to_sym, meta[a]]
Expand Down Expand Up @@ -419,6 +434,11 @@ def self.import_by_day_missing(options={})
Doi.where(schema_version: nil).where(created: from_date.midnight..from_date.end_of_day).find_each do |doi|
begin
string = doi.current_metadata.to_s.start_with?('<?xml version=') ? doi.current_metadata.xml.force_encoding("UTF-8") : nil
unless string.present?
logger.error "[MySQL] No metadata for DOI " + doi.doi + " found."
return nil
end

meta = doi.read_datacite(string: string, sandbox: doi.sandbox)
attrs = %w(creators contributors titles publisher publication_year types descriptions container sizes formats language dates identifiers related_identifiers funding_references geo_locations rights_list subjects content_url).map do |a|
[a.to_sym, meta[a]]
Expand All @@ -444,7 +464,7 @@ def self.index(options={})
until_date = options[:until_date].present? ? Date.parse(options[:until_date]) : Date.current
index_time = options[:index_time].presence || Time.zone.now.utc.iso8601
client_id = options[:client_id]

# get every day between from_date and until_date
(from_date..until_date).each do |d|
DoiIndexByDayJob.perform_later(from_date: d.strftime("%F"), index_time: index_time, client_id: client_id)
Expand Down
2 changes: 1 addition & 1 deletion lib/tasks/client.rake
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ namespace :client do
puts "#{client.dois.length} DOIs will be imported."
client.dois.find_each do |doi|
begin
Doi.import_one(doi: doi.doi)
Doi.import_one(doi_id: doi.doi)
puts "DOI #{doi.doi} imported."
rescue TypeError, NoMethodError, RuntimeError, ActiveRecord::StatementInvalid, ActiveRecord::LockWaitTimeout, Elasticsearch::Transport::Transport::Errors::BadRequest => error
puts "[MySQL] Error importing metadata for " + doi.doi + ": " + error.message
Expand Down
2 changes: 1 addition & 1 deletion lib/tasks/doi.rake
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ namespace :doi do
exit
end

Doi.import_one(doi: ENV['DOI'])
Doi.import_one(doi_id: ENV['DOI'])
end

desc 'Index all DOIs'
Expand Down

0 comments on commit c9a2640

Please sign in to comment.