Skip to content

Commit

Permalink
convert metadata when JSON is missing. #161
Browse files Browse the repository at this point in the history
  • Loading branch information
Martin Fenner committed Dec 16, 2018
1 parent 72a25e3 commit d54e44d
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 1 deletion.
29 changes: 29 additions & 0 deletions app/models/doi.rb
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,35 @@ def self.import_by_day(options={})
end
end

def self.import_missing(options={})
from_date = options[:from_date].present? ? Date.parse(options[:from_date]) : Date.current
until_date = options[:until_date].present? ? Date.parse(options[:until_date]) : Date.current

count = 0

logger = Logger.new(STDOUT)

Doi.where(schema_version: nil).where(created: from_date.midnight..until_date.end_of_day).find_each do |doi|
begin
string = doi.current_metadata.present? ? doi.current_metadata.xml : nil
meta = doi.read_datacite(string: string, sandbox: doi.sandbox)
attrs = %w(creators contributors titles publisher publication_year types descriptions container sizes formats language dates identifiers related_identifiers funding_references geo_locations rights_list subjects content_url).map do |a|
[a.to_sym, meta[a]]
end.to_h.merge(schema_version: meta["schema_version"] || "http://datacite.org/schema/kernel-4", version_info: meta["version"], xml: string)

doi.update_columns(attrs)
rescue TypeError, NoMethodError, ActiveRecord::LockWaitTimeout => error
logger.error "[MySQL] Error importing metadata for " + doi.doi + ": " + error.message
else
count += 1
end
end

if count > 0
logger.info "[MySQL] Imported metadata for #{count} DOIs created #{options[:from_date]} - #{options[:until_date]}."
end
end

def self.index(options={})
from_date = options[:from_date].present? ? Date.parse(options[:from_date]) : Date.current
until_date = options[:until_date].present? ? Date.parse(options[:until_date]) : Date.current
Expand Down
5 changes: 5 additions & 0 deletions db/migrate/20181216071910_schema_version_index.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
class SchemaVersionIndex < ActiveRecord::Migration[5.2]
def change
add_index :dataset, [:schema_version]
end
end
3 changes: 2 additions & 1 deletion db/schema.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.

ActiveRecord::Schema.define(version: 2018_12_09_231736) do
ActiveRecord::Schema.define(version: 2018_12_16_071910) do

create_table "active_storage_attachments", options: "ENGINE=InnoDB DEFAULT CHARSET=latin1", force: :cascade do |t|
t.string "name", limit: 191, null: false
Expand Down Expand Up @@ -162,6 +162,7 @@
t.index ["doi"], name: "doi", unique: true
t.index ["last_landing_page_content_type"], name: "index_dataset_on_last_landing_page_content_type"
t.index ["last_landing_page_status"], name: "index_dataset_on_last_landing_page_status"
t.index ["schema_version"], name: "index_dataset_on_schema_version"
t.index ["source"], name: "index_dataset_source"
t.index ["url"], name: "index_dataset_on_url", length: 100
end
Expand Down
8 changes: 8 additions & 0 deletions lib/tasks/doi.rake
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,14 @@ namespace :doi do
puts "DOIs created on #{from_date} imported."
end

desc 'Import missing DOIs'
task :import_missing => :environment do
from_date = ENV['FROM_DATE'] || Date.current.strftime("%F")
until_date = ENV['UNTIL_DATE'] || Date.current.strftime("%F")

Doi.import_missing(from_date: from_date, until_date: until_date)
end

desc 'Import one DOI'
task :import_one => :environment do
if ENV['DOI'].nil?
Expand Down

0 comments on commit d54e44d

Please sign in to comment.