diff --git a/app/models/doi.rb b/app/models/doi.rb index 95be1204c..ec1a70990 100644 --- a/app/models/doi.rb +++ b/app/models/doi.rb @@ -395,6 +395,35 @@ def self.import_by_day(options={}) end end + def self.import_missing(options={}) + from_date = options[:from_date].present? ? Date.parse(options[:from_date]) : Date.current + until_date = options[:until_date].present? ? Date.parse(options[:until_date]) : Date.current + + count = 0 + + logger = Logger.new(STDOUT) + + Doi.where(schema_version: nil).where(created: from_date.midnight..until_date.end_of_day).find_each do |doi| + begin + string = doi.current_metadata.present? ? doi.current_metadata.xml : nil + meta = doi.read_datacite(string: string, sandbox: doi.sandbox) + attrs = %w(creators contributors titles publisher publication_year types descriptions container sizes formats language dates identifiers related_identifiers funding_references geo_locations rights_list subjects content_url).map do |a| + [a.to_sym, meta[a]] + end.to_h.merge(schema_version: meta["schema_version"] || "http://datacite.org/schema/kernel-4", version_info: meta["version"], xml: string) + + doi.update_columns(attrs) + rescue TypeError, NoMethodError, ActiveRecord::LockWaitTimeout => error + logger.error "[MySQL] Error importing metadata for " + doi.doi + ": " + error.message + else + count += 1 + end + end + + if count > 0 + logger.info "[MySQL] Imported metadata for #{count} DOIs created #{options[:from_date]} - #{options[:until_date]}." + end + end + def self.index(options={}) from_date = options[:from_date].present? ? Date.parse(options[:from_date]) : Date.current until_date = options[:until_date].present? ? Date.parse(options[:until_date]) : Date.current diff --git a/db/migrate/20181216071910_schema_version_index.rb b/db/migrate/20181216071910_schema_version_index.rb new file mode 100644 index 000000000..a2250a0cb --- /dev/null +++ b/db/migrate/20181216071910_schema_version_index.rb @@ -0,0 +1,5 @@ +class SchemaVersionIndex < ActiveRecord::Migration[5.2] + def change + add_index :dataset, [:schema_version] + end +end diff --git a/db/schema.rb b/db/schema.rb index d67b517f8..adc480111 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 2018_12_09_231736) do +ActiveRecord::Schema.define(version: 2018_12_16_071910) do create_table "active_storage_attachments", options: "ENGINE=InnoDB DEFAULT CHARSET=latin1", force: :cascade do |t| t.string "name", limit: 191, null: false @@ -162,6 +162,7 @@ t.index ["doi"], name: "doi", unique: true t.index ["last_landing_page_content_type"], name: "index_dataset_on_last_landing_page_content_type" t.index ["last_landing_page_status"], name: "index_dataset_on_last_landing_page_status" + t.index ["schema_version"], name: "index_dataset_on_schema_version" t.index ["source"], name: "index_dataset_source" t.index ["url"], name: "index_dataset_on_url", length: 100 end diff --git a/lib/tasks/doi.rake b/lib/tasks/doi.rake index 3ef15cf94..29d609d12 100644 --- a/lib/tasks/doi.rake +++ b/lib/tasks/doi.rake @@ -27,6 +27,14 @@ namespace :doi do puts "DOIs created on #{from_date} imported." end + desc 'Import missing DOIs' + task :import_missing => :environment do + from_date = ENV['FROM_DATE'] || Date.current.strftime("%F") + until_date = ENV['UNTIL_DATE'] || Date.current.strftime("%F") + + Doi.import_missing(from_date: from_date, until_date: until_date) + end + desc 'Import one DOI' task :import_one => :environment do if ENV['DOI'].nil?