From 9f871cbf54feaa57ef2a2c7f2efa0b009583fde8 Mon Sep 17 00:00:00 2001 From: Martin Fenner Date: Sun, 4 Aug 2019 09:10:46 +0200 Subject: [PATCH] convert containers with namespaces. datacite/bolognese#72 --- app/jobs/doi_convert_container_by_id_job.rb | 12 +++++ app/models/doi.rb | 56 +++++++++++++++++++++ lib/tasks/doi.rake | 8 +++ 3 files changed, 76 insertions(+) create mode 100644 app/jobs/doi_convert_container_by_id_job.rb diff --git a/app/jobs/doi_convert_container_by_id_job.rb b/app/jobs/doi_convert_container_by_id_job.rb new file mode 100644 index 000000000..fdef9c408 --- /dev/null +++ b/app/jobs/doi_convert_container_by_id_job.rb @@ -0,0 +1,12 @@ +class DoiConvertContainerByIdJob < ActiveJob::Base + queue_as :lupo_background + + rescue_from ActiveJob::DeserializationError, Elasticsearch::Transport::Transport::Errors::BadRequest do |error| + logger = Logger.new(STDOUT) + logger.error error.message + end + + def perform(options={}) + Doi.convert_container_by_id(options) + end +end diff --git a/app/models/doi.rb b/app/models/doi.rb index ce3895419..88274ea49 100644 --- a/app/models/doi.rb +++ b/app/models/doi.rb @@ -667,6 +667,62 @@ def self.convert_affiliation_by_id(options={}) count end + def self.convert_containers(options={}) + from_id = (options[:from_id] || Doi.minimum(:id)).to_i + until_id = (options[:until_id] || Doi.maximum(:id)).to_i + + # get every id between from_id and end_id + (from_id..until_id).step(500).each do |id| + DoiConvertContainerByIdJob.perform_later(options.merge(id: id)) + puts "Queued converting containers for DOIs with IDs starting with #{id}." unless Rails.env.test? + end + + (from_id..until_id).to_a.length + end + + def self.convert_container_by_id(options={}) + return nil unless options[:id].present? + + id = options[:id].to_i + count = 0 + + logger = Logger.new(STDOUT) + + Doi.where(id: id..(id + 499)).find_each do |doi| + should_update = false + + if doi.container.nil? + should_update = true + container = {} + elsif !(doi.container.is_a?(Hash)) + logger.error "[MySQL] container for DOI #{doi.doi} should be a hash." + elsif [doi.container["title"], doi.container["volume"], doi.container["issue"]].any? { |c| c.is_a?(Hash) } + should_update = true + container = { + "type" => doi.container["type"], + "identifier" => doi.container["identifier"], + "identifierType" => doi.container["identifierType"], + "title" => parse_attributes(doi.container["title"]), + "volume" => parse_attributes(doi.container["volume"]), + "issue" => parse_attributes(doi.container["issue"]), + "firstPage" => doi.container["firstPage"], + "lastPage" => doi.container["lastPage"] }.compact + end + + if should_update + doi.update_columns(container: container) + count += 1 + end + end + + logger.info "[MySQL] Converted containers for #{count} DOIs with IDs #{id} - #{(id + 499)}." if count > 0 + + count + rescue TypeError, ActiveRecord::ActiveRecordError, ActiveRecord::LockWaitTimeout => error + logger.error "[MySQL] Error converting containers for DOIs with IDs #{id} - #{(id + 499)}." + count + end + def doi=(value) write_attribute(:doi, value.upcase) if value.present? end diff --git a/lib/tasks/doi.rake b/lib/tasks/doi.rake index 6acbab49c..2f08e8f85 100644 --- a/lib/tasks/doi.rake +++ b/lib/tasks/doi.rake @@ -85,6 +85,14 @@ namespace :doi do Doi.convert_affiliations(from_id: from_id, until_id: until_id) end + desc 'Convert containers to new format' + task :convert_containers => :environment do + from_id = (ENV['FROM_ID'] || Doi.minimum(:id)).to_i + until_id = (ENV['UNTIL_ID'] || Doi.maximum(:id)).to_i + + Doi.convert_containers(from_id: from_id, until_id: until_id) + end + desc 'Migrates landing page data handling camelCase changes at same time' task :migrate_landing_page => :environment do Doi.migrate_landing_page