From aab41e6670262e485be070bbc25059eabcb7722e Mon Sep 17 00:00:00 2001 From: Martin Fenner Date: Mon, 15 Jul 2019 09:22:23 +0200 Subject: [PATCH] transition to new affiliations format --- app/jobs/affiliation_job.rb | 24 +++++++++++++++ app/models/doi.rb | 61 +++++++++++++++++++++++++++++++++++-- lib/tasks/doi.rake | 5 +++ 3 files changed, 88 insertions(+), 2 deletions(-) create mode 100644 app/jobs/affiliation_job.rb diff --git a/app/jobs/affiliation_job.rb b/app/jobs/affiliation_job.rb new file mode 100644 index 000000000..b68cf8131 --- /dev/null +++ b/app/jobs/affiliation_job.rb @@ -0,0 +1,24 @@ +class AffiliationJob < ActiveJob::Base + queue_as :lupo_background + + def perform(doi_id) + logger = Logger.new(STDOUT) + doi = Doi.where(doi: doi_id).first + + if doi.present? + new_creators = Array.wrap(creators).map do |c| + c["affiliation"] = { "name" => c["affiliation"] } if c["affiliation"].is_a?(String) + c + end + new_contributors = Array.wrap(contributors).map do |c| + c["affiliation"] = { "name" => c["affiliation"] } if c["affiliation"].is_a?(String) + c + end + doi.update_attributes(creators: new_creators, contributors: new_contributors) + + doi.__elasticsearch__.index_document + else + logger.info "[Affiliation] Error updaing DOI " + doi_id + ": not found" + end + end +end diff --git a/app/models/doi.rb b/app/models/doi.rb index cbbc03fad..6ff6072f5 100644 --- a/app/models/doi.rb +++ b/app/models/doi.rb @@ -309,8 +309,8 @@ def as_indexed_json(options={}) "doi" => doi, "identifier" => identifier, "url" => url, - "creators" => creators, - "contributors" => contributors, + "creators" => creators_with_affiliations, + "contributors" => contributors_with_affiliations, "creator_names" => creator_names, "titles" => titles, "descriptions" => descriptions, @@ -544,6 +544,24 @@ def creator_names end end + # use newer index with old database following schema 4.3 changes + def creators_with_affiliations + Array.wrap(creators).map do |c| + c["affiliation"] = { "name" => c["affiliation"] } if c["affiliation"].is_a?(String) + c + end + end + + def contributors_with_affiliations + Array.wrap(contributors).map do |c| + c["affiliation"] = { "name" => c["affiliation"] } if c["affiliation"].is_a?(String) + c + end + end + + def convert_affiliation + end + def doi=(value) write_attribute(:doi, value.upcase) if value.present? end @@ -820,6 +838,45 @@ def set_defaults self.updated = Time.zone.now.utc.iso8601 end + # convert affiliations from string to hash, following changes in schema 4.3 + def self.convert_affiliations + logger = Logger.new(STDOUT) + + response = Doi.query("creators.affiliation:* -creators.affiliation.name:*", page: { size: 1, cursor: [] }) + logger.info "#{response.results.total} DOIs found that have the affiliation in the old format." + + if response.results.total > 0 + # walk through results using cursor + cursor = [] + + while response.results.results.length > 0 do + response = Doi.query("creators.affiliation:* -creators.affiliation.name:*", page: { size: 1000, cursor: cursor }) + break unless response.results.results.length > 0 + + logger.info "[Affiliation] Updating #{response.results.results.length} DOIs starting with _id #{response.results.to_a.first[:_id]}." + cursor = response.results.to_a.last[:sort] + + response.results.results.each do |d| + AffiliationJob.perform_later(d.doi) + end + end + end + end + + def creators_with_affiliations + Array.wrap(creators).map do |c| + c["affiliation"] = { "name" => c["affiliation"] } if c["affiliation"].is_a?(String) + c + end + end + + def contributors_with_affiliations + Array.wrap(contributors).map do |c| + c["affiliation"] = { "name" => c["affiliation"] } if c["affiliation"].is_a?(String) + c + end + end + def self.migrate_landing_page(options={}) logger = Logger.new(STDOUT) logger.info "Starting migration" diff --git a/lib/tasks/doi.rake b/lib/tasks/doi.rake index 89aaf64e4..2fe71ab05 100644 --- a/lib/tasks/doi.rake +++ b/lib/tasks/doi.rake @@ -77,6 +77,11 @@ namespace :doi do Doi.set_minted end + desc 'Convert affiliations to new format' + task :convert_affiliations => :environment do + Doi.convert_affiliations + end + desc 'Migrates landing page data handling camelCase changes at same time' task :migrate_landing_page => :environment do Doi.migrate_landing_page