Skip to content

Commit

Permalink
convert affiliations in database to new format. #324 #326
Browse files Browse the repository at this point in the history
  • Loading branch information
Martin Fenner committed Aug 1, 2019
1 parent d64b46d commit 26b5362
Show file tree
Hide file tree
Showing 10 changed files with 117 additions and 49 deletions.
3 changes: 1 addition & 2 deletions app/controllers/dois_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -467,8 +467,7 @@ def status
protected

def set_doi
response = Doi.find_by_id(params[:id])
@doi = response.records.first
@doi = Doi.where(doi: params[:id]).first
fail ActiveRecord::RecordNotFound unless @doi.present?
end

Expand Down
5 changes: 5 additions & 0 deletions app/jobs/crossref_doi_by_id_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@ class CrossrefDoiByIdJob < ActiveJob::Base

# discard_on ActiveJob::DeserializationError

rescue_from ActiveJob::DeserializationError, Elasticsearch::Transport::Transport::Errors::BadRequest do |error|
logger = Logger.new(STDOUT)
logger.error error.message
end

def perform(id, options={})
logger = Logger.new(STDOUT)

Expand Down
12 changes: 12 additions & 0 deletions app/jobs/doi_convert_affiliation_by_id_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
class DoiConvertAffiliationByIdJob < ActiveJob::Base
queue_as :lupo_background

rescue_from ActiveJob::DeserializationError, Elasticsearch::Transport::Transport::Errors::BadRequest do |error|
logger = Logger.new(STDOUT)
logger.error error.message
end

def perform(options={})
Doi.convert_affiliation_by_id(options)
end
end
7 changes: 6 additions & 1 deletion app/jobs/doi_import_by_id_job.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
class DoiImportByIdJob < ActiveJob::Base
queue_as :lupo_background

rescue_from ActiveJob::DeserializationError, Elasticsearch::Transport::Transport::Errors::BadRequest do |error|
logger = Logger.new(STDOUT)
logger.error error.message
end

def perform(options={})
Doi.import_by_id(options)
end
end
end
2 changes: 1 addition & 1 deletion app/jobs/index_job.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
class IndexJob < ActiveJob::Base
queue_as :lupo

rescue_from ActiveJob::DeserializationError do |error|
rescue_from ActiveJob::DeserializationError, Elasticsearch::Transport::Transport::Errors::BadRequest do |error|
logger = Logger.new(STDOUT)
logger.error error.message
end
Expand Down
2 changes: 1 addition & 1 deletion app/models/activity.rb
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def after_audit
awardTitle: { type: :keyword }
}},
dates: { type: :object, properties: {
date: { type: :date, format: "date_optional_time", ignore_malformed: true, fields: { raw: { type: :text }} },
date: { type: :text },
dateType: { type: :keyword }
}},
geo_locations: { type: :object, properties: {
Expand Down
94 changes: 53 additions & 41 deletions app/models/doi.rb
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ class Doi < ActiveRecord::Base
awardTitle: { type: :keyword }
}
indexes :dates, type: :object, properties: {
date: { type: :date, format: "date_optional_time", ignore_malformed: true, fields: { raw: { type: :text }} },
date: { type: :text },
dateType: { type: :keyword }
}
indexes :geo_locations, type: :object, properties: {
Expand Down Expand Up @@ -572,7 +572,58 @@ def contributors_with_affiliations
end
end

def convert_affiliation
def self.convert_affiliations(options={})
from_id = (options[:from_id] || Doi.minimum(:id)).to_i
until_id = (options[:until_id] || Doi.maximum(:id)).to_i

# get every id between from_id and end_id
(from_id..until_id).step(500).each do |id|
DoiConvertAffiliationByIdJob.perform_later(options.merge(id: id))
puts "Queued converting affiliations for DOIs with IDs starting with #{id}." unless Rails.env.test?
end

(from_id..until_id).to_a.length
end

def self.convert_affiliation_by_id(options={})
return nil unless options[:id].present?

id = options[:id].to_i
count = 0

logger = Logger.new(STDOUT)

Doi.where(id: id..(id + 499)).find_each do |doi|
should_update = false
creators = Array.wrap(doi.creators).map do |c|
if c["affiliation"].is_a?(String)
c["affiliation"] = { "name" => c["affiliation"] }
should_update = true
end

c
end
contributors = Array.wrap(doi.contributors).map do |c|
if c["affiliation"].is_a?(String)
c["affiliation"] = { "name" => c["affiliation"] }
should_update = true
end

c
end

if should_update
Doi.non_audited_columns = [:creators, :contributors]
doi.update_attributes(creators: creators, contributors: contributors)
count += 1
end
end

logger.info "[Elasticsearch] Converted affiliations for #{count} DOIs with IDs #{id} - #{(id + 499)}." if count > 0

count
rescue Elasticsearch::Transport::Transport::Errors::RequestEntityTooLarge, Faraday::ConnectionFailed, ActiveRecord::LockWaitTimeout => error
logger.info "[Elasticsearch] Error #{error.message} converting affiliations for DOIs with IDs #{id} - #{(id + 499)}."
end

def doi=(value)
Expand Down Expand Up @@ -851,45 +902,6 @@ def set_defaults
self.updated = Time.zone.now.utc.iso8601
end

# convert affiliations from string to hash, following changes in schema 4.3
def self.convert_affiliations
logger = Logger.new(STDOUT)

response = Doi.query("creators.affiliation:*", page: { size: 1, cursor: [] })
logger.info "#{response.results.total} DOIs found that have the affiliation in the old format."

if response.results.total > 0
# walk through results using cursor
cursor = []

while response.results.results.length > 0 do
response = Doi.query("creators.affiliation:*", page: { size: 1000, cursor: cursor })
break unless response.results.results.length > 0

logger.info "[Affiliation] Updating #{response.results.results.length} DOIs starting with _id #{response.results.to_a.first[:_id]}."
cursor = response.results.to_a.last[:sort]

response.results.results.each do |d|
AffiliationJob.perform_later(d.doi)
end
end
end
end

def creators_with_affiliations
Array.wrap(creators).map do |c|
c["affiliation"] = { "name" => c["affiliation"] } if c["affiliation"].is_a?(String)
c
end
end

def contributors_with_affiliations
Array.wrap(contributors).map do |c|
c["affiliation"] = { "name" => c["affiliation"] } if c["affiliation"].is_a?(String)
c
end
end

def self.migrate_landing_page(options={})
logger = Logger.new(STDOUT)
logger.info "Starting migration"
Expand Down
5 changes: 4 additions & 1 deletion lib/tasks/doi.rake
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,10 @@ namespace :doi do

desc 'Convert affiliations to new format'
task :convert_affiliations => :environment do
Doi.convert_affiliations
from_id = (ENV['FROM_ID'] || Doi.minimum(:id)).to_i
until_id = (ENV['UNTIL_ID'] || Doi.maximum(:id)).to_i

Doi.convert_affiliations(from_id: from_id, until_id: until_id)
end

desc 'Migrates landing page data handling camelCase changes at same time'
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions spec/requests/dois_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2726,8 +2726,8 @@
it 'returns nil' do
get "/dois/#{doi.doi}/get-url", nil, headers

expect(last_response.status).to eq(404)
expect(json['url']).to be_nil
expect(last_response.status).to eq(403)
expect(json).to eq("errors"=>[{"status"=>403, "title"=>"SERVER NOT RESPONSIBLE FOR HANDLE"}])
end
end
end
Expand Down

0 comments on commit 26b5362

Please sign in to comment.