From b5babb4dc754e6313ddf7209a50882cc3b5a93e9 Mon Sep 17 00:00:00 2001 From: Richard Hallett Date: Mon, 28 Sep 2020 11:52:44 +0200 Subject: [PATCH 1/2] Add rake task for fixing index type This rake task adds the type information either DataCite or OtherType based on agency. --- app/models/doi.rb | 43 +++++++++++++++++++++++++++++++++++++++---- lib/tasks/doi.rake | 9 +++++++++ 2 files changed, 48 insertions(+), 4 deletions(-) diff --git a/app/models/doi.rb b/app/models/doi.rb index c6d671c2c..55f997dc2 100644 --- a/app/models/doi.rb +++ b/app/models/doi.rb @@ -868,7 +868,7 @@ def self.gql_query(query, options={}) filter << { terms: { "client.certificate" => options[:certificate].split(",") }} if options[:certificate].present? filter << { term: { "creators.nameIdentifiers.nameIdentifier" => "https://orcid.org/#{orcid_from_url(options[:user_id])}" }} if options[:user_id].present? filter << { term: { "creators.nameIdentifiers.nameIdentifierScheme" => "ORCID" }} if options[:has_person].present? - + # match either one of has_affiliation, has_organization, has_funder or has_member if options[:has_organization].present? should << { term: { "creators.nameIdentifiers.nameIdentifierScheme" => "ROR" }} @@ -1067,7 +1067,7 @@ def self.query(query, options={}) filter << { terms: { "client.certificate" => options[:certificate].split(",") }} if options[:certificate].present? filter << { term: { "creators.nameIdentifiers.nameIdentifier" => "https://orcid.org/#{orcid_from_url(options[:user_id])}" }} if options[:user_id].present? filter << { term: { "creators.nameIdentifiers.nameIdentifierScheme" => "ORCID" }} if options[:has_person].present? - + # match either one of has_affiliation, has_organization, or has_funder if options[:has_organization].present? should << { term: { "creators.nameIdentifiers.nameIdentifierScheme" => "ROR" }} @@ -2133,7 +2133,7 @@ def set_defaults end def update_agency - if agency.blank? || agency.casecmp?("datacite") + if agency.blank? || agency.casecmp?("datacite") self.agency = "datacite" self.type = "DataciteDoi" elsif agency.casecmp?("crossref") @@ -2310,4 +2310,39 @@ def self.migrate_landing_page(options={}) "Finished migrating landing pages." end -end + + def self.add_index_type(options={}) + return nil unless options[:from_id].present? + + + from_id = options[:from_id].to_i + until_id = (options[:until_id] || (from_id + 499)).to_i + + # get every id between from_id and end_id + count = 0 + + Rails.logger.info "[migration_index_types] adding type information for DOIs with IDs #{from_id} - #{until_id}." + + Doi.where(id: from_id..until_id).where('type' => nil).find_each(batch_size: 500) do |doi| + begin + if doi.agency.casecmp?("datacite") + type = "DataciteDoi" + else + type = "OtherDoi" + end + + doi.update_columns("type": type) + + count += 1 + Rails.logger.info "Updated #{doi.doi} (#{doi.id})" + + rescue => error + Rails.logger.error "Error updating #{doi.doi} (#{doi.id}), #{error.message}" + end + end + + "Finished updating dois, total #{count}" + end + + +end \ No newline at end of file diff --git a/lib/tasks/doi.rake b/lib/tasks/doi.rake index e38f45348..4c24e5bb2 100644 --- a/lib/tasks/doi.rake +++ b/lib/tasks/doi.rake @@ -243,4 +243,13 @@ namespace :doi do count = Doi.delete_dois_by_prefix(ENV['PREFIX']) puts "#{count} DOIs with prefix #{ENV['PREFIX']} deleted." end + + desc 'Add type information to dois based on id range' + task :add_index_type => :environment do + options = { + from_id: ENV['FROM_ID'], + until_id: ENV['UNTIL_ID'] + } + puts Doi.add_index_type(options) + end end From eee13002bd80cac42d1e014dc432d0f812a09bca Mon Sep 17 00:00:00 2001 From: Richard Hallett Date: Mon, 28 Sep 2020 17:22:09 +0200 Subject: [PATCH 2/2] Fix unknown case for adding index type --- app/models/doi.rb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/app/models/doi.rb b/app/models/doi.rb index 55f997dc2..79829a879 100644 --- a/app/models/doi.rb +++ b/app/models/doi.rb @@ -2327,11 +2327,13 @@ def self.add_index_type(options={}) begin if doi.agency.casecmp?("datacite") type = "DataciteDoi" - else + elsif doi.agency.casecmp?("crossref") type = "OtherDoi" + else + type = "DataciteDoi" end - doi.update_columns("type": type) + doi.update_columns("type" => type) count += 1 Rails.logger.info "Updated #{doi.doi} (#{doi.id})"