Skip to content

Commit

Permalink
Merge pull request #645 from datacite/add_rake_fix_types
Browse files Browse the repository at this point in the history
Add rake task for fixing index type
  • Loading branch information
richardhallett authored Sep 28, 2020
2 parents a47573a + f739a00 commit ed61150
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 4 deletions.
45 changes: 41 additions & 4 deletions app/models/doi.rb
Original file line number Diff line number Diff line change
Expand Up @@ -868,7 +868,7 @@ def self.gql_query(query, options={})
filter << { terms: { "client.certificate" => options[:certificate].split(",") }} if options[:certificate].present?
filter << { term: { "creators.nameIdentifiers.nameIdentifier" => "https://orcid.org/#{orcid_from_url(options[:user_id])}" }} if options[:user_id].present?
filter << { term: { "creators.nameIdentifiers.nameIdentifierScheme" => "ORCID" }} if options[:has_person].present?

# match either one of has_affiliation, has_organization, has_funder or has_member
if options[:has_organization].present?
should << { term: { "creators.nameIdentifiers.nameIdentifierScheme" => "ROR" }}
Expand Down Expand Up @@ -1067,7 +1067,7 @@ def self.query(query, options={})
filter << { terms: { "client.certificate" => options[:certificate].split(",") }} if options[:certificate].present?
filter << { term: { "creators.nameIdentifiers.nameIdentifier" => "https://orcid.org/#{orcid_from_url(options[:user_id])}" }} if options[:user_id].present?
filter << { term: { "creators.nameIdentifiers.nameIdentifierScheme" => "ORCID" }} if options[:has_person].present?

# match either one of has_affiliation, has_organization, or has_funder
if options[:has_organization].present?
should << { term: { "creators.nameIdentifiers.nameIdentifierScheme" => "ROR" }}
Expand Down Expand Up @@ -2133,7 +2133,7 @@ def set_defaults
end

def update_agency
if agency.blank? || agency.casecmp?("datacite")
if agency.blank? || agency.casecmp?("datacite")
self.agency = "datacite"
self.type = "DataciteDoi"
elsif agency.casecmp?("crossref")
Expand Down Expand Up @@ -2310,4 +2310,41 @@ def self.migrate_landing_page(options={})

"Finished migrating landing pages."
end
end

def self.add_index_type(options={})
return nil unless options[:from_id].present?


from_id = options[:from_id].to_i
until_id = (options[:until_id] || (from_id + 499)).to_i

# get every id between from_id and end_id
count = 0

Rails.logger.info "[migration_index_types] adding type information for DOIs with IDs #{from_id} - #{until_id}."

Doi.where(id: from_id..until_id).where('type' => nil).find_each(batch_size: 500) do |doi|
begin
if doi.agency.casecmp?("datacite")
type = "DataciteDoi"
elsif doi.agency.casecmp?("crossref")
type = "OtherDoi"
else
type = "DataciteDoi"
end

doi.update_columns("type" => type)

count += 1
Rails.logger.info "Updated #{doi.doi} (#{doi.id})"

rescue => error
Rails.logger.error "Error updating #{doi.doi} (#{doi.id}), #{error.message}"
end
end

"Finished updating dois, total #{count}"
end


end
9 changes: 9 additions & 0 deletions lib/tasks/doi.rake
Original file line number Diff line number Diff line change
Expand Up @@ -243,4 +243,13 @@ namespace :doi do
count = Doi.delete_dois_by_prefix(ENV['PREFIX'])
puts "#{count} DOIs with prefix #{ENV['PREFIX']} deleted."
end

desc 'Add type information to dois based on id range'
task :add_index_type => :environment do
options = {
from_id: ENV['FROM_ID'],
until_id: ENV['UNTIL_ID']
}
puts Doi.add_index_type(options)
end
end

0 comments on commit ed61150

Please sign in to comment.