Skip to content

Commit

Permalink
fix delete by prefix rake task. #272
Browse files Browse the repository at this point in the history
  • Loading branch information
Martin Fenner committed May 22, 2019
1 parent 61b3fc3 commit 3312f47
Show file tree
Hide file tree
Showing 9 changed files with 101 additions and 44 deletions.
1 change: 1 addition & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ gem 'rack-cors', '~> 1.0', '>= 1.0.2', :require => 'rack/cors'
gem 'strip_attributes', '~> 1.8'
gem 'slack-notifier', '~> 2.1'
gem 'mini_magick', '~> 4.8'
gem 'elasticsearch', '~> 6.8'
gem 'elasticsearch-model', '~> 6.0.0', require: 'elasticsearch/model'
gem 'elasticsearch-rails', '~> 6.0.0'
gem 'faraday_middleware-aws-sigv4', '~> 0.2.4'
Expand Down
15 changes: 8 additions & 7 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@ GEM
audited (4.8.0)
activerecord (>= 4.0, < 5.3)
aws-eventstream (1.0.3)
aws-partitions (1.165.0)
aws-sdk-core (3.53.0)
aws-partitions (1.166.0)
aws-sdk-core (3.53.1)
aws-eventstream (~> 1.0, >= 1.0.2)
aws-partitions (~> 1.0)
aws-sigv4 (~> 1.1)
Expand Down Expand Up @@ -188,10 +188,10 @@ GEM
sxp (~> 1.0)
edtf (3.0.4)
activesupport (>= 3.0, < 6.0)
elasticsearch (7.1.0)
elasticsearch-api (= 7.1.0)
elasticsearch-transport (= 7.1.0)
elasticsearch-api (7.1.0)
elasticsearch (6.8.0)
elasticsearch-api (= 6.8.0)
elasticsearch-transport (= 6.8.0)
elasticsearch-api (6.8.0)
multi_json
elasticsearch-extensions (0.0.31)
ansi
Expand All @@ -201,7 +201,7 @@ GEM
elasticsearch (> 1)
hashie
elasticsearch-rails (6.0.0)
elasticsearch-transport (7.1.0)
elasticsearch-transport (6.8.0)
faraday
multi_json
equivalent-xml (0.6.0)
Expand Down Expand Up @@ -554,6 +554,7 @@ DEPENDENCIES
database_cleaner
diffy (~> 3.2, >= 3.2.1)
dotenv
elasticsearch (~> 6.8)
elasticsearch-extensions (~> 0.0.29)
elasticsearch-model (~> 6.0.0)
elasticsearch-rails (~> 6.0.0)
Expand Down
15 changes: 15 additions & 0 deletions app/jobs/delete_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
class DeleteJob < ActiveJob::Base
queue_as :lupo_background

def perform(doi_id, options={})
logger = Logger.new(STDOUT)
doi = Doi.where(doi: doi_id).first

if doi.present?
doi.destroy
logger.info "Deleted DOI " + doi_id + "."
else
logger.info "Error deleting DOI " + doi_id + ": not found"
end
end
end
2 changes: 1 addition & 1 deletion app/jobs/transfer_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@ def perform(doi_id, options={})
logger.info "[Transfer] Error transferring DOI " + doi_id + ": not found"
end
end
end
end
2 changes: 1 addition & 1 deletion app/models/concerns/indexable.rb
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def query(query, options={})
must << { term: { "types.resourceTypeGeneral": options[:resource_type_id].underscore.camelize }} if options[:resource_type_id].present?
must << { terms: { provider_id: options[:provider_id].split(",") }} if options[:provider_id].present?
must << { terms: { client_id: options[:client_id].to_s.split(",") }} if options[:client_id].present?
must << { term: { prefix: options[:prefix] }} if options[:prefix].present?
must << { terms: { prefix: options[:prefix].to_s.split(",") }} if options[:prefix].present?
must << { term: { uid: options[:uid] }} if options[:uid].present?
must << { term: { "author.id" => "https://orcid.org/#{options[:person_id]}" }} if options[:person_id].present?
must << { range: { created: { gte: "#{options[:created].split(",").min}||/y", lte: "#{options[:created].split(",").max}||/y", format: "yyyy" }}} if options[:created].present?
Expand Down
44 changes: 35 additions & 9 deletions app/models/doi.rb
Original file line number Diff line number Diff line change
Expand Up @@ -871,13 +871,39 @@ def self.delete_test_dois(from_date: nil)
end
end

#to be used after DOIS transfer to another RA
def self.delete_dois_by_prefix(prefix)
collection.where("doi LIKE ?", prefix).find_each do |d|
logger = Logger.new(STDOUT)
logger.info "Deleted #{d.doi}, last updated #{d.updated.iso8601}."
d.destroy
# to be used after DOIs were transferred to another DOI RA
def self.delete_dois_by_prefix(prefix, options={})
logger = Logger.new(STDOUT)

if prefix.blank?
Logger.error "[Error] No prefix provided."
return nil
end

# query = options[:query] || "*"
size = (options[:size] || 1000).to_i

response = Doi.query(nil, prefix: prefix, page: { size: 1, cursor: 0 })
logger.info "#{response.results.total} DOIs found for prefix #{prefix}."

if prefix && response.results.total > 0
# walk through results using cursor
cursor = 0

while response.results.results.length > 0 do
response = Doi.query(nil, prefix: prefix, page: { size: size, cursor: cursor })
break unless response.results.results.length > 0

logger.info "Deleting #{response.results.results.length} DOIs starting with _id #{cursor + 1}."
cursor = response.results.to_a.last[:sort].first.to_i

response.results.results.each do |d|
DeleteJob.perform_later(d.doi)
end
end
end

response.results.total
end

# register DOIs in the handle system that have not been registered yet
Expand Down Expand Up @@ -970,15 +996,15 @@ def self.transfer(options={})
query = options[:query] || "*"
size = (options[:size] || 1000).to_i

response = Doi.query(query, client_id: options[:client_id], page: { size: 1, cursor: 0 })
response = Doi.query(nil, client_id: options[:client_id], page: { size: 1, cursor: 0 })
logger.info "[Transfer] #{response.results.total} DOIs found for client #{options[:client_id]}."

if options[:client_id] && options[:target_id] && response.results.total > 0
# walk through results using cursor
cursor = 0

while response.results.results.length > 0 do
response = Doi.query(query, client_id: options[:client_id], page: { size: size, cursor: cursor })
response = Doi.query(nil, client_id: options[:client_id], page: { size: size, cursor: cursor })
break unless response.results.results.length > 0

logger.info "[Transfer] Transferring #{response.results.results.length} DOIs starting with _id #{cursor + 1}."
Expand Down Expand Up @@ -1024,7 +1050,7 @@ def self.migrate_landing_page(options={})
"download-latency" => "downloadLatency"
}
result = result.map {|k, v| [mappings[k] || k, v] }.to_h
# doi.update_columns("last_landing_page_status_result": result)
# doi.update_columns("last_landing_page_status_result": result)

# Do a fix of the stored download Latency
# Sometimes was floating point precision, we dont need this
Expand Down
24 changes: 4 additions & 20 deletions lib/tasks/client.rake
Original file line number Diff line number Diff line change
Expand Up @@ -91,28 +91,12 @@ namespace :client do

# delete all associated prefixes and DOIs
prefixes = client.prefixes.where.not('prefix IN (?)', prefixes_to_keep).pluck(:prefix)
prefix_ids = client.prefixes.where.not('prefix IN (?)', prefixes_to_keep).pluck(:id)

response = client.client_prefixes.destroy_all
puts "#{response.count} client prefixes deleted."

if prefix_ids.present?
response = ProviderPrefix.where('prefixes IN (?)', prefix_ids).destroy_all
puts "#{response.count} provider prefixes deleted."
end

if prefixes.present?
response = Prefix.where('prefix IN (?)', prefixes).destroy_all
puts "Prefixes #{prefixes.join(" and ")} deleted."
prefixes.each do |prefix|
ENV['PREFIX'] = prefix
Rake::Task["prefix:delete"].reenable
Rake::Task["prefix:delete"].invoke
end

# delete DOIs in batches
puts "#{client.dois.length} DOIs will be deleted."
client.dois.find_each do |doi|
doi.destroy
puts "DOI #{doi.doi} deleted."
end

if client.update_attributes(is_active: nil, deleted_at: Time.zone.now)
client.send_delete_email unless Rails.env.test?
puts "Client with client ID #{ENV['CLIENT_ID']} deleted."
Expand Down
6 changes: 0 additions & 6 deletions lib/tasks/doi.rake
Original file line number Diff line number Diff line change
Expand Up @@ -129,12 +129,6 @@ namespace :doi do
Doi.delete_test_dois(from_date: from_date)
end

desc 'Delete DOIs with by prefix'
task :delete_test_dois => :environment do
prefix = ENV['PREFIX_TO_DELETE']
Doi.delete_dois_by_prefix(prefix) if prefix.present?
end

desc 'Migrates landing page data handling camelCase changes at same time'
task :migrate_landing_page => :environment do
Doi.migrate_landing_page
Expand Down
36 changes: 36 additions & 0 deletions lib/tasks/prefix.rake
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
namespace :prefix do
desc 'Delete prefix and associated DOIs'
task :delete => :environment do
# These prefixes are used by multiple clients and can't be deleted
prefixes_to_keep = %w(10.5072 10.4124 10.4225 10.4226 10.4227)

if ENV['PREFIX'].nil?
puts "ENV['PREFIX'] is required."
exit
end

if prefixes_to_keep.include?(ENV['PREFIX'])
puts "Prefix #{ENV['PREFIX']} can't be deleted."
exit
end

prefix = Prefix.where(prefix: ENV['PREFIX']).first
if prefix.nil?
puts "Prefix #{ENV['PREFIX']} not found."
exit
end

ClientPrefix.where('prefixes = ?', prefix.id).destroy_all
puts "Client prefix deleted."

ProviderPrefix.where('prefixes = ?', prefix.id).destroy_all
puts "Provider prefix deleted."

prefix.destroy
puts "Prefix #{ENV['PREFIX']} deleted."

# delete DOIs
count = Doi.delete_dois_by_prefix(ENV['PREFIX'])
puts "#{count} DOIs with prefix #{ENV['PREFIX']} deleted."
end
end

0 comments on commit 3312f47

Please sign in to comment.