diff --git a/Gemfile b/Gemfile index 941211367..1b35ac451 100644 --- a/Gemfile +++ b/Gemfile @@ -48,6 +48,7 @@ gem 'rack-cors', '~> 1.0', '>= 1.0.2', :require => 'rack/cors' gem 'strip_attributes', '~> 1.8' gem 'slack-notifier', '~> 2.1' gem 'mini_magick', '~> 4.8' +gem 'elasticsearch', '~> 6.8' gem 'elasticsearch-model', '~> 6.0.0', require: 'elasticsearch/model' gem 'elasticsearch-rails', '~> 6.0.0' gem 'faraday_middleware-aws-sigv4', '~> 0.2.4' diff --git a/Gemfile.lock b/Gemfile.lock index b10bef74e..0567afbcb 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -65,8 +65,8 @@ GEM audited (4.8.0) activerecord (>= 4.0, < 5.3) aws-eventstream (1.0.3) - aws-partitions (1.165.0) - aws-sdk-core (3.53.0) + aws-partitions (1.166.0) + aws-sdk-core (3.53.1) aws-eventstream (~> 1.0, >= 1.0.2) aws-partitions (~> 1.0) aws-sigv4 (~> 1.1) @@ -188,10 +188,10 @@ GEM sxp (~> 1.0) edtf (3.0.4) activesupport (>= 3.0, < 6.0) - elasticsearch (7.1.0) - elasticsearch-api (= 7.1.0) - elasticsearch-transport (= 7.1.0) - elasticsearch-api (7.1.0) + elasticsearch (6.8.0) + elasticsearch-api (= 6.8.0) + elasticsearch-transport (= 6.8.0) + elasticsearch-api (6.8.0) multi_json elasticsearch-extensions (0.0.31) ansi @@ -201,7 +201,7 @@ GEM elasticsearch (> 1) hashie elasticsearch-rails (6.0.0) - elasticsearch-transport (7.1.0) + elasticsearch-transport (6.8.0) faraday multi_json equivalent-xml (0.6.0) @@ -554,6 +554,7 @@ DEPENDENCIES database_cleaner diffy (~> 3.2, >= 3.2.1) dotenv + elasticsearch (~> 6.8) elasticsearch-extensions (~> 0.0.29) elasticsearch-model (~> 6.0.0) elasticsearch-rails (~> 6.0.0) diff --git a/app/jobs/delete_job.rb b/app/jobs/delete_job.rb new file mode 100644 index 000000000..4c690eaa0 --- /dev/null +++ b/app/jobs/delete_job.rb @@ -0,0 +1,15 @@ +class DeleteJob < ActiveJob::Base + queue_as :lupo_background + + def perform(doi_id, options={}) + logger = Logger.new(STDOUT) + doi = Doi.where(doi: doi_id).first + + if doi.present? + doi.destroy + logger.info "Deleted DOI " + doi_id + "." + else + logger.info "Error deleting DOI " + doi_id + ": not found" + end + end +end diff --git a/app/jobs/transfer_job.rb b/app/jobs/transfer_job.rb index d23ae6e15..a2ffbfc4e 100644 --- a/app/jobs/transfer_job.rb +++ b/app/jobs/transfer_job.rb @@ -22,4 +22,4 @@ def perform(doi_id, options={}) logger.info "[Transfer] Error transferring DOI " + doi_id + ": not found" end end -end \ No newline at end of file +end diff --git a/app/models/concerns/indexable.rb b/app/models/concerns/indexable.rb index 766c24271..5989fa9b8 100644 --- a/app/models/concerns/indexable.rb +++ b/app/models/concerns/indexable.rb @@ -142,7 +142,7 @@ def query(query, options={}) must << { term: { "types.resourceTypeGeneral": options[:resource_type_id].underscore.camelize }} if options[:resource_type_id].present? must << { terms: { provider_id: options[:provider_id].split(",") }} if options[:provider_id].present? must << { terms: { client_id: options[:client_id].to_s.split(",") }} if options[:client_id].present? - must << { term: { prefix: options[:prefix] }} if options[:prefix].present? + must << { terms: { prefix: options[:prefix].to_s.split(",") }} if options[:prefix].present? must << { term: { uid: options[:uid] }} if options[:uid].present? must << { term: { "author.id" => "https://orcid.org/#{options[:person_id]}" }} if options[:person_id].present? must << { range: { created: { gte: "#{options[:created].split(",").min}||/y", lte: "#{options[:created].split(",").max}||/y", format: "yyyy" }}} if options[:created].present? diff --git a/app/models/doi.rb b/app/models/doi.rb index b1a900459..69ea83ca1 100644 --- a/app/models/doi.rb +++ b/app/models/doi.rb @@ -871,13 +871,39 @@ def self.delete_test_dois(from_date: nil) end end - #to be used after DOIS transfer to another RA - def self.delete_dois_by_prefix(prefix) - collection.where("doi LIKE ?", prefix).find_each do |d| - logger = Logger.new(STDOUT) - logger.info "Deleted #{d.doi}, last updated #{d.updated.iso8601}." - d.destroy + # to be used after DOIs were transferred to another DOI RA + def self.delete_dois_by_prefix(prefix, options={}) + logger = Logger.new(STDOUT) + + if prefix.blank? + Logger.error "[Error] No prefix provided." + return nil end + + # query = options[:query] || "*" + size = (options[:size] || 1000).to_i + + response = Doi.query(nil, prefix: prefix, page: { size: 1, cursor: 0 }) + logger.info "#{response.results.total} DOIs found for prefix #{prefix}." + + if prefix && response.results.total > 0 + # walk through results using cursor + cursor = 0 + + while response.results.results.length > 0 do + response = Doi.query(nil, prefix: prefix, page: { size: size, cursor: cursor }) + break unless response.results.results.length > 0 + + logger.info "Deleting #{response.results.results.length} DOIs starting with _id #{cursor + 1}." + cursor = response.results.to_a.last[:sort].first.to_i + + response.results.results.each do |d| + DeleteJob.perform_later(d.doi) + end + end + end + + response.results.total end # register DOIs in the handle system that have not been registered yet @@ -970,7 +996,7 @@ def self.transfer(options={}) query = options[:query] || "*" size = (options[:size] || 1000).to_i - response = Doi.query(query, client_id: options[:client_id], page: { size: 1, cursor: 0 }) + response = Doi.query(nil, client_id: options[:client_id], page: { size: 1, cursor: 0 }) logger.info "[Transfer] #{response.results.total} DOIs found for client #{options[:client_id]}." if options[:client_id] && options[:target_id] && response.results.total > 0 @@ -978,7 +1004,7 @@ def self.transfer(options={}) cursor = 0 while response.results.results.length > 0 do - response = Doi.query(query, client_id: options[:client_id], page: { size: size, cursor: cursor }) + response = Doi.query(nil, client_id: options[:client_id], page: { size: size, cursor: cursor }) break unless response.results.results.length > 0 logger.info "[Transfer] Transferring #{response.results.results.length} DOIs starting with _id #{cursor + 1}." @@ -1024,7 +1050,7 @@ def self.migrate_landing_page(options={}) "download-latency" => "downloadLatency" } result = result.map {|k, v| [mappings[k] || k, v] }.to_h -# doi.update_columns("last_landing_page_status_result": result) + # doi.update_columns("last_landing_page_status_result": result) # Do a fix of the stored download Latency # Sometimes was floating point precision, we dont need this diff --git a/lib/tasks/client.rake b/lib/tasks/client.rake index bd714095c..cea9387ae 100644 --- a/lib/tasks/client.rake +++ b/lib/tasks/client.rake @@ -91,28 +91,12 @@ namespace :client do # delete all associated prefixes and DOIs prefixes = client.prefixes.where.not('prefix IN (?)', prefixes_to_keep).pluck(:prefix) - prefix_ids = client.prefixes.where.not('prefix IN (?)', prefixes_to_keep).pluck(:id) - - response = client.client_prefixes.destroy_all - puts "#{response.count} client prefixes deleted." - - if prefix_ids.present? - response = ProviderPrefix.where('prefixes IN (?)', prefix_ids).destroy_all - puts "#{response.count} provider prefixes deleted." - end - - if prefixes.present? - response = Prefix.where('prefix IN (?)', prefixes).destroy_all - puts "Prefixes #{prefixes.join(" and ")} deleted." + prefixes.each do |prefix| + ENV['PREFIX'] = prefix + Rake::Task["prefix:delete"].reenable + Rake::Task["prefix:delete"].invoke end - # delete DOIs in batches - puts "#{client.dois.length} DOIs will be deleted." - client.dois.find_each do |doi| - doi.destroy - puts "DOI #{doi.doi} deleted." - end - if client.update_attributes(is_active: nil, deleted_at: Time.zone.now) client.send_delete_email unless Rails.env.test? puts "Client with client ID #{ENV['CLIENT_ID']} deleted." diff --git a/lib/tasks/doi.rake b/lib/tasks/doi.rake index 5558539c2..25d71b6b8 100644 --- a/lib/tasks/doi.rake +++ b/lib/tasks/doi.rake @@ -129,12 +129,6 @@ namespace :doi do Doi.delete_test_dois(from_date: from_date) end - desc 'Delete DOIs with by prefix' - task :delete_test_dois => :environment do - prefix = ENV['PREFIX_TO_DELETE'] - Doi.delete_dois_by_prefix(prefix) if prefix.present? - end - desc 'Migrates landing page data handling camelCase changes at same time' task :migrate_landing_page => :environment do Doi.migrate_landing_page diff --git a/lib/tasks/prefix.rake b/lib/tasks/prefix.rake new file mode 100644 index 000000000..6164a372e --- /dev/null +++ b/lib/tasks/prefix.rake @@ -0,0 +1,36 @@ +namespace :prefix do + desc 'Delete prefix and associated DOIs' + task :delete => :environment do + # These prefixes are used by multiple clients and can't be deleted + prefixes_to_keep = %w(10.5072 10.4124 10.4225 10.4226 10.4227) + + if ENV['PREFIX'].nil? + puts "ENV['PREFIX'] is required." + exit + end + + if prefixes_to_keep.include?(ENV['PREFIX']) + puts "Prefix #{ENV['PREFIX']} can't be deleted." + exit + end + + prefix = Prefix.where(prefix: ENV['PREFIX']).first + if prefix.nil? + puts "Prefix #{ENV['PREFIX']} not found." + exit + end + + ClientPrefix.where('prefixes = ?', prefix.id).destroy_all + puts "Client prefix deleted." + + ProviderPrefix.where('prefixes = ?', prefix.id).destroy_all + puts "Provider prefix deleted." + + prefix.destroy + puts "Prefix #{ENV['PREFIX']} deleted." + + # delete DOIs + count = Doi.delete_dois_by_prefix(ENV['PREFIX']) + puts "#{count} DOIs with prefix #{ENV['PREFIX']} deleted." + end +end