Skip to content

Commit

Permalink
Merge pull request #922 from datacite/doi-enrichment-rake
Browse files Browse the repository at this point in the history
Only enrich non-deleted clients/repositories
  • Loading branch information
jrhoads authored Feb 22, 2023
2 parents dd765cf + 2d6e74e commit aec861c
Showing 1 changed file with 30 additions and 5 deletions.
35 changes: 30 additions & 5 deletions lib/tasks/enrich.rake
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,26 @@
namespace :enrich do
desc "Enrich Clients with Subjects from re3data and converted to Field Of Science subjectScheme"
task client_subjects: :environment do
def all_clients_from_query(query: nil)
# Loop through all clients
page = { size: 1_000, number: 1 }
response = Client.query(query, page: page)
clients = response.records.to_a

total = response.records.total
total_pages = page[:size] > 0 ? (total.to_f / page[:size]).ceil : 0

# keep going for all pages
page_num = 2
while page_num <= total_pages
page = { size: 1_000, number: page_num }
response = self.query(query, page: page)
clients = clients + response.records.to_a
page_num += 1
end
clients
end

def enrich_client(client)
re3data = DataCatalog.find_by_id(client.re3data_id).fetch(:data, []).first
if re3data
Expand All @@ -19,11 +39,16 @@ namespace :enrich do
end

puts "Searching for disciplinary repositories with re3data_ids without subjects"
search_results = Client.search("re3data_id:* AND -subjects:* AND repository_type:disciplinary")
puts "Found #{search_results.records.count} repostitories. Enriching now..."
search_results.records.map do |c|
enrich_client(c)
clients = all_clients_from_query(query: "re3data_id:* AND -subjects:* AND -deleted_at:* AND repository_type:disciplinary")
puts "Found #{clients.count} repostitories."
if clients.count > 0
puts "Enriching now..."
clients.each do |c|
enrich_client(c)
end
puts "Enrichment complete"
else
puts "Skipping enrichment"
end
puts "Enrichment complete"
end
end

0 comments on commit aec861c

Please sign in to comment.