diff --git a/lib/tasks/enrich.rake b/lib/tasks/enrich.rake index 97cde6b15..3bd087000 100644 --- a/lib/tasks/enrich.rake +++ b/lib/tasks/enrich.rake @@ -3,6 +3,26 @@ namespace :enrich do desc "Enrich Clients with Subjects from re3data and converted to Field Of Science subjectScheme" task client_subjects: :environment do + def all_clients_from_query(query: nil) + # Loop through all clients + page = { size: 1_000, number: 1 } + response = Client.query(query, page: page) + clients = response.records.to_a + + total = response.records.total + total_pages = page[:size] > 0 ? (total.to_f / page[:size]).ceil : 0 + + # keep going for all pages + page_num = 2 + while page_num <= total_pages + page = { size: 1_000, number: page_num } + response = self.query(query, page: page) + clients = clients + response.records.to_a + page_num += 1 + end + clients + end + def enrich_client(client) re3data = DataCatalog.find_by_id(client.re3data_id).fetch(:data, []).first if re3data @@ -19,11 +39,16 @@ namespace :enrich do end puts "Searching for disciplinary repositories with re3data_ids without subjects" - search_results = Client.search("re3data_id:* AND -subjects:* AND -deleted_at:* AND repository_type:disciplinary") - puts "Found #{search_results.records.count} repostitories. Enriching now..." - search_results.records.map do |c| - enrich_client(c) + clients = all_clients_from_query(query: "re3data_id:* AND -subjects:* AND -deleted_at:* AND repository_type:disciplinary") + puts "Found #{clients.count} repostitories." + if clients.count > 0 + puts "Enriching now..." + clients.each do |c| + enrich_client(c) + end + puts "Enrichment complete" + else + puts "Skipping enrichment" end - puts "Enrichment complete" end end