Skip to content

Commit

Permalink
Add rake task to back-fill disciplinary clients/repositories that hav…
Browse files Browse the repository at this point in the history
…e re3data subjects.
  • Loading branch information
jrhoads committed Feb 14, 2023
1 parent 38c3c61 commit 7b60d40
Showing 1 changed file with 29 additions and 0 deletions.
29 changes: 29 additions & 0 deletions lib/tasks/enrich.rake
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# frozen_string_literal: true

namespace :enrich do
desc "Enrich Clients with Subjects from re3data and converted to Field Of Science subjectScheme"
task client_subjects: :environment do
def enrich_client(client)
re3data = DataCatalog.find_by_id(client.re3data_id).fetch(:data, []).first
if re3data
subs = re3data.subjects
dfg_ids = subs.select { |subject|
subject.scheme == "DFG"
}.map { |subject|
subject.text.split.first
}
fos_subjects = Bolognese::Utils.dfg_ids_to_fos(dfg_ids)
client.subjects = fos_subjects.uniq
client.save
end
end

puts "Searching for disciplinary repositories with re3data_ids without subjects"
search_results = Client.search("re3data_id:* AND -subjects:* AND repository_type:disciplinary")
puts "Found #{search_results.records.count} repostitories. Enriching now..."
search_results.records.map do |c|
enrich_client(c)
end
puts "Enrichment complete"
end
end

0 comments on commit 7b60d40

Please sign in to comment.