From 7b60d407854883e48b740e52bcd60961fc580b19 Mon Sep 17 00:00:00 2001 From: jrhoads Date: Tue, 14 Feb 2023 13:31:58 -0500 Subject: [PATCH] Add rake task to back-fill disciplinary clients/repositories that have re3data subjects. --- lib/tasks/enrich.rake | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 lib/tasks/enrich.rake diff --git a/lib/tasks/enrich.rake b/lib/tasks/enrich.rake new file mode 100644 index 000000000..91ec75f4e --- /dev/null +++ b/lib/tasks/enrich.rake @@ -0,0 +1,29 @@ +# frozen_string_literal: true + +namespace :enrich do + desc "Enrich Clients with Subjects from re3data and converted to Field Of Science subjectScheme" + task client_subjects: :environment do + def enrich_client(client) + re3data = DataCatalog.find_by_id(client.re3data_id).fetch(:data, []).first + if re3data + subs = re3data.subjects + dfg_ids = subs.select { |subject| + subject.scheme == "DFG" + }.map { |subject| + subject.text.split.first + } + fos_subjects = Bolognese::Utils.dfg_ids_to_fos(dfg_ids) + client.subjects = fos_subjects.uniq + client.save + end + end + + puts "Searching for disciplinary repositories with re3data_ids without subjects" + search_results = Client.search("re3data_id:* AND -subjects:* AND repository_type:disciplinary") + puts "Found #{search_results.records.count} repostitories. Enriching now..." + search_results.records.map do |c| + enrich_client(c) + end + puts "Enrichment complete" + end +end