From 2dd72e2213863f86d80fe9be81be1fceccb00786 Mon Sep 17 00:00:00 2001 From: jrhoads Date: Wed, 29 Mar 2023 10:32:40 -0400 Subject: [PATCH] Revert "Revert "Doi-enrichment Redo"" This reverts commit 3cedaaebfaf309f76251f6732a16c7c57368ec5d. --- app/graphql/schema.graphql | 272 ++++++++++++++++++ app/graphql/types/doi_item.rb | 34 ++- app/graphql/types/query_type.rb | 39 ++- .../types/work_connection_with_total_type.rb | 18 ++ app/models/client.rb | 28 +- app/models/datacite_doi.rb | 3 +- app/models/doi.rb | 36 +++ app/models/schemas/client/subjects.json | 26 +- db/seeds/development/base.seeds.rb | 10 +- lib/tasks/datacite_doi.rake | 10 +- lib/tasks/enrich.rake | 54 ++++ spec/factories/client.rb | 15 + spec/graphql/types/doi_item_spec.rb | 3 + spec/graphql/types/work_type_spec.rb | 83 ++++++ spec/models/client_spec.rb | 12 +- spec/requests/repositories_spec.rb | 1 + 16 files changed, 611 insertions(+), 33 deletions(-) create mode 100644 lib/tasks/enrich.rake diff --git a/app/graphql/schema.graphql b/app/graphql/schema.graphql index 29fb47266..48a6bce8c 100644 --- a/app/graphql/schema.graphql +++ b/app/graphql/schema.graphql @@ -186,6 +186,16 @@ type Audiovisual implements DoiItem { """ fieldsOfScience: [FieldOfScience!] + """ + OECD Fields of Science of the resource and containing repository + """ + fieldsOfScienceCombined: [FieldOfScience!] + + """ + OECD Fields of Science of the containing repository + """ + fieldsOfScienceRepository: [FieldOfScience!] + """ Technical format of the resource """ @@ -495,6 +505,16 @@ type Book implements DoiItem { """ fieldsOfScience: [FieldOfScience!] + """ + OECD Fields of Science of the resource and containing repository + """ + fieldsOfScienceCombined: [FieldOfScience!] + + """ + OECD Fields of Science of the containing repository + """ + fieldsOfScienceRepository: [FieldOfScience!] + """ Technical format of the resource """ @@ -759,6 +779,16 @@ type BookChapter implements DoiItem { """ fieldsOfScience: [FieldOfScience!] + """ + OECD Fields of Science of the resource and containing repository + """ + fieldsOfScienceCombined: [FieldOfScience!] + + """ + OECD Fields of Science of the containing repository + """ + fieldsOfScienceRepository: [FieldOfScience!] + """ Technical format of the resource """ @@ -1153,6 +1183,16 @@ type Collection implements DoiItem { """ fieldsOfScience: [FieldOfScience!] + """ + OECD Fields of Science of the resource and containing repository + """ + fieldsOfScienceCombined: [FieldOfScience!] + + """ + OECD Fields of Science of the containing repository + """ + fieldsOfScienceRepository: [FieldOfScience!] + """ Technical format of the resource """ @@ -1461,6 +1501,16 @@ type ConferencePaper implements DoiItem { """ fieldsOfScience: [FieldOfScience!] + """ + OECD Fields of Science of the resource and containing repository + """ + fieldsOfScienceCombined: [FieldOfScience!] + + """ + OECD Fields of Science of the containing repository + """ + fieldsOfScienceRepository: [FieldOfScience!] + """ Technical format of the resource """ @@ -2062,6 +2112,16 @@ type DataManagementPlan implements DoiItem { """ fieldsOfScience: [FieldOfScience!] + """ + OECD Fields of Science of the resource and containing repository + """ + fieldsOfScienceCombined: [FieldOfScience!] + + """ + OECD Fields of Science of the containing repository + """ + fieldsOfScienceRepository: [FieldOfScience!] + """ Technical format of the resource """ @@ -2371,6 +2431,16 @@ type DataPaper implements DoiItem { """ fieldsOfScience: [FieldOfScience!] + """ + OECD Fields of Science of the resource and containing repository + """ + fieldsOfScienceCombined: [FieldOfScience!] + + """ + OECD Fields of Science of the containing repository + """ + fieldsOfScienceRepository: [FieldOfScience!] + """ Technical format of the resource """ @@ -2679,6 +2749,16 @@ type Dataset implements DoiItem { """ fieldsOfScience: [FieldOfScience!] + """ + OECD Fields of Science of the resource and containing repository + """ + fieldsOfScienceCombined: [FieldOfScience!] + + """ + OECD Fields of Science of the containing repository + """ + fieldsOfScienceRepository: [FieldOfScience!] + """ Technical format of the resource """ @@ -3106,6 +3186,16 @@ type Dissertation implements DoiItem { """ fieldsOfScience: [FieldOfScience!] + """ + OECD Fields of Science of the resource and containing repository + """ + fieldsOfScienceCombined: [FieldOfScience!] + + """ + OECD Fields of Science of the containing repository + """ + fieldsOfScienceRepository: [FieldOfScience!] + """ Technical format of the resource """ @@ -3418,6 +3508,16 @@ interface DoiItem { """ fieldsOfScience: [FieldOfScience!] + """ + OECD Fields of Science of the resource and containing repository + """ + fieldsOfScienceCombined: [FieldOfScience!] + + """ + OECD Fields of Science of the containing repository + """ + fieldsOfScienceRepository: [FieldOfScience!] + """ Technical format of the resource """ @@ -3732,6 +3832,16 @@ type Event implements DoiItem { """ fieldsOfScience: [FieldOfScience!] + """ + OECD Fields of Science of the resource and containing repository + """ + fieldsOfScienceCombined: [FieldOfScience!] + + """ + OECD Fields of Science of the containing repository + """ + fieldsOfScienceRepository: [FieldOfScience!] + """ Technical format of the resource """ @@ -4025,6 +4135,16 @@ type EventData implements DoiItem { """ fieldsOfScience: [FieldOfScience!] + """ + OECD Fields of Science of the resource and containing repository + """ + fieldsOfScienceCombined: [FieldOfScience!] + + """ + OECD Fields of Science of the containing repository + """ + fieldsOfScienceRepository: [FieldOfScience!] + """ Technical format of the resource """ @@ -4614,6 +4734,16 @@ type Image implements DoiItem { """ fieldsOfScience: [FieldOfScience!] + """ + OECD Fields of Science of the resource and containing repository + """ + fieldsOfScienceCombined: [FieldOfScience!] + + """ + OECD Fields of Science of the containing repository + """ + fieldsOfScienceRepository: [FieldOfScience!] + """ Technical format of the resource """ @@ -4921,6 +5051,16 @@ type Instrument implements DoiItem { """ fieldsOfScience: [FieldOfScience!] + """ + OECD Fields of Science of the resource and containing repository + """ + fieldsOfScienceCombined: [FieldOfScience!] + + """ + OECD Fields of Science of the containing repository + """ + fieldsOfScienceRepository: [FieldOfScience!] + """ Technical format of the resource """ @@ -5228,6 +5368,16 @@ type InteractiveResource implements DoiItem { """ fieldsOfScience: [FieldOfScience!] + """ + OECD Fields of Science of the resource and containing repository + """ + fieldsOfScienceCombined: [FieldOfScience!] + + """ + OECD Fields of Science of the containing repository + """ + fieldsOfScienceRepository: [FieldOfScience!] + """ Technical format of the resource """ @@ -5541,6 +5691,16 @@ type JournalArticle implements DoiItem { """ fieldsOfScience: [FieldOfScience!] + """ + OECD Fields of Science of the resource and containing repository + """ + fieldsOfScienceCombined: [FieldOfScience!] + + """ + OECD Fields of Science of the containing repository + """ + fieldsOfScienceRepository: [FieldOfScience!] + """ Technical format of the resource """ @@ -6145,6 +6305,16 @@ type Model implements DoiItem { """ fieldsOfScience: [FieldOfScience!] + """ + OECD Fields of Science of the resource and containing repository + """ + fieldsOfScienceCombined: [FieldOfScience!] + + """ + OECD Fields of Science of the containing repository + """ + fieldsOfScienceRepository: [FieldOfScience!] + """ Technical format of the resource """ @@ -6625,6 +6795,16 @@ type Other implements DoiItem { """ fieldsOfScience: [FieldOfScience!] + """ + OECD Fields of Science of the resource and containing repository + """ + fieldsOfScienceCombined: [FieldOfScience!] + + """ + OECD Fields of Science of the containing repository + """ + fieldsOfScienceRepository: [FieldOfScience!] + """ Technical format of the resource """ @@ -6958,6 +7138,16 @@ type PeerReview implements DoiItem { """ fieldsOfScience: [FieldOfScience!] + """ + OECD Fields of Science of the resource and containing repository + """ + fieldsOfScienceCombined: [FieldOfScience!] + + """ + OECD Fields of Science of the containing repository + """ + fieldsOfScienceRepository: [FieldOfScience!] + """ Technical format of the resource """ @@ -7403,6 +7593,16 @@ type PhysicalObject implements DoiItem { """ fieldsOfScience: [FieldOfScience!] + """ + OECD Fields of Science of the resource and containing repository + """ + fieldsOfScienceCombined: [FieldOfScience!] + + """ + OECD Fields of Science of the containing repository + """ + fieldsOfScienceRepository: [FieldOfScience!] + """ Technical format of the resource """ @@ -7764,6 +7964,16 @@ type Preprint implements DoiItem { """ fieldsOfScience: [FieldOfScience!] + """ + OECD Fields of Science of the resource and containing repository + """ + fieldsOfScienceCombined: [FieldOfScience!] + + """ + OECD Fields of Science of the containing repository + """ + fieldsOfScienceRepository: [FieldOfScience!] + """ Technical format of the resource """ @@ -8072,6 +8282,16 @@ type Publication implements DoiItem { """ fieldsOfScience: [FieldOfScience!] + """ + OECD Fields of Science of the resource and containing repository + """ + fieldsOfScienceCombined: [FieldOfScience!] + + """ + OECD Fields of Science of the containing repository + """ + fieldsOfScienceRepository: [FieldOfScience!] + """ Technical format of the resource """ @@ -9025,6 +9245,16 @@ type Service implements DoiItem { """ fieldsOfScience: [FieldOfScience!] + """ + OECD Fields of Science of the resource and containing repository + """ + fieldsOfScienceCombined: [FieldOfScience!] + + """ + OECD Fields of Science of the containing repository + """ + fieldsOfScienceRepository: [FieldOfScience!] + """ Technical format of the resource """ @@ -9334,6 +9564,16 @@ type Software implements DoiItem { """ fieldsOfScience: [FieldOfScience!] + """ + OECD Fields of Science of the resource and containing repository + """ + fieldsOfScienceCombined: [FieldOfScience!] + + """ + OECD Fields of Science of the containing repository + """ + fieldsOfScienceRepository: [FieldOfScience!] + """ Technical format of the resource """ @@ -9673,6 +9913,16 @@ type Sound implements DoiItem { """ fieldsOfScience: [FieldOfScience!] + """ + OECD Fields of Science of the resource and containing repository + """ + fieldsOfScienceCombined: [FieldOfScience!] + + """ + OECD Fields of Science of the containing repository + """ + fieldsOfScienceRepository: [FieldOfScience!] + """ Technical format of the resource """ @@ -10192,6 +10442,16 @@ type Work implements DoiItem { """ fieldsOfScience: [FieldOfScience!] + """ + OECD Fields of Science of the resource and containing repository + """ + fieldsOfScienceCombined: [FieldOfScience!] + + """ + OECD Fields of Science of the containing repository + """ + fieldsOfScienceRepository: [FieldOfScience!] + """ Technical format of the resource """ @@ -10391,6 +10651,8 @@ type WorkConnectionWithTotal { """ edges: [WorkEdge] fieldsOfScience: [Facet!] + fieldsOfScienceCombined: [Facet!] + fieldsOfScienceRepository: [Facet!] languages: [Facet!] licenses: [Facet!] @@ -10506,6 +10768,16 @@ type Workflow implements DoiItem { """ fieldsOfScience: [FieldOfScience!] + """ + OECD Fields of Science of the resource and containing repository + """ + fieldsOfScienceCombined: [FieldOfScience!] + + """ + OECD Fields of Science of the containing repository + """ + fieldsOfScienceRepository: [FieldOfScience!] + """ Technical format of the resource """ diff --git a/app/graphql/types/doi_item.rb b/app/graphql/types/doi_item.rb index b4134356e..a96c36d11 100644 --- a/app/graphql/types/doi_item.rb +++ b/app/graphql/types/doi_item.rb @@ -66,6 +66,15 @@ module DoiItem field :fields_of_science, [FieldOfScienceType], null: true, description: "OECD Fields of Science of the resource" + + field :fields_of_science_combined, + [FieldOfScienceType], + null: true, description: "OECD Fields of Science of the resource and containing repository" + + field :fields_of_science_repository, + [FieldOfScienceType], + null: true, description: "OECD Fields of Science of the containing repository" + field :dates, [DateType], null: true, description: "Different dates relevant to the work" @@ -418,14 +427,33 @@ def registration_agency { id: object.agency, name: REGISTRATION_AGENCIES[object.agency] }.compact end - def fields_of_science + def _fos_to_facet(fos_list) + Array.wrap(fos_list).map do |name| + { "id" => name.parameterize(separator: "_"), "name" => name } + end.uniq + end + + def fields_of_science_repository + if object.client.blank? + return [] + end + _fos_to_facet(object.fields_of_science_repository) + end + + def fields_of_science_combined + _fos_to_facet(object.fields_of_science_combined) + end + + def _fos_temp Array.wrap(object.subjects).select do |s| s["subjectScheme"] == "Fields of Science and Technology (FOS)" end.map do |s| - name = s["subject"].gsub("FOS: ", "") - { "id" => name.parameterize(separator: "_"), "name" => name } + s["subject"].gsub("FOS: ", "") end.uniq end + def fields_of_science + _fos_to_facet(_fos_temp) + end def creators(**args) Array.wrap(object.creators)[0...args[:first]].map do |c| diff --git a/app/graphql/types/query_type.rb b/app/graphql/types/query_type.rb index 22f2d2d76..3fe3124b7 100644 --- a/app/graphql/types/query_type.rb +++ b/app/graphql/types/query_type.rb @@ -277,7 +277,12 @@ def actor(id:) end def works(**args) - ElasticsearchModelResponseConnection.new(response(args), context: context, first: args[:first], after: args[:after]) + ElasticsearchModelResponseConnection.new( + response(args), { + context: context, + first: args[:first], + after: args[:after] + }) end field :work, WorkType, null: false do @@ -1253,7 +1258,37 @@ def usage_report(id:) end def response(**args) - Doi.gql_query(args[:query], ids: args[:ids], user_id: args[:user_id], client_id: args[:repository_id], provider_id: args[:member_id], resource_type_id: args[:resource_type_id], resource_type: args[:resource_type], published: args[:published], agency: args[:registration_agency], language: args[:language], license: args[:license], has_person: args[:has_person], has_funder: args[:has_funder], has_organization: args[:has_organization], has_affiliation: args[:has_affiliation], has_member: args[:has_member], has_citations: args[:has_citations], has_parts: args[:has_parts], has_versions: args[:has_versions], has_views: args[:has_views], has_downloads: args[:has_downloads], field_of_science: args[:field_of_science], facet_count: args[:facet_count], pid_entity: args[:pid_entity], state: "findable", page: { cursor: args[:after].present? ? Base64.urlsafe_decode64(args[:after]) : [], size: args[:first] }) + Doi.gql_query( + args[:query], + ids: args[:ids], + user_id: args[:user_id], + client_id: args[:repository_id], + provider_id: args[:member_id], + resource_type_id: args[:resource_type_id], + resource_type: args[:resource_type], + published: args[:published], + agency: args[:registration_agency], + language: args[:language], + license: args[:license], + has_person: args[:has_person], + has_funder: args[:has_funder], + has_organization: args[:has_organization], + has_affiliation: args[:has_affiliation], + has_member: args[:has_member], + has_citations: args[:has_citations], + has_parts: args[:has_parts], + has_versions: args[:has_versions], + has_views: args[:has_views], + has_downloads: args[:has_downloads], + field_of_science: args[:field_of_science], + facet_count: args[:facet_count], + pid_entity: args[:pid_entity], + state: "findable", + page: { + cursor: args[:after].present? ? Base64.urlsafe_decode64(args[:after]) : [], + size: args[:first] + } + ) end def set_doi(id) diff --git a/app/graphql/types/work_connection_with_total_type.rb b/app/graphql/types/work_connection_with_total_type.rb index b0c5b58d4..7fb8d4436 100644 --- a/app/graphql/types/work_connection_with_total_type.rb +++ b/app/graphql/types/work_connection_with_total_type.rb @@ -17,6 +17,8 @@ class WorkConnectionWithTotalType < BaseConnection field :affiliations, [FacetType], null: true, cache: true field :authors, [FacetType], null: true, cache: true field :fields_of_science, [FacetType], null: true, cache: true + field :fields_of_science_combined, [FacetType], null: true, cache: true + field :fields_of_science_repository, [FacetType], null: true, cache: true field :licenses, [FacetType], null: true, cache: true field :languages, [FacetType], null: true, cache: true @@ -106,6 +108,22 @@ def fields_of_science end end + def fields_of_science_combined + if object.aggregations.fields_of_science_combined + facet_by_fos(object.aggregations.fields_of_science_combined.buckets) + else + [] + end + end + + def fields_of_science_repository + if object.aggregations.fields_of_science_repository + facet_by_fos(object.aggregations.fields_of_science_repository.buckets) + else + [] + end + end + def languages if object.aggregations.languages facet_by_language(object.aggregations.languages.buckets) diff --git a/app/models/client.rb b/app/models/client.rb index 5f3fe9667..f8afe729a 100644 --- a/app/models/client.rb +++ b/app/models/client.rb @@ -52,6 +52,7 @@ class Client < ApplicationRecord attr_accessor :password_input, :target_id attr_reader :from_salesforce + validate :subjects_only_for_disciplinary_repos validates :subjects, if: :subjects?, json: { message: ->(errors) { errors }, @@ -96,6 +97,7 @@ class Client < ApplicationRecord has_many :activities, as: :auditable, dependent: :destroy before_validation :set_defaults + before_validation :convert_subject_hashes_to_camelcase before_create { self.created = Time.zone.now.utc.iso8601 } before_save { self.updated = Time.zone.now.utc.iso8601 } after_create_commit :assign_prefix @@ -399,6 +401,7 @@ def as_indexed_json(options = {}) end, "analytics_dashboard_url" => analytics_dashboard_url, "analytics_tracking_id" => analytics_tracking_id, + "subjects" => Array.wrap(subjects), } end @@ -507,7 +510,7 @@ def re3data=(value) end def subjects=(value) - write_attribute(:subjects, Array.wrap(value)) + write_attribute(:subjects, Array.wrap(value).uniq) end def opendoar=(value) @@ -913,6 +916,15 @@ def freeze_symbol errors.add(:symbol, "cannot be changed") if symbol_changed? end + def subjects_only_for_disciplinary_repos + if Array.wrap(subjects).any? && Array.wrap(repository_type).exclude?("disciplinary") + errors.add( + :subjects, + "Subjects are only allowed for disciplinary repositories. This repository_type is: #{repository_type}", + ) + end + end + def check_id if symbol && symbol.split(".").first != provider.symbol errors.add( @@ -958,7 +970,7 @@ def assign_prefix ClientPrefix.create( client_id: symbol, provider_prefix_id: provider_prefix.uid, - prefix_id: provider_prefix.prefix.uid, + prefix_id: provider_prefix.prefix.uid ) end end @@ -983,6 +995,18 @@ def set_defaults self.doi_quota_allowed = -1 unless doi_quota_allowed.to_i > 0 end + def convert_subject_hashes_to_camelcase + if self.subjects? + self.subjects = Array.wrap(self.subjects).map { |subject| + subject.transform_keys! do |key| + key.to_s.camelcase(:lower) + end + } + else + [] + end + end + def create_reference_repository ReferenceRepository.create_from_client(self) end diff --git a/app/models/datacite_doi.rb b/app/models/datacite_doi.rb index ab00cfe48..cdd69023d 100644 --- a/app/models/datacite_doi.rb +++ b/app/models/datacite_doi.rb @@ -39,12 +39,13 @@ def self.import_by_ids(options = {}) DataciteDoi.where(type: "DataciteDoi").maximum(:id) ). to_i + batch_size = options[:batch_size] || 50 count = 0 # TODO remove query for type once STI is enabled # SQS message size limit is 256 kB, up to 2 GB with S3 DataciteDoi.where(type: "DataciteDoi").where(id: from_id..until_id). - find_in_batches(batch_size: 50) do |dois| + find_in_batches(batch_size: batch_size) do |dois| ids = dois.pluck(:id) DataciteDoiImportInBulkJob.perform_later(ids, index: index) count += ids.length diff --git a/app/models/doi.rb b/app/models/doi.rb index 60c136feb..b0ccd9357 100644 --- a/app/models/doi.rb +++ b/app/models/doi.rb @@ -141,6 +141,8 @@ class Doi < ApplicationRecord before_save :set_defaults, :save_metadata before_create { self.created = Time.zone.now.utc.iso8601 } + FIELD_OF_SCIENCE_SCHEME = "Fields of Science and Technology (FOS)" + scope :q, ->(query) { where("dataset.doi = ?", query) } # use different index for testing @@ -408,6 +410,14 @@ class Doi < ApplicationRecord updated: { type: :date }, deleted_at: { type: :date }, cumulative_years: { type: :integer, index: "false" }, + subjects: { type: :object, properties: { + subjectScheme: { type: :keyword }, + subject: { type: :keyword }, + schemeUri: { type: :keyword }, + valueUri: { type: :keyword }, + lang: { type: :keyword }, + classificationCode: { type: :keyword }, + } } } indexes :provider, type: :object, properties: { id: { type: :keyword }, @@ -512,6 +522,9 @@ class Doi < ApplicationRecord titleType: { type: :keyword }, lang: { type: :keyword }, } + indexes :fields_of_science, type: :keyword + indexes :fields_of_science_combined, type: :keyword + indexes :fields_of_science_repository, type: :keyword end end @@ -567,6 +580,9 @@ def as_indexed_json(_options = {}) "sizes" => Array.wrap(sizes), "language" => language, "subjects" => Array.wrap(subjects), + "fields_of_science" => fields_of_science, + "fields_of_science_repository" => fields_of_science_repository, + "fields_of_science_combined" => fields_of_science_combined, "xml" => xml, "is_active" => is_active, "landing_page" => landing_page, @@ -1712,6 +1728,26 @@ def client_id client.symbol.downcase if client.present? end + def _fos_filter(subject_array) + Array.wrap(subject_array).select { |sub| + sub.dig("subjectScheme") == FIELD_OF_SCIENCE_SCHEME + }.map do |fos| + fos["subject"].gsub("FOS: ", "") + end + end + + def fields_of_science + _fos_filter(subjects).uniq + end + + def fields_of_science_repository + _fos_filter(client&.subjects).uniq + end + + def fields_of_science_combined + fields_of_science | fields_of_science_repository + end + def client_id_and_name "#{client_id}:#{client.name}" if client.present? end diff --git a/app/models/schemas/client/subjects.json b/app/models/schemas/client/subjects.json index 7179c2243..8ece1f194 100644 --- a/app/models/schemas/client/subjects.json +++ b/app/models/schemas/client/subjects.json @@ -5,34 +5,18 @@ "items": { "type": "object", "properties": { - "classification_code": { "type": "string" }, "classificationCode": { "type": "string" }, - "scheme_uri": { "type": "string" }, "schemeUri": { "type": "string" }, - "value_uri": { "type": "string" }, "valueUri": { "type": "string" }, "lang": { "type": "string" }, "subject": { "type": "string" }, - "subject_scheme": { "type": "string" }, "subjectScheme": { "type": "string" } }, - "oneOf": [ - { - "required": [ - "classification_code", - "scheme_uri", - "subject", - "subject_scheme" - ] - }, - { - "required": [ - "classificationCode", - "schemeUri", - "subject", - "subjectScheme" - ] - } + "required": [ + "classificationCode", + "schemeUri", + "subject", + "subjectScheme" ], "additionalProperties": false } diff --git a/db/seeds/development/base.seeds.rb b/db/seeds/development/base.seeds.rb index 4a32778aa..f9725fcfe 100644 --- a/db/seeds/development/base.seeds.rb +++ b/db/seeds/development/base.seeds.rb @@ -24,11 +24,19 @@ client = Client.where(symbol: "DATACITE.TEST").first || FactoryBot.create( - :client, + :client_with_fos, provider: provider, symbol: ENV["MDS_USERNAME"], password_input: ENV["MDS_PASSWORD"], ) +if Prefix.where(uid: "10.14454").blank? + prefix = FactoryBot.create(:prefix, uid: "10.14454") + ### This creates both the client_prefix and the provider association + FactoryBot.create( + :client_prefix, + client_id: client.symbol, prefix_id: prefix.uid, + ) +end dois = FactoryBot.create_list(:doi, 10, client: client, state: "findable") FactoryBot.create_list(:event_for_datacite_related, 3, obj_id: dois.first.doi) FactoryBot.create_list(:event_for_datacite_usage, 2, obj_id: dois.first.doi) diff --git a/lib/tasks/datacite_doi.rake b/lib/tasks/datacite_doi.rake index 4191231f2..40fbf98c7 100644 --- a/lib/tasks/datacite_doi.rake +++ b/lib/tasks/datacite_doi.rake @@ -65,8 +65,14 @@ namespace :datacite_doi do task import: :environment do from_id = (ENV["FROM_ID"] || DataciteDoi.minimum(:id)).to_i until_id = (ENV["UNTIL_ID"] || DataciteDoi.maximum(:id)).to_i - - DataciteDoi.import_by_ids(from_id: from_id, until_id: until_id, index: ENV["INDEX"] || DataciteDoi.inactive_index) + batch_size = ENV["BATCH_SIZE"].nil? ? 50 : ENV["BATCH_SIZE"].to_i + + DataciteDoi.import_by_ids( + from_id: from_id, + until_id: until_id, + batch_size: batch_size, + index: ENV["INDEX"] || DataciteDoi.inactive_index + ) end desc "Import one datacite DOI" diff --git a/lib/tasks/enrich.rake b/lib/tasks/enrich.rake new file mode 100644 index 000000000..3bd087000 --- /dev/null +++ b/lib/tasks/enrich.rake @@ -0,0 +1,54 @@ +# frozen_string_literal: true + +namespace :enrich do + desc "Enrich Clients with Subjects from re3data and converted to Field Of Science subjectScheme" + task client_subjects: :environment do + def all_clients_from_query(query: nil) + # Loop through all clients + page = { size: 1_000, number: 1 } + response = Client.query(query, page: page) + clients = response.records.to_a + + total = response.records.total + total_pages = page[:size] > 0 ? (total.to_f / page[:size]).ceil : 0 + + # keep going for all pages + page_num = 2 + while page_num <= total_pages + page = { size: 1_000, number: page_num } + response = self.query(query, page: page) + clients = clients + response.records.to_a + page_num += 1 + end + clients + end + + def enrich_client(client) + re3data = DataCatalog.find_by_id(client.re3data_id).fetch(:data, []).first + if re3data + subs = re3data.subjects + dfg_ids = subs.select { |subject| + subject.scheme == "DFG" + }.map { |subject| + subject.text.split.first + } + fos_subjects = Bolognese::Utils.dfg_ids_to_fos(dfg_ids) + client.subjects = fos_subjects.uniq + client.save + end + end + + puts "Searching for disciplinary repositories with re3data_ids without subjects" + clients = all_clients_from_query(query: "re3data_id:* AND -subjects:* AND -deleted_at:* AND repository_type:disciplinary") + puts "Found #{clients.count} repostitories." + if clients.count > 0 + puts "Enriching now..." + clients.each do |c| + enrich_client(c) + end + puts "Enrichment complete" + else + puts "Skipping enrichment" + end + end +end diff --git a/spec/factories/client.rb b/spec/factories/client.rb index 16b82fe80..c5a40efe6 100644 --- a/spec/factories/client.rb +++ b/spec/factories/client.rb @@ -19,6 +19,21 @@ password_input { "12345" } is_active { true } + factory :client_with_fos do + repository_type { "disciplinary" } + subjects do + [ + { + subject: "Physical sciences", + valueUri: "", + schemeUri: "http://www.oecd.org/science/inno/38235147.pdf", + subjectScheme: "Fields of Science and Technology (FOS)", + classificationCode: "1001", + }, + ] + end + end + initialize_with { Client.where(symbol: symbol).first_or_initialize } end end diff --git a/spec/graphql/types/doi_item_spec.rb b/spec/graphql/types/doi_item_spec.rb index 2db00a772..e88ef3ff3 100644 --- a/spec/graphql/types/doi_item_spec.rb +++ b/spec/graphql/types/doi_item_spec.rb @@ -13,6 +13,9 @@ it { is_expected.to have_field(:publicationYear).of_type("Int") } it { is_expected.to have_field(:publisher).of_type("String") } it { is_expected.to have_field(:subjects).of_type("[Subject!]") } + it { is_expected.to have_field(:fieldsOfScience).of_type("[FieldOfScience!]") } + it { is_expected.to have_field(:fieldsOfScienceRepository).of_type("[FieldOfScience!]") } + it { is_expected.to have_field(:fieldsOfScienceCombined).of_type("[FieldOfScience!]") } it { is_expected.to have_field(:dates).of_type("[Date!]") } it { is_expected.to have_field(:registered).of_type("ISO8601DateTime") } it { is_expected.to have_field(:language).of_type("Language") } diff --git a/spec/graphql/types/work_type_spec.rb b/spec/graphql/types/work_type_spec.rb index 30e5b63f2..6f5473628 100644 --- a/spec/graphql/types/work_type_spec.rb +++ b/spec/graphql/types/work_type_spec.rb @@ -1201,4 +1201,87 @@ ) end end + + describe "query works with repository subjects" do + before :all do + SLEEP_TIME = 2 + WORK_COUNT = 10 + + DataciteDoi.import(force: true) + Client.import(force: true) + Prefix.import(force: true) + ClientPrefix.import(force: true) + ReferenceRepository.import(force: true) + Event.import(force: true) + + search_query = ' + fragment facetFields on Facet { + id + title + count + } + query{ + works(query:"*"){ + totalCount + fieldsOfScience { ...facetFields } + fieldsOfScienceRepository { ...facetFields } + fieldsOfScienceCombined{ ...facetFields } + } + } + ' + + create(:prefix) + client = create(:client_with_fos) + create_list(:doi, WORK_COUNT, + aasm_state: "findable", + client: client + ) + Doi.import + sleep SLEEP_TIME + @facet_response = LupoSchema.execute(search_query).as_json + Rails.logger.level = :fatal + DataciteDoi.destroy_all + ReferenceRepository.destroy_all + Client.destroy_all + Provider.destroy_all + Prefix.destroy_all + ClientPrefix.destroy_all + ProviderPrefix.destroy_all + Event.destroy_all + end + + let (:fos_facet) do + { + "id" => "physical_sciences", + "title" => "Physical sciences", + "count" => WORK_COUNT + } + end + + it "has all dois in the search results" do + response = @facet_response + expect(response.dig("data", "works", "totalCount")).to eq(WORK_COUNT) + end + + it "returns Field of Science Facets" do + response = @facet_response + expect( + response.dig("data", "works", "fieldsOfScience") + ).to match_array([]) + end + + it "returns Field of Science Facets from the repository" do + response = @facet_response + expect( + response.dig("data", "works", "fieldsOfScienceRepository") + ).to match_array([ fos_facet ]) + end + + it "returns combined Field of Science Facets" do + response = @facet_response + expect( + response.dig("data", "works", "fieldsOfScienceCombined") + ).to match_array([ fos_facet ]) + end + end end diff --git a/spec/models/client_spec.rb b/spec/models/client_spec.rb index 921389037..e9872b420 100644 --- a/spec/models/client_spec.rb +++ b/spec/models/client_spec.rb @@ -15,7 +15,11 @@ end describe "subjects" do - let(:client) { build(:client, provider: provider) } + let(:client) { build(:client, + provider: provider, + repository_type: "disciplinary" + ) + } let(:valid_subjects) do { classificationCode: "6.1", @@ -25,6 +29,12 @@ } end + it "are invalid if the repository_type is not disciplinary" do + client.repository_type = nil + client.subjects = [valid_subjects] + expect(client).to_not be_valid + end + it "valid hash" do client.subjects = [valid_subjects] expect(client).to be_valid diff --git a/spec/requests/repositories_spec.rb b/spec/requests/repositories_spec.rb index 3f2512696..374841395 100644 --- a/spec/requests/repositories_spec.rb +++ b/spec/requests/repositories_spec.rb @@ -413,6 +413,7 @@ "type" => "repositories", "attributes" => { "subjects" => subjects, + "repositoryType" => "disciplinary", }, }, }