From 1a6578ad4dbf1c89b7c7e0d7123bafb420d8c96c Mon Sep 17 00:00:00 2001 From: jrhoads Date: Thu, 30 Nov 2023 15:59:13 -0500 Subject: [PATCH 1/3] Add person_id to doi model --- app/models/doi.rb | 33 ++++++++++++++++------ spec/models/doi_spec.rb | 61 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+), 8 deletions(-) diff --git a/app/models/doi.rb b/app/models/doi.rb index 599af8e39..63109f61d 100644 --- a/app/models/doi.rb +++ b/app/models/doi.rb @@ -1649,21 +1649,38 @@ def sponsor_contributors end + def person_id + (Array.wrap(creators) + Array.wrap(contributors)).reduce([]) do |sum, c| + Array.wrap(c.fetch("nameIdentifiers", nil)).each do |name_identifier| + if name_identifier.is_a?(Hash) && name_identifier.fetch("nameIdentifierScheme", nil) == "ORCID" && name_identifier.fetch("nameIdentifier", nil).present? + sum << orcid_as_url( + orcid_from_url(name_identifier.fetch("nameIdentifier", nil)) + ) + end + end + sum.uniq + end + end + def organization_id (Array.wrap(creators) + Array.wrap(contributors)).reduce([]) do |sum, c| Array.wrap(c.fetch("nameIdentifiers", nil)).each do |name_identifier| - sum << ror_from_url(name_identifier.fetch("nameIdentifier", nil)) if name_identifier.is_a?(Hash) && name_identifier.fetch("nameIdentifierScheme", nil) == "ROR" && name_identifier.fetch("nameIdentifier", nil).present? + if name_identifier.is_a?(Hash) && name_identifier.fetch("nameIdentifierScheme", nil) == "ROR" && name_identifier.fetch("nameIdentifier", nil).present? + sum << ror_from_url(name_identifier.fetch("nameIdentifier", nil)) + end end - sum + sum.uniq end end def fair_organization_id (Array.wrap(creators) + sponsor_contributors).reduce([]) do |sum, c| Array.wrap(c.fetch("nameIdentifiers", nil)).each do |name_identifier| - sum << ror_from_url(name_identifier.fetch("nameIdentifier", nil)) if name_identifier.is_a?(Hash) && name_identifier.fetch("nameIdentifierScheme", nil) == "ROR" && name_identifier.fetch("nameIdentifier", nil).present? + if name_identifier.is_a?(Hash) && name_identifier.fetch("nameIdentifierScheme", nil) == "ROR" && name_identifier.fetch("nameIdentifier", nil).present? + sum << ror_from_url(name_identifier.fetch("nameIdentifier", nil)) + end end - sum + sum.uniq end end @@ -1672,7 +1689,7 @@ def affiliation_id Array.wrap(c.fetch("affiliation", nil)).each do |affiliation| sum << ror_from_url(affiliation.fetch("affiliationIdentifier", nil)) if affiliation.is_a?(Hash) && affiliation.fetch("affiliationIdentifierScheme", nil) == "ROR" && affiliation.fetch("affiliationIdentifier", nil).present? end - sum + sum.uniq end end @@ -1681,7 +1698,7 @@ def fair_affiliation_id Array.wrap(c.fetch("affiliation", nil)).each do |affiliation| sum << ror_from_url(affiliation.fetch("affiliationIdentifier", nil)) if affiliation.is_a?(Hash) && affiliation.fetch("affiliationIdentifierScheme", nil) == "ROR" && affiliation.fetch("affiliationIdentifier", nil).present? end - sum + sum.uniq end end @@ -1690,7 +1707,7 @@ def affiliation_id_and_name Array.wrap(c.fetch("affiliation", nil)).each do |affiliation| sum << "#{ror_from_url(affiliation.fetch('affiliationIdentifier', nil))}:#{affiliation.fetch('name', nil)}" if affiliation.is_a?(Hash) && affiliation.fetch("affiliationIdentifierScheme", nil) == "ROR" && affiliation.fetch("affiliationIdentifier", nil).present? end - sum + sum.uniq end end @@ -1699,7 +1716,7 @@ def fair_affiliation_id_and_name Array.wrap(c.fetch("affiliation", nil)).each do |affiliation| sum << "#{ror_from_url(affiliation.fetch('affiliationIdentifier', nil))}:#{affiliation.fetch('name', nil)}" if affiliation.is_a?(Hash) && affiliation.fetch("affiliationIdentifierScheme", nil) == "ROR" && affiliation.fetch("affiliationIdentifier", nil).present? end - sum + sum.uniq end end diff --git a/spec/models/doi_spec.rb b/spec/models/doi_spec.rb index c74323ae6..ccd77160d 100644 --- a/spec/models/doi_spec.rb +++ b/spec/models/doi_spec.rb @@ -1081,6 +1081,67 @@ end end + describe "person_ids" do + it "from creators and contributors" do + subject = build( + :doi, + creators: [ + { + "familyName" => "Garza", + "givenName" => "Kristian", + "name" => "Garza, Kristian", + "nameIdentifiers" => [ + { + "nameIdentifier" => "https://orcid.org/0000-0003-3484-6875", + "nameIdentifierScheme" => "ORCID", + "schemeUri" => "https://orcid.org", + }, + ], + "nameType" => "Personal", + "affiliation" => [ + { + "name" => "University of Cambridge", + "affiliationIdentifier" => "https://ror.org/013meh722", + "affiliationIdentifierScheme" => "ROR", + }, + ], + }, + ], + contributors: [ + { + "contributorType" => "Sponsor", + "familyName" => "Bob", + "givenName" => "Jones", + "name" => "Jones, Bob", + "nameIdentifiers" => [ + { + "nameIdentifier" => "https://orcid.org/0000-0003-3484-0000", + "nameIdentifierScheme" => "ORCID", + "schemeUri" => "https://orcid.org", + }, + ], + "nameType" => "Personal", + "affiliation" => [ + { + "name" => "University of Examples", + "affiliationIdentifier" => "https://ror.org/013meh8888", + "affiliationIdentifierScheme" => "ROR", + }, + ], + }, + ] + ) + expect(subject).to be_valid + expect(subject.person_id).to eq( + [ + "https://orcid.org/0000-0003-3484-6875", + "https://orcid.org/0000-0003-3484-0000", + ] + ) + + end + end + describe "related_identifiers" do it "has part" do subject = build(:doi, related_identifiers: [ From 7abc2cc116ef4913f77f57201a69f14be70e6719 Mon Sep 17 00:00:00 2001 From: jrhoads Date: Sun, 3 Dec 2023 21:09:19 -0500 Subject: [PATCH 2/3] Appease Rubocop --- spec/models/doi_spec.rb | 1 - 1 file changed, 1 deletion(-) diff --git a/spec/models/doi_spec.rb b/spec/models/doi_spec.rb index ccd77160d..95f558e5a 100644 --- a/spec/models/doi_spec.rb +++ b/spec/models/doi_spec.rb @@ -1138,7 +1138,6 @@ "https://orcid.org/0000-0003-3484-0000", ] ) - end end From 9467a6f4b5ba0d1ecb214bc1f2347f900edf9948 Mon Sep 17 00:00:00 2001 From: Joseph Rhoads Date: Mon, 4 Dec 2023 05:25:16 -0500 Subject: [PATCH 3/3] Index person_id (#1050) * Index person_id * Add related_doi indexing (#1051) * Add related_doi indexing * Add relation_type to the hash * Add more fields to the related_dois indexed info * Add tests for related_dois * Add related_dois to elasticsearch * Appease Rubocop --- app/models/doi.rb | 13 ++++ app/models/doi/indexer/related_doi_indexer.rb | 45 ++++++++++++++ .../related_identifier_denormalizer.rb | 32 ++++++++++ .../doi_verified_related_identifiers_spec.rb | 60 +++++++++++++++++++ 4 files changed, 150 insertions(+) create mode 100644 app/models/doi/indexer/related_doi_indexer.rb create mode 100644 app/models/doi/indexer/related_identifier_denormalizer.rb create mode 100644 spec/models/doi_verified_related_identifiers_spec.rb diff --git a/app/models/doi.rb b/app/models/doi.rb index 63109f61d..1f41250a2 100644 --- a/app/models/doi.rb +++ b/app/models/doi.rb @@ -244,6 +244,7 @@ class Doi < ApplicationRecord indexes :provider_id, type: :keyword indexes :consortium_id, type: :keyword indexes :resource_type_id, type: :keyword + indexes :person_id, type: :keyword indexes :affiliation_id, type: :keyword indexes :fair_affiliation_id, type: :keyword indexes :organization_id, type: :keyword @@ -549,6 +550,14 @@ class Doi < ApplicationRecord indexes :fields_of_science, type: :keyword indexes :fields_of_science_combined, type: :keyword indexes :fields_of_science_repository, type: :keyword + indexes :related_doi, type: :object, properties: { + client_id: { type: :keyword }, + doi: { type: :keyword }, + organization_id: { type: :keyword }, + person_id: { type: :keyword }, + resource_type_id: { type: :keyword }, + resource_type_id_and_name: { type: :keyword }, + } end end @@ -1619,6 +1628,10 @@ def consortium_id client.provider.consortium_id.downcase if client.present? && client.provider.consortium_id.present? end + def related_dois + Doi::Indexer::RelatedDoiIndexer.new(related_identifiers).as_indexed_json + end + def related_dmp_ids Array.wrap(related_identifiers).select { |related_identifier| related_identifier["relatedIdentifierType"] == "DOI" diff --git a/app/models/doi/indexer/related_doi_indexer.rb b/app/models/doi/indexer/related_doi_indexer.rb new file mode 100644 index 000000000..eb97419d3 --- /dev/null +++ b/app/models/doi/indexer/related_doi_indexer.rb @@ -0,0 +1,45 @@ +# frozen_string_literal: true + +module Doi::Indexer + class RelatedDoiIndexer + def initialize(related_identifiers) + @related_identifiers = related_identifiers + @related_dois = nil + end + + def related_dois + @related_dois ||= @related_identifiers.select { |r| r["relatedIdentifierType"] == "DOI" } + end + + def related_grouped_by_id + related_dois.group_by { |r| r["relatedIdentifier"].downcase } + end + + def relation_types_gouped_by_id + related_grouped_by_id.transform_values do |values| + values.map { |val| val["relationType"].underscore }.uniq + end + end + + def related_doi_ids + related_grouped_by_id.keys + end + + def dois + Doi.where(doi: related_doi_ids) + end + + def indexed_dois + dois.map { |d| RelatedIdentifierDenormalizer.new(d).to_hash } + end + + def as_indexed_json + rtypes = relation_types_gouped_by_id + indexed_dois.map do |indexed_doi| + doi = indexed_doi["doi"] + indexed_doi["relation_type"] = rtypes[doi] + indexed_doi + end + end + end +end diff --git a/app/models/doi/indexer/related_identifier_denormalizer.rb b/app/models/doi/indexer/related_identifier_denormalizer.rb new file mode 100644 index 000000000..9188015f8 --- /dev/null +++ b/app/models/doi/indexer/related_identifier_denormalizer.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +module Doi::Indexer + class RelatedIdentifierDenormalizer + attr_reader :related_doi + + def initialize(doi) + @related_doi = doi + end + + def to_hash + %w[ + client_id + doi + organization_id + person_id + resource_type_id + resource_type_id_and_name + ].index_with { |method_name| send(method_name) } + end + + delegate :resource_type_id, to: :related_doi + delegate :resource_type_id_and_name, to: :related_doi + delegate :organization_id, to: :related_doi + delegate :person_id, to: :related_doi + delegate :client_id, to: :related_doi + + def doi + @related_doi.doi.downcase + end + end +end diff --git a/spec/models/doi_verified_related_identifiers_spec.rb b/spec/models/doi_verified_related_identifiers_spec.rb new file mode 100644 index 000000000..04c237fc7 --- /dev/null +++ b/spec/models/doi_verified_related_identifiers_spec.rb @@ -0,0 +1,60 @@ + +# frozen_string_literal: true + +require "rails_helper" + +describe Doi, type: :model, vcr: true, elasticsearch: true do + describe "related_doi" do + let(:client) { create(:client) } + let(:target_doi) do + create(:doi, + client: client, + aasm_state: "findable", + types: { "resourceTypeGeneral" => "Dataset" } + ) + end + let(:doi) do + create(:doi, + client: client, + aasm_state: "findable", + related_identifiers: [ + { + "relatedIdentifier": target_doi.doi, + "relatedIdentifierType": "DOI", + "relationType": "HasPart", + "resourceTypeGeneral": "OutputManagementPlan", + }, + { + "relatedIdentifier": target_doi.doi, + "relatedIdentifierType": "DOI", + "relationType": "Cites", + "resourceTypeGeneral": "Text", + } + ]) + end + + it "indexes related_dois" do + expect(doi.related_dois.first["doi"]).to eq(target_doi.doi.downcase) + end + + it "indexes related doi's client_id" do + expect(doi.related_dois.first["client_id"]).to eq(target_doi.client_id) + end + + it "indexes related doi's person_id" do + expect(doi.related_dois.first["person_id"]).to eq(target_doi.person_id) + end + + it "does not index related doi's claimed resource_type_id" do + expect(doi.related_dois.first["resource_type_id"]).not_to eq("output_management_plan") + end + + it "indexes related doi's true resource_type_id" do + expect(doi.related_dois.first["resource_type_id"]).to eq("dataset") + end + + it "indexes all relations to the related doi" do + expect(doi.related_dois.first["relation_type"]).to eq(["has_part", "cites"]) + end + end +end