From 1a6578ad4dbf1c89b7c7e0d7123bafb420d8c96c Mon Sep 17 00:00:00 2001
From: jrhoads <jrhoads@datacite.org>
Date: Thu, 30 Nov 2023 15:59:13 -0500
Subject: [PATCH 1/3] Add person_id to doi model

---
 app/models/doi.rb       | 33 ++++++++++++++++------
 spec/models/doi_spec.rb | 61 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 86 insertions(+), 8 deletions(-)

diff --git a/app/models/doi.rb b/app/models/doi.rb
index 599af8e39..63109f61d 100644
--- a/app/models/doi.rb
+++ b/app/models/doi.rb
@@ -1649,21 +1649,38 @@ def sponsor_contributors
   end
 
 
+  def person_id
+    (Array.wrap(creators) + Array.wrap(contributors)).reduce([]) do |sum, c|
+      Array.wrap(c.fetch("nameIdentifiers", nil)).each do |name_identifier|
+        if name_identifier.is_a?(Hash) && name_identifier.fetch("nameIdentifierScheme", nil) == "ORCID" && name_identifier.fetch("nameIdentifier", nil).present?
+          sum << orcid_as_url(
+            orcid_from_url(name_identifier.fetch("nameIdentifier", nil))
+          )
+        end
+      end
+      sum.uniq
+    end
+  end
+
   def organization_id
     (Array.wrap(creators) + Array.wrap(contributors)).reduce([]) do |sum, c|
       Array.wrap(c.fetch("nameIdentifiers", nil)).each do |name_identifier|
-        sum << ror_from_url(name_identifier.fetch("nameIdentifier", nil)) if name_identifier.is_a?(Hash) && name_identifier.fetch("nameIdentifierScheme", nil) == "ROR" && name_identifier.fetch("nameIdentifier", nil).present?
+        if name_identifier.is_a?(Hash) && name_identifier.fetch("nameIdentifierScheme", nil) == "ROR" && name_identifier.fetch("nameIdentifier", nil).present?
+          sum << ror_from_url(name_identifier.fetch("nameIdentifier", nil))
+        end
       end
-      sum
+      sum.uniq
     end
   end
 
   def fair_organization_id
     (Array.wrap(creators) + sponsor_contributors).reduce([]) do |sum, c|
       Array.wrap(c.fetch("nameIdentifiers", nil)).each do |name_identifier|
-        sum << ror_from_url(name_identifier.fetch("nameIdentifier", nil)) if name_identifier.is_a?(Hash) && name_identifier.fetch("nameIdentifierScheme", nil) == "ROR" && name_identifier.fetch("nameIdentifier", nil).present?
+        if name_identifier.is_a?(Hash) && name_identifier.fetch("nameIdentifierScheme", nil) == "ROR" && name_identifier.fetch("nameIdentifier", nil).present?
+          sum << ror_from_url(name_identifier.fetch("nameIdentifier", nil))
+        end
       end
-      sum
+      sum.uniq
     end
   end
 
@@ -1672,7 +1689,7 @@ def affiliation_id
       Array.wrap(c.fetch("affiliation", nil)).each do |affiliation|
         sum << ror_from_url(affiliation.fetch("affiliationIdentifier", nil)) if affiliation.is_a?(Hash) && affiliation.fetch("affiliationIdentifierScheme", nil) == "ROR" && affiliation.fetch("affiliationIdentifier", nil).present?
       end
-      sum
+      sum.uniq
     end
   end
 
@@ -1681,7 +1698,7 @@ def fair_affiliation_id
       Array.wrap(c.fetch("affiliation", nil)).each do |affiliation|
         sum << ror_from_url(affiliation.fetch("affiliationIdentifier", nil)) if affiliation.is_a?(Hash) && affiliation.fetch("affiliationIdentifierScheme", nil) == "ROR" && affiliation.fetch("affiliationIdentifier", nil).present?
       end
-      sum
+      sum.uniq
     end
   end
 
@@ -1690,7 +1707,7 @@ def affiliation_id_and_name
       Array.wrap(c.fetch("affiliation", nil)).each do |affiliation|
         sum << "#{ror_from_url(affiliation.fetch('affiliationIdentifier', nil))}:#{affiliation.fetch('name', nil)}" if affiliation.is_a?(Hash) && affiliation.fetch("affiliationIdentifierScheme", nil) == "ROR" && affiliation.fetch("affiliationIdentifier", nil).present?
       end
-      sum
+      sum.uniq
     end
   end
 
@@ -1699,7 +1716,7 @@ def fair_affiliation_id_and_name
       Array.wrap(c.fetch("affiliation", nil)).each do |affiliation|
         sum << "#{ror_from_url(affiliation.fetch('affiliationIdentifier', nil))}:#{affiliation.fetch('name', nil)}" if affiliation.is_a?(Hash) && affiliation.fetch("affiliationIdentifierScheme", nil) == "ROR" && affiliation.fetch("affiliationIdentifier", nil).present?
       end
-      sum
+      sum.uniq
     end
   end
 
diff --git a/spec/models/doi_spec.rb b/spec/models/doi_spec.rb
index c74323ae6..ccd77160d 100644
--- a/spec/models/doi_spec.rb
+++ b/spec/models/doi_spec.rb
@@ -1081,6 +1081,67 @@
     end
   end
 
+  describe "person_ids" do
+    it "from creators and contributors" do
+      subject = build(
+        :doi,
+        creators: [
+          {
+            "familyName" => "Garza",
+            "givenName" => "Kristian",
+            "name" => "Garza, Kristian",
+            "nameIdentifiers" => [
+              {
+                "nameIdentifier" => "https://orcid.org/0000-0003-3484-6875",
+                "nameIdentifierScheme" => "ORCID",
+                "schemeUri" => "https://orcid.org",
+              },
+            ],
+            "nameType" => "Personal",
+            "affiliation" => [
+              {
+                "name" => "University of Cambridge",
+                "affiliationIdentifier" => "https://ror.org/013meh722",
+                "affiliationIdentifierScheme" => "ROR",
+              },
+           ],
+          },
+        ],
+        contributors: [
+          {
+            "contributorType" => "Sponsor",
+            "familyName" => "Bob",
+            "givenName" => "Jones",
+            "name" => "Jones, Bob",
+            "nameIdentifiers" => [
+              {
+                "nameIdentifier" => "https://orcid.org/0000-0003-3484-0000",
+                "nameIdentifierScheme" => "ORCID",
+                "schemeUri" => "https://orcid.org",
+              },
+            ],
+            "nameType" => "Personal",
+            "affiliation" => [
+              {
+                "name" => "University of Examples",
+                "affiliationIdentifier" => "https://ror.org/013meh8888",
+                "affiliationIdentifierScheme" => "ROR",
+              },
+           ],
+          },
+        ]
+      )
+      expect(subject).to be_valid
+      expect(subject.person_id).to eq(
+        [
+          "https://orcid.org/0000-0003-3484-6875",
+          "https://orcid.org/0000-0003-3484-0000",
+        ]
+      )
+
+    end
+  end
+
   describe "related_identifiers" do
     it "has part" do
       subject = build(:doi, related_identifiers: [

From 7abc2cc116ef4913f77f57201a69f14be70e6719 Mon Sep 17 00:00:00 2001
From: jrhoads <jrhoads@datacite.org>
Date: Sun, 3 Dec 2023 21:09:19 -0500
Subject: [PATCH 2/3] Appease Rubocop

---
 spec/models/doi_spec.rb | 1 -
 1 file changed, 1 deletion(-)

diff --git a/spec/models/doi_spec.rb b/spec/models/doi_spec.rb
index ccd77160d..95f558e5a 100644
--- a/spec/models/doi_spec.rb
+++ b/spec/models/doi_spec.rb
@@ -1138,7 +1138,6 @@
           "https://orcid.org/0000-0003-3484-0000",
         ]
       )
-
     end
   end
 

From 9467a6f4b5ba0d1ecb214bc1f2347f900edf9948 Mon Sep 17 00:00:00 2001
From: Joseph Rhoads <jrhoads@datacite.org>
Date: Mon, 4 Dec 2023 05:25:16 -0500
Subject: [PATCH 3/3] Index person_id (#1050)

* Index person_id

* Add related_doi indexing (#1051)

* Add related_doi indexing

* Add relation_type to the hash

* Add more fields to the related_dois indexed info

* Add tests for related_dois

* Add related_dois to elasticsearch

* Appease Rubocop
---
 app/models/doi.rb                             | 13 ++++
 app/models/doi/indexer/related_doi_indexer.rb | 45 ++++++++++++++
 .../related_identifier_denormalizer.rb        | 32 ++++++++++
 .../doi_verified_related_identifiers_spec.rb  | 60 +++++++++++++++++++
 4 files changed, 150 insertions(+)
 create mode 100644 app/models/doi/indexer/related_doi_indexer.rb
 create mode 100644 app/models/doi/indexer/related_identifier_denormalizer.rb
 create mode 100644 spec/models/doi_verified_related_identifiers_spec.rb

diff --git a/app/models/doi.rb b/app/models/doi.rb
index 63109f61d..1f41250a2 100644
--- a/app/models/doi.rb
+++ b/app/models/doi.rb
@@ -244,6 +244,7 @@ class Doi < ApplicationRecord
       indexes :provider_id,                    type: :keyword
       indexes :consortium_id,                  type: :keyword
       indexes :resource_type_id,               type: :keyword
+      indexes :person_id,                      type: :keyword
       indexes :affiliation_id,                 type: :keyword
       indexes :fair_affiliation_id,            type: :keyword
       indexes :organization_id,                type: :keyword
@@ -549,6 +550,14 @@ class Doi < ApplicationRecord
       indexes :fields_of_science, type: :keyword
       indexes :fields_of_science_combined, type: :keyword
       indexes :fields_of_science_repository, type: :keyword
+      indexes :related_doi, type: :object, properties: {
+        client_id: { type: :keyword },
+        doi: { type: :keyword },
+        organization_id: { type: :keyword },
+        person_id: { type: :keyword },
+        resource_type_id: { type: :keyword },
+        resource_type_id_and_name: { type: :keyword },
+      }
     end
   end
 
@@ -1619,6 +1628,10 @@ def consortium_id
     client.provider.consortium_id.downcase if client.present? && client.provider.consortium_id.present?
   end
 
+  def related_dois
+    Doi::Indexer::RelatedDoiIndexer.new(related_identifiers).as_indexed_json
+  end
+
   def related_dmp_ids
     Array.wrap(related_identifiers).select { |related_identifier|
       related_identifier["relatedIdentifierType"] == "DOI"
diff --git a/app/models/doi/indexer/related_doi_indexer.rb b/app/models/doi/indexer/related_doi_indexer.rb
new file mode 100644
index 000000000..eb97419d3
--- /dev/null
+++ b/app/models/doi/indexer/related_doi_indexer.rb
@@ -0,0 +1,45 @@
+# frozen_string_literal: true
+
+module Doi::Indexer
+  class RelatedDoiIndexer
+    def initialize(related_identifiers)
+      @related_identifiers = related_identifiers
+      @related_dois = nil
+    end
+
+    def related_dois
+      @related_dois ||= @related_identifiers.select { |r| r["relatedIdentifierType"] == "DOI" }
+    end
+
+    def related_grouped_by_id
+      related_dois.group_by { |r| r["relatedIdentifier"].downcase }
+    end
+
+    def relation_types_gouped_by_id
+      related_grouped_by_id.transform_values do |values|
+        values.map { |val| val["relationType"].underscore }.uniq
+      end
+    end
+
+    def related_doi_ids
+      related_grouped_by_id.keys
+    end
+
+    def dois
+      Doi.where(doi: related_doi_ids)
+    end
+
+    def indexed_dois
+      dois.map { |d| RelatedIdentifierDenormalizer.new(d).to_hash }
+    end
+
+    def as_indexed_json
+      rtypes = relation_types_gouped_by_id
+      indexed_dois.map do |indexed_doi|
+        doi = indexed_doi["doi"]
+        indexed_doi["relation_type"] = rtypes[doi]
+        indexed_doi
+      end
+    end
+  end
+end
diff --git a/app/models/doi/indexer/related_identifier_denormalizer.rb b/app/models/doi/indexer/related_identifier_denormalizer.rb
new file mode 100644
index 000000000..9188015f8
--- /dev/null
+++ b/app/models/doi/indexer/related_identifier_denormalizer.rb
@@ -0,0 +1,32 @@
+# frozen_string_literal: true
+
+module Doi::Indexer
+  class RelatedIdentifierDenormalizer
+    attr_reader :related_doi
+
+    def initialize(doi)
+      @related_doi = doi
+    end
+
+    def to_hash
+      %w[
+        client_id
+        doi
+        organization_id
+        person_id
+        resource_type_id
+        resource_type_id_and_name
+      ].index_with { |method_name| send(method_name) }
+    end
+
+    delegate :resource_type_id, to: :related_doi
+    delegate :resource_type_id_and_name, to: :related_doi
+    delegate :organization_id, to: :related_doi
+    delegate :person_id, to: :related_doi
+    delegate :client_id, to: :related_doi
+
+    def doi
+      @related_doi.doi.downcase
+    end
+  end
+end
diff --git a/spec/models/doi_verified_related_identifiers_spec.rb b/spec/models/doi_verified_related_identifiers_spec.rb
new file mode 100644
index 000000000..04c237fc7
--- /dev/null
+++ b/spec/models/doi_verified_related_identifiers_spec.rb
@@ -0,0 +1,60 @@
+
+# frozen_string_literal: true
+
+require "rails_helper"
+
+describe Doi, type: :model, vcr: true, elasticsearch: true do
+  describe "related_doi" do
+    let(:client) { create(:client) }
+    let(:target_doi) do
+      create(:doi,
+        client: client,
+        aasm_state: "findable",
+        types: { "resourceTypeGeneral" => "Dataset" }
+      )
+    end
+    let(:doi) do
+      create(:doi,
+        client: client,
+        aasm_state: "findable",
+        related_identifiers: [
+          {
+            "relatedIdentifier": target_doi.doi,
+            "relatedIdentifierType": "DOI",
+            "relationType": "HasPart",
+            "resourceTypeGeneral": "OutputManagementPlan",
+          },
+          {
+            "relatedIdentifier": target_doi.doi,
+            "relatedIdentifierType": "DOI",
+            "relationType": "Cites",
+            "resourceTypeGeneral": "Text",
+          }
+        ])
+    end
+
+    it "indexes related_dois" do
+      expect(doi.related_dois.first["doi"]).to eq(target_doi.doi.downcase)
+    end
+
+    it "indexes related doi's client_id" do
+      expect(doi.related_dois.first["client_id"]).to eq(target_doi.client_id)
+    end
+
+    it "indexes related doi's person_id" do
+      expect(doi.related_dois.first["person_id"]).to eq(target_doi.person_id)
+    end
+
+    it "does not index related doi's claimed resource_type_id" do
+      expect(doi.related_dois.first["resource_type_id"]).not_to eq("output_management_plan")
+    end
+
+    it "indexes related doi's true resource_type_id" do
+      expect(doi.related_dois.first["resource_type_id"]).to eq("dataset")
+    end
+
+    it "indexes all relations to the related doi" do
+      expect(doi.related_dois.first["relation_type"]).to eq(["has_part", "cites"])
+    end
+  end
+end