From 7b3e8cd408cd141903765fb4904d33c5bba5ac42 Mon Sep 17 00:00:00 2001
From: Ashwini Sukale <ashwini.sukale@datacite.org>
Date: Thu, 5 Oct 2023 15:45:31 +0530
Subject: [PATCH] =?UTF-8?q?Issue-1602=20Added=20test=20case=20for=20normal?=
 =?UTF-8?q?izing=20ORCID=20ids=20with=20and=20withou=20=E2=80=A6=20(#164)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Issue-1602 Added test case for normalizing ORCID ids with and wihtou schemeURI

* issues-1602 Remove leading and trailing spaces from ORCID id before nomrmalization

* issues-1602 Update gem version

* Revert "issues-1602 Update gem version"

This reverts commit c03b47aa420dec3996d92b44e8a09ab230d560fd.
---
 lib/bolognese/author_utils.rb                 |  1 +
 spec/author_utils_spec.rb                     | 27 +++++++++++++++++--
 .../datacite-example-ROR-nameIdentifiers.xml  | 12 +++++++--
 3 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/lib/bolognese/author_utils.rb b/lib/bolognese/author_utils.rb
index 82520d47..0a3ce219 100644
--- a/lib/bolognese/author_utils.rb
+++ b/lib/bolognese/author_utils.rb
@@ -30,6 +30,7 @@ def get_one_author(author)
       name_type = parse_attributes(author.fetch("creatorName", nil), content: "nameType", first: true) || parse_attributes(author.fetch("contributorName", nil), content: "nameType", first: true)
 
       name_identifiers = Array.wrap(author.fetch("nameIdentifier", nil)).map do |ni|
+        ni["__content__"] = ni["__content__"].strip
         if ni["nameIdentifierScheme"] == "ORCID"
           {
             "nameIdentifier" => normalize_orcid(ni["__content__"]),
diff --git a/spec/author_utils_spec.rb b/spec/author_utils_spec.rb
index 3ae05ba9..e95acc35 100644
--- a/spec/author_utils_spec.rb
+++ b/spec/author_utils_spec.rb
@@ -173,9 +173,10 @@
   end
 
   context "affiliationIdentifier" do
+    let(:input) { fixture_path + 'datacite-example-ROR-nameIdentifiers.xml' }
+    subject { Bolognese::Metadata.new(input: input, from: "datacite") }
+
     it "should normalize ROR affiliationIdentifier with and without URL" do
-      input = fixture_path + 'datacite-example-ROR-nameIdentifiers.xml'
-      subject = Bolognese::Metadata.new(input: input, from: "datacite")
       # without URL inside affiliationIdentifier="05bp8ka77"
       ror_affiliater0 = subject.creators[0]["affiliation"].select { |r| r["affiliationIdentifierScheme"] == "ROR" }
       expect(ror_affiliater0[0]["affiliationIdentifier"]).to eq("https://ror.org/05bp8ka77")
@@ -184,6 +185,28 @@
       expect(ror_affiliater1[0]["affiliationIdentifier"]).to eq("https://ror.org/05bp8ka05")
     end
 
+    it "should normalize the valid ORCID nameIdentifier to URL with schemeURI" do
+      # with "schemeURI"
+      # ORICD normalization  0000-0001-9998-0117 => https://orcid.org/0000-0001-9998-0117
+      expect(subject.creators[0]["nameIdentifiers"]).to eq([{"nameIdentifier"=>"https://orcid.org/0000-0001-9998-0117", "schemeUri"=>"https://orcid.org", "nameIdentifierScheme"=>"ORCID"}])
+    end
+
+    it "should normalize the valid ORCID nameIdentifier to URL without schemeURI" do
+      # without "schemeURI"
+      # ORICD normalization  0000-0001-9998-0117 => https://orcid.org/0000-0001-9998-0117
+      expect(subject.creators[7]["nameIdentifiers"]).to eq([{"nameIdentifier"=>"https://orcid.org/0000-0001-9998-0117", "schemeUri"=>"https://orcid.org", "nameIdentifierScheme"=>"ORCID"}])
+    end
+
+    it "should keep nameIdentifier URL after normalization" do
+      # ORICD normalization  https://orcid.org/0000-0001-9998-0114 => https://orcid.org/0000-0001-9998-0114
+      expect(subject.creators[1]["nameIdentifiers"]).to eq([{"nameIdentifier"=>"https://orcid.org/0000-0001-9998-0114", "schemeUri"=>"https://orcid.org", "nameIdentifierScheme"=>"ORCID"}])
+    end
+
+    it "should sanitize valid ORCID id/URL before normalization" do
+      #"  0000-0001-9998-0118  ",  # Valid ORCID with leading/trailing spaces
+      expect(subject.creators[8]["nameIdentifiers"]).to eq([{"nameIdentifier"=>"https://orcid.org/0000-0001-9998-0118", "schemeUri"=>"https://orcid.org", "nameIdentifierScheme"=>"ORCID"}])
+    end
+
     it "should parse non ROR schema's without normalizing them" do
       input = fixture_path + 'datacite-example-ROR-nameIdentifiers.xml'
       subject = Bolognese::Metadata.new(input: input, from: "datacite")
diff --git a/spec/fixtures/datacite-example-ROR-nameIdentifiers.xml b/spec/fixtures/datacite-example-ROR-nameIdentifiers.xml
index 91764c3d..8a1f68b7 100644
--- a/spec/fixtures/datacite-example-ROR-nameIdentifiers.xml
+++ b/spec/fixtures/datacite-example-ROR-nameIdentifiers.xml
@@ -5,13 +5,13 @@
     <creators>
         <creator>
             <creatorName nameType="Personal">Ashwini Sukale</creatorName>
-            <nameIdentifier schemeURI="https://orcid.org/" nameIdentifierScheme="ORCID"> https://orcid.org/0000-0001-9998-0117 </nameIdentifier>
+            <nameIdentifier schemeURI="https://orcid.org/" nameIdentifierScheme="ORCID">0000-0001-9998-0117</nameIdentifier>
             <affiliation affiliationIdentifier="05bp8ka77" affiliationIdentifierScheme="ROR"> Metadata Game Changers </affiliation>
             <affiliation affiliationIdentifier="grid.268117.b" affiliationIdentifierScheme="GRID">Wesleyan University</affiliation>
         </creator>
         <creator>
             <creatorName nameType="Personal">Erin Robinson</creatorName>
-            <nameIdentifier schemeURI="https://orcid.org/" nameIdentifierScheme="ORCID"> https://orcid.org/0000-0001-9998-0114 </nameIdentifier>
+            <nameIdentifier schemeURI="https://orcid.org/" nameIdentifierScheme="ORCID">https://orcid.org/0000-0001-9998-0114</nameIdentifier>
             <affiliation schemeURI="https://ror.org" affiliationIdentifier="https://ror.org/05bp8ka05" affiliationIdentifierScheme="ROR"> Metadata Game Changers </affiliation>
             <affiliation affiliationIdentifier="grid.268117.b" affiliationIdentifierScheme="GRID" schemeURI="https://grid.ac/institutes/">Wesleyan University</affiliation>
         </creator>
@@ -35,6 +35,14 @@
             <creatorName nameType="Organizational">جامعة زاخۆ</creatorName>
             <nameIdentifier nameIdentifierScheme="RORS" schemeURI="https://ror.org">05sd1pz50</nameIdentifier>
         </creator>
+        <creator>
+            <creatorName nameType="Personal">Ashwini S</creatorName>
+            <nameIdentifier nameIdentifierScheme="ORCID">0000-0001-9998-0117</nameIdentifier>
+        </creator>
+         <creator>
+            <creatorName nameType="Personal">Mike B</creatorName>
+            <nameIdentifier nameIdentifierScheme="ORCID"> 0000-0001-9998-0118 </nameIdentifier>
+        </creator>
     </creators>
     <titles>
         <title xml:lang="en-US">Genomic Standards Consortium (GSC) Island Sampling Day: Moorea Reef to Ridges Genomic Transect</title>