From 7b3e8cd408cd141903765fb4904d33c5bba5ac42 Mon Sep 17 00:00:00 2001 From: Ashwini Sukale <ashwini.sukale@datacite.org> Date: Thu, 5 Oct 2023 15:45:31 +0530 Subject: [PATCH] =?UTF-8?q?Issue-1602=20Added=20test=20case=20for=20normal?= =?UTF-8?q?izing=20ORCID=20ids=20with=20and=20withou=20=E2=80=A6=20(#164)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Issue-1602 Added test case for normalizing ORCID ids with and wihtou schemeURI * issues-1602 Remove leading and trailing spaces from ORCID id before nomrmalization * issues-1602 Update gem version * Revert "issues-1602 Update gem version" This reverts commit c03b47aa420dec3996d92b44e8a09ab230d560fd. --- lib/bolognese/author_utils.rb | 1 + spec/author_utils_spec.rb | 27 +++++++++++++++++-- .../datacite-example-ROR-nameIdentifiers.xml | 12 +++++++-- 3 files changed, 36 insertions(+), 4 deletions(-) diff --git a/lib/bolognese/author_utils.rb b/lib/bolognese/author_utils.rb index 82520d47..0a3ce219 100644 --- a/lib/bolognese/author_utils.rb +++ b/lib/bolognese/author_utils.rb @@ -30,6 +30,7 @@ def get_one_author(author) name_type = parse_attributes(author.fetch("creatorName", nil), content: "nameType", first: true) || parse_attributes(author.fetch("contributorName", nil), content: "nameType", first: true) name_identifiers = Array.wrap(author.fetch("nameIdentifier", nil)).map do |ni| + ni["__content__"] = ni["__content__"].strip if ni["nameIdentifierScheme"] == "ORCID" { "nameIdentifier" => normalize_orcid(ni["__content__"]), diff --git a/spec/author_utils_spec.rb b/spec/author_utils_spec.rb index 3ae05ba9..e95acc35 100644 --- a/spec/author_utils_spec.rb +++ b/spec/author_utils_spec.rb @@ -173,9 +173,10 @@ end context "affiliationIdentifier" do + let(:input) { fixture_path + 'datacite-example-ROR-nameIdentifiers.xml' } + subject { Bolognese::Metadata.new(input: input, from: "datacite") } + it "should normalize ROR affiliationIdentifier with and without URL" do - input = fixture_path + 'datacite-example-ROR-nameIdentifiers.xml' - subject = Bolognese::Metadata.new(input: input, from: "datacite") # without URL inside affiliationIdentifier="05bp8ka77" ror_affiliater0 = subject.creators[0]["affiliation"].select { |r| r["affiliationIdentifierScheme"] == "ROR" } expect(ror_affiliater0[0]["affiliationIdentifier"]).to eq("https://ror.org/05bp8ka77") @@ -184,6 +185,28 @@ expect(ror_affiliater1[0]["affiliationIdentifier"]).to eq("https://ror.org/05bp8ka05") end + it "should normalize the valid ORCID nameIdentifier to URL with schemeURI" do + # with "schemeURI" + # ORICD normalization 0000-0001-9998-0117 => https://orcid.org/0000-0001-9998-0117 + expect(subject.creators[0]["nameIdentifiers"]).to eq([{"nameIdentifier"=>"https://orcid.org/0000-0001-9998-0117", "schemeUri"=>"https://orcid.org", "nameIdentifierScheme"=>"ORCID"}]) + end + + it "should normalize the valid ORCID nameIdentifier to URL without schemeURI" do + # without "schemeURI" + # ORICD normalization 0000-0001-9998-0117 => https://orcid.org/0000-0001-9998-0117 + expect(subject.creators[7]["nameIdentifiers"]).to eq([{"nameIdentifier"=>"https://orcid.org/0000-0001-9998-0117", "schemeUri"=>"https://orcid.org", "nameIdentifierScheme"=>"ORCID"}]) + end + + it "should keep nameIdentifier URL after normalization" do + # ORICD normalization https://orcid.org/0000-0001-9998-0114 => https://orcid.org/0000-0001-9998-0114 + expect(subject.creators[1]["nameIdentifiers"]).to eq([{"nameIdentifier"=>"https://orcid.org/0000-0001-9998-0114", "schemeUri"=>"https://orcid.org", "nameIdentifierScheme"=>"ORCID"}]) + end + + it "should sanitize valid ORCID id/URL before normalization" do + #" 0000-0001-9998-0118 ", # Valid ORCID with leading/trailing spaces + expect(subject.creators[8]["nameIdentifiers"]).to eq([{"nameIdentifier"=>"https://orcid.org/0000-0001-9998-0118", "schemeUri"=>"https://orcid.org", "nameIdentifierScheme"=>"ORCID"}]) + end + it "should parse non ROR schema's without normalizing them" do input = fixture_path + 'datacite-example-ROR-nameIdentifiers.xml' subject = Bolognese::Metadata.new(input: input, from: "datacite") diff --git a/spec/fixtures/datacite-example-ROR-nameIdentifiers.xml b/spec/fixtures/datacite-example-ROR-nameIdentifiers.xml index 91764c3d..8a1f68b7 100644 --- a/spec/fixtures/datacite-example-ROR-nameIdentifiers.xml +++ b/spec/fixtures/datacite-example-ROR-nameIdentifiers.xml @@ -5,13 +5,13 @@ <creators> <creator> <creatorName nameType="Personal">Ashwini Sukale</creatorName> - <nameIdentifier schemeURI="https://orcid.org/" nameIdentifierScheme="ORCID"> https://orcid.org/0000-0001-9998-0117 </nameIdentifier> + <nameIdentifier schemeURI="https://orcid.org/" nameIdentifierScheme="ORCID">0000-0001-9998-0117</nameIdentifier> <affiliation affiliationIdentifier="05bp8ka77" affiliationIdentifierScheme="ROR"> Metadata Game Changers </affiliation> <affiliation affiliationIdentifier="grid.268117.b" affiliationIdentifierScheme="GRID">Wesleyan University</affiliation> </creator> <creator> <creatorName nameType="Personal">Erin Robinson</creatorName> - <nameIdentifier schemeURI="https://orcid.org/" nameIdentifierScheme="ORCID"> https://orcid.org/0000-0001-9998-0114 </nameIdentifier> + <nameIdentifier schemeURI="https://orcid.org/" nameIdentifierScheme="ORCID">https://orcid.org/0000-0001-9998-0114</nameIdentifier> <affiliation schemeURI="https://ror.org" affiliationIdentifier="https://ror.org/05bp8ka05" affiliationIdentifierScheme="ROR"> Metadata Game Changers </affiliation> <affiliation affiliationIdentifier="grid.268117.b" affiliationIdentifierScheme="GRID" schemeURI="https://grid.ac/institutes/">Wesleyan University</affiliation> </creator> @@ -35,6 +35,14 @@ <creatorName nameType="Organizational">جامعة زاخۆ</creatorName> <nameIdentifier nameIdentifierScheme="RORS" schemeURI="https://ror.org">05sd1pz50</nameIdentifier> </creator> + <creator> + <creatorName nameType="Personal">Ashwini S</creatorName> + <nameIdentifier nameIdentifierScheme="ORCID">0000-0001-9998-0117</nameIdentifier> + </creator> + <creator> + <creatorName nameType="Personal">Mike B</creatorName> + <nameIdentifier nameIdentifierScheme="ORCID"> 0000-0001-9998-0118 </nameIdentifier> + </creator> </creators> <titles> <title xml:lang="en-US">Genomic Standards Consortium (GSC) Island Sampling Day: Moorea Reef to Ridges Genomic Transect</title>