From 2ec7b047d3637c13bfdbbad86ec7865c55a21336 Mon Sep 17 00:00:00 2001 From: Cody Ross Date: Fri, 17 Jun 2022 14:01:40 -0400 Subject: [PATCH 1/4] Addresses error where author aggs with multiple nameIdentifiers resulted in array index error --- app/controllers/concerns/facetable.rb | 24 +- spec/concerns/facetable_spec.rb | 59 +++ ...s_aggs_with_multiple_name_identifiers.json | 393 ++++++++++++++++++ 3 files changed, 464 insertions(+), 12 deletions(-) create mode 100644 spec/fixtures/files/authors_aggs_with_multiple_name_identifiers.json diff --git a/app/controllers/concerns/facetable.rb b/app/controllers/concerns/facetable.rb index 07d12e5da..12334b1e0 100644 --- a/app/controllers/concerns/facetable.rb +++ b/app/controllers/concerns/facetable.rb @@ -408,9 +408,9 @@ def facet_by_fos(arr) end def facet_by_authors(arr) - arr.map do |hsh| + arr.map { |hsh| orcid_id = hsh["key"] - + # The aggregation query should only return 1 hit, so hence the index # into first element creators = hsh.dig("authors", "hits", "hits")[0].dig("_source", "creators") @@ -418,20 +418,20 @@ def facet_by_authors(arr) # Filter through creators to find creator that matches the key matched_creator = creators.select do |creator| if creator.key?("nameIdentifiers") - creator["nameIdentifiers"].each do |ni| - break ni["nameIdentifier"] == orcid_id - end + creator["nameIdentifiers"].any? { |ni| ni["nameIdentifier"] == orcid_id } end end - title = matched_creator[0]["name"] + if matched_creator.any? + title = matched_creator[0]["name"] - { - "id" => orcid_id, - "title" => title, - "count" => hsh["doc_count"], - } - end + { + "id" => orcid_id, + "title" => title, + "count" => hsh["doc_count"], + } + end + }.compact end end end diff --git a/spec/concerns/facetable_spec.rb b/spec/concerns/facetable_spec.rb index 8ebae8950..ff1acb3da 100644 --- a/spec/concerns/facetable_spec.rb +++ b/spec/concerns/facetable_spec.rb @@ -4,6 +4,7 @@ describe "Facetable", type: :controller do let(:author_aggs) { JSON.parse(file_fixture("authors_aggs.json").read) } + let(:author_aggs_with_multiple_name_identifiers) { JSON.parse(file_fixture("authors_aggs_with_multiple_name_identifiers.json").read) } let(:model) { DataciteDoisController.new } it "facet by author" do authors = model.facet_by_authors(author_aggs) @@ -11,6 +12,64 @@ expected_result = [{ "id" => "https://orcid.org/0000-0003-1419-2405", "title" => "Fenner, Martin", "count" => 244 }, { "id" => "https://orcid.org/0000-0001-9570-8121", "title" => "Lambert, Simon", "count" => 23 }] expect(authors).to eq (expected_result) end + + it "facet by author where author may have multiple nameIdentifiers" do + authors = model.facet_by_authors(author_aggs_with_multiple_name_identifiers) + + expected_result = [ + { + "id" => "https://orcid.org/0000-0002-0429-5446", + "title" => "Nam, Hyung-song", + "count" => 28, + }, + { + "id" => "https://orcid.org/0000-0003-4973-3128", + "title" => "Casares, Ramón", + "count" => 12, + }, + { + "id" => "https://orcid.org/0000-0002-3776-4755", + "title" => "Gomeseria, Ronald", + "count" => 4, + }, + { + "id" => "https://orcid.org/0000-0002-6014-2161", + "title" => "Kartha, Sivan", + "count" => 4, + }, + { + "id" => "https://orcid.org/0000-0003-1026-5865", + "title" => "Willemen, Louise", + "count" => 4, + }, + { + "id" => "https://orcid.org/0000-0003-4624-488X", + "title" => "Schwarz, Nina", + "count" => 4, + }, + { + "id" => "https://osf.io/8kzbu/", + "title" => "Gomeseria, Ronald", + "count" => 4, + }, + { + "id" => "https://orcid.org/0000-0002-2149-9897", + "title" => "A, Subaveerapandiyan", + "count" => 3, + }, + { + "id" => "https://orcid.org/0000-0002-4541-7294", + "title" => "Puntiroli, Michael", + "count" => 3, + }, + { + "id" => "https://osf.io/cjhmz/", + "title" => "Puntiroli, Michael", + "count" => 3, + } + ] + expect(authors).to eq (expected_result) + end end diff --git a/spec/fixtures/files/authors_aggs_with_multiple_name_identifiers.json b/spec/fixtures/files/authors_aggs_with_multiple_name_identifiers.json new file mode 100644 index 000000000..6e7a60547 --- /dev/null +++ b/spec/fixtures/files/authors_aggs_with_multiple_name_identifiers.json @@ -0,0 +1,393 @@ +[ + { + "key": "https://orcid.org/0000-0002-0429-5446", + "doc_count": 28, + "authors": { + "hits": { + "total": { + "value": 28, + "relation": "eq" + }, + "max_score": 13.617611, + "hits": [ + { + "_index": "dois_v1", + "_type": "_doc", + "_id": "42248746", + "_score": 13.617611, + "_source": { + "creators": [ + { + "name": "Nam, Hyung-song", + "nameIdentifiers": [ + { + "nameIdentifier": "https://orcid.org/0000-0002-0429-5446" + } + ] + }, + { + "name": "Capecchi, Mario" + } + ] + } + } + ] + } + } + }, + { + "key": "https://orcid.org/0000-0003-4973-3128", + "doc_count": 12, + "authors": { + "hits": { + "total": { + "value": 12, + "relation": "eq" + }, + "max_score": 27.93923, + "hits": [ + { + "_index": "dois_v1", + "_type": "_doc", + "_id": "10910532", + "_score": 27.93923, + "_source": { + "creators": [ + { + "name": "Casares, Ramón", + "nameIdentifiers": [ + { + "nameIdentifier": "https://orcid.org/0000-0003-4973-3128" + } + ] + } + ] + } + } + ] + } + } + }, + { + "key": "https://orcid.org/0000-0002-3776-4755", + "doc_count": 4, + "authors": { + "hits": { + "total": { + "value": 4, + "relation": "eq" + }, + "max_score": 35.18596, + "hits": [ + { + "_index": "dois_v1", + "_type": "_doc", + "_id": "24003460", + "_score": 35.18596, + "_source": { + "creators": [ + { + "name": "Gomeseria, Ronald", + "nameIdentifiers": [ + { + "nameIdentifier": "https://osf.io/8kzbu/" + }, + { + "nameIdentifier": "https://orcid.org/0000-0002-3776-4755" + } + ] + } + ] + } + } + ] + } + } + }, + { + "key": "https://orcid.org/0000-0002-6014-2161", + "doc_count": 4, + "authors": { + "hits": { + "total": { + "value": 4, + "relation": "eq" + }, + "max_score": 21.385294, + "hits": [ + { + "_index": "dois_v1", + "_type": "_doc", + "_id": "42716009", + "_score": 21.385294, + "_source": { + "creators": [ + { + "name": "Kartha, Sivan", + "nameIdentifiers": [ + { + "nameIdentifier": "https://orcid.org/0000-0002-6014-2161" + } + ] + }, + { + "name": "Holz, Christian", + "nameIdentifiers": [ + { + "nameIdentifier": "https://orcid.org/0000-0003-0722-1044" + } + ] + }, + { + "name": "Athanasiou, Tom" + } + ] + } + } + ] + } + } + }, + { + "key": "https://orcid.org/0000-0003-1026-5865", + "doc_count": 4, + "authors": { + "hits": { + "total": { + "value": 4, + "relation": "eq" + }, + "max_score": 23.381191, + "hits": [ + { + "_index": "dois_v1", + "_type": "_doc", + "_id": "65907285", + "_score": 23.381191, + "_source": { + "creators": [ + { + "name": "Schwarz, Nina", + "nameIdentifiers": [ + { + "nameIdentifier": "https://orcid.org/0000-0003-4624-488X" + } + ] + }, + { + "name": "Martinez, Javier", + "nameIdentifiers": [ + { + "nameIdentifier": "https://orcid.org/0000-0001-9634-3849" + } + ] + }, + { + "name": "Willemen, Louise", + "nameIdentifiers": [ + { + "nameIdentifier": "https://orcid.org/0000-0003-1026-5865" + } + ] + } + ] + } + } + ] + } + } + }, + { + "key": "https://orcid.org/0000-0003-4624-488X", + "doc_count": 4, + "authors": { + "hits": { + "total": { + "value": 4, + "relation": "eq" + }, + "max_score": 23.381191, + "hits": [ + { + "_index": "dois_v1", + "_type": "_doc", + "_id": "65907285", + "_score": 23.381191, + "_source": { + "creators": [ + { + "name": "Schwarz, Nina", + "nameIdentifiers": [ + { + "nameIdentifier": "https://orcid.org/0000-0003-4624-488X" + } + ] + }, + { + "name": "Martinez, Javier", + "nameIdentifiers": [ + { + "nameIdentifier": "https://orcid.org/0000-0001-9634-3849" + } + ] + }, + { + "name": "Willemen, Louise", + "nameIdentifiers": [ + { + "nameIdentifier": "https://orcid.org/0000-0003-1026-5865" + } + ] + } + ] + } + } + ] + } + } + }, + { + "key": "https://osf.io/8kzbu/", + "doc_count": 4, + "authors": { + "hits": { + "total": { + "value": 4, + "relation": "eq" + }, + "max_score": 35.18596, + "hits": [ + { + "_index": "dois_v1", + "_type": "_doc", + "_id": "24003460", + "_score": 35.18596, + "_source": { + "creators": [ + { + "name": "Gomeseria, Ronald", + "nameIdentifiers": [ + { + "nameIdentifier": "https://osf.io/8kzbu/" + }, + { + "nameIdentifier": "https://orcid.org/0000-0002-3776-4755" + } + ] + } + ] + } + } + ] + } + } + }, + { + "key": "https://orcid.org/0000-0002-2149-9897", + "doc_count": 3, + "authors": { + "hits": { + "total": { + "value": 3, + "relation": "eq" + }, + "max_score": 27.425684, + "hits": [ + { + "_index": "dois_v1", + "_type": "_doc", + "_id": "42238927", + "_score": 27.425684, + "_source": { + "creators": [ + { + "name": "A, Subaveerapandiyan", + "nameIdentifiers": [ + { + "nameIdentifier": "https://orcid.org/0000-0002-2149-9897" + } + ] + }, + { + "name": "Sinha, Priyanka" + } + ] + } + } + ] + } + } + }, + { + "key": "https://orcid.org/0000-0002-4541-7294", + "doc_count": 3, + "authors": { + "hits": { + "total": { + "value": 3, + "relation": "eq" + }, + "max_score": 23.62671, + "hits": [ + { + "_index": "dois_v1", + "_type": "_doc", + "_id": "41304522", + "_score": 23.62671, + "_source": { + "creators": [ + { + "name": "Puntiroli, Michael", + "nameIdentifiers": [ + { + "nameIdentifier": "https://osf.io/cjhmz/" + }, + { + "nameIdentifier": "https://orcid.org/0000-0002-4541-7294" + } + ] + } + ] + } + } + ] + } + } + }, + { + "key": "https://osf.io/cjhmz/", + "doc_count": 3, + "authors": { + "hits": { + "total": { + "value": 3, + "relation": "eq" + }, + "max_score": 23.62671, + "hits": [ + { + "_index": "dois_v1", + "_type": "_doc", + "_id": "41304522", + "_score": 23.62671, + "_source": { + "creators": [ + { + "name": "Puntiroli, Michael", + "nameIdentifiers": [ + { + "nameIdentifier": "https://osf.io/cjhmz/" + }, + { + "nameIdentifier": "https://orcid.org/0000-0002-4541-7294" + } + ] + } + ] + } + } + ] + } + } + } +] \ No newline at end of file From 8f309b83c225156e6f9d12c2b744e467afe49a1f Mon Sep 17 00:00:00 2001 From: Cody Ross Date: Fri, 17 Jun 2022 15:09:34 -0400 Subject: [PATCH 2/4] Per akita tooltips, only include ORCIDs in authors facets --- app/controllers/concerns/facetable.rb | 4 ++-- spec/concerns/facetable_spec.rb | 10 ---------- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/app/controllers/concerns/facetable.rb b/app/controllers/concerns/facetable.rb index 12334b1e0..ba438f67e 100644 --- a/app/controllers/concerns/facetable.rb +++ b/app/controllers/concerns/facetable.rb @@ -409,8 +409,8 @@ def facet_by_fos(arr) def facet_by_authors(arr) arr.map { |hsh| - orcid_id = hsh["key"] - + orcid_id = %r{\A(?:(http|https)://(orcid.org)/)(.+)\z}.match?(hsh["key"]) && hsh["key"] + # The aggregation query should only return 1 hit, so hence the index # into first element creators = hsh.dig("authors", "hits", "hits")[0].dig("_source", "creators") diff --git a/spec/concerns/facetable_spec.rb b/spec/concerns/facetable_spec.rb index ff1acb3da..3e6313ff4 100644 --- a/spec/concerns/facetable_spec.rb +++ b/spec/concerns/facetable_spec.rb @@ -47,11 +47,6 @@ "title" => "Schwarz, Nina", "count" => 4, }, - { - "id" => "https://osf.io/8kzbu/", - "title" => "Gomeseria, Ronald", - "count" => 4, - }, { "id" => "https://orcid.org/0000-0002-2149-9897", "title" => "A, Subaveerapandiyan", @@ -62,11 +57,6 @@ "title" => "Puntiroli, Michael", "count" => 3, }, - { - "id" => "https://osf.io/cjhmz/", - "title" => "Puntiroli, Michael", - "count" => 3, - } ] expect(authors).to eq (expected_result) end From 1c289f1a6128c575da53fe76365eee04da9ee3d0 Mon Sep 17 00:00:00 2001 From: Cody Ross Date: Fri, 1 Jul 2022 12:12:34 -0400 Subject: [PATCH 3/4] Addresses error where author aggregation in GraphQL response sometimes returns nil when creator has multiple nameIdentifiers --- app/controllers/concerns/facetable.rb | 4 + app/graphql/types/base_connection.rb | 28 ++--- spec/graphql/types/work_type_spec.rb | 150 ++++++++++++++++++++++++++ 3 files changed, 170 insertions(+), 12 deletions(-) diff --git a/app/controllers/concerns/facetable.rb b/app/controllers/concerns/facetable.rb index ba438f67e..ca81fb1ae 100644 --- a/app/controllers/concerns/facetable.rb +++ b/app/controllers/concerns/facetable.rb @@ -411,6 +411,10 @@ def facet_by_authors(arr) arr.map { |hsh| orcid_id = %r{\A(?:(http|https)://(orcid.org)/)(.+)\z}.match?(hsh["key"]) && hsh["key"] + if orcid_id.nil? + next + end + # The aggregation query should only return 1 hit, so hence the index # into first element creators = hsh.dig("authors", "hits", "hits")[0].dig("_source", "creators") diff --git a/app/graphql/types/base_connection.rb b/app/graphql/types/base_connection.rb index a5dc5ee21..9ab191daa 100644 --- a/app/graphql/types/base_connection.rb +++ b/app/graphql/types/base_connection.rb @@ -198,9 +198,13 @@ def facet_by_language(arr) end def facet_by_authors(arr) - arr.map do |hsh| - orcid_id = hsh["key"] + arr.map { |hsh| + orcid_id = %r{\A(?:(http|https)://(orcid.org)/)(.+)\z}.match?(hsh["key"]) && hsh["key"] + if orcid_id.nil? + next + end + # The aggregation query should only return 1 hit, so hence the index # into first element creators = hsh.dig("authors", "hits", "hits")[0].dig("_source", "creators") @@ -208,19 +212,19 @@ def facet_by_authors(arr) # Filter through creators to find creator that matches the key matched_creator = creators.select do |creator| if creator.key?("nameIdentifiers") - creator["nameIdentifiers"].each do |ni| - break ni["nameIdentifier"] == orcid_id - end + creator["nameIdentifiers"].any? { |ni| ni["nameIdentifier"] == orcid_id } end end - title = matched_creator[0]["name"] + if matched_creator.any? + title = matched_creator[0]["name"] - { - "id" => orcid_id, - "title" => title, - "count" => hsh["doc_count"], - } - end + { + "id" => orcid_id, + "title" => title, + "count" => hsh["doc_count"], + } + end + }.compact end end diff --git a/spec/graphql/types/work_type_spec.rb b/spec/graphql/types/work_type_spec.rb index cbc1cc91f..30e5b63f2 100644 --- a/spec/graphql/types/work_type_spec.rb +++ b/spec/graphql/types/work_type_spec.rb @@ -1051,4 +1051,154 @@ ) end end + + describe "get author aggregations when creators have multiple nameIdentifiers", elasticsearch: true do + let!(:work_one) do + create( + :doi, + aasm_state: "findable", + creators: [ + { + "name" => "Garza, Kristian", + "nameType" => "Personal", + "nameIdentifiers" => [ + { + "nameIdentifier" => "https://orcid.org/0000-0003-3484-6875", + "nameIdentifierScheme" => "ORCID", + "schemeUri" => "https://orcid.org", + }, + { + "nameIdentifier" => "http://id.loc.gov/authorities/names/n90722093", + "nameIdentifierScheme" => "LCNAF", + "schemeUri" => "http://id.loc.gov/authorities/names", + }, + ], + }, + { + "familyName" => "Ross", + "givenName" => "Cody", + "name" => "Ross, Cody", + "nameIdentifiers" => [ + { + "nameIdentifier" => "http://id.loc.gov/authorities/names/no90016802", + "nameIdentifierScheme" => "LCNAF", + "schemeUri" => "http://id.loc.gov/authorities/names", + }, + { + "nameIdentifier" => "https://orcid.org/0000-0002-4684-9769", + "nameIdentifierScheme" => "ORCID", + "schemeUri" => "https://orcid.org", + }, + ], + }, + { + "name" => "Cody Ross", + "nameType" => "Personal", + }, + ], + ) + end + + let!(:work_two) do + create( + :doi, + aasm_state: "findable", + creators: [ + { + "name" => "Garza, Kristian", + "nameType" => "Personal", + "nameIdentifiers" => [ + { + "nameIdentifier" => "http://id.loc.gov/authorities/names/n90722093", + "nameIdentifierScheme" => "LCNAF", + "schemeUri" => "http://id.loc.gov/authorities/names", + }, + { + "nameIdentifier" => "https://orcid.org/0000-0003-3484-6875", + "nameIdentifierScheme" => "ORCID", + "schemeUri" => "https://orcid.org", + }, + ], + }, + { + "familyName" => "Ross", + "givenName" => "Cody", + "name" => "Ross, Cody", + "nameIdentifiers" => [ + { + "nameIdentifier" => "https://orcid.org/0000-0002-4684-9769", + "nameIdentifierScheme" => "ORCID", + "schemeUri" => "https://orcid.org", + }, + ], + }, + { + "name" => "Cody Ross", + "nameType" => "Personal", + }, + { + "name" => "Department of Psychoceramics, University of Cambridge", + "nameIdentifiers" => [ + { + "nameIdentifier" => "https://ror.org/013meh722", + "nameIdentifierScheme" => "ROR", + "schemeUri" => "https://ror.org", + }, + ], + "nameType" => "Organizational", + }, + ], + ) + end + + before do + Doi.import + sleep 2 + end + + let(:query_works) do + 'query { + works(query:"") { + authors { + count + id + title + } + nodes { + creators { + id + name + givenName + familyName + affiliation { + id + name + } + } + } + } + }' + end + + it "returns author aggregation that is an array of authors with ORCID nameIdentifiers" do + response = LupoSchema.execute(query_works).as_json + + expect(response.dig("data", "works", "authors").count).to eq(2) + + expect(response.dig("data", "works", "authors")).to eq( + [ + { + "count" => 2, + "id" => "https://orcid.org/0000-0002-4684-9769", + "title" => "Ross, Cody" + }, + { + "count" => 2, + "id" => "https://orcid.org/0000-0003-3484-6875", + "title" => "Garza, Kristian" + }, + ] + ) + end + end end From 78be741112e35196e7d1408162867663ead83d80 Mon Sep 17 00:00:00 2001 From: Cody Ross Date: Fri, 1 Jul 2022 12:17:50 -0400 Subject: [PATCH 4/4] Linting errors --- app/graphql/types/base_connection.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/graphql/types/base_connection.rb b/app/graphql/types/base_connection.rb index 9ab191daa..ee44dc442 100644 --- a/app/graphql/types/base_connection.rb +++ b/app/graphql/types/base_connection.rb @@ -204,7 +204,7 @@ def facet_by_authors(arr) if orcid_id.nil? next end - + # The aggregation query should only return 1 hit, so hence the index # into first element creators = hsh.dig("authors", "hits", "hits")[0].dig("_source", "creators")