diff --git a/app/controllers/concerns/facetable.rb b/app/controllers/concerns/facetable.rb index 07d12e5da..ca81fb1ae 100644 --- a/app/controllers/concerns/facetable.rb +++ b/app/controllers/concerns/facetable.rb @@ -408,8 +408,12 @@ def facet_by_fos(arr) end def facet_by_authors(arr) - arr.map do |hsh| - orcid_id = hsh["key"] + arr.map { |hsh| + orcid_id = %r{\A(?:(http|https)://(orcid.org)/)(.+)\z}.match?(hsh["key"]) && hsh["key"] + + if orcid_id.nil? + next + end # The aggregation query should only return 1 hit, so hence the index # into first element @@ -418,20 +422,20 @@ def facet_by_authors(arr) # Filter through creators to find creator that matches the key matched_creator = creators.select do |creator| if creator.key?("nameIdentifiers") - creator["nameIdentifiers"].each do |ni| - break ni["nameIdentifier"] == orcid_id - end + creator["nameIdentifiers"].any? { |ni| ni["nameIdentifier"] == orcid_id } end end - title = matched_creator[0]["name"] + if matched_creator.any? + title = matched_creator[0]["name"] - { - "id" => orcid_id, - "title" => title, - "count" => hsh["doc_count"], - } - end + { + "id" => orcid_id, + "title" => title, + "count" => hsh["doc_count"], + } + end + }.compact end end end diff --git a/app/graphql/types/base_connection.rb b/app/graphql/types/base_connection.rb index a5dc5ee21..ee44dc442 100644 --- a/app/graphql/types/base_connection.rb +++ b/app/graphql/types/base_connection.rb @@ -198,8 +198,12 @@ def facet_by_language(arr) end def facet_by_authors(arr) - arr.map do |hsh| - orcid_id = hsh["key"] + arr.map { |hsh| + orcid_id = %r{\A(?:(http|https)://(orcid.org)/)(.+)\z}.match?(hsh["key"]) && hsh["key"] + + if orcid_id.nil? + next + end # The aggregation query should only return 1 hit, so hence the index # into first element @@ -208,19 +212,19 @@ def facet_by_authors(arr) # Filter through creators to find creator that matches the key matched_creator = creators.select do |creator| if creator.key?("nameIdentifiers") - creator["nameIdentifiers"].each do |ni| - break ni["nameIdentifier"] == orcid_id - end + creator["nameIdentifiers"].any? { |ni| ni["nameIdentifier"] == orcid_id } end end - title = matched_creator[0]["name"] + if matched_creator.any? + title = matched_creator[0]["name"] - { - "id" => orcid_id, - "title" => title, - "count" => hsh["doc_count"], - } - end + { + "id" => orcid_id, + "title" => title, + "count" => hsh["doc_count"], + } + end + }.compact end end diff --git a/spec/concerns/facetable_spec.rb b/spec/concerns/facetable_spec.rb index 8ebae8950..3e6313ff4 100644 --- a/spec/concerns/facetable_spec.rb +++ b/spec/concerns/facetable_spec.rb @@ -4,6 +4,7 @@ describe "Facetable", type: :controller do let(:author_aggs) { JSON.parse(file_fixture("authors_aggs.json").read) } + let(:author_aggs_with_multiple_name_identifiers) { JSON.parse(file_fixture("authors_aggs_with_multiple_name_identifiers.json").read) } let(:model) { DataciteDoisController.new } it "facet by author" do authors = model.facet_by_authors(author_aggs) @@ -11,6 +12,54 @@ expected_result = [{ "id" => "https://orcid.org/0000-0003-1419-2405", "title" => "Fenner, Martin", "count" => 244 }, { "id" => "https://orcid.org/0000-0001-9570-8121", "title" => "Lambert, Simon", "count" => 23 }] expect(authors).to eq (expected_result) end + + it "facet by author where author may have multiple nameIdentifiers" do + authors = model.facet_by_authors(author_aggs_with_multiple_name_identifiers) + + expected_result = [ + { + "id" => "https://orcid.org/0000-0002-0429-5446", + "title" => "Nam, Hyung-song", + "count" => 28, + }, + { + "id" => "https://orcid.org/0000-0003-4973-3128", + "title" => "Casares, Ramón", + "count" => 12, + }, + { + "id" => "https://orcid.org/0000-0002-3776-4755", + "title" => "Gomeseria, Ronald", + "count" => 4, + }, + { + "id" => "https://orcid.org/0000-0002-6014-2161", + "title" => "Kartha, Sivan", + "count" => 4, + }, + { + "id" => "https://orcid.org/0000-0003-1026-5865", + "title" => "Willemen, Louise", + "count" => 4, + }, + { + "id" => "https://orcid.org/0000-0003-4624-488X", + "title" => "Schwarz, Nina", + "count" => 4, + }, + { + "id" => "https://orcid.org/0000-0002-2149-9897", + "title" => "A, Subaveerapandiyan", + "count" => 3, + }, + { + "id" => "https://orcid.org/0000-0002-4541-7294", + "title" => "Puntiroli, Michael", + "count" => 3, + }, + ] + expect(authors).to eq (expected_result) + end end diff --git a/spec/fixtures/files/authors_aggs_with_multiple_name_identifiers.json b/spec/fixtures/files/authors_aggs_with_multiple_name_identifiers.json new file mode 100644 index 000000000..6e7a60547 --- /dev/null +++ b/spec/fixtures/files/authors_aggs_with_multiple_name_identifiers.json @@ -0,0 +1,393 @@ +[ + { + "key": "https://orcid.org/0000-0002-0429-5446", + "doc_count": 28, + "authors": { + "hits": { + "total": { + "value": 28, + "relation": "eq" + }, + "max_score": 13.617611, + "hits": [ + { + "_index": "dois_v1", + "_type": "_doc", + "_id": "42248746", + "_score": 13.617611, + "_source": { + "creators": [ + { + "name": "Nam, Hyung-song", + "nameIdentifiers": [ + { + "nameIdentifier": "https://orcid.org/0000-0002-0429-5446" + } + ] + }, + { + "name": "Capecchi, Mario" + } + ] + } + } + ] + } + } + }, + { + "key": "https://orcid.org/0000-0003-4973-3128", + "doc_count": 12, + "authors": { + "hits": { + "total": { + "value": 12, + "relation": "eq" + }, + "max_score": 27.93923, + "hits": [ + { + "_index": "dois_v1", + "_type": "_doc", + "_id": "10910532", + "_score": 27.93923, + "_source": { + "creators": [ + { + "name": "Casares, Ramón", + "nameIdentifiers": [ + { + "nameIdentifier": "https://orcid.org/0000-0003-4973-3128" + } + ] + } + ] + } + } + ] + } + } + }, + { + "key": "https://orcid.org/0000-0002-3776-4755", + "doc_count": 4, + "authors": { + "hits": { + "total": { + "value": 4, + "relation": "eq" + }, + "max_score": 35.18596, + "hits": [ + { + "_index": "dois_v1", + "_type": "_doc", + "_id": "24003460", + "_score": 35.18596, + "_source": { + "creators": [ + { + "name": "Gomeseria, Ronald", + "nameIdentifiers": [ + { + "nameIdentifier": "https://osf.io/8kzbu/" + }, + { + "nameIdentifier": "https://orcid.org/0000-0002-3776-4755" + } + ] + } + ] + } + } + ] + } + } + }, + { + "key": "https://orcid.org/0000-0002-6014-2161", + "doc_count": 4, + "authors": { + "hits": { + "total": { + "value": 4, + "relation": "eq" + }, + "max_score": 21.385294, + "hits": [ + { + "_index": "dois_v1", + "_type": "_doc", + "_id": "42716009", + "_score": 21.385294, + "_source": { + "creators": [ + { + "name": "Kartha, Sivan", + "nameIdentifiers": [ + { + "nameIdentifier": "https://orcid.org/0000-0002-6014-2161" + } + ] + }, + { + "name": "Holz, Christian", + "nameIdentifiers": [ + { + "nameIdentifier": "https://orcid.org/0000-0003-0722-1044" + } + ] + }, + { + "name": "Athanasiou, Tom" + } + ] + } + } + ] + } + } + }, + { + "key": "https://orcid.org/0000-0003-1026-5865", + "doc_count": 4, + "authors": { + "hits": { + "total": { + "value": 4, + "relation": "eq" + }, + "max_score": 23.381191, + "hits": [ + { + "_index": "dois_v1", + "_type": "_doc", + "_id": "65907285", + "_score": 23.381191, + "_source": { + "creators": [ + { + "name": "Schwarz, Nina", + "nameIdentifiers": [ + { + "nameIdentifier": "https://orcid.org/0000-0003-4624-488X" + } + ] + }, + { + "name": "Martinez, Javier", + "nameIdentifiers": [ + { + "nameIdentifier": "https://orcid.org/0000-0001-9634-3849" + } + ] + }, + { + "name": "Willemen, Louise", + "nameIdentifiers": [ + { + "nameIdentifier": "https://orcid.org/0000-0003-1026-5865" + } + ] + } + ] + } + } + ] + } + } + }, + { + "key": "https://orcid.org/0000-0003-4624-488X", + "doc_count": 4, + "authors": { + "hits": { + "total": { + "value": 4, + "relation": "eq" + }, + "max_score": 23.381191, + "hits": [ + { + "_index": "dois_v1", + "_type": "_doc", + "_id": "65907285", + "_score": 23.381191, + "_source": { + "creators": [ + { + "name": "Schwarz, Nina", + "nameIdentifiers": [ + { + "nameIdentifier": "https://orcid.org/0000-0003-4624-488X" + } + ] + }, + { + "name": "Martinez, Javier", + "nameIdentifiers": [ + { + "nameIdentifier": "https://orcid.org/0000-0001-9634-3849" + } + ] + }, + { + "name": "Willemen, Louise", + "nameIdentifiers": [ + { + "nameIdentifier": "https://orcid.org/0000-0003-1026-5865" + } + ] + } + ] + } + } + ] + } + } + }, + { + "key": "https://osf.io/8kzbu/", + "doc_count": 4, + "authors": { + "hits": { + "total": { + "value": 4, + "relation": "eq" + }, + "max_score": 35.18596, + "hits": [ + { + "_index": "dois_v1", + "_type": "_doc", + "_id": "24003460", + "_score": 35.18596, + "_source": { + "creators": [ + { + "name": "Gomeseria, Ronald", + "nameIdentifiers": [ + { + "nameIdentifier": "https://osf.io/8kzbu/" + }, + { + "nameIdentifier": "https://orcid.org/0000-0002-3776-4755" + } + ] + } + ] + } + } + ] + } + } + }, + { + "key": "https://orcid.org/0000-0002-2149-9897", + "doc_count": 3, + "authors": { + "hits": { + "total": { + "value": 3, + "relation": "eq" + }, + "max_score": 27.425684, + "hits": [ + { + "_index": "dois_v1", + "_type": "_doc", + "_id": "42238927", + "_score": 27.425684, + "_source": { + "creators": [ + { + "name": "A, Subaveerapandiyan", + "nameIdentifiers": [ + { + "nameIdentifier": "https://orcid.org/0000-0002-2149-9897" + } + ] + }, + { + "name": "Sinha, Priyanka" + } + ] + } + } + ] + } + } + }, + { + "key": "https://orcid.org/0000-0002-4541-7294", + "doc_count": 3, + "authors": { + "hits": { + "total": { + "value": 3, + "relation": "eq" + }, + "max_score": 23.62671, + "hits": [ + { + "_index": "dois_v1", + "_type": "_doc", + "_id": "41304522", + "_score": 23.62671, + "_source": { + "creators": [ + { + "name": "Puntiroli, Michael", + "nameIdentifiers": [ + { + "nameIdentifier": "https://osf.io/cjhmz/" + }, + { + "nameIdentifier": "https://orcid.org/0000-0002-4541-7294" + } + ] + } + ] + } + } + ] + } + } + }, + { + "key": "https://osf.io/cjhmz/", + "doc_count": 3, + "authors": { + "hits": { + "total": { + "value": 3, + "relation": "eq" + }, + "max_score": 23.62671, + "hits": [ + { + "_index": "dois_v1", + "_type": "_doc", + "_id": "41304522", + "_score": 23.62671, + "_source": { + "creators": [ + { + "name": "Puntiroli, Michael", + "nameIdentifiers": [ + { + "nameIdentifier": "https://osf.io/cjhmz/" + }, + { + "nameIdentifier": "https://orcid.org/0000-0002-4541-7294" + } + ] + } + ] + } + } + ] + } + } + } +] \ No newline at end of file diff --git a/spec/graphql/types/work_type_spec.rb b/spec/graphql/types/work_type_spec.rb index cbc1cc91f..30e5b63f2 100644 --- a/spec/graphql/types/work_type_spec.rb +++ b/spec/graphql/types/work_type_spec.rb @@ -1051,4 +1051,154 @@ ) end end + + describe "get author aggregations when creators have multiple nameIdentifiers", elasticsearch: true do + let!(:work_one) do + create( + :doi, + aasm_state: "findable", + creators: [ + { + "name" => "Garza, Kristian", + "nameType" => "Personal", + "nameIdentifiers" => [ + { + "nameIdentifier" => "https://orcid.org/0000-0003-3484-6875", + "nameIdentifierScheme" => "ORCID", + "schemeUri" => "https://orcid.org", + }, + { + "nameIdentifier" => "http://id.loc.gov/authorities/names/n90722093", + "nameIdentifierScheme" => "LCNAF", + "schemeUri" => "http://id.loc.gov/authorities/names", + }, + ], + }, + { + "familyName" => "Ross", + "givenName" => "Cody", + "name" => "Ross, Cody", + "nameIdentifiers" => [ + { + "nameIdentifier" => "http://id.loc.gov/authorities/names/no90016802", + "nameIdentifierScheme" => "LCNAF", + "schemeUri" => "http://id.loc.gov/authorities/names", + }, + { + "nameIdentifier" => "https://orcid.org/0000-0002-4684-9769", + "nameIdentifierScheme" => "ORCID", + "schemeUri" => "https://orcid.org", + }, + ], + }, + { + "name" => "Cody Ross", + "nameType" => "Personal", + }, + ], + ) + end + + let!(:work_two) do + create( + :doi, + aasm_state: "findable", + creators: [ + { + "name" => "Garza, Kristian", + "nameType" => "Personal", + "nameIdentifiers" => [ + { + "nameIdentifier" => "http://id.loc.gov/authorities/names/n90722093", + "nameIdentifierScheme" => "LCNAF", + "schemeUri" => "http://id.loc.gov/authorities/names", + }, + { + "nameIdentifier" => "https://orcid.org/0000-0003-3484-6875", + "nameIdentifierScheme" => "ORCID", + "schemeUri" => "https://orcid.org", + }, + ], + }, + { + "familyName" => "Ross", + "givenName" => "Cody", + "name" => "Ross, Cody", + "nameIdentifiers" => [ + { + "nameIdentifier" => "https://orcid.org/0000-0002-4684-9769", + "nameIdentifierScheme" => "ORCID", + "schemeUri" => "https://orcid.org", + }, + ], + }, + { + "name" => "Cody Ross", + "nameType" => "Personal", + }, + { + "name" => "Department of Psychoceramics, University of Cambridge", + "nameIdentifiers" => [ + { + "nameIdentifier" => "https://ror.org/013meh722", + "nameIdentifierScheme" => "ROR", + "schemeUri" => "https://ror.org", + }, + ], + "nameType" => "Organizational", + }, + ], + ) + end + + before do + Doi.import + sleep 2 + end + + let(:query_works) do + 'query { + works(query:"") { + authors { + count + id + title + } + nodes { + creators { + id + name + givenName + familyName + affiliation { + id + name + } + } + } + } + }' + end + + it "returns author aggregation that is an array of authors with ORCID nameIdentifiers" do + response = LupoSchema.execute(query_works).as_json + + expect(response.dig("data", "works", "authors").count).to eq(2) + + expect(response.dig("data", "works", "authors")).to eq( + [ + { + "count" => 2, + "id" => "https://orcid.org/0000-0002-4684-9769", + "title" => "Ross, Cody" + }, + { + "count" => 2, + "id" => "https://orcid.org/0000-0003-3484-6875", + "title" => "Garza, Kristian" + }, + ] + ) + end + end end