From cb7ba117fd27ac097699c8825e5ab2a23bb04691 Mon Sep 17 00:00:00 2001 From: jrhoads Date: Tue, 19 Nov 2024 13:49:27 +0100 Subject: [PATCH 1/8] Add more specs to the query builder for filters --- spec/models/doi/graphql_query_builder_spec.rb | 192 +++++++++++++++++- 1 file changed, 184 insertions(+), 8 deletions(-) diff --git a/spec/models/doi/graphql_query_builder_spec.rb b/spec/models/doi/graphql_query_builder_spec.rb index c506a9017..da466fb4a 100644 --- a/spec/models/doi/graphql_query_builder_spec.rb +++ b/spec/models/doi/graphql_query_builder_spec.rb @@ -58,18 +58,194 @@ end end - describe "filters" do - it "is an empty array if not set" do - expect(described_class.new("", {}).filters).to eq([]) - expect(described_class.new(nil, {}).filters).to eq([]) +describe "#filters" do + let(:query) { "" } + let(:options) { {} } + let(:builder) { described_class.new(query, options) } + + context "with basic filters" do + context "when filtering by DOI ids" do + let(:options) { { ids: "10.5438/0012,10.5438/0013" } } + + it "includes DOI terms filter" do + expect(builder.filters).to include( + { terms: { doi: ["10.5438/0012", "10.5438/0013"].map(&:upcase) } } + ) + end + end + + context "when filtering by resource type" do + let(:options) { { resource_type: "dataset,text" } } + + it "includes resource type terms filter" do + expect(builder.filters).to include( + { terms: { "types.resourceType": ["dataset", "text"] } } + ) + end end - it "can filter for ids" do - expect(described_class.new("foo", { ids: ["bar"] }).filters).to eq([{ terms: { doi: ["BAR"] } }]) + context "when filtering by language" do + let(:options) { { language: "en,de" } } + + it "includes language terms filter" do + expect(builder.filters).to include( + { terms: { language: ["en", "de"].map(&:downcase) } } + ) + end end + end + + context "with date range filters" do + let(:options) { { published: "2020,2022" } } - it "can filter for ids as single string" do - expect(described_class.new("foo", { ids: "bar" }).filters).to eq([{ terms: { doi: ["BAR"] } }]) + it "handles publication year range" do + expect(builder.filters).to include( + { range: { publication_year: { gte: "2020||/y", lte: "2022||/y", format: "yyyy" } } } + ) + end + + context "when filtering by created date" do + let(:options) { { created: "2021,2023" } } + + it "handles created date range" do + expect(builder.filters).to include( + { range: { created: { gte: "2021||/y", lte: "2023||/y", format: "yyyy" } } } + ) + end + end end + + context "with count-based filters" do + let(:options) { { has_views: "10" } } + + it "handles view count threshold" do + expect(builder.filters).to include( + { range: { view_count: { gte: 10 } } } + ) + end + + context "when filtering by citations" do + let(:options) { { has_citations: "5" } } + + it "handles citation count threshold" do + expect(builder.filters).to include( + { range: { citation_count: { gte: 5 } } } + ) + end + end + end + + context "with subject-based filters" do + let(:options) { { pid_entity: "dataset,software" } } + + it "handles pid entity filters" do + expect(builder.filters).to include( + { term: { "subjects.subjectScheme": "PidEntity" } }, + { terms: { "subjects.subject": ["Dataset", "Software"] } } + ) + end + + context "when filtering by field of science" do + let(:options) { { field_of_science: "computer_science,mathematics" } } + + it "handles field of science filters" do + expect(builder.filters).to include( + { term: { "subjects.subjectScheme": "Fields of Science and Technology (FOS)" } }, + { terms: { "subjects.subject": ["FOS: Computer science", "FOS: Mathematics"] } } + ) + end + end + end + + context "with landing page filters" do + let(:options) { { link_check_status: "200" } } + + it "handles landing page status" do + expect(builder.filters).to include( + { term: { "landing_page.status": "200" } } + ) + end + + context "with schema.org check" do + let(:options) { { link_check_has_schema_org: true } } + + it "handles schema.org presence check" do + expect(builder.filters).to include( + { term: { "landing_page.hasSchemaOrg": true } } + ) + end + end + end + + context "with identifier filters" do + context "with ids" do + it "can filter for ids" do + expect(described_class.new("foo", { ids: ["bar"] }).filters).to eq([{ terms: { doi: ["BAR"] } }]) + end + + it "can filter for ids as single string" do + expect(described_class.new("foo", { ids: "bar" }).filters).to eq([{ terms: { doi: ["BAR"] } }]) + end + end + + context "with certificate" do + let(:options) { { certificate: "CoreTrustSeal,CLARIN" } } + + it "handles client certificate" do + expect(builder.filters).to include( + { terms: { "client.certificate" => ["CoreTrustSeal", "CLARIN"] } } + ) + end + end + + context "with ORCID" do + let(:options) { { user_id: "https://orcid.org/0000-0003-1419-2405" } } + + it "handles user ORCID" do + expect(builder.filters).to include( + { terms: { "creators.nameIdentifiers.nameIdentifier" => ["https://orcid.org/0000-0003-1419-2405"] } } + ) + end + end + end + + context "with multiple filters" do + it "combines different filter types" do + options = { + resource_type: "dataset", + published: "2020,2022", + has_citations: "5", + language: "en" + } + + builder = described_class.new(query, options) + filters = builder.filters + + expect(filters).to include( + { terms: { "types.resourceType": ["dataset"] } }, + { range: { publication_year: { gte: "2020||/y", lte: "2022||/y", format: "yyyy" } } }, + { range: { citation_count: { gte: 5 } } }, + { terms: { language: ["en"] } } + ) + expect(filters.length).to eq(4) + end + end + + context "with empty or invalid filters" do + it "handles empty options" do + builder = described_class.new(query, {}) + expect(builder.filters).to be_empty + end + + it "handles nil values" do + options = { resource_type: nil, language: nil } + builder = described_class.new(query, options) + expect(builder.filters).to be_empty + end + end + end + + describe "filters" do + end end From c7a938c29dc36bb57e51db53b3b28eca8b4685e4 Mon Sep 17 00:00:00 2001 From: jrhoads Date: Wed, 20 Nov 2024 12:25:50 +0100 Subject: [PATCH 2/8] Remove unnessesary context blocks. Refactor inline --- spec/models/doi/graphql_query_builder_spec.rb | 137 ++++++++---------- 1 file changed, 57 insertions(+), 80 deletions(-) diff --git a/spec/models/doi/graphql_query_builder_spec.rb b/spec/models/doi/graphql_query_builder_spec.rb index da466fb4a..cd25db1b4 100644 --- a/spec/models/doi/graphql_query_builder_spec.rb +++ b/spec/models/doi/graphql_query_builder_spec.rb @@ -4,9 +4,9 @@ require "rails_helper" RSpec.describe Doi::GraphqlQuery::Builder do + let(:query) { "" } + let(:options) { {} } describe "page size" do - let(:query) { "" } - let(:options) { {} } let(:builder) { described_class.new(query, options) } it "is DEFAULT_PAGE_SIZE with no options" do @@ -23,8 +23,6 @@ end describe "cursor" do - let(:query) { "" } - let(:options) { {} } let(:builder) { described_class.new(query, options) } it "is DEFAULT_CURSOR with no options" do @@ -58,129 +56,114 @@ end end -describe "#filters" do - let(:query) { "" } - let(:options) { {} } - let(:builder) { described_class.new(query, options) } +describe "filters" do context "with basic filters" do - context "when filtering by DOI ids" do - let(:options) { { ids: "10.5438/0012,10.5438/0013" } } - - it "includes DOI terms filter" do + it "handles DOI ids" do + options = { ids: "10.5438/0012,10.5438/0013" } + builder = described_class.new(query, options) expect(builder.filters).to include( { terms: { doi: ["10.5438/0012", "10.5438/0013"].map(&:upcase) } } ) end - end - context "when filtering by resource type" do - let(:options) { { resource_type: "dataset,text" } } - - it "includes resource type terms filter" do + it "handles resource type" do + options = { resource_type: "dataset,text" } + builder = described_class.new(query, options) expect(builder.filters).to include( { terms: { "types.resourceType": ["dataset", "text"] } } ) end - end - - context "when filtering by language" do - let(:options) { { language: "en,de" } } - it "includes language terms filter" do + it "handles language" do + options = { language: "en,de" } + builder = described_class.new(query, options) expect(builder.filters).to include( { terms: { language: ["en", "de"].map(&:downcase) } } ) end - end end context "with date range filters" do - let(:options) { { published: "2020,2022" } } - it "handles publication year range" do + options = { published: "2020,2022" } + builder = described_class.new(query, options) expect(builder.filters).to include( { range: { publication_year: { gte: "2020||/y", lte: "2022||/y", format: "yyyy" } } } ) end - context "when filtering by created date" do - let(:options) { { created: "2021,2023" } } - - it "handles created date range" do - expect(builder.filters).to include( - { range: { created: { gte: "2021||/y", lte: "2023||/y", format: "yyyy" } } } - ) - end + it "handles created date range" do + options = { created: "2021,2023" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { range: { created: { gte: "2021||/y", lte: "2023||/y", format: "yyyy" } } } + ) end end context "with count-based filters" do - let(:options) { { has_views: "10" } } - it "handles view count threshold" do + options = { has_views: "10" } + builder = described_class.new(query, options) expect(builder.filters).to include( { range: { view_count: { gte: 10 } } } ) end - context "when filtering by citations" do - let(:options) { { has_citations: "5" } } - - it "handles citation count threshold" do - expect(builder.filters).to include( - { range: { citation_count: { gte: 5 } } } - ) - end + it "handles citation count threshold" do + options = { has_citations: "5" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { range: { citation_count: { gte: 5 } } } + ) end end context "with subject-based filters" do - let(:options) { { pid_entity: "dataset,software" } } - it "handles pid entity filters" do + options = { pid_entity: "dataset,software" } + builder = described_class.new(query, options) expect(builder.filters).to include( { term: { "subjects.subjectScheme": "PidEntity" } }, { terms: { "subjects.subject": ["Dataset", "Software"] } } ) end - context "when filtering by field of science" do - let(:options) { { field_of_science: "computer_science,mathematics" } } - - it "handles field of science filters" do - expect(builder.filters).to include( - { term: { "subjects.subjectScheme": "Fields of Science and Technology (FOS)" } }, - { terms: { "subjects.subject": ["FOS: Computer science", "FOS: Mathematics"] } } - ) - end + it "handles field of science filters" do + options = { field_of_science: "computer_science,mathematics" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { term: { "subjects.subjectScheme": "Fields of Science and Technology (FOS)" } }, + { terms: { "subjects.subject": ["FOS: Computer science", "FOS: Mathematics"] } } + ) end end context "with landing page filters" do - let(:options) { { link_check_status: "200" } } - it "handles landing page status" do + options = { link_check_status: "200" } + builder = described_class.new(query, options) expect(builder.filters).to include( { term: { "landing_page.status": "200" } } ) end - context "with schema.org check" do - let(:options) { { link_check_has_schema_org: true } } - - it "handles schema.org presence check" do - expect(builder.filters).to include( - { term: { "landing_page.hasSchemaOrg": true } } - ) - end + it "handles schema.org presence check" do + options = { link_check_has_schema_org: true } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { term: { "landing_page.hasSchemaOrg": true } } + ) end end context "with identifier filters" do context "with ids" do it "can filter for ids" do - expect(described_class.new("foo", { ids: ["bar"] }).filters).to eq([{ terms: { doi: ["BAR"] } }]) + expect(described_class.new("foo", { ids: ["bar"] }).filters).to eq( + [{ terms: { doi: ["BAR"] } }] + ) end it "can filter for ids as single string" do @@ -188,25 +171,18 @@ end end - context "with certificate" do - let(:options) { { certificate: "CoreTrustSeal,CLARIN" } } - - it "handles client certificate" do - expect(builder.filters).to include( - { terms: { "client.certificate" => ["CoreTrustSeal", "CLARIN"] } } - ) - end + it "handles client certificate" do + builder = described_class.new(query, { certificate: "CoreTrustSeal,CLARIN" }) + expect(builder.filters).to include( + { terms: { "client.certificate" => ["CoreTrustSeal", "CLARIN"] } } + ) end - context "with ORCID" do - let(:options) { { user_id: "https://orcid.org/0000-0003-1419-2405" } } - it "handles user ORCID" do - expect(builder.filters).to include( + expect(described_class.new(query, { user_id: "https://orcid.org/0000-0003-1419-2405" }).filters).to include( { terms: { "creators.nameIdentifiers.nameIdentifier" => ["https://orcid.org/0000-0003-1419-2405"] } } ) end - end end context "with multiple filters" do @@ -245,7 +221,8 @@ end end - describe "filters" do + describe "sorting" do + + end - end end From 39699dc9cf00f3a9b7af415cbcaef0eda238df37 Mon Sep 17 00:00:00 2001 From: jrhoads Date: Wed, 20 Nov 2024 12:47:09 +0100 Subject: [PATCH 3/8] Set and spec default sorting --- app/models/doi/graphql_query.rb | 3 +- spec/models/doi/graphql_query_builder_spec.rb | 64 +++++++++++-------- 2 files changed, 41 insertions(+), 26 deletions(-) diff --git a/app/models/doi/graphql_query.rb b/app/models/doi/graphql_query.rb index 660de86ab..e7e899e21 100644 --- a/app/models/doi/graphql_query.rb +++ b/app/models/doi/graphql_query.rb @@ -7,6 +7,7 @@ class Builder DEFAULT_CURSOR = [0, ""] DEFAULT_PAGE_SIZE = 0 DEFAULT_FACET_COUNT = 10 + DEFAULT_SORT = [{ created: "asc", uid: "asc" }] def initialize(query, options) @query = query @@ -29,7 +30,7 @@ def size end def sort - [{ created: "asc", uid: "asc" }] + DEFAULT_SORT end def query_fields diff --git a/spec/models/doi/graphql_query_builder_spec.rb b/spec/models/doi/graphql_query_builder_spec.rb index cd25db1b4..74ea60656 100644 --- a/spec/models/doi/graphql_query_builder_spec.rb +++ b/spec/models/doi/graphql_query_builder_spec.rb @@ -56,33 +56,32 @@ end end -describe "filters" do - - context "with basic filters" do - it "handles DOI ids" do - options = { ids: "10.5438/0012,10.5438/0013" } - builder = described_class.new(query, options) - expect(builder.filters).to include( - { terms: { doi: ["10.5438/0012", "10.5438/0013"].map(&:upcase) } } - ) - end + describe "filters" do + context "with basic filters" do + it "handles DOI ids" do + options = { ids: "10.5438/0012,10.5438/0013" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { terms: { doi: ["10.5438/0012", "10.5438/0013"].map(&:upcase) } } + ) + end - it "handles resource type" do - options = { resource_type: "dataset,text" } - builder = described_class.new(query, options) - expect(builder.filters).to include( - { terms: { "types.resourceType": ["dataset", "text"] } } - ) - end + it "handles resource type" do + options = { resource_type: "dataset,text" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { terms: { "types.resourceType": ["dataset", "text"] } } + ) + end - it "handles language" do - options = { language: "en,de" } - builder = described_class.new(query, options) - expect(builder.filters).to include( - { terms: { language: ["en", "de"].map(&:downcase) } } - ) - end - end + it "handles language" do + options = { language: "en,de" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { terms: { language: ["en", "de"].map(&:downcase) } } + ) + end + end context "with date range filters" do it "handles publication year range" do @@ -222,6 +221,21 @@ end describe "sorting" do + let(:builder) { described_class.new(query, options) } + + context "with no sort options" do + it "uses default sort" do + expect(builder.sort).to eq(described_class::DEFAULT_SORT) + end + end + + context "with sort options" do + let(:options) { { sort: "relevance" } } + it "ignores any sort options and returns the default" do + expect(builder.sort).to eq(described_class::DEFAULT_SORT) + + end + end end From b10d89d35f9b6af991820acb505b8ba2cfe0a1ac Mon Sep 17 00:00:00 2001 From: jrhoads Date: Fri, 22 Nov 2024 11:09:01 +0100 Subject: [PATCH 4/8] Add more specs for filters --- spec/models/doi/graphql_query_builder_spec.rb | 102 ++++++++++++++++-- 1 file changed, 93 insertions(+), 9 deletions(-) diff --git a/spec/models/doi/graphql_query_builder_spec.rb b/spec/models/doi/graphql_query_builder_spec.rb index 74ea60656..3134a6e3b 100644 --- a/spec/models/doi/graphql_query_builder_spec.rb +++ b/spec/models/doi/graphql_query_builder_spec.rb @@ -66,6 +66,15 @@ ) end + it "handles resource_type_id" do + options = { resource_type_id: "Journal_Article" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { term: { resource_type_id: "journal-article" } } + ) + + end + it "handles resource type" do options = { resource_type: "dataset,text" } builder = described_class.new(query, options) @@ -74,6 +83,23 @@ ) end + + it "handles agency" do + options = {agency: "crossref"} + builder = described_class.new(query, options) + expect(builder.filters).to include( + { terms: { agency: ["crossref"].map(&:downcase) } } + ) + end + + it "handles prefix" do + options = {prefix: "10.5438"} + builder = described_class.new(query, options) + expect(builder.filters).to include( + { terms: { prefix: ["10.5438"].map(&:downcase) } } + ) + end + it "handles language" do options = { language: "en,de" } builder = described_class.new(query, options) @@ -81,6 +107,14 @@ { terms: { language: ["en", "de"].map(&:downcase) } } ) end + + it "handles uid" do + options = { uid: "10.5438/0012" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { term: { uid: "10.5438/0012" } } + ) + end end context "with date range filters" do @@ -102,11 +136,11 @@ end context "with count-based filters" do - it "handles view count threshold" do - options = { has_views: "10" } + it "handles reference count threshold" do + options = { has_references: "5" } builder = described_class.new(query, options) expect(builder.filters).to include( - { range: { view_count: { gte: 10 } } } + { range: { reference_count: { gte: 5 } } } ) end @@ -117,6 +151,54 @@ { range: { citation_count: { gte: 5 } } } ) end + + it "handles part count threshold" do + options = { has_parts: "10" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { range: { part_count: { gte: 10 } } } + ) + end + + it "handles part of count threshold" do + options = { has_part_of: "10" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { range: { part_of_count: { gte: 10 } } } + ) + end + + it "handles version count threshold" do + options = { has_versions: "10" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { range: { version_count: { gte: 10 } } } + ) + end + + it "handles version of count threshold" do + options = { has_version_of: "10" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { range: { version_of_count: { gte: 10 } } } + ) + end + + it "handles view count threshold" do + options = { has_views: "10" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { range: { view_count: { gte: 10 } } } + ) + end + + it "handles download count threshold" do + options = { has_downloads: "10" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { range: { download_count: { gte: 10 } } } + ) + end end context "with subject-based filters" do @@ -177,11 +259,11 @@ ) end - it "handles user ORCID" do - expect(described_class.new(query, { user_id: "https://orcid.org/0000-0003-1419-2405" }).filters).to include( - { terms: { "creators.nameIdentifiers.nameIdentifier" => ["https://orcid.org/0000-0003-1419-2405"] } } - ) - end + it "handles user ORCID" do + expect(described_class.new(query, { user_id: "https://orcid.org/0000-0003-1419-2405" }).filters).to include( + { terms: { "creators.nameIdentifiers.nameIdentifier" => ["https://orcid.org/0000-0003-1419-2405"] } } + ) + end end context "with multiple filters" do @@ -218,6 +300,9 @@ expect(builder.filters).to be_empty end end + + + end describe "sorting" do @@ -236,7 +321,6 @@ end end - end end From 787b74f2318ac4fbcb620483bec55d4959166a27 Mon Sep 17 00:00:00 2001 From: jrhoads Date: Fri, 29 Nov 2024 15:48:31 +0100 Subject: [PATCH 5/8] Add more specs for filters --- spec/models/doi/graphql_query_builder_spec.rb | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/spec/models/doi/graphql_query_builder_spec.rb b/spec/models/doi/graphql_query_builder_spec.rb index 3134a6e3b..d08a8b63e 100644 --- a/spec/models/doi/graphql_query_builder_spec.rb +++ b/spec/models/doi/graphql_query_builder_spec.rb @@ -115,6 +115,58 @@ { term: { uid: "10.5438/0012" } } ) end + + it "handles state" do + options = { state: "findable,registered" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { terms: { aasm_state: ["findable", "registered"] } } + ) + end + + it "handles consortium_id" do + options = { consortium_id: "dc" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { term: { consortium_id: { :case_insensitive => true, :value => "dc" } } } + ) + end + + it "handles registered" do + options = { registered: "2021,2023" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { range: { registered: { gte: "2021||/y", lte: "2023||/y", format: "yyyy" } } } + ) + end + + end + + context "filters based on client metadata" do + it "handles re3data_id" do + options = { re3data_id: "10.17616/r31njmjx" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { term: { "client.re3data_id" => "10.17616/r31njmjx" } } + ) + end + + it "handles opendoar_id" do + options = { opendoar_id: "123456" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { term: { "client.opendoar_id" => "123456" } } + ) + end + + it "handles certificates" do + options = { certificate: "CoreTrustSeal,WDS" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { terms: { "client.certificate" => ["CoreTrustSeal", "WDS"] } } + ) + end + end context "with date range filters" do From 1ad7b27749d96fd44194cbcf3b8c895e7fc4c0c4 Mon Sep 17 00:00:00 2001 From: jrhoads Date: Fri, 29 Nov 2024 16:26:39 +0100 Subject: [PATCH 6/8] Refactor filters specs into their own file --- .../doi/graphql_query_builder_filters_spec.rb | 311 +++++++++++++++++ spec/models/doi/graphql_query_builder_spec.rb | 319 +----------------- 2 files changed, 320 insertions(+), 310 deletions(-) create mode 100644 spec/models/doi/graphql_query_builder_filters_spec.rb diff --git a/spec/models/doi/graphql_query_builder_filters_spec.rb b/spec/models/doi/graphql_query_builder_filters_spec.rb new file mode 100644 index 000000000..84aa88316 --- /dev/null +++ b/spec/models/doi/graphql_query_builder_filters_spec.rb @@ -0,0 +1,311 @@ + + +# frozen_string_literal: true + +require "rails_helper" + +RSpec.describe Doi::GraphqlQuery::Builder do + let(:query) { "" } + let(:options) { {} } + + describe "filters" do + context "with basic filters" do + it "handles DOI ids" do + options = { ids: "10.5438/0012,10.5438/0013" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { terms: { doi: ["10.5438/0012", "10.5438/0013"].map(&:upcase) } } + ) + end + + it "handles resource_type_id" do + options = { resource_type_id: "Journal_Article" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { term: { resource_type_id: "journal-article" } } + ) + + end + + it "handles resource type" do + options = { resource_type: "dataset,text" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { terms: { "types.resourceType": ["dataset", "text"] } } + ) + end + + + it "handles agency" do + options = {agency: "crossref"} + builder = described_class.new(query, options) + expect(builder.filters).to include( + { terms: { agency: ["crossref"].map(&:downcase) } } + ) + end + + it "handles prefix" do + options = {prefix: "10.5438"} + builder = described_class.new(query, options) + expect(builder.filters).to include( + { terms: { prefix: ["10.5438"].map(&:downcase) } } + ) + end + + it "handles language" do + options = { language: "en,de" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { terms: { language: ["en", "de"].map(&:downcase) } } + ) + end + + it "handles uid" do + options = { uid: "10.5438/0012" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { term: { uid: "10.5438/0012" } } + ) + end + + it "handles state" do + options = { state: "findable,registered" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { terms: { aasm_state: ["findable", "registered"] } } + ) + end + + it "handles consortium_id" do + options = { consortium_id: "dc" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { term: { consortium_id: { :case_insensitive => true, :value => "dc" } } } + ) + end + + it "handles registered" do + options = { registered: "2021,2023" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { range: { registered: { gte: "2021||/y", lte: "2023||/y", format: "yyyy" } } } + ) + end + + end + + context "filters based on client metadata" do + it "handles re3data_id" do + options = { re3data_id: "10.17616/r31njmjx" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { term: { "client.re3data_id" => "10.17616/r31njmjx" } } + ) + end + + it "handles opendoar_id" do + options = { opendoar_id: "123456" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { term: { "client.opendoar_id" => "123456" } } + ) + end + + it "handles certificates" do + options = { certificate: "CoreTrustSeal,WDS" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { terms: { "client.certificate" => ["CoreTrustSeal", "WDS"] } } + ) + end + + end + + context "with date range filters" do + it "handles publication year range" do + options = { published: "2020,2022" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { range: { publication_year: { gte: "2020||/y", lte: "2022||/y", format: "yyyy" } } } + ) + end + + it "handles created date range" do + options = { created: "2021,2023" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { range: { created: { gte: "2021||/y", lte: "2023||/y", format: "yyyy" } } } + ) + end + end + + context "with count-based filters" do + it "handles reference count threshold" do + options = { has_references: "5" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { range: { reference_count: { gte: 5 } } } + ) + end + + it "handles citation count threshold" do + options = { has_citations: "5" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { range: { citation_count: { gte: 5 } } } + ) + end + + it "handles part count threshold" do + options = { has_parts: "10" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { range: { part_count: { gte: 10 } } } + ) + end + + it "handles part of count threshold" do + options = { has_part_of: "10" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { range: { part_of_count: { gte: 10 } } } + ) + end + + it "handles version count threshold" do + options = { has_versions: "10" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { range: { version_count: { gte: 10 } } } + ) + end + + it "handles version of count threshold" do + options = { has_version_of: "10" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { range: { version_of_count: { gte: 10 } } } + ) + end + + it "handles view count threshold" do + options = { has_views: "10" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { range: { view_count: { gte: 10 } } } + ) + end + + it "handles download count threshold" do + options = { has_downloads: "10" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { range: { download_count: { gte: 10 } } } + ) + end + end + + context "with subject-based filters" do + it "handles pid entity filters" do + options = { pid_entity: "dataset,software" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { term: { "subjects.subjectScheme": "PidEntity" } }, + { terms: { "subjects.subject": ["Dataset", "Software"] } } + ) + end + + it "handles field of science filters" do + options = { field_of_science: "computer_science,mathematics" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { term: { "subjects.subjectScheme": "Fields of Science and Technology (FOS)" } }, + { terms: { "subjects.subject": ["FOS: Computer science", "FOS: Mathematics"] } } + ) + end + end + + context "with landing page filters" do + it "handles landing page status" do + options = { link_check_status: "200" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { term: { "landing_page.status": "200" } } + ) + end + + it "handles schema.org presence check" do + options = { link_check_has_schema_org: true } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { term: { "landing_page.hasSchemaOrg": true } } + ) + end + end + + context "with identifier filters" do + context "with ids" do + it "can filter for ids" do + expect(described_class.new("foo", { ids: ["bar"] }).filters).to eq( + [{ terms: { doi: ["BAR"] } }] + ) + end + + it "can filter for ids as single string" do + expect(described_class.new("foo", { ids: "bar" }).filters).to eq([{ terms: { doi: ["BAR"] } }]) + end + end + + it "handles client certificate" do + builder = described_class.new(query, { certificate: "CoreTrustSeal,CLARIN" }) + expect(builder.filters).to include( + { terms: { "client.certificate" => ["CoreTrustSeal", "CLARIN"] } } + ) + end + + it "handles user ORCID" do + expect(described_class.new(query, { user_id: "https://orcid.org/0000-0003-1419-2405" }).filters).to include( + { terms: { "creators.nameIdentifiers.nameIdentifier" => ["https://orcid.org/0000-0003-1419-2405"] } } + ) + end + end + + context "with multiple filters" do + it "combines different filter types" do + options = { + resource_type: "dataset", + published: "2020,2022", + has_citations: "5", + language: "en" + } + + builder = described_class.new(query, options) + filters = builder.filters + + expect(filters).to include( + { terms: { "types.resourceType": ["dataset"] } }, + { range: { publication_year: { gte: "2020||/y", lte: "2022||/y", format: "yyyy" } } }, + { range: { citation_count: { gte: 5 } } }, + { terms: { language: ["en"] } } + ) + expect(filters.length).to eq(4) + end + end + + context "with empty or invalid filters" do + it "handles empty options" do + builder = described_class.new(query, {}) + expect(builder.filters).to be_empty + end + + it "handles nil values" do + options = { resource_type: nil, language: nil } + builder = described_class.new(query, options) + expect(builder.filters).to be_empty + end + end + + + + end +end diff --git a/spec/models/doi/graphql_query_builder_spec.rb b/spec/models/doi/graphql_query_builder_spec.rb index d08a8b63e..683eb74a6 100644 --- a/spec/models/doi/graphql_query_builder_spec.rb +++ b/spec/models/doi/graphql_query_builder_spec.rb @@ -56,323 +56,22 @@ end end - describe "filters" do - context "with basic filters" do - it "handles DOI ids" do - options = { ids: "10.5438/0012,10.5438/0013" } - builder = described_class.new(query, options) - expect(builder.filters).to include( - { terms: { doi: ["10.5438/0012", "10.5438/0013"].map(&:upcase) } } - ) - end - - it "handles resource_type_id" do - options = { resource_type_id: "Journal_Article" } - builder = described_class.new(query, options) - expect(builder.filters).to include( - { term: { resource_type_id: "journal-article" } } - ) - - end - - it "handles resource type" do - options = { resource_type: "dataset,text" } - builder = described_class.new(query, options) - expect(builder.filters).to include( - { terms: { "types.resourceType": ["dataset", "text"] } } - ) - end - - - it "handles agency" do - options = {agency: "crossref"} - builder = described_class.new(query, options) - expect(builder.filters).to include( - { terms: { agency: ["crossref"].map(&:downcase) } } - ) - end - - it "handles prefix" do - options = {prefix: "10.5438"} - builder = described_class.new(query, options) - expect(builder.filters).to include( - { terms: { prefix: ["10.5438"].map(&:downcase) } } - ) - end - - it "handles language" do - options = { language: "en,de" } - builder = described_class.new(query, options) - expect(builder.filters).to include( - { terms: { language: ["en", "de"].map(&:downcase) } } - ) - end - - it "handles uid" do - options = { uid: "10.5438/0012" } - builder = described_class.new(query, options) - expect(builder.filters).to include( - { term: { uid: "10.5438/0012" } } - ) - end - - it "handles state" do - options = { state: "findable,registered" } - builder = described_class.new(query, options) - expect(builder.filters).to include( - { terms: { aasm_state: ["findable", "registered"] } } - ) - end - - it "handles consortium_id" do - options = { consortium_id: "dc" } - builder = described_class.new(query, options) - expect(builder.filters).to include( - { term: { consortium_id: { :case_insensitive => true, :value => "dc" } } } - ) - end - - it "handles registered" do - options = { registered: "2021,2023" } - builder = described_class.new(query, options) - expect(builder.filters).to include( - { range: { registered: { gte: "2021||/y", lte: "2023||/y", format: "yyyy" } } } - ) - end - - end - - context "filters based on client metadata" do - it "handles re3data_id" do - options = { re3data_id: "10.17616/r31njmjx" } - builder = described_class.new(query, options) - expect(builder.filters).to include( - { term: { "client.re3data_id" => "10.17616/r31njmjx" } } - ) - end - - it "handles opendoar_id" do - options = { opendoar_id: "123456" } - builder = described_class.new(query, options) - expect(builder.filters).to include( - { term: { "client.opendoar_id" => "123456" } } - ) - end - - it "handles certificates" do - options = { certificate: "CoreTrustSeal,WDS" } - builder = described_class.new(query, options) - expect(builder.filters).to include( - { terms: { "client.certificate" => ["CoreTrustSeal", "WDS"] } } - ) - end - - end - - context "with date range filters" do - it "handles publication year range" do - options = { published: "2020,2022" } - builder = described_class.new(query, options) - expect(builder.filters).to include( - { range: { publication_year: { gte: "2020||/y", lte: "2022||/y", format: "yyyy" } } } - ) - end - - it "handles created date range" do - options = { created: "2021,2023" } - builder = described_class.new(query, options) - expect(builder.filters).to include( - { range: { created: { gte: "2021||/y", lte: "2023||/y", format: "yyyy" } } } - ) - end - end - - context "with count-based filters" do - it "handles reference count threshold" do - options = { has_references: "5" } - builder = described_class.new(query, options) - expect(builder.filters).to include( - { range: { reference_count: { gte: 5 } } } - ) - end - - it "handles citation count threshold" do - options = { has_citations: "5" } - builder = described_class.new(query, options) - expect(builder.filters).to include( - { range: { citation_count: { gte: 5 } } } - ) - end - - it "handles part count threshold" do - options = { has_parts: "10" } - builder = described_class.new(query, options) - expect(builder.filters).to include( - { range: { part_count: { gte: 10 } } } - ) - end - - it "handles part of count threshold" do - options = { has_part_of: "10" } - builder = described_class.new(query, options) - expect(builder.filters).to include( - { range: { part_of_count: { gte: 10 } } } - ) - end - - it "handles version count threshold" do - options = { has_versions: "10" } - builder = described_class.new(query, options) - expect(builder.filters).to include( - { range: { version_count: { gte: 10 } } } - ) - end - - it "handles version of count threshold" do - options = { has_version_of: "10" } - builder = described_class.new(query, options) - expect(builder.filters).to include( - { range: { version_of_count: { gte: 10 } } } - ) - end - - it "handles view count threshold" do - options = { has_views: "10" } - builder = described_class.new(query, options) - expect(builder.filters).to include( - { range: { view_count: { gte: 10 } } } - ) - end - - it "handles download count threshold" do - options = { has_downloads: "10" } - builder = described_class.new(query, options) - expect(builder.filters).to include( - { range: { download_count: { gte: 10 } } } - ) - end - end - - context "with subject-based filters" do - it "handles pid entity filters" do - options = { pid_entity: "dataset,software" } - builder = described_class.new(query, options) - expect(builder.filters).to include( - { term: { "subjects.subjectScheme": "PidEntity" } }, - { terms: { "subjects.subject": ["Dataset", "Software"] } } - ) - end - - it "handles field of science filters" do - options = { field_of_science: "computer_science,mathematics" } - builder = described_class.new(query, options) - expect(builder.filters).to include( - { term: { "subjects.subjectScheme": "Fields of Science and Technology (FOS)" } }, - { terms: { "subjects.subject": ["FOS: Computer science", "FOS: Mathematics"] } } - ) - end - end - - context "with landing page filters" do - it "handles landing page status" do - options = { link_check_status: "200" } - builder = described_class.new(query, options) - expect(builder.filters).to include( - { term: { "landing_page.status": "200" } } - ) - end - - it "handles schema.org presence check" do - options = { link_check_has_schema_org: true } - builder = described_class.new(query, options) - expect(builder.filters).to include( - { term: { "landing_page.hasSchemaOrg": true } } - ) - end - end - - context "with identifier filters" do - context "with ids" do - it "can filter for ids" do - expect(described_class.new("foo", { ids: ["bar"] }).filters).to eq( - [{ terms: { doi: ["BAR"] } }] - ) - end - - it "can filter for ids as single string" do - expect(described_class.new("foo", { ids: "bar" }).filters).to eq([{ terms: { doi: ["BAR"] } }]) - end - end - - it "handles client certificate" do - builder = described_class.new(query, { certificate: "CoreTrustSeal,CLARIN" }) - expect(builder.filters).to include( - { terms: { "client.certificate" => ["CoreTrustSeal", "CLARIN"] } } - ) - end + describe "sorting" do + let(:builder) { described_class.new(query, options) } - it "handles user ORCID" do - expect(described_class.new(query, { user_id: "https://orcid.org/0000-0003-1419-2405" }).filters).to include( - { terms: { "creators.nameIdentifiers.nameIdentifier" => ["https://orcid.org/0000-0003-1419-2405"] } } - ) + context "with no sort options" do + it "uses default sort" do + expect(builder.sort).to eq(described_class::DEFAULT_SORT) end end - context "with multiple filters" do - it "combines different filter types" do - options = { - resource_type: "dataset", - published: "2020,2022", - has_citations: "5", - language: "en" - } - - builder = described_class.new(query, options) - filters = builder.filters + context "with sort options" do + let(:options) { { sort: "relevance" } } + it "ignores any sort options and returns the default" do + expect(builder.sort).to eq(described_class::DEFAULT_SORT) - expect(filters).to include( - { terms: { "types.resourceType": ["dataset"] } }, - { range: { publication_year: { gte: "2020||/y", lte: "2022||/y", format: "yyyy" } } }, - { range: { citation_count: { gte: 5 } } }, - { terms: { language: ["en"] } } - ) - expect(filters.length).to eq(4) end end - - context "with empty or invalid filters" do - it "handles empty options" do - builder = described_class.new(query, {}) - expect(builder.filters).to be_empty - end - - it "handles nil values" do - options = { resource_type: nil, language: nil } - builder = described_class.new(query, options) - expect(builder.filters).to be_empty - end - end - - - end - describe "sorting" do - let(:builder) { described_class.new(query, options) } - - context "with no sort options" do - it "uses default sort" do - expect(builder.sort).to eq(described_class::DEFAULT_SORT) - end - end - - context "with sort options" do - let(:options) { { sort: "relevance" } } - it "ignores any sort options and returns the default" do - expect(builder.sort).to eq(described_class::DEFAULT_SORT) - - end - end - end - end From eafbeaea32656559258bc6675dd87a79e82e3b9a Mon Sep 17 00:00:00 2001 From: jrhoads Date: Fri, 29 Nov 2024 16:49:04 +0100 Subject: [PATCH 7/8] Add spec for aggregates --- .../graphql_query_builder_aggregates_spec.rb | 212 ++++++++++++++++++ 1 file changed, 212 insertions(+) create mode 100644 spec/models/doi/graphql_query_builder_aggregates_spec.rb diff --git a/spec/models/doi/graphql_query_builder_aggregates_spec.rb b/spec/models/doi/graphql_query_builder_aggregates_spec.rb new file mode 100644 index 000000000..d3853eebe --- /dev/null +++ b/spec/models/doi/graphql_query_builder_aggregates_spec.rb @@ -0,0 +1,212 @@ +# frozen_string_literal: true + +require "rails_helper" + +RSpec.describe Doi::GraphqlQuery::Builder do + let(:query) { "" } + let(:options) { {} } + + describe "aggregations" do + it "by default all aggregations are enabled" do + builder = described_class.new(query, options) + expect(builder.aggregations).to eq( + { + :affiliations=>{:terms=>{:field=>"affiliation_id_and_name", :min_doc_count=>1, :missing=>"__missing__", :size=>10}}, + :authors=>{ + :aggs=>{:authors=>{ + :top_hits=>{:_source=>{ + :includes=>["creators.name", "creators.nameIdentifiers.nameIdentifier"], + }, :size=>1}, + }}, + :terms=>{:field=>"creators.nameIdentifiers.nameIdentifier", :include=>"https?://orcid.org/.*", :min_doc_count=>1, :size=>10}, + }, + :citation_count=>{ + :sum=>{:field=>"citation_count"}, + }, + :client_types=>{ + :terms=>{:field=>"client.client_type", :min_doc_count=>1, :size=>10}, + }, + :clients=>{ + :terms=>{:field=>"client_id_and_name", :min_doc_count=>1, :size=>10}, + }, + :content_url_count=>{ + :value_count=>{:field=>"content_url"}, + }, + :creators_and_contributors=>{ + :aggs=>{ + :creators_and_contributors=>{ + :top_hits=>{:_source=>{ + :includes=>["creators_and_contributors.name", "creators_and_contributors.nameIdentifiers.nameIdentifier"], + }, :size=>1}, + }, + :work_types=>{:terms=>{:field=>"resource_type_id_and_name", :min_doc_count=>1}}, + }, + :terms=>{ + :field=>"creators_and_contributors.nameIdentifiers.nameIdentifier", + :include=>"https?://orcid.org/.*", + :min_doc_count=>1, + :size=>10, + }, + }, + :download_count=>{ + :sum=>{:field=>"download_count"}, + }, + :fields_of_science=>{ + :aggs=>{ + :subject=>{ + :terms=>{ + :field=>"subjects.subject", + :include=>"FOS:.*", + :min_doc_count=>1, + :size=>10, + }, + }, + }, + :filter=>{ + :term=>{:"subjects.subjectScheme"=>"Fields of Science and Technology (FOS)"}, + }, + }, + :fields_of_science_combined=>{ + :terms=>{ + :field=>"fields_of_science_combined", + :min_doc_count=>1, + :size=>10, + }, + }, + :fields_of_science_repository=>{ + :terms=>{ + :field=>"fields_of_science_repository", + :min_doc_count=>1, + :size=>10, + }, + }, + :funders=>{ + :aggs=>{:funders=>{:top_hits=>{:_source=>{:includes=>["funding_references.funderName", "funding_references.funderIdentifier"]}, :size=>1}}}, + :terms=>{ + :field=>"funding_references.funderIdentifier", + :min_doc_count=>1, + :size=>10, + }, + }, + :languages=>{ + :terms=>{ + :field=>"language", + :min_doc_count=>1, + :size=>10, + }, + }, + :licenses=>{ + :terms=>{ + :field=>"rights_list.rightsIdentifier", + :min_doc_count=>1, + :missing=>"__missing__", + :size=>10, + }, + }, + :open_licenses=>{ + :aggs=>{ + :resource_types=>{ + :terms=>{ + :field=>"resource_type_id_and_name", + :min_doc_count=>1, + :size=>10, + }, + }, + }, + :filter=>{ + :terms=>{:"rights_list.rightsIdentifier"=>[ + "cc-by-1.0", + "cc-by-2.0", + "cc-by-2.5", + "cc-by-3.0", + "cc-by-3.0-at", + "cc-by-3.0-us", + "cc-by-4.0", + "cc-pddc", + "cc0-1.0", + "cc-pdm-1.0", + ]}, + }, + }, + :pid_entities=>{ + :aggs=>{ + :subject=>{ + :terms=>{ + :field=>"subjects.subject", + :include=>[ + "Dataset", + "Publication", + "Software", + "Organization", + "Funder", + "Person", + "Grant", + "Sample", + "Instrument", + "Repository", + "Project", + ], + :min_doc_count=>1, + :size=>10, + }, + }, + }, + :filter=>{:term=>{:"subjects.subjectScheme"=>"PidEntity"}}, + }, + :published=>{ + :date_histogram=>{ + :field=>"publication_year", + :format=>"year", + :interval=>"year", + :min_doc_count=>1, + :order=>{:_key=>"desc"}, + }, + }, + :registration_agencies=>{ + :terms=>{:field=>"agency", :min_doc_count=>1, :size=>10}, + }, + :resource_types=>{:terms=>{ + :field=>"resource_type_id_and_name", + :min_doc_count=>1, + :missing=>"__missing__", + :size=>10, + }}, + :view_count=>{ + :sum=>{:field=>"view_count"}, + }, + } + ) + end + + it "has keys for all aggregates" do + expected_keys = %i[ + affiliations + authors + citation_count + client_types + clients + content_url_count + creators_and_contributors + download_count + fields_of_science + fields_of_science_combined + fields_of_science_repository + funders + languages + licenses + open_licenses + pid_entities + published + registration_agencies + resource_types + view_count + ] + + builder = described_class.new(query, options) + expect(builder.aggregations.keys).to match_array(expected_keys) + end + end + + + +end From 39d6ece26efb0b4ba005731333b61c8c55854699 Mon Sep 17 00:00:00 2001 From: jrhoads Date: Thu, 12 Dec 2024 12:05:15 +0100 Subject: [PATCH 8/8] Appease rubocop --- app/models/doi/graphql_query.rb | 2 +- .../graphql_query_builder_aggregates_spec.rb | 223 +++++++++--------- .../doi/graphql_query_builder_filters_spec.rb | 188 +++++++-------- spec/models/doi/graphql_query_builder_spec.rb | 2 - 4 files changed, 202 insertions(+), 213 deletions(-) diff --git a/app/models/doi/graphql_query.rb b/app/models/doi/graphql_query.rb index e7e899e21..46611b534 100644 --- a/app/models/doi/graphql_query.rb +++ b/app/models/doi/graphql_query.rb @@ -30,7 +30,7 @@ def size end def sort - DEFAULT_SORT + DEFAULT_SORT end def query_fields diff --git a/spec/models/doi/graphql_query_builder_aggregates_spec.rb b/spec/models/doi/graphql_query_builder_aggregates_spec.rb index d3853eebe..9c10e0911 100644 --- a/spec/models/doi/graphql_query_builder_aggregates_spec.rb +++ b/spec/models/doi/graphql_query_builder_aggregates_spec.rb @@ -11,110 +11,110 @@ builder = described_class.new(query, options) expect(builder.aggregations).to eq( { - :affiliations=>{:terms=>{:field=>"affiliation_id_and_name", :min_doc_count=>1, :missing=>"__missing__", :size=>10}}, - :authors=>{ - :aggs=>{:authors=>{ - :top_hits=>{:_source=>{ - :includes=>["creators.name", "creators.nameIdentifiers.nameIdentifier"], - }, :size=>1}, - }}, - :terms=>{:field=>"creators.nameIdentifiers.nameIdentifier", :include=>"https?://orcid.org/.*", :min_doc_count=>1, :size=>10}, - }, - :citation_count=>{ - :sum=>{:field=>"citation_count"}, - }, - :client_types=>{ - :terms=>{:field=>"client.client_type", :min_doc_count=>1, :size=>10}, - }, - :clients=>{ - :terms=>{:field=>"client_id_and_name", :min_doc_count=>1, :size=>10}, - }, - :content_url_count=>{ - :value_count=>{:field=>"content_url"}, - }, - :creators_and_contributors=>{ - :aggs=>{ - :creators_and_contributors=>{ - :top_hits=>{:_source=>{ - :includes=>["creators_and_contributors.name", "creators_and_contributors.nameIdentifiers.nameIdentifier"], - }, :size=>1}, + affiliations: { terms: { field: "affiliation_id_and_name", min_doc_count: 1, missing: "__missing__", size: 10 } }, + authors: { + aggs: { authors: { + top_hits: { _source: { + includes: ["creators.name", "creators.nameIdentifiers.nameIdentifier"], + }, size: 1 }, + } }, + terms: { field: "creators.nameIdentifiers.nameIdentifier", include: "https?://orcid.org/.*", min_doc_count: 1, size: 10 }, + }, + citation_count: { + sum: { field: "citation_count" }, + }, + client_types: { + terms: { field: "client.client_type", min_doc_count: 1, size: 10 }, + }, + clients: { + terms: { field: "client_id_and_name", min_doc_count: 1, size: 10 }, + }, + content_url_count: { + value_count: { field: "content_url" }, + }, + creators_and_contributors: { + aggs: { + creators_and_contributors: { + top_hits: { _source: { + includes: ["creators_and_contributors.name", "creators_and_contributors.nameIdentifiers.nameIdentifier"], + }, size: 1 }, }, - :work_types=>{:terms=>{:field=>"resource_type_id_and_name", :min_doc_count=>1}}, + work_types: { terms: { field: "resource_type_id_and_name", min_doc_count: 1 } }, }, - :terms=>{ - :field=>"creators_and_contributors.nameIdentifiers.nameIdentifier", - :include=>"https?://orcid.org/.*", - :min_doc_count=>1, - :size=>10, + terms: { + field: "creators_and_contributors.nameIdentifiers.nameIdentifier", + include: "https?://orcid.org/.*", + min_doc_count: 1, + size: 10, }, }, - :download_count=>{ - :sum=>{:field=>"download_count"}, - }, - :fields_of_science=>{ - :aggs=>{ - :subject=>{ - :terms=>{ - :field=>"subjects.subject", - :include=>"FOS:.*", - :min_doc_count=>1, - :size=>10, + download_count: { + sum: { field: "download_count" }, + }, + fields_of_science: { + aggs: { + subject: { + terms: { + field: "subjects.subject", + include: "FOS:.*", + min_doc_count: 1, + size: 10, }, }, }, - :filter=>{ - :term=>{:"subjects.subjectScheme"=>"Fields of Science and Technology (FOS)"}, + filter: { + term: { "subjects.subjectScheme": "Fields of Science and Technology (FOS)" }, }, }, - :fields_of_science_combined=>{ - :terms=>{ - :field=>"fields_of_science_combined", - :min_doc_count=>1, - :size=>10, + fields_of_science_combined: { + terms: { + field: "fields_of_science_combined", + min_doc_count: 1, + size: 10, }, }, - :fields_of_science_repository=>{ - :terms=>{ - :field=>"fields_of_science_repository", - :min_doc_count=>1, - :size=>10, + fields_of_science_repository: { + terms: { + field: "fields_of_science_repository", + min_doc_count: 1, + size: 10, }, }, - :funders=>{ - :aggs=>{:funders=>{:top_hits=>{:_source=>{:includes=>["funding_references.funderName", "funding_references.funderIdentifier"]}, :size=>1}}}, - :terms=>{ - :field=>"funding_references.funderIdentifier", - :min_doc_count=>1, - :size=>10, + funders: { + aggs: { funders: { top_hits: { _source: { includes: ["funding_references.funderName", "funding_references.funderIdentifier"] }, size: 1 } } }, + terms: { + field: "funding_references.funderIdentifier", + min_doc_count: 1, + size: 10, }, }, - :languages=>{ - :terms=>{ - :field=>"language", - :min_doc_count=>1, - :size=>10, + languages: { + terms: { + field: "language", + min_doc_count: 1, + size: 10, }, }, - :licenses=>{ - :terms=>{ - :field=>"rights_list.rightsIdentifier", - :min_doc_count=>1, - :missing=>"__missing__", - :size=>10, + licenses: { + terms: { + field: "rights_list.rightsIdentifier", + min_doc_count: 1, + missing: "__missing__", + size: 10, }, }, - :open_licenses=>{ - :aggs=>{ - :resource_types=>{ - :terms=>{ - :field=>"resource_type_id_and_name", - :min_doc_count=>1, - :size=>10, + open_licenses: { + aggs: { + resource_types: { + terms: { + field: "resource_type_id_and_name", + min_doc_count: 1, + size: 10, }, }, }, - :filter=>{ - :terms=>{:"rights_list.rightsIdentifier"=>[ + filter: { + terms: { "rights_list.rightsIdentifier": [ "cc-by-1.0", "cc-by-2.0", "cc-by-2.5", @@ -125,15 +125,15 @@ "cc-pddc", "cc0-1.0", "cc-pdm-1.0", - ]}, + ] }, }, }, - :pid_entities=>{ - :aggs=>{ - :subject=>{ - :terms=>{ - :field=>"subjects.subject", - :include=>[ + pid_entities: { + aggs: { + subject: { + terms: { + field: "subjects.subject", + include: [ "Dataset", "Publication", "Software", @@ -146,33 +146,33 @@ "Repository", "Project", ], - :min_doc_count=>1, - :size=>10, + min_doc_count: 1, + size: 10, }, }, }, - :filter=>{:term=>{:"subjects.subjectScheme"=>"PidEntity"}}, - }, - :published=>{ - :date_histogram=>{ - :field=>"publication_year", - :format=>"year", - :interval=>"year", - :min_doc_count=>1, - :order=>{:_key=>"desc"}, + filter: { term: { "subjects.subjectScheme": "PidEntity" } }, + }, + published: { + date_histogram: { + field: "publication_year", + format: "year", + interval: "year", + min_doc_count: 1, + order: { _key: "desc" }, }, }, - :registration_agencies=>{ - :terms=>{:field=>"agency", :min_doc_count=>1, :size=>10}, + registration_agencies: { + terms: { field: "agency", min_doc_count: 1, size: 10 }, }, - :resource_types=>{:terms=>{ - :field=>"resource_type_id_and_name", - :min_doc_count=>1, - :missing=>"__missing__", - :size=>10, - }}, - :view_count=>{ - :sum=>{:field=>"view_count"}, + resource_types: { terms: { + field: "resource_type_id_and_name", + min_doc_count: 1, + missing: "__missing__", + size: 10, + } }, + view_count: { + sum: { field: "view_count" }, }, } ) @@ -206,7 +206,4 @@ expect(builder.aggregations.keys).to match_array(expected_keys) end end - - - end diff --git a/spec/models/doi/graphql_query_builder_filters_spec.rb b/spec/models/doi/graphql_query_builder_filters_spec.rb index 84aa88316..261ef7af2 100644 --- a/spec/models/doi/graphql_query_builder_filters_spec.rb +++ b/spec/models/doi/graphql_query_builder_filters_spec.rb @@ -10,115 +10,112 @@ describe "filters" do context "with basic filters" do - it "handles DOI ids" do - options = { ids: "10.5438/0012,10.5438/0013" } - builder = described_class.new(query, options) - expect(builder.filters).to include( - { terms: { doi: ["10.5438/0012", "10.5438/0013"].map(&:upcase) } } - ) - end - - it "handles resource_type_id" do - options = { resource_type_id: "Journal_Article" } - builder = described_class.new(query, options) - expect(builder.filters).to include( - { term: { resource_type_id: "journal-article" } } - ) - - end + it "handles DOI ids" do + options = { ids: "10.5438/0012,10.5438/0013" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { terms: { doi: ["10.5438/0012", "10.5438/0013"].map(&:upcase) } } + ) + end - it "handles resource type" do - options = { resource_type: "dataset,text" } - builder = described_class.new(query, options) - expect(builder.filters).to include( - { terms: { "types.resourceType": ["dataset", "text"] } } - ) - end + it "handles resource_type_id" do + options = { resource_type_id: "Journal_Article" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { term: { resource_type_id: "journal-article" } } + ) + end + it "handles resource type" do + options = { resource_type: "dataset,text" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { terms: { "types.resourceType": ["dataset", "text"] } } + ) + end - it "handles agency" do - options = {agency: "crossref"} - builder = described_class.new(query, options) - expect(builder.filters).to include( - { terms: { agency: ["crossref"].map(&:downcase) } } - ) - end - it "handles prefix" do - options = {prefix: "10.5438"} - builder = described_class.new(query, options) - expect(builder.filters).to include( - { terms: { prefix: ["10.5438"].map(&:downcase) } } - ) - end + it "handles agency" do + options = { agency: "crossref" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { terms: { agency: ["crossref"].map(&:downcase) } } + ) + end - it "handles language" do - options = { language: "en,de" } - builder = described_class.new(query, options) - expect(builder.filters).to include( - { terms: { language: ["en", "de"].map(&:downcase) } } - ) - end + it "handles prefix" do + options = { prefix: "10.5438" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { terms: { prefix: ["10.5438"].map(&:downcase) } } + ) + end - it "handles uid" do - options = { uid: "10.5438/0012" } - builder = described_class.new(query, options) - expect(builder.filters).to include( - { term: { uid: "10.5438/0012" } } - ) - end + it "handles language" do + options = { language: "en,de" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { terms: { language: ["en", "de"].map(&:downcase) } } + ) + end - it "handles state" do - options = { state: "findable,registered" } - builder = described_class.new(query, options) - expect(builder.filters).to include( - { terms: { aasm_state: ["findable", "registered"] } } - ) - end + it "handles uid" do + options = { uid: "10.5438/0012" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { term: { uid: "10.5438/0012" } } + ) + end - it "handles consortium_id" do - options = { consortium_id: "dc" } - builder = described_class.new(query, options) - expect(builder.filters).to include( - { term: { consortium_id: { :case_insensitive => true, :value => "dc" } } } - ) - end + it "handles state" do + options = { state: "findable,registered" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { terms: { aasm_state: ["findable", "registered"] } } + ) + end - it "handles registered" do - options = { registered: "2021,2023" } - builder = described_class.new(query, options) - expect(builder.filters).to include( - { range: { registered: { gte: "2021||/y", lte: "2023||/y", format: "yyyy" } } } - ) - end + it "handles consortium_id" do + options = { consortium_id: "dc" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { term: { consortium_id: { case_insensitive: true, value: "dc" } } } + ) + end + it "handles registered" do + options = { registered: "2021,2023" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { range: { registered: { gte: "2021||/y", lte: "2023||/y", format: "yyyy" } } } + ) + end end context "filters based on client metadata" do - it "handles re3data_id" do - options = { re3data_id: "10.17616/r31njmjx" } - builder = described_class.new(query, options) - expect(builder.filters).to include( - { term: { "client.re3data_id" => "10.17616/r31njmjx" } } - ) - end - - it "handles opendoar_id" do - options = { opendoar_id: "123456" } - builder = described_class.new(query, options) - expect(builder.filters).to include( - { term: { "client.opendoar_id" => "123456" } } - ) - end + it "handles re3data_id" do + options = { re3data_id: "10.17616/r31njmjx" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { term: { "client.re3data_id" => "10.17616/r31njmjx" } } + ) + end - it "handles certificates" do - options = { certificate: "CoreTrustSeal,WDS" } - builder = described_class.new(query, options) - expect(builder.filters).to include( - { terms: { "client.certificate" => ["CoreTrustSeal", "WDS"] } } - ) - end + it "handles opendoar_id" do + options = { opendoar_id: "123456" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { term: { "client.opendoar_id" => "123456" } } + ) + end + it "handles certificates" do + options = { certificate: "CoreTrustSeal,WDS" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { terms: { "client.certificate" => ["CoreTrustSeal", "WDS"] } } + ) + end end context "with date range filters" do @@ -304,8 +301,5 @@ expect(builder.filters).to be_empty end end - - - end end diff --git a/spec/models/doi/graphql_query_builder_spec.rb b/spec/models/doi/graphql_query_builder_spec.rb index 683eb74a6..034adcbcf 100644 --- a/spec/models/doi/graphql_query_builder_spec.rb +++ b/spec/models/doi/graphql_query_builder_spec.rb @@ -69,9 +69,7 @@ let(:options) { { sort: "relevance" } } it "ignores any sort options and returns the default" do expect(builder.sort).to eq(described_class::DEFAULT_SORT) - end end end - end