diff --git a/app/controllers/datacite_dois_controller.rb b/app/controllers/datacite_dois_controller.rb index 456e4822c..9162a02c6 100644 --- a/app/controllers/datacite_dois_controller.rb +++ b/app/controllers/datacite_dois_controller.rb @@ -84,7 +84,7 @@ def index if params[:id].present? response = DataciteDoi.find_by_id(params[:id]) elsif params[:ids].present? - response = DataciteDoi.find_by_ids(params[:ids], page: page, sort: sort) + response = DataciteDoi.find_by_ids(params[:ids], disable_facets: params[:disable_facets], facets: params[:facets], page: page, sort: sort) else response = DataciteDoi.query( @@ -136,6 +136,7 @@ def index source: params[:source], scroll_id: params[:scroll_id], disable_facets: disable_facets, + facets: params[:facets], page: page, sort: sort, random: params[:random], @@ -221,47 +222,43 @@ def index ) end else - if total.positive? && !disable_facets - states = facet_by_key(response.aggregations.states.buckets) - resource_types = facet_by_combined_key(response.aggregations.resource_types.buckets) - published = facet_by_range(response.aggregations.published.buckets) - created = facet_by_key_as_string(response.aggregations.created.buckets) - created_by_month = response.aggregations.created_by_month ? facet_by_key_as_string(response.aggregations.created_by_month.buckets) : nil - registered = facet_by_key_as_string(response.aggregations.registered.buckets) - providers = facet_by_combined_key(response.aggregations.providers.buckets) - clients = facet_by_combined_key(response.aggregations.clients.buckets) - prefixes = facet_by_key(response.aggregations.prefixes.buckets) - schema_versions = facet_by_schema(response.aggregations.schema_versions.buckets) - affiliations = facet_by_combined_key(response.aggregations.affiliations.buckets) - subjects = facet_by_key(response.aggregations.subjects.buckets) - fields_of_science = facet_by_fos(response.aggregations.fields_of_science.subject.buckets) - certificates = facet_by_key(response.aggregations.certificates.buckets) - licenses = facet_by_license(response.aggregations.licenses.buckets) - link_checks_status = facet_by_cumulative_year(response.aggregations.link_checks_status.buckets) - citations = metric_facet_by_year(response.aggregations.citations.buckets) - views = metric_facet_by_year(response.aggregations.views.buckets) - downloads = metric_facet_by_year(response.aggregations.downloads.buckets) - else - states = nil - resource_types = nil - published = nil - created = nil - registered = nil - providers = nil - clients = nil - prefixes = nil - schema_versions = nil - affiliations = nil - subjects = nil - fields_of_science = nil - certificates = nil - licenses = nil - link_checks_status = nil - citations = nil - views = nil - downloads = nil - end + facets_to_facet_methods = { + states: :facet_by_key, + resource_types: :facet_by_combined_key, + created: :facet_by_key_as_string, + created_by_month: :facet_by_key_as_string, + published: :facet_by_range, + registered: :facet_by_key_as_string, + providers: :facet_by_combined_key, + clients: :facet_by_combined_key, + client_types: :facet_by_client_type, + affiliations: :facet_by_combined_key, + prefixes: :facet_by_key, + certificates: :facet_by_key, + licenses: :facet_by_license, + schema_versions: :facet_by_schema, + link_checks_status: :facet_by_cumulative_year, + creators_and_contributors: :facet_by_creators_and_contributors, + subjects: :facet_by_key, + fields_of_science: :facet_by_fos, + languages: :facet_by_language, + registration_agencies: :facet_by_registration_agency, + citations: :metric_facet_by_year, + views: :metric_facet_by_year, + downloads: :metric_facet_by_year + } + + facets_to_bucket_path = { + fields_of_science: [:subject, :buckets] + } + aggregations = response.aggregations + facets = facets_to_facet_methods.map do |facet, method| + if aggregations.dig(facet) + buckets = facets_to_bucket_path.dig(facet) ? aggregations.dig(facet, *facets_to_bucket_path[facet]) : aggregations.dig(facet).buckets + [facet.to_s.camelize(:lower), send(method, buckets)] + end + end.compact.to_h respond_to do |format| format.json do options = {} @@ -271,27 +268,8 @@ def index page: if page[:cursor].nil? && page[:number].present? page[:number] - end, - states: states, - "resourceTypes" => resource_types, - created: created, - createdByMonth: created_by_month, - published: published, - registered: registered, - providers: providers, - clients: clients, - affiliations: affiliations, - prefixes: prefixes, - certificates: certificates, - licenses: licenses, - "schemaVersions" => schema_versions, - "linkChecksStatus" => link_checks_status, - subjects: subjects, - "fieldsOfScience" => fields_of_science, - citations: citations, - views: views, - downloads: downloads, - }.compact + end + }.merge(facets).compact options[:links] = { self: request.original_url, @@ -325,6 +303,7 @@ def index "has-affiliation" => params[:has_affiliation], "has-funder" => params[:has_funder], "disable-facets" => params[:disable_facets], + "facets" => params[:facets], detail: params[:detail], composite: params[:composite], affiliation: params[:affiliation], diff --git a/app/models/doi.rb b/app/models/doi.rb index b9a2c9bd6..0210d0475 100644 --- a/app/models/doi.rb +++ b/app/models/doi.rb @@ -674,82 +674,142 @@ def as_indexed_json(_options = {}) } end - - def self.query_aggregations(disable_facets: false) - if !disable_facets - { - # number of resourceTypeGeneral increased from 16 to 28 in schema 4.4 - resource_types: { terms: { field: "resource_type_id_and_name", size: 30, min_doc_count: 1 } }, - states: { terms: { field: "aasm_state", size: 3, min_doc_count: 1 } }, - published: { - date_histogram: { - field: "publication_year", - interval: "year", - format: "year", - order: { - _key: "desc", - }, - min_doc_count: 1, + DOI_AGGREGATION_DEFINITIONS = { + # number of resourceTypeGeneral increased from 30 to 32 in schema 4.6 + resource_types: { terms: { field: "resource_type_id_and_name", size: 32, min_doc_count: 1 } }, + states: { terms: { field: "aasm_state", size: 3, min_doc_count: 1 } }, + published: { + date_histogram: { + field: "publication_year", + interval: "year", + format: "year", + order: { + _key: "desc", }, + min_doc_count: 1, }, - registration_agencies: { terms: { field: "agency", size: 10, min_doc_count: 1 } }, - created: { date_histogram: { field: "created", interval: "year", format: "year", order: { _key: "desc" }, min_doc_count: 1 }, - aggs: { bucket_truncate: { bucket_sort: { size: 10 } } } }, - registered: { date_histogram: { field: "registered", interval: "year", format: "year", order: { _key: "desc" }, min_doc_count: 1 }, - aggs: { bucket_truncate: { bucket_sort: { size: 10 } } } }, - providers: { terms: { field: "provider_id_and_name", size: 10, min_doc_count: 1 } }, - clients: { terms: { field: "client_id_and_name", size: 10, min_doc_count: 1 } }, - affiliations: { terms: { field: "affiliation_id_and_name", size: 10, min_doc_count: 1 } }, - prefixes: { terms: { field: "prefix", size: 10, min_doc_count: 1 } }, - schema_versions: { terms: { field: "schema_version", size: 10, min_doc_count: 1 } }, - link_checks_status: { terms: { field: "landing_page.status", size: 10, min_doc_count: 1 } }, - # link_checks_has_schema_org: { terms: { field: 'landing_page.hasSchemaOrg', size: 2, min_doc_count: 1 } }, - # link_checks_schema_org_id: { value_count: { field: "landing_page.schemaOrgId" } }, - # link_checks_dc_identifier: { value_count: { field: "landing_page.dcIdentifier" } }, - # link_checks_citation_doi: { value_count: { field: "landing_page.citationDoi" } }, - # links_checked: { value_count: { field: "landing_page.checked" } }, - # sources: { terms: { field: 'source', size: 15, min_doc_count: 1 } }, - subjects: { terms: { field: "subjects.subject", size: 10, min_doc_count: 1 } }, - pid_entities: { - filter: { term: { "subjects.subjectScheme": "PidEntity" } }, - aggs: { - subject: { terms: { field: "subjects.subject", size: 10, min_doc_count: 1, - include: %w(Dataset Publication Software Organization Funder Person Grant Sample Instrument Repository Project) } }, - }, + }, + registration_agencies: { terms: { field: "agency", size: 10, min_doc_count: 1 } }, + created: { date_histogram: { field: "created", interval: "year", format: "year", order: { _key: "desc" }, min_doc_count: 1 }, + aggs: { bucket_truncate: { bucket_sort: { size: 10 } } } }, + registered: { date_histogram: { field: "registered", interval: "year", format: "year", order: { _key: "desc" }, min_doc_count: 1 }, + aggs: { bucket_truncate: { bucket_sort: { size: 10 } } } }, + providers: { terms: { field: "provider_id_and_name", size: 10, min_doc_count: 1 } }, + clients: { terms: { field: "client_id_and_name", size: 10, min_doc_count: 1 } }, + client_types: { + terms: { + field: "client.client_type", + size: 4, + min_doc_count: 1 + } + }, + affiliations: { terms: { field: "affiliation_id_and_name", size: 10, min_doc_count: 1 } }, + prefixes: { terms: { field: "prefix", size: 10, min_doc_count: 1 } }, + schema_versions: { terms: { field: "schema_version", size: 10, min_doc_count: 1 } }, + link_checks_status: { terms: { field: "landing_page.status", size: 10, min_doc_count: 1 } }, + subjects: { terms: { field: "subjects.subject", size: 10, min_doc_count: 1 } }, + pid_entities: { + filter: { term: { "subjects.subjectScheme": "PidEntity" } }, + aggs: { + subject: { terms: { field: "subjects.subject", size: 10, min_doc_count: 1, + include: %w(Dataset Publication Software Organization Funder Person Grant Sample Instrument Repository Project) } }, }, - fields_of_science: { - filter: { term: { "subjects.subjectScheme": "Fields of Science and Technology (FOS)" } }, - aggs: { - subject: { terms: { field: "subjects.subject", size: 10, min_doc_count: 1, - include: "FOS:.*" } }, - }, + }, + fields_of_science: { + filter: { term: { "subjects.subjectScheme": "Fields of Science and Technology (FOS)" } }, + aggs: { + subject: { terms: { field: "subjects.subject", size: 10, min_doc_count: 1, + include: "FOS:.*" } }, }, - licenses: { terms: { field: "rights_list.rightsIdentifier", size: 10, min_doc_count: 1 } }, - languages: { terms: { field: "language", size: 10, min_doc_count: 1 } }, - certificates: { terms: { field: "client.certificate", size: 10, min_doc_count: 1 } }, - views: { - date_histogram: { field: "publication_year", interval: "year", format: "year", order: { _key: "desc" }, min_doc_count: 1 }, - aggs: { - metric_count: { sum: { field: "view_count" } }, - bucket_truncate: { bucket_sort: { size: 10 } }, - }, + }, + licenses: { terms: { field: "rights_list.rightsIdentifier", size: 10, min_doc_count: 1 } }, + languages: { terms: { field: "language", size: 10, min_doc_count: 1 } }, + certificates: { terms: { field: "client.certificate", size: 10, min_doc_count: 1 } }, + creators_and_contributors: { + terms: { + field: "creators_and_contributors.nameIdentifiers.nameIdentifier", + size: 10, + min_doc_count: 1, + include: "https?://orcid.org/.*" }, - downloads: { - date_histogram: { field: "publication_year", interval: "year", format: "year", order: { _key: "desc" }, min_doc_count: 1 }, - aggs: { - metric_count: { sum: { field: "download_count" } }, - bucket_truncate: { bucket_sort: { size: 10 } }, + aggs: { + creators_and_contributors: { + top_hits: { + _source: { + includes: [ + "creators_and_contributors.name", + "creators_and_contributors.nameIdentifiers.nameIdentifier" + ] + }, + size: 1 + } }, + "work_types": { + "terms": { + "field": "resource_type_id_and_name", + "min_doc_count": 1 + } + } + } + }, + views: { + date_histogram: { field: "publication_year", interval: "year", format: "year", order: { _key: "desc" }, min_doc_count: 1 }, + aggs: { + metric_count: { sum: { field: "view_count" } }, + bucket_truncate: { bucket_sort: { size: 10 } }, }, - citations: { - date_histogram: { field: "publication_year", interval: "year", format: "year", order: { _key: "desc" }, min_doc_count: 1 }, - aggs: { - metric_count: { sum: { field: "citation_count" } }, - bucket_truncate: { bucket_sort: { size: 10 } }, - }, + }, + downloads: { + date_histogram: { field: "publication_year", interval: "year", format: "year", order: { _key: "desc" }, min_doc_count: 1 }, + aggs: { + metric_count: { sum: { field: "download_count" } }, + bucket_truncate: { bucket_sort: { size: 10 } }, }, - } - end + }, + citations: { + date_histogram: { field: "publication_year", interval: "year", format: "year", order: { _key: "desc" }, min_doc_count: 1 }, + aggs: { + metric_count: { sum: { field: "citation_count" } }, + bucket_truncate: { bucket_sort: { size: 10 } }, + }, + }, + } + + def self.default_doi_query_facets + [ + :resource_types, + :states, + :published, + :created, + :registered, + :providers, + :clients, + :affiliations, + :prefixes, + :schema_versions, + :link_checks_status, + :subjects, + :fields_of_science, + :licenses, + :certificates, + :views, + :downloads, + :citations + ] + end + + def self.query_aggregations(disable_facets: false, facets: nil) + return {} if disable_facets.to_s == "true" + + selected_facets = if facets.is_a?(String) + facets.split(",").map(&:strip).map(&:underscore).map(&:to_sym) + else + Array.wrap(facets).map(&:to_sym) + end.uniq + + selected_facets = default_doi_query_facets if facets.nil? + + DOI_AGGREGATION_DEFINITIONS.slice(*selected_facets) end def self.provider_aggregations @@ -820,7 +880,7 @@ def self.find_by_ids(ids, options = {}) must: must, }, }, - aggregations: query_aggregations(disable_facets: options[:disable_facets]), + aggregations: query_aggregations(disable_facets: options[:disable_facets], facets: options[:facets]), ) end @@ -902,9 +962,9 @@ def self.query(query, options = {}) elsif options[:totals_agg] == "prefix" prefix_aggregations elsif options[:client_type] == "igsnCatalog" - query_aggregations(disable_facets: options[:disable_facets]).merge(self.igsn_id_catalog_aggregations) + query_aggregations(disable_facets: options[:disable_facets], facets: options[:facets]).merge(self.igsn_id_catalog_aggregations) else - query_aggregations(disable_facets: options[:disable_facets]) + query_aggregations(disable_facets: options[:disable_facets], facets: options[:facets]) end # Cursor nav uses search_after, this should always be an array of values that match the sort. diff --git a/spec/models/doi_spec.rb b/spec/models/doi_spec.rb index 85bad8fc1..5d03eb7f5 100644 --- a/spec/models/doi_spec.rb +++ b/spec/models/doi_spec.rb @@ -1929,6 +1929,91 @@ end end + describe "query_aggregations" do + default_aggregations = Doi.default_doi_query_facets + + it "returns default aggregations when disable_facets and facets are not set" do + aggregations = Doi.query_aggregations + + expect(aggregations.keys).to match_array(default_aggregations) + end + + it "returns default aggregations when disable_facets is set to false" do + aggregations = Doi.query_aggregations(disable_facets: false) + + expect(aggregations.keys).to match_array(default_aggregations) + end + + it "returns blank aggregations when disable_facets is true" do + aggregations = Doi.query_aggregations(disable_facets: true) + + expect(aggregations).to eq({}) + end + + it "returns blank aggregations when disable_facets is true string" do + aggregations = Doi.query_aggregations(disable_facets: "true") + + expect(aggregations).to eq({}) + end + + it "returns default aggregations when disable_facets is false" do + aggregations = Doi.query_aggregations(disable_facets: false) + + expect(aggregations.keys).to match_array(default_aggregations) + end + + it "returns default aggregations when disable_facets is false string" do + aggregations = Doi.query_aggregations(disable_facets: "false") + + expect(aggregations.keys).to match_array(default_aggregations) + end + + it "returns selected aggregations when facets is a string" do + facets_string = "creators_and_contributors, registrationAgencies,made_up_facet,states,registration_agencies" + aggregations = Doi.query_aggregations(facets: facets_string) + expected_aggregations = [:creators_and_contributors, :registration_agencies, :states] + + expect(aggregations.keys).to match_array(expected_aggregations) + end + + it "returns blank aggregations when facets is a blank string" do + facets_string = "" + aggregations = Doi.query_aggregations(facets: facets_string) + + expect(aggregations).to eq({}) + end + + it "returns selected aggregations when facets is an array of symbols" do + facets_array = [:creators_and_contributors, :registration_agencies, :states, :made_up_facet, :registration_agencies] + aggregations = Doi.query_aggregations(facets: facets_array) + expected_aggregations = [:creators_and_contributors, :registration_agencies, :states] + + expect(aggregations.keys).to match_array(expected_aggregations) + end + + it "returns blank aggregations when facets is a blank array" do + facets_array = [] + aggregations = Doi.query_aggregations(facets: facets_array) + + expect(aggregations).to eq({}) + end + + it "returns selected aggregations when facets are an array of symbols and disable_facets is false" do + facets_array = [:creators_and_contributors, :registration_agencies, :states, :made_up_facet, :registration_agencies] + aggregations = Doi.query_aggregations(facets: facets_array, disable_facets: false) + expected_aggregations = [:creators_and_contributors, :registration_agencies, :states] + + expect(aggregations.keys).to match_array(expected_aggregations) + end + + it "returns blank aggregations when facets are an array of symbols and disable_facets is true" do + facets_array = [:creators_and_contributors, :registration_agencies, :states, :made_up_facet, :registration_agencies] + aggregations = Doi.query_aggregations(facets: facets_array, disable_facets: true) + + expect(aggregations).to eq({}) + end + end + describe "formats" do content_url = [ "https://redivis.com/datasets/rt7m-4ndqm48zf/tables/1dgp-0rkbx6ahe?v=1.2", diff --git a/spec/requests/datacite_dois/datacite_dois_spec.rb b/spec/requests/datacite_dois/datacite_dois_spec.rb index 999c5031c..ee79d372f 100755 --- a/spec/requests/datacite_dois/datacite_dois_spec.rb +++ b/spec/requests/datacite_dois/datacite_dois_spec.rb @@ -13,6 +13,26 @@ def clear_doi_index DataciteDoi.__elasticsearch__.client.indices.refresh(index: DataciteDoi.index_name) end +DEFAULT_DOIS_FACETS = [ + "states", + "resourceTypes", + "created", + "published", + "registered", + "providers", + "clients", + "affiliations", + "prefixes", + "certificates", + "licenses", + "schemaVersions", + "linkChecksStatus", + "subjects", + "fieldsOfScience", + "citations", + "views", + "downloads" +] describe DataciteDoisController, type: :request, vcr: true do let(:admin) { create(:provider, symbol: "ADMIN") } @@ -236,6 +256,69 @@ def clear_doi_index next_link = next_link_absolute.path + "?" + next_link_absolute.query expect(next_link).to eq("/dois?fields%5Bdois%5D=id%2Csubjects&page%5Bnumber%5D=2&page%5Bsize%5D=2") end + + it "returns default facets" do + get "/dois", nil, headers + + expect(last_response.status).to eq(200) + expect(json["data"].size).to eq(10) + expect(json.dig("meta", "total")).to eq(10) + + expect(json.dig("meta").length).to eq(DEFAULT_DOIS_FACETS.length + 3) + expect(json.dig("meta").keys).to match_array(DEFAULT_DOIS_FACETS + ["total", "totalPages", "page"]) + end + + it "returns default facets when disable-facets is set to false" do + get "/dois?disable-facets=false", nil, headers + + expect(last_response.status).to eq(200) + expect(json["data"].size).to eq(10) + expect(json.dig("meta", "total")).to eq(10) + + expect(json.dig("meta").length).to eq(DEFAULT_DOIS_FACETS.length + 3) + expect(json.dig("meta").keys).to match_array(DEFAULT_DOIS_FACETS + ["total", "totalPages", "page"]) + end + + it "returns no facets when disable-facets is set to true" do + get "/dois?disable-facets=true", nil, headers + + expect(last_response.status).to eq(200) + expect(json["data"].size).to eq(10) + expect(json.dig("meta", "total")).to eq(10) + expect(json.dig("meta").length).to eq(3) + DEFAULT_DOIS_FACETS.each do |facet| + expect(json.dig("meta", facet)).to eq(nil) + end + end + + it "returns no facets when facets is empty" do + get "/dois?facets=", nil, headers + + expect(last_response.status).to eq(200) + expect(json["data"].size).to eq(10) + expect(json.dig("meta", "total")).to eq(10) + expect(json.dig("meta").length).to eq(3) + DEFAULT_DOIS_FACETS.each do |facet| + expect(json.dig("meta", facet)).to eq(nil) + end + end + + it "returns specified facets when facets is set" do + get "/dois?facets=client_types,registrationAgencies, clients,languages,creators_and_contributors,made_up_facet", nil, headers + + expect(last_response.status).to eq(200) + expect(json["data"].size).to eq(10) + expect(json.dig("meta", "total")).to eq(10) + expect(json.dig("meta").length).to eq(8) + expect(json.dig("meta", "states")).to eq(nil) + expect(json.dig("meta", "clientTypes")).to be_truthy + expect(json.dig("meta", "registrationAgencies")).to be_truthy + expect(json.dig("meta", "clients")).to be_truthy + expect(json.dig("meta", "languages")).to be_truthy + expect(json.dig("meta", "creatorsAndContributors")).to be_truthy + expect(json.dig("meta", "madeUpFacet")).to eq(nil) + expect(json.dig("meta", "made_up_facet")).to eq(nil) + end end describe "GET /dois with nil publisher values", elsasticsearch: true, prefix_pool_size: 1 do