diff --git a/app/controllers/datacite_dois_controller.rb b/app/controllers/datacite_dois_controller.rb index 2bb34fc92..7ada8acb5 100644 --- a/app/controllers/datacite_dois_controller.rb +++ b/app/controllers/datacite_dois_controller.rb @@ -68,6 +68,9 @@ def index params[:state] = "findable" end + # facets are enabled by default + disable_facets = params[:disable_facets] + if params[:id].present? response = DataciteDoi.find_by_id(params[:id]) elsif params[:ids].present? @@ -122,6 +125,7 @@ def index sample_size: params[:sample], source: params[:source], scroll_id: params[:scroll_id], + disable_facets: disable_facets, page: page, sort: sort, random: params[:random], @@ -206,91 +210,56 @@ def index status: :ok end else - states = - if total.positive? - facet_by_key(response.aggregations.states.buckets) - end - resource_types = - if total.positive? - facet_by_combined_key(response.aggregations.resource_types.buckets) - end - published = - if total.positive? - facet_by_range(response.aggregations.published.buckets) - end - created = - if total.positive? - facet_by_key_as_string(response.aggregations.created.buckets) - end - registered = - if total.positive? - facet_by_key_as_string(response.aggregations.registered.buckets) - end - providers = - if total.positive? - facet_by_combined_key(response.aggregations.providers.buckets) - end - clients = - if total.positive? - facet_by_combined_key(response.aggregations.clients.buckets) - end - prefixes = - if total.positive? - facet_by_key(response.aggregations.prefixes.buckets) - end - schema_versions = - if total.positive? - facet_by_schema(response.aggregations.schema_versions.buckets) - end - - affiliations = - if total.positive? - facet_by_combined_key(response.aggregations.affiliations.buckets) - end - # sources = total.positive? ? facet_by_key(response.aggregations.sources.buckets) : nil - subjects = - if total.positive? - facet_by_key(response.aggregations.subjects.buckets) - end - fields_of_science = - if total.positive? - facet_by_fos( - response.aggregations.fields_of_science.subject.buckets, - ) - end - certificates = - if total.positive? - facet_by_key(response.aggregations.certificates.buckets) - end - licenses = - if total.positive? - facet_by_license(response.aggregations.licenses.buckets) - end - - link_checks_status = - if total.positive? - facet_by_cumulative_year( - response.aggregations.link_checks_status.buckets, - ) - end - # links_with_schema_org = total.positive? ? facet_by_cumulative_year(response.aggregations.link_checks_has_schema_org.buckets) : nil - # link_checks_schema_org_id = total.positive? ? response.aggregations.link_checks_schema_org_id.value : nil - # link_checks_dc_identifier = total.positive? ? response.aggregations.link_checks_dc_identifier.value : nil - # link_checks_citation_doi = total.positive? ? response.aggregations.link_checks_citation_doi.value : nil - # links_checked = total.positive? ? response.aggregations.links_checked.value : nil - - citations = - if total.positive? - metric_facet_by_year(response.aggregations.citations.buckets) - end - views = - if total.positive? - metric_facet_by_year(response.aggregations.views.buckets) - end - downloads = - if total.positive? - metric_facet_by_year(response.aggregations.downloads.buckets) - end + if total.positive? && !disable_facets + states = facet_by_key(response.aggregations.states.buckets) + resource_types = facet_by_combined_key(response.aggregations.resource_types.buckets) + published = facet_by_range(response.aggregations.published.buckets) + created = facet_by_key_as_string(response.aggregations.created.buckets) + registered = facet_by_key_as_string(response.aggregations.registered.buckets) + providers = facet_by_combined_key(response.aggregations.providers.buckets) + clients = facet_by_combined_key(response.aggregations.clients.buckets) + prefixes = facet_by_key(response.aggregations.prefixes.buckets) + schema_versions = facet_by_schema(response.aggregations.schema_versions.buckets) + affiliations = facet_by_combined_key(response.aggregations.affiliations.buckets) + # sources = total.positive? ? facet_by_key(response.aggregations.sources.buckets) : nil + subjects = facet_by_key(response.aggregations.subjects.buckets) + fields_of_science = facet_by_fos( + response.aggregations.fields_of_science.subject.buckets, + ) + certificates = facet_by_key(response.aggregations.certificates.buckets) + licenses = facet_by_license(response.aggregations.licenses.buckets) + link_checks_status = facet_by_cumulative_year( + response.aggregations.link_checks_status.buckets, + ) + # links_with_schema_org = total.positive? ? facet_by_cumulative_year(response.aggregations.link_checks_has_schema_org.buckets) : nil + # link_checks_schema_org_id = total.positive? ? response.aggregations.link_checks_schema_org_id.value : nil + # link_checks_dc_identifier = total.positive? ? response.aggregations.link_checks_dc_identifier.value : nil + # link_checks_citation_doi = total.positive? ? response.aggregations.link_checks_citation_doi.value : nil + # links_checked = total.positive? ? response.aggregations.links_checked.value : nil + + citations = metric_facet_by_year(response.aggregations.citations.buckets) + views = metric_facet_by_year(response.aggregations.views.buckets) + downloads = metric_facet_by_year(response.aggregations.downloads.buckets) + else + states = nil + resource_types = nil + published = nil + created = nil + registered = nil + providers = nil + clients = nil + prefixes = nil + schema_versions = nil + affiliations = nil + subjects = nil + fields_of_science = nil + certificates = nil + licenses = nil + link_checks_status = nil + citations = nil + views = nil + downloads = nil + end respond_to do |format| format.json do @@ -359,6 +328,7 @@ def index "has-person" => params[:has_person], "has-affiliation" => params[:has_affiliation], "has-funder" => params[:has_funder], + "disable-facets" => params[:disable_facets], detail: params[:detail], composite: params[:composite], affiliation: params[:affiliation], diff --git a/app/models/doi.rb b/app/models/doi.rb index 3ff7954e8..4d7b69fd7 100644 --- a/app/models/doi.rb +++ b/app/models/doi.rb @@ -585,78 +585,80 @@ def self.gql_query_aggregations(facet_count: 10) end end - def self.query_aggregations - { - resource_types: { terms: { field: "resource_type_id_and_name", size: 16, min_doc_count: 1 } }, - states: { terms: { field: "aasm_state", size: 3, min_doc_count: 1 } }, - published: { - date_histogram: { - field: "publication_year", - interval: "year", - format: "year", - order: { - _key: "desc", + def self.query_aggregations(disable_facets: false) + if !disable_facets + { + resource_types: { terms: { field: "resource_type_id_and_name", size: 16, min_doc_count: 1 } }, + states: { terms: { field: "aasm_state", size: 3, min_doc_count: 1 } }, + published: { + date_histogram: { + field: "publication_year", + interval: "year", + format: "year", + order: { + _key: "desc", + }, + min_doc_count: 1, }, - min_doc_count: 1, }, - }, - registration_agencies: { terms: { field: "agency", size: 10, min_doc_count: 1 } }, - created: { date_histogram: { field: "created", interval: "year", format: "year", order: { _key: "desc" }, min_doc_count: 1 }, - aggs: { bucket_truncate: { bucket_sort: { size: 10 } } } }, - registered: { date_histogram: { field: "registered", interval: "year", format: "year", order: { _key: "desc" }, min_doc_count: 1 }, - aggs: { bucket_truncate: { bucket_sort: { size: 10 } } } }, - providers: { terms: { field: "provider_id_and_name", size: 10, min_doc_count: 1 } }, - clients: { terms: { field: "client_id_and_name", size: 10, min_doc_count: 1 } }, - affiliations: { terms: { field: "affiliation_id_and_name", size: 10, min_doc_count: 1 } }, - prefixes: { terms: { field: "prefix", size: 10, min_doc_count: 1 } }, - schema_versions: { terms: { field: "schema_version", size: 10, min_doc_count: 1 } }, - link_checks_status: { terms: { field: "landing_page.status", size: 10, min_doc_count: 1 } }, - # link_checks_has_schema_org: { terms: { field: 'landing_page.hasSchemaOrg', size: 2, min_doc_count: 1 } }, - # link_checks_schema_org_id: { value_count: { field: "landing_page.schemaOrgId" } }, - # link_checks_dc_identifier: { value_count: { field: "landing_page.dcIdentifier" } }, - # link_checks_citation_doi: { value_count: { field: "landing_page.citationDoi" } }, - # links_checked: { value_count: { field: "landing_page.checked" } }, - # sources: { terms: { field: 'source', size: 15, min_doc_count: 1 } }, - subjects: { terms: { field: "subjects.subject", size: 10, min_doc_count: 1 } }, - pid_entities: { - filter: { term: { "subjects.subjectScheme": "PidEntity" } }, - aggs: { - subject: { terms: { field: "subjects.subject", size: 10, min_doc_count: 1, - include: %w(Dataset Publication Software Organization Funder Person Grant Sample Instrument Repository Project) } }, + registration_agencies: { terms: { field: "agency", size: 10, min_doc_count: 1 } }, + created: { date_histogram: { field: "created", interval: "year", format: "year", order: { _key: "desc" }, min_doc_count: 1 }, + aggs: { bucket_truncate: { bucket_sort: { size: 10 } } } }, + registered: { date_histogram: { field: "registered", interval: "year", format: "year", order: { _key: "desc" }, min_doc_count: 1 }, + aggs: { bucket_truncate: { bucket_sort: { size: 10 } } } }, + providers: { terms: { field: "provider_id_and_name", size: 10, min_doc_count: 1 } }, + clients: { terms: { field: "client_id_and_name", size: 10, min_doc_count: 1 } }, + affiliations: { terms: { field: "affiliation_id_and_name", size: 10, min_doc_count: 1 } }, + prefixes: { terms: { field: "prefix", size: 10, min_doc_count: 1 } }, + schema_versions: { terms: { field: "schema_version", size: 10, min_doc_count: 1 } }, + link_checks_status: { terms: { field: "landing_page.status", size: 10, min_doc_count: 1 } }, + # link_checks_has_schema_org: { terms: { field: 'landing_page.hasSchemaOrg', size: 2, min_doc_count: 1 } }, + # link_checks_schema_org_id: { value_count: { field: "landing_page.schemaOrgId" } }, + # link_checks_dc_identifier: { value_count: { field: "landing_page.dcIdentifier" } }, + # link_checks_citation_doi: { value_count: { field: "landing_page.citationDoi" } }, + # links_checked: { value_count: { field: "landing_page.checked" } }, + # sources: { terms: { field: 'source', size: 15, min_doc_count: 1 } }, + subjects: { terms: { field: "subjects.subject", size: 10, min_doc_count: 1 } }, + pid_entities: { + filter: { term: { "subjects.subjectScheme": "PidEntity" } }, + aggs: { + subject: { terms: { field: "subjects.subject", size: 10, min_doc_count: 1, + include: %w(Dataset Publication Software Organization Funder Person Grant Sample Instrument Repository Project) } }, + }, }, - }, - fields_of_science: { - filter: { term: { "subjects.subjectScheme": "Fields of Science and Technology (FOS)" } }, - aggs: { - subject: { terms: { field: "subjects.subject", size: 10, min_doc_count: 1, - include: "FOS:.*" } }, + fields_of_science: { + filter: { term: { "subjects.subjectScheme": "Fields of Science and Technology (FOS)" } }, + aggs: { + subject: { terms: { field: "subjects.subject", size: 10, min_doc_count: 1, + include: "FOS:.*" } }, + }, }, - }, - licenses: { terms: { field: "rights_list.rightsIdentifier", size: 10, min_doc_count: 1 } }, - languages: { terms: { field: "language", size: 10, min_doc_count: 1 } }, - certificates: { terms: { field: "client.certificate", size: 10, min_doc_count: 1 } }, - views: { - date_histogram: { field: "publication_year", interval: "year", format: "year", order: { _key: "desc" }, min_doc_count: 1 }, - aggs: { - metric_count: { sum: { field: "view_count" } }, - bucket_truncate: { bucket_sort: { size: 10 } }, + licenses: { terms: { field: "rights_list.rightsIdentifier", size: 10, min_doc_count: 1 } }, + languages: { terms: { field: "language", size: 10, min_doc_count: 1 } }, + certificates: { terms: { field: "client.certificate", size: 10, min_doc_count: 1 } }, + views: { + date_histogram: { field: "publication_year", interval: "year", format: "year", order: { _key: "desc" }, min_doc_count: 1 }, + aggs: { + metric_count: { sum: { field: "view_count" } }, + bucket_truncate: { bucket_sort: { size: 10 } }, + }, }, - }, - downloads: { - date_histogram: { field: "publication_year", interval: "year", format: "year", order: { _key: "desc" }, min_doc_count: 1 }, - aggs: { - metric_count: { sum: { field: "download_count" } }, - bucket_truncate: { bucket_sort: { size: 10 } }, + downloads: { + date_histogram: { field: "publication_year", interval: "year", format: "year", order: { _key: "desc" }, min_doc_count: 1 }, + aggs: { + metric_count: { sum: { field: "download_count" } }, + bucket_truncate: { bucket_sort: { size: 10 } }, + }, }, - }, - citations: { - date_histogram: { field: "publication_year", interval: "year", format: "year", order: { _key: "desc" }, min_doc_count: 1 }, - aggs: { - metric_count: { sum: { field: "citation_count" } }, - bucket_truncate: { bucket_sort: { size: 10 } }, + citations: { + date_histogram: { field: "publication_year", interval: "year", format: "year", order: { _key: "desc" }, min_doc_count: 1 }, + aggs: { + metric_count: { sum: { field: "citation_count" } }, + bucket_truncate: { bucket_sort: { size: 10 } }, + }, }, - }, - } + } + end end def self.provider_aggregations @@ -720,7 +722,7 @@ def self.find_by_ids(ids, options = {}) must: must, }, }, - aggregations: query_aggregations, + aggregations: query_aggregations(disable_facets: options[:disable_facets]), ) end @@ -761,7 +763,7 @@ def self.stats_query(options = {}) # query for graphql, removing options that are not needed def self.gql_query(query, options = {}) options[:page] ||= {} - options[:facet_count] ||= 10 + options[:facet_count] = (options[:facet_count] || 10).to_i aggregations = gql_query_aggregations(facet_count: options[:facet_count]) # cursor nav uses search_after, this should always be an array of values that match the sort. @@ -952,7 +954,7 @@ def self.query(query, options = {}) elsif options[:totals_agg] == "prefix" prefix_aggregations else - query_aggregations + query_aggregations(disable_facets: options[:disable_facets]) end # Cursor nav uses search_after, this should always be an array of values that match the sort. diff --git a/spec/fixtures/vcr_cassettes/DataciteDoisController/downloads/has_downloads.yml b/spec/fixtures/vcr_cassettes/DataciteDoisController/downloads/has_downloads.yml new file mode 100644 index 000000000..9dc17bcf9 --- /dev/null +++ b/spec/fixtures/vcr_cassettes/DataciteDoisController/downloads/has_downloads.yml @@ -0,0 +1,55 @@ +--- +http_interactions: +- request: + method: get + uri: https://doi.org/ra/10.14454 + body: + encoding: US-ASCII + string: '' + headers: + User-Agent: + - Mozilla/5.0 (compatible; Maremma/4.7.2; mailto:info@datacite.org) + Accept: + - text/html,application/json,application/xml;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5 + response: + status: + code: 200 + message: '' + headers: + Date: + - Wed, 27 Jan 2021 05:29:09 GMT + Content-Type: + - application/json;charset=UTF-8 + Connection: + - keep-alive + Set-Cookie: + - __cfduid=d139b9f4bfe1bfd29ce13cddd130920bc1611725349; expires=Fri, 26-Feb-21 + 05:29:09 GMT; path=/; domain=.doi.org; HttpOnly; SameSite=Lax; Secure + Cf-Cache-Status: + - DYNAMIC + Cf-Request-Id: + - 07e3ea399700004a55228a4000000001 + Expect-Ct: + - max-age=604800, report-uri="https://report-uri.cloudflare.com/cdn-cgi/beacon/expect-ct" + Report-To: + - '{"group":"cf-nel","endpoints":[{"url":"https:\/\/a.nel.cloudflare.com\/report?s=xNqymxeiJWZtxPpz%2F%2FLpvlrIZBzkTSS5Ia7gubeGbn4ZPtrNk6sT2d97YwKAU%2FMyl5ESP5qINhiLJIFuQUtT5M%2Fxf%2Fftk%2BsB"}],"max_age":604800}' + Nel: + - '{"max_age":604800,"report_to":"cf-nel"}' + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Server: + - cloudflare + Cf-Ray: + - 61801308fd404a55-FRA + body: + encoding: ASCII-8BIT + string: |- + [ + { + "DOI": "10.14454", + "RA": "DataCite" + } + ] + http_version: null + recorded_at: Wed, 27 Jan 2021 05:29:08 GMT +recorded_with: VCR 5.1.0 diff --git a/spec/requests/datacite_dois_spec.rb b/spec/requests/datacite_dois_spec.rb index 2cbf04b4d..f9ca0faa7 100644 --- a/spec/requests/datacite_dois_spec.rb +++ b/spec/requests/datacite_dois_spec.rb @@ -441,6 +441,17 @@ expect(json.dig("data", 0, "attributes", "types")).to eq("bibtex" => "misc", "citeproc" => "article", "resourceType" => "Presentation", "resourceTypeGeneral" => "InteractiveResource", "ris" => "GEN", "schemaOrg" => "CreativeWork") expect(json.dig("meta", "resourceTypes")).to eq([{ "count" => 3, "id" => "interactive-resource", "title" => "Interactive Resource" }]) end + + it "filter for interactive resources no facets" do + get "/dois?resource-type-id=interactive-resource&disable-facets=true", nil, headers + + expect(last_response.status).to eq(200) + expect(json["data"].size).to eq(3) + expect(json.dig("meta", "total")).to eq(3) + expect(json.dig("data", 0, "attributes", "publicationYear")).to eq(2011) + expect(json.dig("data", 0, "attributes", "types")).to eq("bibtex" => "misc", "citeproc" => "article", "resourceType" => "Presentation", "resourceTypeGeneral" => "InteractiveResource", "ris" => "GEN", "schemaOrg" => "CreativeWork") + expect(json.dig("meta")).to eq("page" => 1, "total" => 3, "totalPages" => 1) + end end describe "GET /dois for fake resources", elasticsearch: true, vcr: true do @@ -978,7 +989,7 @@ put "/dois/#{doi.doi}", valid_attributes, headers expect(last_response.status).to eq(422) - expect(json["errors"]).to eq([{ "source" => "creators", "title" => "Missing child element(s). Expected is ( {http://datacite.org/schema/kernel-4}creator ). at line 4, column 0", "uid" => doi.uid }]) + expect(json["errors"]).to eq([{ "source" => "creators", "title" => "DOI #{doi.uid}: Missing child element(s). Expected is ( {http://datacite.org/schema/kernel-4}creator ). at line 4, column 0", "uid" => doi.uid }]) end end @@ -1013,7 +1024,7 @@ patch "/dois/10.14454/10703", valid_attributes, headers expect(last_response.status).to eq(422) - expect(json.fetch("errors", nil)).to eq([{ "source" => "xml", "title" => "Schema http://datacite.org/schema/kernel-2.2 is no longer supported", "uid" => "10.14454/10703" }]) + expect(json.fetch("errors", nil)).to eq([{ "source" => "xml", "title" => "DOI 10.14454/10703: Schema http://datacite.org/schema/kernel-2.2 is no longer supported", "uid" => "10.14454/10703" }]) end end @@ -1089,7 +1100,7 @@ put "/dois/#{doi_id}", valid_attributes, headers expect(last_response.status).to eq(422) - expect(json["errors"]).to eq([{ "source" => "creators", "title" => "Missing child element(s). Expected is ( {http://datacite.org/schema/kernel-4}creator ). at line 4, column 0", "uid" => "10.14454/077d-fj48" }]) + expect(json["errors"]).to eq([{ "source" => "creators", "title" => "DOI #{doi_id}: Missing child element(s). Expected is ( {http://datacite.org/schema/kernel-4}creator ). at line 4, column 0", "uid" => "10.14454/077d-fj48" }]) end end @@ -1115,7 +1126,7 @@ put "/dois/#{doi_id}", valid_attributes, headers expect(last_response.status).to eq(422) - expect(json["errors"]).to eq([{ "source" => "creators", "title" => "Missing child element(s). Expected is ( {http://datacite.org/schema/kernel-4}creator ). at line 4, column 0", "uid" => "10.14454/077d-fj48" }]) + expect(json["errors"]).to eq([{ "source" => "creators", "title" => "DOI 10.14454/077d-fj48: Missing child element(s). Expected is ( {http://datacite.org/schema/kernel-4}creator ). at line 4, column 0", "uid" => "10.14454/077d-fj48" }]) end end @@ -2298,7 +2309,7 @@ post "/dois", valid_attributes, headers expect(last_response.status).to eq(422) - expect(json.fetch("errors", nil)).to eq([{ "source" => "xml", "title" => "Schema http://datacite.org/schema/kernel-2.2 is no longer supported", "uid" => "10.14454/10703" }]) + expect(json.fetch("errors", nil)).to eq([{ "source" => "xml", "title" => "DOI 10.14454/10703: Schema http://datacite.org/schema/kernel-2.2 is no longer supported", "uid" => "10.14454/10703" }]) end end @@ -2376,7 +2387,7 @@ post "/dois", valid_attributes, headers expect(last_response.status).to eq(422) - expect(json.fetch("errors", nil)).to eq([{ "source" => "xml", "title" => "Schema http://datacite.org/schema/kernel-2.2 is no longer supported", "uid" => "10.14454/10703" }]) + expect(json.fetch("errors", nil)).to eq([{ "source" => "xml", "title" => "DOI 10.14454/10703: Schema http://datacite.org/schema/kernel-2.2 is no longer supported", "uid" => "10.14454/10703" }]) end end @@ -2757,7 +2768,7 @@ expect(last_response.status).to eq(200) expect(json["errors"].size).to eq(1) - expect(json["errors"].first).to eq("source" => "creators", "title" => "Missing child element(s). Expected is ( {http://datacite.org/schema/kernel-4}creator ). at line 4, column 0", "uid" => "10.14454/10703") + expect(json["errors"].first).to eq("source" => "creators", "title" => "DOI 10.14454/10703: Missing child element(s). Expected is ( {http://datacite.org/schema/kernel-4}creator ). at line 4, column 0", "uid" => "10.14454/10703") end end @@ -2780,7 +2791,7 @@ expect(last_response.status).to eq(200) expect(json["errors"].size).to eq(1) - expect(json["errors"].first).to eq("source" => "creatorName", "title" => "This element is not expected. Expected is ( {http://datacite.org/schema/kernel-4}affiliation ). at line 16, column 0", "uid" => "10.14454/10703") + expect(json["errors"].first).to eq("source" => "creatorName", "title" => "DOI 10.14454/10703: This element is not expected. Expected is ( {http://datacite.org/schema/kernel-4}affiliation ). at line 16, column 0", "uid" => "10.14454/10703") end end @@ -2802,7 +2813,7 @@ post "/dois/validate", params, headers expect(last_response.status).to eq(200) - expect(json["errors"].first).to eq("source" => "creatorName', attribute 'nameType", "title" => "[facet 'enumeration'] The value 'personal' is not an element of the set {'Organizational', 'Personal'}. at line 12, column 0", "uid" => "10.14454/10703") + expect(json["errors"].first).to eq("source" => "creatorName', attribute 'nameType", "title" => "DOI 10.14454/10703: [facet 'enumeration'] The value 'personal' is not an element of the set {'Organizational', 'Personal'}. at line 12, column 0", "uid" => "10.14454/10703") end end