Skip to content

Commit

Permalink
optionally change number of facets in DOI REST API. #709
Browse files Browse the repository at this point in the history
  • Loading branch information
Martin Fenner committed Jan 27, 2021
1 parent 3794f90 commit 1397676
Show file tree
Hide file tree
Showing 4 changed files with 200 additions and 162 deletions.
140 changes: 55 additions & 85 deletions app/controllers/datacite_dois_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ def index
params[:state] = "findable"
end

# default number of facets
facet_count = (params[:facet_count] || 10).to_i

if params[:id].present?
response = DataciteDoi.find_by_id(params[:id])
elsif params[:ids].present?
Expand Down Expand Up @@ -122,6 +125,7 @@ def index
sample_size: params[:sample],
source: params[:source],
scroll_id: params[:scroll_id],
facet_count: facet_count,
page: page,
sort: sort,
random: params[:random],
Expand Down Expand Up @@ -206,91 +210,56 @@ def index
status: :ok
end
else
states =
if total.positive?
facet_by_key(response.aggregations.states.buckets)
end
resource_types =
if total.positive?
facet_by_combined_key(response.aggregations.resource_types.buckets)
end
published =
if total.positive?
facet_by_range(response.aggregations.published.buckets)
end
created =
if total.positive?
facet_by_key_as_string(response.aggregations.created.buckets)
end
registered =
if total.positive?
facet_by_key_as_string(response.aggregations.registered.buckets)
end
providers =
if total.positive?
facet_by_combined_key(response.aggregations.providers.buckets)
end
clients =
if total.positive?
facet_by_combined_key(response.aggregations.clients.buckets)
end
prefixes =
if total.positive?
facet_by_key(response.aggregations.prefixes.buckets)
end
schema_versions =
if total.positive?
facet_by_schema(response.aggregations.schema_versions.buckets)
end

affiliations =
if total.positive?
facet_by_combined_key(response.aggregations.affiliations.buckets)
end
# sources = total.positive? ? facet_by_key(response.aggregations.sources.buckets) : nil
subjects =
if total.positive?
facet_by_key(response.aggregations.subjects.buckets)
end
fields_of_science =
if total.positive?
facet_by_fos(
response.aggregations.fields_of_science.subject.buckets,
)
end
certificates =
if total.positive?
facet_by_key(response.aggregations.certificates.buckets)
end
licenses =
if total.positive?
facet_by_license(response.aggregations.licenses.buckets)
end

link_checks_status =
if total.positive?
facet_by_cumulative_year(
response.aggregations.link_checks_status.buckets,
)
end
# links_with_schema_org = total.positive? ? facet_by_cumulative_year(response.aggregations.link_checks_has_schema_org.buckets) : nil
# link_checks_schema_org_id = total.positive? ? response.aggregations.link_checks_schema_org_id.value : nil
# link_checks_dc_identifier = total.positive? ? response.aggregations.link_checks_dc_identifier.value : nil
# link_checks_citation_doi = total.positive? ? response.aggregations.link_checks_citation_doi.value : nil
# links_checked = total.positive? ? response.aggregations.links_checked.value : nil

citations =
if total.positive?
metric_facet_by_year(response.aggregations.citations.buckets)
end
views =
if total.positive?
metric_facet_by_year(response.aggregations.views.buckets)
end
downloads =
if total.positive?
metric_facet_by_year(response.aggregations.downloads.buckets)
end
if total.positive? && facet_count.positive?
states = facet_by_key(response.aggregations.states.buckets)
resource_types = facet_by_combined_key(response.aggregations.resource_types.buckets)
published = facet_by_range(response.aggregations.published.buckets)
created = facet_by_key_as_string(response.aggregations.created.buckets)
registered = facet_by_key_as_string(response.aggregations.registered.buckets)
providers = facet_by_combined_key(response.aggregations.providers.buckets)
clients = facet_by_combined_key(response.aggregations.clients.buckets)
prefixes = facet_by_key(response.aggregations.prefixes.buckets)
schema_versions = facet_by_schema(response.aggregations.schema_versions.buckets)
affiliations = facet_by_combined_key(response.aggregations.affiliations.buckets)
# sources = total.positive? ? facet_by_key(response.aggregations.sources.buckets) : nil
subjects = facet_by_key(response.aggregations.subjects.buckets)
fields_of_science = facet_by_fos(
response.aggregations.fields_of_science.subject.buckets,
)
certificates = facet_by_key(response.aggregations.certificates.buckets)
licenses = facet_by_license(response.aggregations.licenses.buckets)
link_checks_status = facet_by_cumulative_year(
response.aggregations.link_checks_status.buckets,
)
# links_with_schema_org = total.positive? ? facet_by_cumulative_year(response.aggregations.link_checks_has_schema_org.buckets) : nil
# link_checks_schema_org_id = total.positive? ? response.aggregations.link_checks_schema_org_id.value : nil
# link_checks_dc_identifier = total.positive? ? response.aggregations.link_checks_dc_identifier.value : nil
# link_checks_citation_doi = total.positive? ? response.aggregations.link_checks_citation_doi.value : nil
# links_checked = total.positive? ? response.aggregations.links_checked.value : nil

citations = metric_facet_by_year(response.aggregations.citations.buckets)
views = metric_facet_by_year(response.aggregations.views.buckets)
downloads = metric_facet_by_year(response.aggregations.downloads.buckets)
else
states = nil
resource_types = nil
published = nil
created = nil
registered = nil
providers = nil
clients = nil
prefixes = nil
schema_versions = nil
affiliations = nil
subjects = nil
fields_of_science = nil
certificates = nil
licenses = nil
link_checks_status = nil
citations = nil
views = nil
downloads = nil
end

respond_to do |format|
format.json do
Expand Down Expand Up @@ -359,6 +328,7 @@ def index
"has-person" => params[:has_person],
"has-affiliation" => params[:has_affiliation],
"has-funder" => params[:has_funder],
facet_count: params[:facet_count],
detail: params[:detail],
composite: params[:composite],
affiliation: params[:affiliation],
Expand Down
138 changes: 70 additions & 68 deletions app/models/doi.rb
Original file line number Diff line number Diff line change
Expand Up @@ -585,78 +585,80 @@ def self.gql_query_aggregations(facet_count: 10)
end
end

def self.query_aggregations
{
resource_types: { terms: { field: "resource_type_id_and_name", size: 16, min_doc_count: 1 } },
states: { terms: { field: "aasm_state", size: 3, min_doc_count: 1 } },
published: {
date_histogram: {
field: "publication_year",
interval: "year",
format: "year",
order: {
_key: "desc",
def self.query_aggregations(facet_count: 10)
if facet_count.to_i.positive?
{
resource_types: { terms: { field: "resource_type_id_and_name", size: facet_count, min_doc_count: 1 } },
states: { terms: { field: "aasm_state", size: 3, min_doc_count: 1 } },
published: {
date_histogram: {
field: "publication_year",
interval: "year",
format: "year",
order: {
_key: "desc",
},
min_doc_count: 1,
},
min_doc_count: 1,
},
},
registration_agencies: { terms: { field: "agency", size: 10, min_doc_count: 1 } },
created: { date_histogram: { field: "created", interval: "year", format: "year", order: { _key: "desc" }, min_doc_count: 1 },
aggs: { bucket_truncate: { bucket_sort: { size: 10 } } } },
registered: { date_histogram: { field: "registered", interval: "year", format: "year", order: { _key: "desc" }, min_doc_count: 1 },
aggs: { bucket_truncate: { bucket_sort: { size: 10 } } } },
providers: { terms: { field: "provider_id_and_name", size: 10, min_doc_count: 1 } },
clients: { terms: { field: "client_id_and_name", size: 10, min_doc_count: 1 } },
affiliations: { terms: { field: "affiliation_id_and_name", size: 10, min_doc_count: 1 } },
prefixes: { terms: { field: "prefix", size: 10, min_doc_count: 1 } },
schema_versions: { terms: { field: "schema_version", size: 10, min_doc_count: 1 } },
link_checks_status: { terms: { field: "landing_page.status", size: 10, min_doc_count: 1 } },
# link_checks_has_schema_org: { terms: { field: 'landing_page.hasSchemaOrg', size: 2, min_doc_count: 1 } },
# link_checks_schema_org_id: { value_count: { field: "landing_page.schemaOrgId" } },
# link_checks_dc_identifier: { value_count: { field: "landing_page.dcIdentifier" } },
# link_checks_citation_doi: { value_count: { field: "landing_page.citationDoi" } },
# links_checked: { value_count: { field: "landing_page.checked" } },
# sources: { terms: { field: 'source', size: 15, min_doc_count: 1 } },
subjects: { terms: { field: "subjects.subject", size: 10, min_doc_count: 1 } },
pid_entities: {
filter: { term: { "subjects.subjectScheme": "PidEntity" } },
aggs: {
subject: { terms: { field: "subjects.subject", size: 10, min_doc_count: 1,
include: %w(Dataset Publication Software Organization Funder Person Grant Sample Instrument Repository Project) } },
registration_agencies: { terms: { field: "agency", size: facet_count, min_doc_count: 1 } },
created: { date_histogram: { field: "created", interval: "year", format: "year", order: { _key: "desc" }, min_doc_count: 1 },
aggs: { bucket_truncate: { bucket_sort: { size: facet_count } } } },
registered: { date_histogram: { field: "registered", interval: "year", format: "year", order: { _key: "desc" }, min_doc_count: 1 },
aggs: { bucket_truncate: { bucket_sort: { size: facet_count } } } },
providers: { terms: { field: "provider_id_and_name", size: facet_count, min_doc_count: 1 } },
clients: { terms: { field: "client_id_and_name", size: 10, min_doc_count: 1 } },
affiliations: { terms: { field: "affiliation_id_and_name", size: facet_count, min_doc_count: 1 } },
prefixes: { terms: { field: "prefix", size: facet_count, min_doc_count: 1 } },
schema_versions: { terms: { field: "schema_version", size: facet_count, min_doc_count: 1 } },
link_checks_status: { terms: { field: "landing_page.status", size: facet_count, min_doc_count: 1 } },
# link_checks_has_schema_org: { terms: { field: 'landing_page.hasSchemaOrg', size: 2, min_doc_count: 1 } },
# link_checks_schema_org_id: { value_count: { field: "landing_page.schemaOrgId" } },
# link_checks_dc_identifier: { value_count: { field: "landing_page.dcIdentifier" } },
# link_checks_citation_doi: { value_count: { field: "landing_page.citationDoi" } },
# links_checked: { value_count: { field: "landing_page.checked" } },
# sources: { terms: { field: 'source', size: 15, min_doc_count: 1 } },
subjects: { terms: { field: "subjects.subject", size: facet_count, min_doc_count: 1 } },
pid_entities: {
filter: { term: { "subjects.subjectScheme": "PidEntity" } },
aggs: {
subject: { terms: { field: "subjects.subject", size: facet_count, min_doc_count: 1,
include: %w(Dataset Publication Software Organization Funder Person Grant Sample Instrument Repository Project) } },
},
},
},
fields_of_science: {
filter: { term: { "subjects.subjectScheme": "Fields of Science and Technology (FOS)" } },
aggs: {
subject: { terms: { field: "subjects.subject", size: 10, min_doc_count: 1,
include: "FOS:.*" } },
fields_of_science: {
filter: { term: { "subjects.subjectScheme": "Fields of Science and Technology (FOS)" } },
aggs: {
subject: { terms: { field: "subjects.subject", size: facet_count, min_doc_count: 1,
include: "FOS:.*" } },
},
},
},
licenses: { terms: { field: "rights_list.rightsIdentifier", size: 10, min_doc_count: 1 } },
languages: { terms: { field: "language", size: 10, min_doc_count: 1 } },
certificates: { terms: { field: "client.certificate", size: 10, min_doc_count: 1 } },
views: {
date_histogram: { field: "publication_year", interval: "year", format: "year", order: { _key: "desc" }, min_doc_count: 1 },
aggs: {
metric_count: { sum: { field: "view_count" } },
bucket_truncate: { bucket_sort: { size: 10 } },
licenses: { terms: { field: "rights_list.rightsIdentifier", size: facet_count, min_doc_count: 1 } },
languages: { terms: { field: "language", size: facet_count, min_doc_count: 1 } },
certificates: { terms: { field: "client.certificate", size: facet_count, min_doc_count: 1 } },
views: {
date_histogram: { field: "publication_year", interval: "year", format: "year", order: { _key: "desc" }, min_doc_count: 1 },
aggs: {
metric_count: { sum: { field: "view_count" } },
bucket_truncate: { bucket_sort: { size: facet_count } },
},
},
},
downloads: {
date_histogram: { field: "publication_year", interval: "year", format: "year", order: { _key: "desc" }, min_doc_count: 1 },
aggs: {
metric_count: { sum: { field: "download_count" } },
bucket_truncate: { bucket_sort: { size: 10 } },
downloads: {
date_histogram: { field: "publication_year", interval: "year", format: "year", order: { _key: "desc" }, min_doc_count: 1 },
aggs: {
metric_count: { sum: { field: "download_count" } },
bucket_truncate: { bucket_sort: { size: facet_count } },
},
},
},
citations: {
date_histogram: { field: "publication_year", interval: "year", format: "year", order: { _key: "desc" }, min_doc_count: 1 },
aggs: {
metric_count: { sum: { field: "citation_count" } },
bucket_truncate: { bucket_sort: { size: 10 } },
citations: {
date_histogram: { field: "publication_year", interval: "year", format: "year", order: { _key: "desc" }, min_doc_count: 1 },
aggs: {
metric_count: { sum: { field: "citation_count" } },
bucket_truncate: { bucket_sort: { size: facet_count } },
},
},
},
}
}
end
end

def self.provider_aggregations
Expand Down Expand Up @@ -720,7 +722,7 @@ def self.find_by_ids(ids, options = {})
must: must,
},
},
aggregations: query_aggregations,
aggregations: query_aggregations(facet_count: options[:facet_count]),
)
end

Expand Down Expand Up @@ -761,7 +763,7 @@ def self.stats_query(options = {})
# query for graphql, removing options that are not needed
def self.gql_query(query, options = {})
options[:page] ||= {}
options[:facet_count] ||= 10
options[:facet_count] = (options[:facet_count] || 10).to_i
aggregations = gql_query_aggregations(facet_count: options[:facet_count])

# cursor nav uses search_after, this should always be an array of values that match the sort.
Expand Down Expand Up @@ -952,7 +954,7 @@ def self.query(query, options = {})
elsif options[:totals_agg] == "prefix"
prefix_aggregations
else
query_aggregations
query_aggregations(facet_count: options[:facet_count])
end

# Cursor nav uses search_after, this should always be an array of values that match the sort.
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 1397676

Please sign in to comment.