From 01e48b1c502b10e226ffc7bbeb2c75e0656bddb4 Mon Sep 17 00:00:00 2001 From: Kristian Garza Date: Fri, 29 Nov 2019 11:42:24 +0100 Subject: [PATCH] aggregations cleaning --- app/models/event.rb | 95 ++++++++++++--------------------------------- 1 file changed, 25 insertions(+), 70 deletions(-) diff --git a/app/models/event.rb b/app/models/event.rb index b92f7e9e0..f6a0d345d 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -200,7 +200,7 @@ def self.query_fields ["subj_id^10", "obj_id^10", "subj.name^5", "subj.author^5", "subj.periodical^5", "subj.publisher^5", "obj.name^5", "obj.author^5", "obj.periodical^5", "obj.publisher^5", "_all"] end - def self.query_aggregations(doi = nil) + def self.query_aggregations sum_distribution = { sum_bucket: { buckets_path: "year_months>total_by_year_month" @@ -221,11 +221,6 @@ def self.query_aggregations(doi = nil) citation_types: { terms: { field: "citation_type", size: 50, min_doc_count: 1 }, aggs: { year_months: { date_histogram: { field: "occurred_at", interval: "month", min_doc_count: 1 }, aggs: { "total_by_year_month" => { sum: { field: "total" } } } } } }, relation_types: { terms: { field: "relation_type_id", size: 50, min_doc_count: 1 }, aggs: { year_months: { date_histogram: { field: "occurred_at", interval: "month", min_doc_count: 1 }, aggs: { "total_by_year_month" => { sum: { field: "total" } } } }, "sum_distribution" => sum_distribution } }, dois: { terms: { field: "obj_id", size: 50, min_doc_count: 1 }, aggs: { relation_types: { terms: { field: "relation_type_id", size: 50, min_doc_count: 1 }, aggs: { "total_by_type" => { sum: { field: "total" } } } } } }, - # dois_usage: { - # filter: { script: { script: "doc['source_id'].value == 'datacite-usage'" } }, - # aggs: { - # dois: { terms: { field: "obj_id", size: 50, min_doc_count: 1 }, aggs: { relation_types: { terms: { field: "relation_type_id", size: 50, min_doc_count: 1 }, aggs: { "total_by_type" => { sum: { field: "total" } } } } } } } - # }, dois_citations: { filter: { script: { @@ -237,96 +232,56 @@ def self.query_aggregations(doi = nil) } end - def self.metrics_aggregations(doi = nil) - sum_distribution = { - sum_bucket: { - buckets_path: "year_months>total_by_year_month" + + + def self.citation_count_aggregation + { + citations: { + terms: { field: "doi", size: 100, min_doc_count: 1 }, aggs: { total: { cardinality: { field: "citation_id" } } } } } + end - views_filter = { script: { script: "doc['relation_type_id'].value == 'unique-dataset-investigations-regular' && doc['source_id'].value == 'datacite-usage'" } } - downloads_filter = { script: { script: "doc['relation_type_id'].value == 'unique-dataset-requests-regular' && doc['source_id'].value == 'datacite-usage'" } } - - { - views: { - filter: views_filter, - aggs: { dois: { - terms: { field: "obj_id", size: 50, min_doc_count: 1 } , aggs: { "total_by_type" => { sum: { field: "total" } } } - } } - }, - views_histogram: { - filter: views_filter, - aggs: { - year_months: { date_histogram: { field: "occurred_at", interval: "month", min_doc_count: 1 }, aggs: { "total_by_year_month" => { sum: { field: "total" } } } }, "sum_distribution" => sum_distribution - } - }, - downloads: { - filter: downloads_filter, - aggs: { dois: { - terms: { field: "obj_id", size: 50, min_doc_count: 1 } , aggs: { "total_by_type" => { sum: { field: "total" } } } - } } - }, - downloads_histogram: { - filter: downloads_filter, - aggs: { - year_months: { date_histogram: { field: "occurred_at", interval: "month", min_doc_count: 1 }, aggs: { "total_by_year_month" => { sum: { field: "total" } } } }, "sum_distribution" => sum_distribution - } + def self.usage_count_aggregation + { + usage: { + terms: { field: "obj_id", size: 50, min_doc_count: 1 } , aggs: { "total_by_type" => { sum: { field: "total" } } } } } end - def self.citations_aggregations(doi) - doi = Event.new.normalize_doi(doi) if doi.present? - + def self.yearly_histogram_aggregation sum_year_distribution = { sum_bucket: { buckets_path: "years>total_by_year" } } - citations_filter = { script: { script: "(#{PASSIVE_RELATION_TYPES}.contains(doc['relation_type_id'].value) && '#{doi}' == doc['subj_id'].value) || (#{ACTIVE_RELATION_TYPES}.contains(doc['relation_type_id'].value) && '#{doi}' == doc['obj_id'].value)" } } - references_filter = { script: { script: "(#{PASSIVE_RELATION_TYPES}.contains(doc['relation_type_id'].value) && '#{doi}' == doc['obj_id'].value) || (#{ACTIVE_RELATION_TYPES}.contains(doc['relation_type_id'].value) && '#{doi}' == doc['subj_id'].value)" } } - { - citations_histogram: { - filter: citations_filter, + histogram: { + filter: { script: { script: "true"}}, aggs: { years: { histogram: { field: "citation_year", interval: 1 , min_doc_count: 1 }, aggs: { "total_by_year" => { sum: { field: "total" } } } }, "sum_distribution" => sum_year_distribution } }, - references: { - filter: references_filter, - aggs: { dois: { - terms: { field: "doi", size: 100, min_doc_count: 1 }, aggs: { total: { cardinality: { field: "citation_id" } } } - } } - }, - relations: { - filter: { script: { script: "#{RELATIONS_RELATION_TYPES}.contains(doc['relation_type_id'].value)" } - }, - aggs: { dois: { - terms: { field: "doi", size: 100, min_doc_count: 1 }, aggs: { total: { cardinality: { field: "citation_id" } } } - } } - } } end - - def self.citation_count_aggregation(doi) - { - citations: { - terms: { field: "doi", size: 100, min_doc_count: 1 }, aggs: { total: { cardinality: { field: "citation_id" } } } + def self.monthly_histogram_aggregation + sum_distribution = { + sum_bucket: { + buckets_path: "year_months>total_by_year_month" } } - end - - def self.usage_count_aggregation(doi) { - usage: { - terms: { field: "obj_id", size: 50, min_doc_count: 1 } , aggs: { "total_by_type" => { sum: { field: "total" } } } + histogram: { + filter: { script: { script: "true"}}, + aggs: { + year_months: { date_histogram: { field: "occurred_at", interval: "month", min_doc_count: 1 }, aggs: { "total_by_year_month" => { sum: { field: "total" } } } }, "sum_distribution" => sum_distribution } - } + }} end - def self.advanced_aggregations(doi = nil) + def self.advanced_aggregations { unique_obj_count: { cardinality: { field: "obj_id" } }, unique_subj_count: { cardinality: { field: "subj_id" } }