From cc604cc0b8694c3f8ff206f89d49ae596cc306bb Mon Sep 17 00:00:00 2001 From: Kristian Garza Date: Thu, 1 Aug 2019 15:28:33 +0200 Subject: [PATCH 1/7] added citation year --- app/models/event.rb | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/app/models/event.rb b/app/models/event.rb index ef36017e4..a995a904d 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -127,6 +127,7 @@ class Event < ActiveRecord::Base indexes :indexed_at, type: :date indexes :occurred_at, type: :date indexes :citation_id, type: :keyword + indexes :citation_year, type: :keyword indexes :cache_key, type: :keyword end @@ -161,6 +162,7 @@ def as_indexed_json(options={}) "indexed_at" => indexed_at, "occurred_at" => occurred_at, "citation_id" => citation_id, + "citation_year" => citation_year, "cache_key" => cache_key } end @@ -205,28 +207,15 @@ def self.query_aggregations script: "#{INCLUDED_RELATION_TYPES}.contains(doc['relation_type_id'].value)" } }, - aggs: { years: { date_histogram: { field: 'occurred_at', interval: 'year', min_doc_count: 1 }, aggs: { "total_by_year" => { sum: { field: 'total' }}}},"sum_distribution"=>sum_year_distribution} + aggs: { years: { date_histogram: { field: 'citation_year', interval: 'year', min_doc_count: 1 }, aggs: { "total_by_year" => { sum: { field: 'total' }}}},"sum_distribution"=>sum_year_distribution} + # }, # citations_histogram: { # filter: { # script: { # script: "#{INCLUDED_RELATION_TYPES}.contains(doc['relation_type_id'].value)" # } # }, - # aggs: { years: { terms: { script: { source: " - # String subjDatePublished = params['_source']['subj']['date_published']?.substring(0, 4); - # String objDatePublished = params['_source']['obj']['date_published']?.substring(0, 4); - - # if( params['_source']['subj']['date_published']?.substring(0, 4) !== null && params['_source']['obj']['date_published']?.substring(0, 4) !== null){ - - # if(Integer.parseInt(objDatePublished) > Integer.parseInt(subjDatePublished) ) - # { - # objDatePublished - # } - # else{ - # subjDatePublished - # } - # } - # " }}}} + # aggs: { years: { date_histogram: { field: 'citation_year', interval: 'year', min_doc_count: 1 }, aggs: { "total_by_year" => { sum: { field: 'total' }}}},"sum_distribution"=>sum_year_distribution} # }, # citations: { # filter: { @@ -534,6 +523,19 @@ def obj_cache_key "objects/#{obj_id}-#{timestamp}" end + def citation_year + "" unless INCLUDED_RELATION_TYPES.include?(relation_type_id) + subj_publication = subj['date_published'] || (date_published(subj_id) || year_month) + obj_publication = obj['date_published'] || (date_published(obj_id) || year_month) + [subj_publication[0..3].to_i, obj_publication[0..3].to_i].max + end + + def date_published(doi) + ## TODO: we need to make sure all the dois from other RA are indexed + doi = Doi.where(doi: doi).first + doi[:published] if doi.present? + end + def set_defaults self.uuid = SecureRandom.uuid if uuid.blank? self.subj_id = normalize_doi(subj_id) || subj_id From e37cc31bc13cb66aa4f7e90025abe9db73d1540b Mon Sep 17 00:00:00 2001 From: Kristian Garza Date: Thu, 1 Aug 2019 15:28:39 +0200 Subject: [PATCH 2/7] citation year test --- spec/factories/default.rb | 2 +- spec/models/event_spec.rb | 30 ++++++++++++++++++++++++------ 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/spec/factories/default.rb b/spec/factories/default.rb index 47b6141d4..87ad79c92 100644 --- a/spec/factories/default.rb +++ b/spec/factories/default.rb @@ -294,7 +294,7 @@ source_id { "datacite_related" } source_token { "datacite_related_123" } sequence(:subj_id) { |n| "http://doi.org/10.5061/DRYAD.47SD5e/#{n}" } - subj { nil } + subj { {"datePublished"=>"2006-06-13T16:14:19Z"} } obj_id { "http://doi.org/10.5061/DRYAD.47SD5/1" } relation_type_id { "has_part" } end diff --git a/spec/models/event_spec.rb b/spec/models/event_spec.rb index b02bc7cac..76a86fe04 100644 --- a/spec/models/event_spec.rb +++ b/spec/models/event_spec.rb @@ -2,14 +2,32 @@ describe Event, :type => :model, vcr: true do before(:each) { allow(Time.zone).to receive(:now).and_return(Time.mktime(2015, 4, 8)) } + context "event" do + subject { create(:event) } - subject { create(:event) } + it { is_expected.to validate_presence_of(:subj_id) } + it { is_expected.to validate_presence_of(:source_token) } + it { is_expected.to validate_presence_of(:source_id) } - it { is_expected.to validate_presence_of(:subj_id) } - it { is_expected.to validate_presence_of(:source_token) } - it { is_expected.to validate_presence_of(:source_id) } + it "has subj" do + expect(subject.subj["date-published"]).to eq("2006-06-13T16:14:19Z") + end + end - it "has subj" do - expect(subject.subj["date-published"]).to eq("2006-06-13T16:14:19Z") + context "citation" do + subject { create(:event_for_datacite_related) } + + it "has citation_id" do + expect(subject.citation_id).to eq("https://doi.org/10.5061/dryad.47sd5/1-https://doi.org/10.5061/dryad.47sd5e/1") + end + + it "has citation_year" do + expect(subject.citation_year).to eq(2006) + end + + it "has published_dates" do + expect(subject.subj["datePublished"]).to eq("2006-06-13T16:14:19Z") + expect(subject.obj["datePublished"]).to be_nil + end end end From 0c15805b9aebc3cf7071a11a9352b8ca14442c6d Mon Sep 17 00:00:00 2001 From: Kristian Garza Date: Thu, 1 Aug 2019 15:33:01 +0200 Subject: [PATCH 3/7] aggregator selector just to separate the new aggregations. it does not affect old aggregations --- app/controllers/events_controller.rb | 30 +++++++++-------- app/models/concerns/indexable.rb | 12 ++++++- app/models/event.rb | 48 +++++++++++++++++----------- 3 files changed, 58 insertions(+), 32 deletions(-) diff --git a/app/controllers/events_controller.rb b/app/controllers/events_controller.rb index 70de73952..a03e43b64 100644 --- a/app/controllers/events_controller.rb +++ b/app/controllers/events_controller.rb @@ -104,6 +104,7 @@ def index publication_year: params[:publication_year], occurred_at: params[:occurred_at], year_month: params[:year_month], + aggregations: params[:aggregations], unique: params[:unique], page: page, sort: sort) @@ -113,17 +114,20 @@ def index total_for_pages = page[:cursor].nil? ? [total.to_f, 10000].min : total.to_f total_pages = page[:size] > 0 ? (total_for_pages / page[:size]).ceil : 0 - sources = total.positive? ? facet_by_source(response.response.aggregations.sources.buckets) : nil - prefixes = total.positive? ? facet_by_source(response.response.aggregations.prefixes.buckets) : nil - citation_types = total.positive? ? facet_by_citation_type(response.response.aggregations.citation_types.buckets) : nil - relation_types = total.positive? ? facet_by_relation_type(response.response.aggregations.relation_types.buckets) : nil - registrants = total.positive? && params[:extra] ? facet_by_registrants(response.response.aggregations.registrants.buckets) : nil - pairings = total.positive? && params[:extra] ? facet_by_pairings(response.response.aggregations.pairings.buckets) : nil - dois = total.positive? && params[:extra] ? facet_by_dois(response.response.aggregations.dois.buckets) : nil - dois_usage = total.positive? && params[:extra] ? facet_by_dois(response.response.aggregations.dois_usage.dois.buckets) : nil - citations_histogram = total.positive? && params[:extra] ? facet_citations_by_year(response.response.aggregations.dois_citations) : nil - # citations_histogram = total.positive? && params[:extra] ? facet_citations_by_year(response.response.aggregations.citations_histogram.years.buckets) : nil - # citations = total.positive? && params[:extra] ? facet_citations_by_dois(response.response.aggregations.citations.dois.buckets) : nil + + aggregations = params.fetch(:aggregations,"").nil? ? "" : params.fetch(:aggregations,"") + + sources = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_source(response.response.aggregations.sources.buckets) : nil + prefixes = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_source(response.response.aggregations.prefixes.buckets) : nil + citation_types = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_citation_type(response.response.aggregations.citation_types.buckets) : nil + relation_types = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_relation_type(response.response.aggregations.relation_types.buckets) : nil + registrants = total.positive? && aggregations.include?("query_aggregations") ? facet_by_registrants(response.response.aggregations.registrants.buckets) : nil + pairings = total.positive? && aggregations.include?("query_aggregations") ? facet_by_pairings(response.response.aggregations.pairings.buckets) : nil + dois = total.positive? && aggregations.include?("query_aggregations") ? facet_by_dois(response.response.aggregations.dois.buckets) : nil + dois_usage = total.positive? && aggregations.include?("query_aggregations") ? facet_by_dois(response.response.aggregations.dois_usage.dois.buckets) : nil + citations_histogram = total.positive? && aggregations.include?("query_aggregations") ? facet_citations_by_year(response.response.aggregations.dois_citations) : nil + citations_histogram = total.positive? && aggregations.include?("metrics_aggregations") ? facet_citations_by_year(response.response.aggregations.citations_histogram) : nil + citations = total.positive? && aggregations.include?("metrics_aggregations") ? facet_citations_by_dois(response.response.aggregations.citations.dois.buckets) : nil results = response.results @@ -140,8 +144,8 @@ def index registrants: registrants, "doisRelationTypes": dois, "doisUsageTypes": dois_usage, - "doisCitations": citations_histogram - # "uniqueCitations": citations + "doisCitations": citations_histogram, + "uniqueCitations": citations }.compact options[:links] = { diff --git a/app/models/concerns/indexable.rb b/app/models/concerns/indexable.rb index f67e8cb77..7b07808e2 100644 --- a/app/models/concerns/indexable.rb +++ b/app/models/concerns/indexable.rb @@ -95,8 +95,18 @@ def find_by_id_list(ids, options={}) }) end + def get_aggregations_hash(aggregations="") + return send(:query_aggregations) if aggregations.blank? + aggs = {} + aggregations.split(",").each do |agg| + agg = :query_aggregations if agg.blank? || !respond_to?(agg) + aggs.merge! send(agg) + end + aggs + end + def query(query, options={}) - aggregations = options[:totals_agg] == true ? totals_aggregations : query_aggregations + aggregations = options[:totals_agg] == true ? totals_aggregations : get_aggregations_hash(options[:aggregations]) options[:page] ||= {} options[:page][:number] ||= 1 options[:page][:size] ||= 25 diff --git a/app/models/event.rb b/app/models/event.rb index a995a904d..1b4cc9191 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -208,28 +208,40 @@ def self.query_aggregations } }, aggs: { years: { date_histogram: { field: 'citation_year', interval: 'year', min_doc_count: 1 }, aggs: { "total_by_year" => { sum: { field: 'total' }}}},"sum_distribution"=>sum_year_distribution} - # }, - # citations_histogram: { - # filter: { - # script: { - # script: "#{INCLUDED_RELATION_TYPES}.contains(doc['relation_type_id'].value)" - # } - # }, - # aggs: { years: { date_histogram: { field: 'citation_year', interval: 'year', min_doc_count: 1 }, aggs: { "total_by_year" => { sum: { field: 'total' }}}},"sum_distribution"=>sum_year_distribution} - # }, - # citations: { - # filter: { - # script: { - # script: "#{INCLUDED_RELATION_TYPES}.contains(doc['relation_type_id'].value)" - # } - # }, - # aggs: { dois: { - # terms: { field: 'obj_id', size: 50, min_doc_count: 1 }, aggs: { unique_citations: { cardinality: { field: 'citation_id' }}} - # }} } } end + def self.metrics_aggregations + + sum_year_distribution = { + sum_bucket: { + buckets_path: "years>total_by_year" + } + } + + { + citations_histogram: { + filter: { + script: { + script: "#{INCLUDED_RELATION_TYPES}.contains(doc['relation_type_id'].value)" + } + }, + aggs: { years: { date_histogram: { field: 'citation_year', interval: 'year', min_doc_count: 1 }, aggs: { "total_by_year" => { sum: { field: 'total' }}}},"sum_distribution"=>sum_year_distribution} + }, + citations: { + filter: { + script: { + script: "#{INCLUDED_RELATION_TYPES}.contains(doc['relation_type_id'].value)" + } + }, + aggs: { dois: { + terms: { field: 'obj_id', size: 50, min_doc_count: 1 }, aggs: { unique_citations: { cardinality: { field: 'citation_id' }}} + }} + } + } + end + # return results for one or more ids def self.find_by_id(ids, options={}) ids = ids.split(",") if ids.is_a?(String) From 883ee780de78ebb013cab25191ad5d0e0f0a77eb Mon Sep 17 00:00:00 2001 From: Kristian Garza Date: Thu, 1 Aug 2019 15:38:03 +0200 Subject: [PATCH 4/7] aggregator selector tests --- spec/concerns/indexable_spec.rb | 62 +++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/spec/concerns/indexable_spec.rb b/spec/concerns/indexable_spec.rb index d0e803b6e..9da859643 100644 --- a/spec/concerns/indexable_spec.rb +++ b/spec/concerns/indexable_spec.rb @@ -100,6 +100,11 @@ it 'query by description' do results = Doi.query("description").results expect(results.total).to eq(1) + + expect(results.response.aggregations.states).not_to be_nil + expect(results.response.aggregations.prefixes).not_to be_nil + expect(results.response.aggregations.created).not_to be_nil + expect(results.response.aggregations.schema_versions).not_to be_nil end it 'query by description not found' do @@ -118,5 +123,62 @@ results = Doi.query(nil, page: { size: 1, cursor: results.to_a.last[:sort] }).results expect(results.to_a.length).to eq(1) end + + context "aggregations" do + it 'returns query_aggregation when filters aggregation with empty' do + aggregations = Doi.get_aggregations_hash("") + expect(aggregations[:resource_types]).not_to be_nil + expect(aggregations[:states]).not_to be_nil + expect(aggregations[:created]).not_to be_nil + expect(aggregations[:schema_versions]).not_to be_nil + end + + it 'returns multiple aggregations when filters aggregations with multiple' do + aggregations = Doi.get_aggregations_hash("query_aggregations,metrics_aggregations") + expect(aggregations[:resource_types]).not_to be_nil + expect(aggregations[:states]).not_to be_nil + expect(aggregations[:created]).not_to be_nil + expect(aggregations[:schema_versions]).not_to be_nil + end + end + end + + context "when event" do + let!(:event) { create(:event) } + let!(:events) { create_list(:event, 3) } + + before do + Event.import + sleep 1 + end + + context "aggregations" do + it 'returns query_aggregation when filters aggregation with empty' do + aggregations = Event.get_aggregations_hash("") + expect(aggregations[:sources]).not_to be_nil + expect(aggregations[:prefixes]).not_to be_nil + expect(aggregations[:citation_types]).not_to be_nil + expect(aggregations[:relation_types]).not_to be_nil + expect(aggregations[:registrants]).not_to be_nil + expect(aggregations[:pairings]).not_to be_nil + expect(aggregations[:dois_usage]).not_to be_nil + expect(aggregations[:citations_histogram]).to be_nil + expect(aggregations[:citations]).to be_nil + end + + it 'returns multiple aggregations when filters aggregations with multiple' do + aggregations = Event.get_aggregations_hash("query_aggregations,metrics_aggregations") + expect(aggregations[:sources]).not_to be_nil + expect(aggregations[:prefixes]).not_to be_nil + expect(aggregations[:citation_types]).not_to be_nil + expect(aggregations[:relation_types]).not_to be_nil + expect(aggregations[:registrants]).not_to be_nil + expect(aggregations[:pairings]).not_to be_nil + expect(aggregations[:dois]).not_to be_nil + expect(aggregations[:dois_usage]).not_to be_nil + expect(aggregations[:citations_histogram]).not_to be_nil + expect(aggregations[:citations]).not_to be_nil + end + end end end From 1ee110d0bd575b516425314930345a8903c32cf4 Mon Sep 17 00:00:00 2001 From: Kristian Garza Date: Thu, 1 Aug 2019 16:39:49 +0200 Subject: [PATCH 5/7] added aggreagtions for views and downlaods --- app/controllers/events_controller.rb | 21 ++++++---- app/models/event.rb | 63 +++++++++++++++++++++++++++- spec/models/event_spec.rb | 2 +- 3 files changed, 75 insertions(+), 11 deletions(-) diff --git a/app/controllers/events_controller.rb b/app/controllers/events_controller.rb index a03e43b64..888c6f884 100644 --- a/app/controllers/events_controller.rb +++ b/app/controllers/events_controller.rb @@ -115,19 +115,21 @@ def index total_pages = page[:size] > 0 ? (total_for_pages / page[:size]).ceil : 0 - aggregations = params.fetch(:aggregations,"").nil? ? "" : params.fetch(:aggregations,"") + aggregations = params.fetch(:aggregations,"") || "" sources = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_source(response.response.aggregations.sources.buckets) : nil prefixes = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_source(response.response.aggregations.prefixes.buckets) : nil citation_types = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_citation_type(response.response.aggregations.citation_types.buckets) : nil relation_types = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_relation_type(response.response.aggregations.relation_types.buckets) : nil - registrants = total.positive? && aggregations.include?("query_aggregations") ? facet_by_registrants(response.response.aggregations.registrants.buckets) : nil - pairings = total.positive? && aggregations.include?("query_aggregations") ? facet_by_pairings(response.response.aggregations.pairings.buckets) : nil - dois = total.positive? && aggregations.include?("query_aggregations") ? facet_by_dois(response.response.aggregations.dois.buckets) : nil - dois_usage = total.positive? && aggregations.include?("query_aggregations") ? facet_by_dois(response.response.aggregations.dois_usage.dois.buckets) : nil - citations_histogram = total.positive? && aggregations.include?("query_aggregations") ? facet_citations_by_year(response.response.aggregations.dois_citations) : nil + registrants = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_registrants(response.response.aggregations.registrants.buckets) : nil + pairings = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_pairings(response.response.aggregations.pairings.buckets) : nil + dois = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_dois(response.response.aggregations.dois.buckets) : nil + dois_usage = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_dois(response.response.aggregations.dois_usage.dois.buckets) : nil + dois_citations = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_citations_by_year(response.response.aggregations.dois_citations) : nil citations_histogram = total.positive? && aggregations.include?("metrics_aggregations") ? facet_citations_by_year(response.response.aggregations.citations_histogram) : nil citations = total.positive? && aggregations.include?("metrics_aggregations") ? facet_citations_by_dois(response.response.aggregations.citations.dois.buckets) : nil + views_histogram = total.positive? && aggregations.include?("metrics_aggregations") ? facet_citations_by_year(response.response.aggregations.views) : nil + downloads_histogram = total.positive? && aggregations.include?("metrics_aggregations") ? facet_citations_by_year(response.response.aggregations.downloads) : nil results = response.results @@ -144,8 +146,11 @@ def index registrants: registrants, "doisRelationTypes": dois, "doisUsageTypes": dois_usage, - "doisCitations": citations_histogram, - "uniqueCitations": citations + "doisCitations": dois_citations, + "citationsHistogram": citations_histogram, + "citations": citations, + "viewsHistogram": views_histogram, + "downloadsHistogram": downloads_histogram }.compact options[:links] = { diff --git a/app/models/event.rb b/app/models/event.rb index 1b4cc9191..c33cf1fbb 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -73,6 +73,17 @@ class Event < ActiveRecord::Base "describes", "is-described-by" ] + VIEWS_RELATION_TYPES = [ + "unique-dataset-investigations-regular", + "total-dataset-investigations-regular" + ] + + + DOWNLOADS_RELATION_TYPES = [ + "unique-dataset-requests-regular", + "total-dataset-requests-regular" + ] + validates :subj_id, :source_id, :source_token, presence: true attr_accessor :container_title, :url @@ -207,7 +218,7 @@ def self.query_aggregations script: "#{INCLUDED_RELATION_TYPES}.contains(doc['relation_type_id'].value)" } }, - aggs: { years: { date_histogram: { field: 'citation_year', interval: 'year', min_doc_count: 1 }, aggs: { "total_by_year" => { sum: { field: 'total' }}}},"sum_distribution"=>sum_year_distribution} + aggs: { years: { date_histogram: { field: 'occurred_at', interval: 'year', min_doc_count: 1 }, aggs: { "total_by_year" => { sum: { field: 'total' }}}},"sum_distribution"=>sum_year_distribution} } } end @@ -238,7 +249,55 @@ def self.metrics_aggregations aggs: { dois: { terms: { field: 'obj_id', size: 50, min_doc_count: 1 }, aggs: { unique_citations: { cardinality: { field: 'citation_id' }}} }} - } + }, + views: { + filter: { + script: { + script: "#{VIEWS_RELATION_TYPES}.contains(doc['relation_type_id'].value) && doc['source_id'].value == 'datacite-usage' && doc['occurred_at'].value.getMillis() >= doc['obj.datePublished'].value.getMillis() && doc['occurred_at'].value.getMillis() < new Date().getTime()" , + } + }, + aggs: { + dois: { + terms: { + field: 'obj_id', + size: 50, + min_doc_count: 1 + }, + aggs: { + "total_by_type" => { + sum: { + field: 'total' + } + } + } + }, + years: { date_histogram: { field: 'occurred_at', interval: 'year', min_doc_count: 1 }, aggs: { "total_by_year" => { sum: { field: 'total' } } } }, "sum_distribution" => sum_year_distribution + } + }, + downloads: { + filter: { + script: { + script: "#{DOWNLOADS_RELATION_TYPES}.contains(doc['relation_type_id'].value) && doc['source_id'].value == 'datacite-usage' && doc['occurred_at'].value.getMillis() >= doc['obj.datePublished'].value.getMillis() && doc['occurred_at'].value.getMillis() < new Date().getTime()" , + } + }, + aggs: { + dois: { + terms: { + field: 'obj_id', + size: 50, + min_doc_count: 1 + }, + aggs: { + "total_by_type" => { + sum: { + field: 'total' + } + } + } + }, + years: { date_histogram: { field: 'occurred_at', interval: 'year', min_doc_count: 1 }, aggs: { "total_by_year" => { sum: { field: 'total' } } } }, "sum_distribution" => sum_year_distribution + } + } } end diff --git a/spec/models/event_spec.rb b/spec/models/event_spec.rb index 76a86fe04..a3476da0e 100644 --- a/spec/models/event_spec.rb +++ b/spec/models/event_spec.rb @@ -22,7 +22,7 @@ end it "has citation_year" do - expect(subject.citation_year).to eq(2006) + expect(subject.citation_year).to eq(2015) end it "has published_dates" do From 88705c7043b776b2cf819f333ff43b3ff175222b Mon Sep 17 00:00:00 2001 From: Kristian Garza Date: Thu, 1 Aug 2019 17:47:09 +0200 Subject: [PATCH 6/7] reverse label name --- app/controllers/events_controller.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/controllers/events_controller.rb b/app/controllers/events_controller.rb index 888c6f884..387709cd4 100644 --- a/app/controllers/events_controller.rb +++ b/app/controllers/events_controller.rb @@ -148,7 +148,7 @@ def index "doisUsageTypes": dois_usage, "doisCitations": dois_citations, "citationsHistogram": citations_histogram, - "citations": citations, + "uniqueCitations": citations, "viewsHistogram": views_histogram, "downloadsHistogram": downloads_histogram }.compact From 77a72d611f104148baf01a5546ba63b1990f0fd5 Mon Sep 17 00:00:00 2001 From: Kristian Garza Date: Thu, 1 Aug 2019 22:25:09 +0200 Subject: [PATCH 7/7] codeclimate suggestions --- app/controllers/concerns/facetable.rb | 24 ++++++---- app/controllers/events_controller.rb | 16 +++---- app/models/concerns/indexable.rb | 2 +- app/models/event.rb | 65 ++++++--------------------- 4 files changed, 38 insertions(+), 69 deletions(-) diff --git a/app/controllers/concerns/facetable.rb b/app/controllers/concerns/facetable.rb index 99a7d8942..32aba70e0 100644 --- a/app/controllers/concerns/facetable.rb +++ b/app/controllers/concerns/facetable.rb @@ -134,14 +134,6 @@ def facet_by_source(arr) end end - # def facet_citations_by_year(hash) - # hash.map do |hsh| - # { "id" => hsh["key"].to_i, - # "title" => hsh["key"], - # "count" => hsh["doc_count"] } - # end - # end - def facet_citations_by_year(hash) arr = hash.dig('years', 'buckets').map do |h| year = h['key_as_string'][0..3].to_i @@ -156,6 +148,22 @@ def facet_citations_by_year(hash) "years" => arr } end + def facet_counts_by_year_month(hash) + arr = hash.dig('year_months', 'buckets').map do |h| + month = h["key_as_string"][5..6].to_i + title = I18n.t("date.month_names")[month] + " " + h["key_as_string"][0..3] + + { + "id" => h["key_as_string"][0..6], + 'title' => title, + 'sum' => h.dig('total_by_year_month', 'value') } + end + { "count" => hash.dig("sum_distribution", "value"), + "yearMonths" => arr } + end + + + def facet_by_relation_type(arr) arr.map do |hsh| arr = hsh.dig("year_months", "buckets").map do |h| diff --git a/app/controllers/events_controller.rb b/app/controllers/events_controller.rb index 387709cd4..7c20b0f4f 100644 --- a/app/controllers/events_controller.rb +++ b/app/controllers/events_controller.rb @@ -115,21 +115,21 @@ def index total_pages = page[:size] > 0 ? (total_for_pages / page[:size]).ceil : 0 - aggregations = params.fetch(:aggregations,"") || "" + aggregations = params.fetch(:aggregations, "") || "" sources = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_source(response.response.aggregations.sources.buckets) : nil - prefixes = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_source(response.response.aggregations.prefixes.buckets) : nil - citation_types = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_citation_type(response.response.aggregations.citation_types.buckets) : nil - relation_types = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_relation_type(response.response.aggregations.relation_types.buckets) : nil + prefixes = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_source(response.response.aggregations.prefixes.buckets) : nil + citation_types = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_citation_type(response.response.aggregations.citation_types.buckets) : nil + relation_types = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_relation_type(response.response.aggregations.relation_types.buckets) : nil registrants = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_registrants(response.response.aggregations.registrants.buckets) : nil pairings = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_pairings(response.response.aggregations.pairings.buckets) : nil dois = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_dois(response.response.aggregations.dois.buckets) : nil dois_usage = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_dois(response.response.aggregations.dois_usage.dois.buckets) : nil dois_citations = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_citations_by_year(response.response.aggregations.dois_citations) : nil - citations_histogram = total.positive? && aggregations.include?("metrics_aggregations") ? facet_citations_by_year(response.response.aggregations.citations_histogram) : nil - citations = total.positive? && aggregations.include?("metrics_aggregations") ? facet_citations_by_dois(response.response.aggregations.citations.dois.buckets) : nil - views_histogram = total.positive? && aggregations.include?("metrics_aggregations") ? facet_citations_by_year(response.response.aggregations.views) : nil - downloads_histogram = total.positive? && aggregations.include?("metrics_aggregations") ? facet_citations_by_year(response.response.aggregations.downloads) : nil + citations_histogram = total.positive? && aggregations.include?("metrics_aggregations") ? facet_citations_by_year(response.response.aggregations.citations_histogram) : nil + citations = total.positive? && aggregations.include?("metrics_aggregations") ? facet_citations_by_dois(response.response.aggregations.citations.dois.buckets) : nil + views_histogram = total.positive? && aggregations.include?("metrics_aggregations") ? facet_counts_by_year_month(response.response.aggregations.views) : nil + downloads_histogram = total.positive? && aggregations.include?("metrics_aggregations") ? facet_counts_by_year_month(response.response.aggregations.downloads) : nil results = response.results diff --git a/app/models/concerns/indexable.rb b/app/models/concerns/indexable.rb index 7b07808e2..3280c25af 100644 --- a/app/models/concerns/indexable.rb +++ b/app/models/concerns/indexable.rb @@ -95,7 +95,7 @@ def find_by_id_list(ids, options={}) }) end - def get_aggregations_hash(aggregations="") + def get_aggregations_hash(aggregations = "") return send(:query_aggregations) if aggregations.blank? aggs = {} aggregations.split(",").each do |agg| diff --git a/app/models/event.rb b/app/models/event.rb index c33cf1fbb..2d32fb2c3 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -74,14 +74,11 @@ class Event < ActiveRecord::Base ] VIEWS_RELATION_TYPES = [ - "unique-dataset-investigations-regular", - "total-dataset-investigations-regular" + "unique-dataset-investigations-regular" ] - DOWNLOADS_RELATION_TYPES = [ - "unique-dataset-requests-regular", - "total-dataset-requests-regular" + "unique-dataset-requests-regular" ] validates :subj_id, :source_id, :source_token, presence: true @@ -224,7 +221,11 @@ def self.query_aggregations end def self.metrics_aggregations - + sum_distribution = { + sum_bucket: { + buckets_path: "year_months>total_by_year_month" + } + } sum_year_distribution = { sum_bucket: { buckets_path: "years>total_by_year" @@ -233,69 +234,29 @@ def self.metrics_aggregations { citations_histogram: { - filter: { - script: { - script: "#{INCLUDED_RELATION_TYPES}.contains(doc['relation_type_id'].value)" - } + filter: {script: {script: "#{INCLUDED_RELATION_TYPES}.contains(doc['relation_type_id'].value)"} }, aggs: { years: { date_histogram: { field: 'citation_year', interval: 'year', min_doc_count: 1 }, aggs: { "total_by_year" => { sum: { field: 'total' }}}},"sum_distribution"=>sum_year_distribution} }, citations: { - filter: { - script: { - script: "#{INCLUDED_RELATION_TYPES}.contains(doc['relation_type_id'].value)" - } + filter: {script: {script: "#{INCLUDED_RELATION_TYPES}.contains(doc['relation_type_id'].value)"} }, aggs: { dois: { terms: { field: 'obj_id', size: 50, min_doc_count: 1 }, aggs: { unique_citations: { cardinality: { field: 'citation_id' }}} }} }, views: { - filter: { - script: { - script: "#{VIEWS_RELATION_TYPES}.contains(doc['relation_type_id'].value) && doc['source_id'].value == 'datacite-usage' && doc['occurred_at'].value.getMillis() >= doc['obj.datePublished'].value.getMillis() && doc['occurred_at'].value.getMillis() < new Date().getTime()" , - } + filter: {script: {script: "#{VIEWS_RELATION_TYPES}.contains(doc['relation_type_id'].value) && doc['source_id'].value == 'datacite-usage' && doc['occurred_at'].value.getMillis() >= doc['obj.datePublished'].value.getMillis() && doc['occurred_at'].value.getMillis() < new Date().getTime()"} }, aggs: { - dois: { - terms: { - field: 'obj_id', - size: 50, - min_doc_count: 1 - }, - aggs: { - "total_by_type" => { - sum: { - field: 'total' - } - } - } - }, - years: { date_histogram: { field: 'occurred_at', interval: 'year', min_doc_count: 1 }, aggs: { "total_by_year" => { sum: { field: 'total' } } } }, "sum_distribution" => sum_year_distribution + year_months: { date_histogram: { field: 'occurred_at', interval: 'month', min_doc_count: 1 }, aggs: { "total_by_year_month" => { sum: { field: 'total' } } } }, "sum_distribution" => sum_distribution } }, downloads: { - filter: { - script: { - script: "#{DOWNLOADS_RELATION_TYPES}.contains(doc['relation_type_id'].value) && doc['source_id'].value == 'datacite-usage' && doc['occurred_at'].value.getMillis() >= doc['obj.datePublished'].value.getMillis() && doc['occurred_at'].value.getMillis() < new Date().getTime()" , - } + filter: {script: {script: "#{DOWNLOADS_RELATION_TYPES}.contains(doc['relation_type_id'].value) && doc['source_id'].value == 'datacite-usage' && doc['occurred_at'].value.getMillis() >= doc['obj.datePublished'].value.getMillis() && doc['occurred_at'].value.getMillis() < new Date().getTime()"} }, aggs: { - dois: { - terms: { - field: 'obj_id', - size: 50, - min_doc_count: 1 - }, - aggs: { - "total_by_type" => { - sum: { - field: 'total' - } - } - } - }, - years: { date_histogram: { field: 'occurred_at', interval: 'year', min_doc_count: 1 }, aggs: { "total_by_year" => { sum: { field: 'total' } } } }, "sum_distribution" => sum_year_distribution + year_months: { date_histogram: { field: 'occurred_at', interval: 'month', min_doc_count: 1 }, aggs: { "total_by_year_month" => { sum: { field: 'total' } } } }, "sum_distribution" => sum_distribution } } }