From f2d49a2da50216bb94d3b3a89f7f51915cbf0a49 Mon Sep 17 00:00:00 2001 From: Sarala Wimalaratne Date: Tue, 12 May 2020 06:53:52 +0100 Subject: [PATCH 1/4] new stats query --- app/controllers/concerns/countable.rb | 2 +- app/controllers/providers_controller.rb | 8 ++++---- app/models/doi.rb | 18 ++++++++++++++++++ 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/app/controllers/concerns/countable.rb b/app/controllers/concerns/countable.rb index f1bc7755d..77cde5e0f 100644 --- a/app/controllers/concerns/countable.rb +++ b/app/controllers/concerns/countable.rb @@ -12,7 +12,7 @@ def doi_count(client_id: nil, provider_id: nil, consortium_id: nil, user_id: nil elsif user_id response = Doi.query(nil, user_id: user_id, state: state, page: { number: 1, size: 0 }) else - response = Doi.query(nil, page: { number: 1, size: 0 }) + response = Doi.stats_query(nil, page: { number: 1, size: 0 }) end response.results.total.positive? ? facet_by_year(response.response.aggregations.created.buckets) : [] diff --git a/app/controllers/providers_controller.rb b/app/controllers/providers_controller.rb index 5a0109547..fd76d5e41 100644 --- a/app/controllers/providers_controller.rb +++ b/app/controllers/providers_controller.rb @@ -235,7 +235,7 @@ def stats providers = provider_count(consortium_id: nil) clients = client_count(provider_id: nil) dois = doi_count(provider_id: nil) - resource_types = resource_type_count(provider_id: nil) + # resource_types = resource_type_count(provider_id: nil) # citations = nil # citation_count(provider_id: nil) # views = nil # view_count(provider_id: nil) # downloads = nil # download_count(provider_id: nil) @@ -243,7 +243,7 @@ def stats providers = provider_count(consortium_id: params[:id]) clients = client_count(consortium_id: params[:id]) dois = doi_count(consortium_id: params[:id]) - resource_types = resource_type_count(consortium_id: params[:id]) + # resource_types = resource_type_count(consortium_id: params[:id]) # citations = citation_count(consortium_id: params[:id]) # views = view_count(consortium_id: params[:id]) # downloads = download_count(consortium_id: params[:id]) @@ -251,7 +251,7 @@ def stats providers = nil clients = client_count(provider_id: params[:id]) dois = doi_count(provider_id: params[:id]) - resource_types = resource_type_count(provider_id: params[:id]) + # resource_types = resource_type_count(provider_id: params[:id]) # citations = citation_count(provider_id: params[:id]) # views = view_count(provider_id: params[:id]) # downloads = download_count(provider_id: params[:id]) @@ -261,7 +261,7 @@ def stats providers: providers, clients: clients, dois: dois, - "resourceTypes" => resource_types, + # "resourceTypes" => resource_types, # citations: citations, # views: views, # downloads: downloads, diff --git a/app/models/doi.rb b/app/models/doi.rb index 279906536..4a8f125a3 100644 --- a/app/models/doi.rb +++ b/app/models/doi.rb @@ -675,6 +675,24 @@ def self.find_by_id(id) ) end + def self.stats_query(query, options={}) + aggregations = {created: { date_histogram: { field: 'created', interval: 'year', format: 'year', order: { _key: "desc" }, min_doc_count: 1 }, + aggs: { bucket_truncate: { bucket_sort: { size: 10 } } } }, + } + + from = 0 + sort = [{ created: "asc", uid: "asc" }] + + __elasticsearch__.search({ + size: options.dig(:page, :size), + from: from, + sort: sort, + query: query, + aggregations: aggregations, + track_total_hits: true + }.compact) + end + def self.query(query, options={}) # support scroll api # map function is small performance hit From 5cbe70cd23f3effc5f392f99b257c207a292b815 Mon Sep 17 00:00:00 2001 From: Sarala Wimalaratne Date: Thu, 14 May 2020 08:14:29 +0100 Subject: [PATCH 2/4] updated query - not working --- app/controllers/concerns/countable.rb | 4 ++-- app/models/doi.rb | 24 +++++++++++++++++------- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/app/controllers/concerns/countable.rb b/app/controllers/concerns/countable.rb index 77cde5e0f..f86140d50 100644 --- a/app/controllers/concerns/countable.rb +++ b/app/controllers/concerns/countable.rb @@ -4,9 +4,9 @@ module Countable included do def doi_count(client_id: nil, provider_id: nil, consortium_id: nil, user_id: nil, state: nil) if client_id - response = Doi.query(nil, client_id: client_id, page: { number: 1, size: 0 }) + response = Doi.stats_query(nil, client_id: client_id, page: { number: 1, size: 0 }) elsif provider_id - response = Doi.query(nil, provider_id: provider_id, page: { number: 1, size: 0 }) + response = Doi.stats_query(nil, provider_id: provider_id, page: { number: 1, size: 0 }) elsif consortium_id response = Doi.query(nil, consortium_id: consortium_id, page: { number: 1, size: 0 }) elsif user_id diff --git a/app/models/doi.rb b/app/models/doi.rb index 4a8f125a3..2717a5de7 100644 --- a/app/models/doi.rb +++ b/app/models/doi.rb @@ -676,18 +676,28 @@ def self.find_by_id(id) end def self.stats_query(query, options={}) + filter = [] + filter << { terms: { provider_id: options[:provider_id].split(",") } } if options[:provider_id].present? + filter << { terms: { client_id: options[:client_id].to_s.split(",") } } if options[:client_id].present? + + es_query = { + query: { + bool: { + filter: filter + } + } + } + + # es_query = query + aggregations = {created: { date_histogram: { field: 'created', interval: 'year', format: 'year', order: { _key: "desc" }, min_doc_count: 1 }, - aggs: { bucket_truncate: { bucket_sort: { size: 10 } } } }, + aggs: { bucket_truncate: { bucket_sort: { size: 11 } } } }, } - from = 0 - sort = [{ created: "asc", uid: "asc" }] - __elasticsearch__.search({ size: options.dig(:page, :size), - from: from, - sort: sort, - query: query, + from: 0, + query: es_query, aggregations: aggregations, track_total_hits: true }.compact) From 21ccdf571492fd99bf89c850e1577a5b67aaac26 Mon Sep 17 00:00:00 2001 From: Sarala Wimalaratne Date: Thu, 14 May 2020 15:12:40 +0100 Subject: [PATCH 3/4] Nested aggragation --- app/models/doi.rb | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/app/models/doi.rb b/app/models/doi.rb index b3c7b74fe..2d4025bf2 100644 --- a/app/models/doi.rb +++ b/app/models/doi.rb @@ -688,11 +688,11 @@ def self.stats_query(query, options={}) } } - # es_query = query - - aggregations = {created: { date_histogram: { field: 'created', interval: 'year', format: 'year', order: { _key: "desc" }, min_doc_count: 1 }, - aggs: { bucket_truncate: { bucket_sort: { size: 11 } } } }, - } + aggregations = {aggs: + {created: { date_histogram: { field: 'created', interval: 'year', format: 'year', order: { _key: "desc" }, min_doc_count: 1 }, + aggs: { bucket_truncate: { bucket_sort: { size: 11 } } } }, + } + } __elasticsearch__.search({ size: options.dig(:page, :size), From 8ad1cc6071b83e05fd7cf46564c31c413a6d9082 Mon Sep 17 00:00:00 2001 From: Martin Fenner Date: Thu, 14 May 2020 21:34:03 +0200 Subject: [PATCH 4/4] simplified query, added tests --- app/controllers/concerns/countable.rb | 28 +++++----- app/models/doi.rb | 35 ++++++------ spec/concerns/countable_spec.rb | 61 +++++++++++++++++++++ spec/models/doi_spec.rb | 77 +++++++++++++++++++++++++++ spec/requests/providers_spec.rb | 2 +- 5 files changed, 168 insertions(+), 35 deletions(-) diff --git a/app/controllers/concerns/countable.rb b/app/controllers/concerns/countable.rb index f86140d50..898c8661a 100644 --- a/app/controllers/concerns/countable.rb +++ b/app/controllers/concerns/countable.rb @@ -2,20 +2,20 @@ module Countable extend ActiveSupport::Concern included do - def doi_count(client_id: nil, provider_id: nil, consortium_id: nil, user_id: nil, state: nil) + def doi_count(client_id: nil, provider_id: nil, consortium_id: nil, user_id: nil) if client_id - response = Doi.stats_query(nil, client_id: client_id, page: { number: 1, size: 0 }) + response = Doi.stats_query(client_id: client_id) elsif provider_id - response = Doi.stats_query(nil, provider_id: provider_id, page: { number: 1, size: 0 }) + response = Doi.stats_query(provider_id: provider_id) elsif consortium_id - response = Doi.query(nil, consortium_id: consortium_id, page: { number: 1, size: 0 }) + response = Doi.stats_query(consortium_id: consortium_id) elsif user_id - response = Doi.query(nil, user_id: user_id, state: state, page: { number: 1, size: 0 }) + response = Doi.stats_query(user_id: user_id) else - response = Doi.stats_query(nil, page: { number: 1, size: 0 }) + response = Doi.stats_query end - response.results.total.positive? ? facet_by_year(response.response.aggregations.created.buckets) : [] + response.results.total.positive? ? facet_by_year(response.aggregations.created.buckets) : [] end def view_count(client_id: nil, provider_id: nil, consortium_id: nil, user_id: nil, state: nil) @@ -31,7 +31,7 @@ def view_count(client_id: nil, provider_id: nil, consortium_id: nil, user_id: ni response = Doi.query(nil, page: { number: 1, size: 0 }) end - response.results.total.positive? ? metric_facet_by_year(response.response.aggregations.views.buckets) : [] + response.results.total.positive? ? metric_facet_by_year(response.aggregations.views.buckets) : [] end def download_count(client_id: nil, provider_id: nil, consortium_id: nil, user_id: nil, state: nil) @@ -47,7 +47,7 @@ def download_count(client_id: nil, provider_id: nil, consortium_id: nil, user_id response = Doi.query(nil, page: { number: 1, size: 0 }) end - response.results.total.positive? ? metric_facet_by_year(response.response.aggregations.downloads.buckets) : [] + response.results.total.positive? ? metric_facet_by_year(response.aggregations.downloads.buckets) : [] end def citation_count(client_id: nil, provider_id: nil, consortium_id: nil, user_id: nil, state: nil) @@ -63,7 +63,7 @@ def citation_count(client_id: nil, provider_id: nil, consortium_id: nil, user_id response = Doi.query(nil, page: { number: 1, size: 0 }) end - response.results.total.positive? ? metric_facet_by_year(response.response.aggregations.citations.buckets) : [] + response.results.total.positive? ? metric_facet_by_year(response.aggregations.citations.buckets) : [] end # cumulative count clients by year @@ -78,7 +78,7 @@ def client_count(provider_id: nil, consortium_id: nil) response = Client.query(nil, include_deleted: true, page: { number: 1, size: 0 }) end - response.results.total.positive? ? facet_by_cumulative_year(response.response.aggregations.cumulative_years.buckets) : [] + response.results.total.positive? ? facet_by_cumulative_year(response.aggregations.cumulative_years.buckets) : [] end # count active clients by provider. Provider can only be deleted when there are no active clients. @@ -94,10 +94,10 @@ def active_client_count(provider_id: nil) def provider_count(consortium_id: nil) if consortium_id response = Provider.query(nil, consortium_id: consortium_id, include_deleted: true, page: { number: 1, size: 0 }) - response.results.total.positive? ? facet_by_cumulative_year(response.response.aggregations.cumulative_years.buckets) : [] + response.results.total.positive? ? facet_by_cumulative_year(response.aggregations.cumulative_years.buckets) : [] else response = Provider.query(nil, include_deleted: true, page: { number: 1, size: 0 }) - response.results.total.positive? ? facet_by_cumulative_year(response.response.aggregations.cumulative_years.buckets) : [] + response.results.total.positive? ? facet_by_cumulative_year(response.aggregations.cumulative_years.buckets) : [] end end @@ -114,7 +114,7 @@ def resource_type_count(client_id: nil, provider_id: nil, consortium_id: nil, us response = Doi.query(nil, page: { number: 1, size: 0 }) end - response.results.total.positive? ? facet_by_combined_key(response.response.aggregations.resource_types.buckets) : [] + response.results.total.positive? ? facet_by_combined_key(response.aggregations.resource_types.buckets) : [] end end end diff --git a/app/models/doi.rb b/app/models/doi.rb index 2d4025bf2..95730f271 100644 --- a/app/models/doi.rb +++ b/app/models/doi.rb @@ -675,32 +675,27 @@ def self.find_by_id(id) ) end - def self.stats_query(query, options={}) + def self.stats_query(options={}) filter = [] - filter << { terms: { provider_id: options[:provider_id].split(",") } } if options[:provider_id].present? - filter << { terms: { client_id: options[:client_id].to_s.split(",") } } if options[:client_id].present? + filter << { term: { provider_id: options[:provider_id] } } if options[:provider_id].present? + filter << { term: { client_id: options[:client_id] } } if options[:client_id].present? + filter << { term: { consortium_id: options[:consortium_id].upcase }} if options[:consortium_id].present? + filter << { term: { "creators.nameIdentifiers.nameIdentifier" => "https://orcid.org/#{orcid_from_url(options[:user_id])}" }} if options[:user_id].present? + + aggregations = { + created: { date_histogram: { field: 'created', interval: 'year', format: 'year', order: { _key: "desc" }, min_doc_count: 1 }, + aggs: { bucket_truncate: { bucket_sort: { size: 12 } } } }, + } - es_query = { + __elasticsearch__.search({ query: { bool: { - filter: filter + must: [{ match_all: {} }], + filter: filter, } - } - } - - aggregations = {aggs: - {created: { date_histogram: { field: 'created', interval: 'year', format: 'year', order: { _key: "desc" }, min_doc_count: 1 }, - aggs: { bucket_truncate: { bucket_sort: { size: 11 } } } }, - } - } - - __elasticsearch__.search({ - size: options.dig(:page, :size), - from: 0, - query: es_query, + }, aggregations: aggregations, - track_total_hits: true - }.compact) + }) end def self.query(query, options={}) diff --git a/spec/concerns/countable_spec.rb b/spec/concerns/countable_spec.rb index a6c4a5ed3..b021e0213 100644 --- a/spec/concerns/countable_spec.rb +++ b/spec/concerns/countable_spec.rb @@ -64,4 +64,65 @@ {"count"=>2, "id"=>"2017", "title"=>"2017"}]) end end + + describe "doi_count" do + before do + allow(Time.zone).to receive(:now).and_return(Time.mktime(2015, 4, 8)) + end + + let(:consortium) { create(:provider, role_name: "ROLE_CONSORTIUM", symbol: "DC") } + let(:provider) { create(:provider, consortium: consortium, role_name: "ROLE_CONSORTIUM_ORGANIZATION", symbol: "DATACITE") } + let(:client) { create(:client, provider: provider, symbol: "DATACITE.TEST") } + let!(:dois) { create_list(:doi, 3, client: client, aasm_state: "findable") } + let!(:doi) { create(:doi) } + + it "counts all dois" do + Doi.import + sleep 2 + + expect(subject.doi_count).to eq([{"count"=>4, "id"=>"2015", "title"=>"2015"}]) + end + + it "counts all consortium dois" do + Doi.import + sleep 2 + + expect(subject.doi_count(consortium_id: "dc")).to eq([{"count"=>3, "id"=>"2015", "title"=>"2015"}]) + end + + it "counts all consortium dois no dois" do + Doi.import + sleep 2 + + expect(subject.doi_count(consortium_id: "abc")).to eq([]) + end + + it "counts all provider dois" do + Doi.import + sleep 2 + + expect(subject.doi_count(provider_id: "datacite")).to eq([{"count"=>3, "id"=>"2015", "title"=>"2015"}]) + end + + it "counts all provider dois no dois" do + Doi.import + sleep 2 + + expect(subject.doi_count(provider_id: "abc")).to eq([]) + end + + it "counts all client dois" do + Doi.import + sleep 2 + + expect(subject.doi_count(client_id: "datacite.test")).to eq([{"count"=>3, "id"=>"2015", "title"=>"2015"}]) + end + + it "counts all client dois no dois" do + Doi.import + sleep 2 + + expect(subject.doi_count(client_id: "abc")).to eq([]) + end + end end diff --git a/spec/models/doi_spec.rb b/spec/models/doi_spec.rb index 524b58699..d1d1aeaf6 100644 --- a/spec/models/doi_spec.rb +++ b/spec/models/doi_spec.rb @@ -872,4 +872,81 @@ expect(changed_doi.landing_page).to eq(landing_page) end end + + describe "stats_query", elasticsearch: true do + subject { Doi } + + before do + allow(Time.zone).to receive(:now).and_return(Time.mktime(2015, 4, 8)) + end + + let(:consortium) { create(:provider, role_name: "ROLE_CONSORTIUM", symbol: "DC") } + let(:provider) { create(:provider, consortium: consortium, role_name: "ROLE_CONSORTIUM_ORGANIZATION", symbol: "DATACITE") } + let(:client) { create(:client, provider: provider, symbol: "DATACITE.TEST") } + let!(:dois) { create_list(:doi, 3, client: client, aasm_state: "findable") } + let!(:doi) { create(:doi) } + + it "counts all dois" do + Doi.import + sleep 2 + + response = subject.stats_query + expect(response.results.total).to eq(4) + expect(response.aggregations.created.buckets).to eq([{"doc_count"=>4, "key"=>1420070400000, "key_as_string"=>"2015"}]) + end + + it "counts all consortia dois" do + Doi.import + sleep 2 + + response = subject.stats_query(consortium_id: "dc") + expect(response.results.total).to eq(3) + expect(response.aggregations.created.buckets).to eq([{"doc_count"=>3, "key"=>1420070400000, "key_as_string"=>"2015"}]) + end + + it "counts all consortia dois no dois" do + Doi.import + sleep 2 + + response = subject.stats_query(consortium_id: "abc") + expect(response.results.total).to eq(0) + expect(response.aggregations.created.buckets).to eq([]) + end + + it "counts all provider dois" do + Doi.import + sleep 2 + + response = subject.stats_query(provider_id: "datacite") + expect(response.results.total).to eq(3) + expect(response.aggregations.created.buckets).to eq([{"doc_count"=>3, "key"=>1420070400000, "key_as_string"=>"2015"}]) + end + + it "counts all provider dois no dois" do + Doi.import + sleep 2 + + response = subject.stats_query(provider_id: "abc") + expect(response.results.total).to eq(0) + expect(response.aggregations.created.buckets).to eq([]) + end + + it "counts all client dois" do + Doi.import + sleep 2 + + response = subject.stats_query(client_id: "datacite.test") + expect(response.results.total).to eq(3) + expect(response.aggregations.created.buckets).to eq([{"doc_count"=>3, "key"=>1420070400000, "key_as_string"=>"2015"}]) + end + + it "counts all client dois no dois" do + Doi.import + sleep 2 + + response = subject.stats_query(client_id: "datacite.abc") + expect(response.results.total).to eq(0) + expect(response.aggregations.created.buckets).to eq([]) + end + end end diff --git a/spec/requests/providers_spec.rb b/spec/requests/providers_spec.rb index bbbe17c40..9983f21c1 100644 --- a/spec/requests/providers_spec.rb +++ b/spec/requests/providers_spec.rb @@ -145,7 +145,7 @@ expect(last_response.status).to eq(200) expect(json["clients"]).to eq([{"count"=>1, "id"=>"2020", "title"=>"2020"}]) - expect(json["resourceTypes"]).to eq([{"count"=>3, "id"=>"dataset", "title"=>"Dataset"}]) + # expect(json["resourceTypes"]).to eq([{"count"=>3, "id"=>"dataset", "title"=>"Dataset"}]) expect(json["dois"]).to eq([{"count"=>3, "id"=>"2020", "title"=>"2020"}]) end end