From 0dbd8006296382ce5cd0365f841d0ac5d35853bd Mon Sep 17 00:00:00 2001 From: Kristian Garza Date: Thu, 28 Nov 2019 17:25:13 +0100 Subject: [PATCH 01/17] refactor usage queries --- app/controllers/events_controller.rb | 10 ++++-- app/graphql/types/metric_interface.rb | 6 ++-- app/queries/events_query.rb | 50 +++++++++++++++++++++++++-- 3 files changed, 56 insertions(+), 10 deletions(-) diff --git a/app/controllers/events_controller.rb b/app/controllers/events_controller.rb index a021861d7..e683f278e 100644 --- a/app/controllers/events_controller.rb +++ b/app/controllers/events_controller.rb @@ -120,7 +120,8 @@ def index registrants = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_registrants(response.response.aggregations.registrants.buckets) : nil pairings = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_pairings(response.response.aggregations.pairings.buckets) : nil dois = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_dois(response.response.aggregations.dois.buckets) : nil - dois_usage = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_dois(response.response.aggregations.dois_usage.dois.buckets) : nil + # dois_usage = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_dois(response.response.aggregations.dois_usage.dois.buckets) : nil + dois_usage = params[:doi].present? ? EventsQuery.new.usage(params[:doi]) : [] dois_citations = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_citations_by_year_v1(response.response.aggregations.dois_citations) : nil citations_histogram = total.positive? && params[:doi].present? && aggregations.include?("citations_aggregations") ? facet_citations_by_year(response.response.aggregations.citations_histogram) : nil citations = params[:doi].present? ? EventsQuery.new.citations(params[:doi]) : [] @@ -128,8 +129,11 @@ def index relations = total.positive? && params[:doi].present? && aggregations.include?("citations_aggregations") ? facet_citations_by_dois(response.response.aggregations.relations.dois.buckets) : nil views_histogram = total.positive? && aggregations.include?("metrics_aggregations") ? facet_counts_by_year_month(response.response.aggregations.views_histogram) : nil downloads_histogram = total.positive? && aggregations.include?("metrics_aggregations") ? facet_counts_by_year_month(response.response.aggregations.downloads_histogram) : nil - views = total.positive? && aggregations.include?("metrics_aggregations") ? facet_by_source(response.response.aggregations.views.dois.buckets) : nil - downloads = total.positive? && aggregations.include?("metrics_aggregations") ? facet_by_source(response.response.aggregations.downloads.dois.buckets) : nil + # views = total.positive? && aggregations.include?("metrics_aggregations") ? facet_by_source(response.response.aggregations.views.dois.buckets) : nil + # downloads = total.positive? && aggregations.include?("metrics_aggregations") ? facet_by_source(response.response.aggregations.downloads.dois.buckets) : nil + + views = params[:doi].present? ? EventsQuery.new.views(params[:doi]) : [] + downloads = params[:doi].present? ? EventsQuery.new.downloads(params[:doi]) : [] unique_obj_count = total.positive? && aggregations.include?("advanced_aggregations") ? response.response.aggregations.unique_obj_count.value : nil unique_subj_count = total.positive? && aggregations.include?("advanced_aggregations") ? response.response.aggregations.unique_subj_count.value : nil diff --git a/app/graphql/types/metric_interface.rb b/app/graphql/types/metric_interface.rb index 097acd871..6cb8a63f9 100644 --- a/app/graphql/types/metric_interface.rb +++ b/app/graphql/types/metric_interface.rb @@ -19,13 +19,11 @@ def aggregation_results(**args) end def view_count - meta = aggregation_results(id: object.identifier).views.dois.buckets - meta.first.fetch("total_by_type", {}).fetch("value", nil) if meta.any? + EventsQuery.new.doi_views(doi_from_url(object.identifier)) end def download_count - meta = aggregation_results(id: object.identifier).downloads.dois.buckets - meta.first.fetch("total_by_type", {}).fetch("value", nil) if meta.any? + EventsQuery.new.doi_downloads(doi_from_url(object.identifier)) end def citation_count diff --git a/app/queries/events_query.rb b/app/queries/events_query.rb index 4c111d248..01faef2dc 100644 --- a/app/queries/events_query.rb +++ b/app/queries/events_query.rb @@ -1,7 +1,6 @@ # frozen_string_literal: true class EventsQuery - include Facetable ACTIVE_RELATION_TYPES = [ @@ -16,13 +15,13 @@ class EventsQuery "is-referenced-by" ] - def initialize() + def initialize end def doi_citations(doi) pid = Event.new.normalize_doi(doi) query = "(subj_id:\"#{pid}\" AND (relation_type_id:#{PASSIVE_RELATION_TYPES.join(' OR relation_type_id:')})) OR (obj_id:\"#{pid}\" AND (relation_type_id:#{ACTIVE_RELATION_TYPES.join(' OR relation_type_id:')}))" - results = Event.query(query, doi:doi, aggregations: "citation_count_aggregation", page: { size: 1, cursor: [] }).response.aggregations.citations.buckets + results = Event.query(query, doi: doi, aggregations: "citation_count_aggregation", page: { size: 1, cursor: [] }).response.aggregations.citations.buckets results.any? ? results.first.total.value : 0 end @@ -31,4 +30,49 @@ def citations(doi) { id: item, count: EventsQuery.new.doi_citations(item) } end end + + def doi_views(doi) + query = "(relation_type_id:unique-dataset-investigations-regular AND source_id:datacite-usage)" + results = Event.query(query, doi: doi, aggregations: "usage_count_aggregation", page: { size: 1, cursor: [] }).response.aggregations.usage.buckets + results.any? ? results.first.dig("total_by_type", "value") : 0 + end + + def views(doi) + doi.downcase.split(",").map do |item| + { id: item, count: EventsQuery.new.doi_views(item) } + end + end + + def doi_downloads(doi) + query = "(relation_type_id:unique-dataset-requests-regular AND source_id:datacite-usage)" + results = Event.query(query, doi: doi, aggregations: "usage_count_aggregation", page: { size: 1, cursor: [] }).response.aggregations.usage.buckets + results.any? ? results.first.dig("total_by_type", "value") : 0 + end + + def downloads(doi) + doi.downcase.split(",").map do |item| + { id: item, count: EventsQuery.new.doi_downloads(item) } + end + end + + def usage(doi) + doi.downcase.split(",").map do |item| + pid = Event.new.normalize_doi(item) + requests = EventsQuery.new.doi_downloads(doi) + investigations = EventsQuery.new.doi_views(doi) + { id: pid, + title: pid, + relationTypes: [ + { id: "unique-dataset-requests-regular", + title: "unique-dataset-requests-regular", + sum: requests + }, + { id: "unique-dataset-investigations-regular", + title: "unique-dataset-investigations-regular", + sum: investigations + } + ] + } + end + end end From abd6bc9f35220dfba5efc3b39efb4d5514e58df7 Mon Sep 17 00:00:00 2001 From: Kristian Garza Date: Thu, 28 Nov 2019 17:25:27 +0100 Subject: [PATCH 02/17] specs for new queries --- spec/factories/default.rb | 44 ++++++++++++++++++++++++------- spec/queries/events_query_spec.rb | 24 +++++++++++++++++ 2 files changed, 58 insertions(+), 10 deletions(-) diff --git a/spec/factories/default.rb b/spec/factories/default.rb index a2e02d675..788789b72 100644 --- a/spec/factories/default.rb +++ b/spec/factories/default.rb @@ -305,20 +305,20 @@ association :doi, factory: :doi, strategy: :create end - factory :event do + factory :event do uuid { SecureRandom.uuid } source_id { "citeulike" } source_token { "citeulike_123" } sequence(:subj_id) { |n| "http://www.citeulike.org/user/dbogartoit/#{n}" } obj_id { "http://doi.org/10.1371/journal.pmed.0030186" } - subj {{ "@id"=>"http://www.citeulike.org/user/dbogartoit", - "@type"=>"CreativeWork", - "uid"=>"http://www.citeulike.org/user/dbogartoit", - "author"=>[{ "given"=>"dbogartoit" }], - "name"=>"CiteULike bookmarks for user dbogartoit", - "publisher"=>"CiteULike", - "date-published"=>"2006-06-13T16:14:19Z", - "url"=>"http://www.citeulike.org/user/dbogartoit" }} + subj {{ "@id" => "http://www.citeulike.org/user/dbogartoit", + "@type" => "CreativeWork", + "uid" => "http://www.citeulike.org/user/dbogartoit", + "author" => [{ "given" => "dbogartoit" }], + "name" => "CiteULike bookmarks for user dbogartoit", + "publisher" => "CiteULike", + "datePublished" => "2006-06-13T16:14:19Z", + "url" => "http://www.citeulike.org/user/dbogartoit" }} obj {} relation_type_id { "bookmarks" } updated_at { Time.zone.now } @@ -328,9 +328,33 @@ source_id { "datacite_related" } source_token { "datacite_related_123" } sequence(:subj_id) { |n| "http://doi.org/10.5061/DRYAD.47SD5e/#{n}" } - subj { {"datePublished"=>"2006-06-13T16:14:19Z"} } + subj { { "datePublished" => "2006-06-13T16:14:19Z" } } obj_id { "http://doi.org/10.5061/DRYAD.47SD5/1" } relation_type_id { "references" } end + + factory :event_for_datacite_usage do + source_id { "datacite-usage" } + source_token { "5348967fhdjksr3wyui325" } + total { rand(1..100).to_int } + sequence(:subj_id) { |n| "https://api.test.datacite.org/report/#{SecureRandom.uuid}" } + subj { { "datePublished" => "2006-06-13T16:14:19Z" } } + obj { { "date_published" => "2007-06-13T16:14:19Z" } } + obj_id { "http://doi.org/10.5061/DRYAD.47SD5/1" } + relation_type_id { "unique-dataset-investigations-regular" } + occurred_at { "2015-06-13T16:14:19Z" } + end + + factory :event_for_datacite_usage_empty do + source_id { "datacite-usage" } + source_token { "5348967fhdjksr3wyui325" } + total { rand(1..100).to_int } + sequence(:subj_id) { |n| "https://api.test.datacite.org/report/#{SecureRandom.uuid}" } + subj { { "datePublished" => "2006-06-13T16:14:19Z" } } + obj {} + obj_id { "http://doi.org/10.5061/DRYAD.47SD5/1" } + relation_type_id { "unique-dataset-investigations-regular" } + occurred_at { "2015-06-13T16:14:19Z" } + end end end diff --git a/spec/queries/events_query_spec.rb b/spec/queries/events_query_spec.rb index 4776cb637..1aef5874d 100644 --- a/spec/queries/events_query_spec.rb +++ b/spec/queries/events_query_spec.rb @@ -30,4 +30,28 @@ expect(no_citations[:count]).to eq(0) end end + + + context "usage events" do + let!(:views) { create_list(:event_for_datacite_usage, 1, obj_id:"http://doi.org/10.0260/co.2004960.v1", relation_type_id:"unique-dataset-investigations-regular") } + let!(:downloads) { create_list(:event_for_datacite_usage, 1, obj_id:"http://doi.org/10.0260/co.2004960.v1", relation_type_id:"unique-dataset-requests-regular") } + + before do + Event.import + sleep 1 + end + + it "doi_views" do + expect(EventsQuery.new.doi_views("10.0260/co.2004960.v1")).to eq(views.first.total) + end + + it "doi_downloads" do + expect(EventsQuery.new.doi_downloads("10.0260/co.2004960.v1")).to eq(downloads.first.total) + end + + it "usage" do + puts EventsQuery.new.usage("10.0260/co.2004960.v1") + expect(EventsQuery.new.usage("10.0260/co.2004960.v1").first).to eq(id: "https://doi.org/10.0260/co.2004960.v1", title: "https://doi.org/10.0260/co.2004960.v1", relationTypes: [{ id: "unique-dataset-requests-regular", title: "unique-dataset-requests-regular", sum: downloads.first.total }, { id: "unique-dataset-investigations-regular", title: "unique-dataset-investigations-regular", sum: views.first.total }]) + end + end end From 9d5cdc671c9e8172aaea184b7d0a6e18d3b5b7ad Mon Sep 17 00:00:00 2001 From: Kristian Garza Date: Thu, 28 Nov 2019 17:25:37 +0100 Subject: [PATCH 03/17] remove old scripts --- app/models/event.rb | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/app/models/event.rb b/app/models/event.rb index c0fe36041..b92f7e9e0 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -221,11 +221,11 @@ def self.query_aggregations(doi = nil) citation_types: { terms: { field: "citation_type", size: 50, min_doc_count: 1 }, aggs: { year_months: { date_histogram: { field: "occurred_at", interval: "month", min_doc_count: 1 }, aggs: { "total_by_year_month" => { sum: { field: "total" } } } } } }, relation_types: { terms: { field: "relation_type_id", size: 50, min_doc_count: 1 }, aggs: { year_months: { date_histogram: { field: "occurred_at", interval: "month", min_doc_count: 1 }, aggs: { "total_by_year_month" => { sum: { field: "total" } } } }, "sum_distribution" => sum_distribution } }, dois: { terms: { field: "obj_id", size: 50, min_doc_count: 1 }, aggs: { relation_types: { terms: { field: "relation_type_id", size: 50, min_doc_count: 1 }, aggs: { "total_by_type" => { sum: { field: "total" } } } } } }, - dois_usage: { - filter: { script: { script: "doc['source_id'].value == 'datacite-usage' && doc['occurred_at'].size() > 0 && doc['obj.datePublished'].size() > 0 && doc['occurred_at'].value.getMillis() >= doc['obj.datePublished'].value.getMillis() && doc['occurred_at'].value.getMillis() < new Date().getTime()" } }, - aggs: { - dois: { terms: { field: "obj_id", size: 50, min_doc_count: 1 }, aggs: { relation_types: { terms: { field: "relation_type_id", size: 50, min_doc_count: 1 }, aggs: { "total_by_type" => { sum: { field: "total" } } } } } } } - }, + # dois_usage: { + # filter: { script: { script: "doc['source_id'].value == 'datacite-usage'" } }, + # aggs: { + # dois: { terms: { field: "obj_id", size: 50, min_doc_count: 1 }, aggs: { relation_types: { terms: { field: "relation_type_id", size: 50, min_doc_count: 1 }, aggs: { "total_by_type" => { sum: { field: "total" } } } } } } } + # }, dois_citations: { filter: { script: { @@ -244,8 +244,8 @@ def self.metrics_aggregations(doi = nil) } } - views_filter = { script: { script: "doc['relation_type_id'].value == 'unique-dataset-investigations-regular' && doc['source_id'].value == 'datacite-usage' && doc['occurred_at'].size() > 0 && doc['obj.datePublished'].size() > 0 && doc['occurred_at'].value.getMillis() >= doc['obj.datePublished'].value.getMillis() && doc['occurred_at'].value.getMillis() < new Date().getTime()" } } - downloads_filter = { script: { script: "doc['relation_type_id'].value == 'unique-dataset-requests-regular' && doc['source_id'].value == 'datacite-usage' && doc['occurred_at'].size() > 0 && doc['obj.datePublished'].size() > 0 && doc['occurred_at'].value.getMillis() >= doc['obj.datePublished'].value.getMillis() && doc['occurred_at'].value.getMillis() < new Date().getTime()" } } + views_filter = { script: { script: "doc['relation_type_id'].value == 'unique-dataset-investigations-regular' && doc['source_id'].value == 'datacite-usage'" } } + downloads_filter = { script: { script: "doc['relation_type_id'].value == 'unique-dataset-requests-regular' && doc['source_id'].value == 'datacite-usage'" } } { views: { @@ -317,6 +317,14 @@ def self.citation_count_aggregation(doi) } end + def self.usage_count_aggregation(doi) + { + usage: { + terms: { field: "obj_id", size: 50, min_doc_count: 1 } , aggs: { "total_by_type" => { sum: { field: "total" } } } + } + } + end + def self.advanced_aggregations(doi = nil) { From 5c406a4d2beb8bd9debb724f3620489df17497f9 Mon Sep 17 00:00:00 2001 From: Kristian Garza Date: Thu, 28 Nov 2019 18:19:51 +0100 Subject: [PATCH 04/17] specs for controller --- spec/models/event_spec.rb | 2 +- spec/requests/events_spec.rb | 27 +++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/spec/models/event_spec.rb b/spec/models/event_spec.rb index c4461ce07..f94b8744d 100644 --- a/spec/models/event_spec.rb +++ b/spec/models/event_spec.rb @@ -10,7 +10,7 @@ it { is_expected.to validate_presence_of(:source_id) } it "has subj" do - expect(subject.subj["date-published"]).to eq("2006-06-13T16:14:19Z") + expect(subject.subj["datePublished"]).to eq("2006-06-13T16:14:19Z") end end diff --git a/spec/requests/events_spec.rb b/spec/requests/events_spec.rb index fad36576b..7fb9b51d7 100644 --- a/spec/requests/events_spec.rb +++ b/spec/requests/events_spec.rb @@ -671,6 +671,33 @@ end end + context "has views and downloads" do + let!(:event) { create_list(:event_for_datacite_usage, 2) } + let(:doi) { (event.first.obj_id).gsub("https://doi.org/", "") } + let(:uri) { "/events?doi=#{doi}" } + + before do + Event.import + sleep 1 + end + + # Exclude the token header. + let(:headers) do + { "HTTP_ACCEPT" => "application/vnd.api+json; version=2" } + end + + it "json" do + get uri, nil, headers + + expect(last_response.status).to eq(200) + response = JSON.parse(last_response.body) + + views = (response.dig("meta", "views")).select { |item| item["id"] == doi } + expect(views.first["count"]).not_to eq(0) + expect(views.first["id"]).to eq(doi) + end + end + context "check meta duplicated" do let!(:event) { create(:event_for_datacite_related, subj_id:"http://doi.org/10.0260/co.2004960.v2", obj_id:"http://doi.org/10.0260/co.2004960.v1") } let!(:copies) { create(:event_for_datacite_related, subj_id:"http://doi.org/10.0260/co.2004960.v2", obj_id:"http://doi.org/10.0260/co.2004960.v1", relation_type_id: "cites") } From 2359ee64ab0bb2255e3f859daaf48aa0807d632c Mon Sep 17 00:00:00 2001 From: Kristian Garza Date: Fri, 29 Nov 2019 03:37:49 +0100 Subject: [PATCH 05/17] move citation link to component --- app/graphql/types/doi_item.rb | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/app/graphql/types/doi_item.rb b/app/graphql/types/doi_item.rb index 96a600c98..fec7a68d6 100644 --- a/app/graphql/types/doi_item.rb +++ b/app/graphql/types/doi_item.rb @@ -68,16 +68,9 @@ def formatted_citation(style: nil, locale: nil) cp = CiteProc::Processor.new(style: style || "apa", locale: locale || "en-US", format: "html") cp.import Array.wrap(citeproc_hsh) bibliography = cp.render :bibliography, id: normalize_doi(object.doi) - url = object.doi - unless /^https?:\/\//i.match?(object.doi) - url = "https://doi.org/#{object.doi}" - end - bibliography.first.gsub(url,doi_link(url)) + bibliography.first end - def doi_link(url) - " #{url} " - end def citeproc_hsh page = object.container.to_h["firstPage"].present? ? [object.container["firstPage"], object.container["lastPage"]].compact.join("-") : nil From e5729fc15c850f012ab3aea11f95aedbc544bea0 Mon Sep 17 00:00:00 2001 From: Kristian Garza Date: Fri, 29 Nov 2019 09:50:02 +0100 Subject: [PATCH 06/17] fix typo on citation relations --- app/queries/events_query.rb | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/app/queries/events_query.rb b/app/queries/events_query.rb index 01faef2dc..85555bfe3 100644 --- a/app/queries/events_query.rb +++ b/app/queries/events_query.rb @@ -5,13 +5,13 @@ class EventsQuery ACTIVE_RELATION_TYPES = [ "cites", - "is-supplement-to", + "is-supplement-by", "references" ] PASSIVE_RELATION_TYPES = [ "is-cited-by", - "is-supplemented-by", + "is-supplemented-to", "is-referenced-by" ] @@ -31,6 +31,14 @@ def citations(doi) end end + def citations_histogram(doi) + pid = Event.new.normalize_doi(doi) + query = "(subj_id:\"#{pid}\" AND (relation_type_id:#{PASSIVE_RELATION_TYPES.join(' OR relation_type_id:')})) OR (obj_id:\"#{pid}\" AND (relation_type_id:#{ACTIVE_RELATION_TYPES.join(' OR relation_type_id:')}))" + results = Event.query(query, doi: doi, aggregations: "yearly_histogram_aggregation", page: { size: 1, cursor: [] }).response.aggregations.histogram.buckets + facet_counts_by_year_month(results) + end + + def doi_views(doi) query = "(relation_type_id:unique-dataset-investigations-regular AND source_id:datacite-usage)" results = Event.query(query, doi: doi, aggregations: "usage_count_aggregation", page: { size: 1, cursor: [] }).response.aggregations.usage.buckets @@ -43,6 +51,12 @@ def views(doi) end end + def views_histogram(doi) + query = "(relation_type_id:unique-dataset-investigations-regular AND source_id:datacite-usage)" + results = Event.query(query, doi: doi, aggregations: "monthly_histogram_aggregation", page: { size: 1, cursor: [] }).response.aggregations.histogram.buckets + facet_counts_by_year_month(results) + end + def doi_downloads(doi) query = "(relation_type_id:unique-dataset-requests-regular AND source_id:datacite-usage)" results = Event.query(query, doi: doi, aggregations: "usage_count_aggregation", page: { size: 1, cursor: [] }).response.aggregations.usage.buckets @@ -55,6 +69,12 @@ def downloads(doi) end end + def downloads_histogram(doi) + query = "(relation_type_id:unique-dataset-requests-regular AND source_id:datacite-usage)" + results = Event.query(query, doi: doi, aggregations: "monthly_histogram_aggregation", page: { size: 1, cursor: [] }).response.aggregations.histogram.buckets + facet_counts_by_year_month(results) + end + def usage(doi) doi.downcase.split(",").map do |item| pid = Event.new.normalize_doi(item) From 777a45e8396f07b5918113b856fc19991fac7691 Mon Sep 17 00:00:00 2001 From: Kristian Garza Date: Fri, 29 Nov 2019 11:41:59 +0100 Subject: [PATCH 07/17] fix tests --- spec/concerns/indexable_spec.rb | 9 ++++----- spec/requests/events_spec.rb | 17 ++++++++--------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/spec/concerns/indexable_spec.rb b/spec/concerns/indexable_spec.rb index fdec53118..0b1a57384 100644 --- a/spec/concerns/indexable_spec.rb +++ b/spec/concerns/indexable_spec.rb @@ -161,13 +161,13 @@ expect(aggregations[:relation_types]).not_to be_nil expect(aggregations[:registrants]).not_to be_nil expect(aggregations[:pairings]).not_to be_nil - expect(aggregations[:dois_usage]).not_to be_nil + # expect(aggregations[:dois_usage]).not_to be_nil expect(aggregations[:citations_histogram]).to be_nil expect(aggregations[:citations]).to be_nil end it 'returns multiple aggregations when filters aggregations with multiple' do - aggregations = Event.get_aggregations_hash({aggregations:"query_aggregations,metrics_aggregations"}) + aggregations = Event.get_aggregations_hash({aggregations:"query_aggregations,citation_count_aggregation"}) expect(aggregations[:sources]).not_to be_nil expect(aggregations[:prefixes]).not_to be_nil expect(aggregations[:citation_types]).not_to be_nil @@ -175,9 +175,8 @@ expect(aggregations[:registrants]).not_to be_nil expect(aggregations[:pairings]).not_to be_nil expect(aggregations[:dois]).not_to be_nil - expect(aggregations[:dois_usage]).not_to be_nil - expect(aggregations[:citations_histogram]).to be_nil - expect(aggregations[:citations]).to be_nil + # expect(aggregations[:dois_usage]).not_to be_nil + expect(aggregations[:citations]).not_to be_nil end end end diff --git a/spec/requests/events_spec.rb b/spec/requests/events_spec.rb index 7fb9b51d7..a8d61f346 100644 --- a/spec/requests/events_spec.rb +++ b/spec/requests/events_spec.rb @@ -273,7 +273,7 @@ Event.import sleep 1 - get uri + "?aggregations=citations_aggregations&doi=10.1016/j.jastp.2013.05.001", nil, headers + get uri + "?doi=10.1016/j.jastp.2013.05.001", nil, headers puts json.dig("meta", "citationsHistogram") expect(json.dig("meta", "citationsHistogram", "years", 0, "title")).to eq("2017") end @@ -623,7 +623,6 @@ total = response.dig("meta", "total") expect(total).to eq(6) - # puts citations.dig(:count) expect(citations.first["count"]).to eq(5) expect(citations.first["id"]).to start_with("10.5061/dryad.47sd5e/") end @@ -636,7 +635,7 @@ let!(:event3) { create(:event_for_datacite_related, obj_id: event.subj_id) } let!(:event4) { create(:event_for_datacite_related, obj_id: event.subj_id, relation_type_id:"has-part") } let(:doi) { (event.subj_id).gsub("https://doi.org/", "") } - let(:uri) { "/events?aggregations=citations_aggregations&doi=#{doi}" } + let(:uri) { "/events?doi=#{doi}" } before do Event.import @@ -656,18 +655,18 @@ puts response citations = (response.dig("meta", "uniqueCitations")).select { |item| item["id"] == doi } - references = (response.dig("meta", "references")).select { |item| item["id"] == doi } - relations = (response.dig("meta", "relations")).select { |item| item["id"] == doi } + # references = (response.dig("meta", "references")).select { |item| item["id"] == doi } + # relations = (response.dig("meta", "relations")).select { |item| item["id"] == doi } total = response.dig("meta", "total") expect(json.dig("meta", "citationsHistogram", "years", 0, "title")).to eq("2015") expect(total).to eq(5) expect(citations.first["count"]).to eq(2) expect(citations.first["id"]).to eq(doi) - expect(references.first["count"]).to eq(2) - expect(references.first["id"]).to eq(doi) - expect(relations.first["count"]).to eq(1) - expect(relations.first["id"]).to eq(doi) + # expect(references.first["count"]).to eq(2) + # expect(references.first["id"]).to eq(doi) + # expect(relations.first["count"]).to eq(1) + # expect(relations.first["id"]).to eq(doi) end end From 1a5ce1b7eb2f28cfb0b888df11e86314c2eacf16 Mon Sep 17 00:00:00 2001 From: Kristian Garza Date: Fri, 29 Nov 2019 11:42:11 +0100 Subject: [PATCH 08/17] histogram aggregations --- app/queries/events_query.rb | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/app/queries/events_query.rb b/app/queries/events_query.rb index 85555bfe3..a31c05aef 100644 --- a/app/queries/events_query.rb +++ b/app/queries/events_query.rb @@ -32,10 +32,10 @@ def citations(doi) end def citations_histogram(doi) - pid = Event.new.normalize_doi(doi) + pid = Event.new.normalize_doi(doi.downcase.split(",").first) query = "(subj_id:\"#{pid}\" AND (relation_type_id:#{PASSIVE_RELATION_TYPES.join(' OR relation_type_id:')})) OR (obj_id:\"#{pid}\" AND (relation_type_id:#{ACTIVE_RELATION_TYPES.join(' OR relation_type_id:')}))" - results = Event.query(query, doi: doi, aggregations: "yearly_histogram_aggregation", page: { size: 1, cursor: [] }).response.aggregations.histogram.buckets - facet_counts_by_year_month(results) + results = Event.query(query, doi: doi, aggregations: "yearly_histogram_aggregation", page: { size: 1, cursor: [] }).response.aggregations.histogram + facet_citations_by_year(results) end @@ -52,8 +52,9 @@ def views(doi) end def views_histogram(doi) + doi = doi.downcase.split(",").first query = "(relation_type_id:unique-dataset-investigations-regular AND source_id:datacite-usage)" - results = Event.query(query, doi: doi, aggregations: "monthly_histogram_aggregation", page: { size: 1, cursor: [] }).response.aggregations.histogram.buckets + results = Event.query(query, doi: doi, aggregations: "monthly_histogram_aggregation", page: { size: 1, cursor: [] }).response.aggregations.histogram facet_counts_by_year_month(results) end @@ -70,8 +71,9 @@ def downloads(doi) end def downloads_histogram(doi) + doi = doi.downcase.split(",").first query = "(relation_type_id:unique-dataset-requests-regular AND source_id:datacite-usage)" - results = Event.query(query, doi: doi, aggregations: "monthly_histogram_aggregation", page: { size: 1, cursor: [] }).response.aggregations.histogram.buckets + results = Event.query(query, doi: doi, aggregations: "monthly_histogram_aggregation", page: { size: 1, cursor: [] }).response.aggregations.histogram facet_counts_by_year_month(results) end From 01e48b1c502b10e226ffc7bbeb2c75e0656bddb4 Mon Sep 17 00:00:00 2001 From: Kristian Garza Date: Fri, 29 Nov 2019 11:42:24 +0100 Subject: [PATCH 09/17] aggregations cleaning --- app/models/event.rb | 95 ++++++++++++--------------------------------- 1 file changed, 25 insertions(+), 70 deletions(-) diff --git a/app/models/event.rb b/app/models/event.rb index b92f7e9e0..f6a0d345d 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -200,7 +200,7 @@ def self.query_fields ["subj_id^10", "obj_id^10", "subj.name^5", "subj.author^5", "subj.periodical^5", "subj.publisher^5", "obj.name^5", "obj.author^5", "obj.periodical^5", "obj.publisher^5", "_all"] end - def self.query_aggregations(doi = nil) + def self.query_aggregations sum_distribution = { sum_bucket: { buckets_path: "year_months>total_by_year_month" @@ -221,11 +221,6 @@ def self.query_aggregations(doi = nil) citation_types: { terms: { field: "citation_type", size: 50, min_doc_count: 1 }, aggs: { year_months: { date_histogram: { field: "occurred_at", interval: "month", min_doc_count: 1 }, aggs: { "total_by_year_month" => { sum: { field: "total" } } } } } }, relation_types: { terms: { field: "relation_type_id", size: 50, min_doc_count: 1 }, aggs: { year_months: { date_histogram: { field: "occurred_at", interval: "month", min_doc_count: 1 }, aggs: { "total_by_year_month" => { sum: { field: "total" } } } }, "sum_distribution" => sum_distribution } }, dois: { terms: { field: "obj_id", size: 50, min_doc_count: 1 }, aggs: { relation_types: { terms: { field: "relation_type_id", size: 50, min_doc_count: 1 }, aggs: { "total_by_type" => { sum: { field: "total" } } } } } }, - # dois_usage: { - # filter: { script: { script: "doc['source_id'].value == 'datacite-usage'" } }, - # aggs: { - # dois: { terms: { field: "obj_id", size: 50, min_doc_count: 1 }, aggs: { relation_types: { terms: { field: "relation_type_id", size: 50, min_doc_count: 1 }, aggs: { "total_by_type" => { sum: { field: "total" } } } } } } } - # }, dois_citations: { filter: { script: { @@ -237,96 +232,56 @@ def self.query_aggregations(doi = nil) } end - def self.metrics_aggregations(doi = nil) - sum_distribution = { - sum_bucket: { - buckets_path: "year_months>total_by_year_month" + + + def self.citation_count_aggregation + { + citations: { + terms: { field: "doi", size: 100, min_doc_count: 1 }, aggs: { total: { cardinality: { field: "citation_id" } } } } } + end - views_filter = { script: { script: "doc['relation_type_id'].value == 'unique-dataset-investigations-regular' && doc['source_id'].value == 'datacite-usage'" } } - downloads_filter = { script: { script: "doc['relation_type_id'].value == 'unique-dataset-requests-regular' && doc['source_id'].value == 'datacite-usage'" } } - - { - views: { - filter: views_filter, - aggs: { dois: { - terms: { field: "obj_id", size: 50, min_doc_count: 1 } , aggs: { "total_by_type" => { sum: { field: "total" } } } - } } - }, - views_histogram: { - filter: views_filter, - aggs: { - year_months: { date_histogram: { field: "occurred_at", interval: "month", min_doc_count: 1 }, aggs: { "total_by_year_month" => { sum: { field: "total" } } } }, "sum_distribution" => sum_distribution - } - }, - downloads: { - filter: downloads_filter, - aggs: { dois: { - terms: { field: "obj_id", size: 50, min_doc_count: 1 } , aggs: { "total_by_type" => { sum: { field: "total" } } } - } } - }, - downloads_histogram: { - filter: downloads_filter, - aggs: { - year_months: { date_histogram: { field: "occurred_at", interval: "month", min_doc_count: 1 }, aggs: { "total_by_year_month" => { sum: { field: "total" } } } }, "sum_distribution" => sum_distribution - } + def self.usage_count_aggregation + { + usage: { + terms: { field: "obj_id", size: 50, min_doc_count: 1 } , aggs: { "total_by_type" => { sum: { field: "total" } } } } } end - def self.citations_aggregations(doi) - doi = Event.new.normalize_doi(doi) if doi.present? - + def self.yearly_histogram_aggregation sum_year_distribution = { sum_bucket: { buckets_path: "years>total_by_year" } } - citations_filter = { script: { script: "(#{PASSIVE_RELATION_TYPES}.contains(doc['relation_type_id'].value) && '#{doi}' == doc['subj_id'].value) || (#{ACTIVE_RELATION_TYPES}.contains(doc['relation_type_id'].value) && '#{doi}' == doc['obj_id'].value)" } } - references_filter = { script: { script: "(#{PASSIVE_RELATION_TYPES}.contains(doc['relation_type_id'].value) && '#{doi}' == doc['obj_id'].value) || (#{ACTIVE_RELATION_TYPES}.contains(doc['relation_type_id'].value) && '#{doi}' == doc['subj_id'].value)" } } - { - citations_histogram: { - filter: citations_filter, + histogram: { + filter: { script: { script: "true"}}, aggs: { years: { histogram: { field: "citation_year", interval: 1 , min_doc_count: 1 }, aggs: { "total_by_year" => { sum: { field: "total" } } } }, "sum_distribution" => sum_year_distribution } }, - references: { - filter: references_filter, - aggs: { dois: { - terms: { field: "doi", size: 100, min_doc_count: 1 }, aggs: { total: { cardinality: { field: "citation_id" } } } - } } - }, - relations: { - filter: { script: { script: "#{RELATIONS_RELATION_TYPES}.contains(doc['relation_type_id'].value)" } - }, - aggs: { dois: { - terms: { field: "doi", size: 100, min_doc_count: 1 }, aggs: { total: { cardinality: { field: "citation_id" } } } - } } - } } end - - def self.citation_count_aggregation(doi) - { - citations: { - terms: { field: "doi", size: 100, min_doc_count: 1 }, aggs: { total: { cardinality: { field: "citation_id" } } } + def self.monthly_histogram_aggregation + sum_distribution = { + sum_bucket: { + buckets_path: "year_months>total_by_year_month" } } - end - - def self.usage_count_aggregation(doi) { - usage: { - terms: { field: "obj_id", size: 50, min_doc_count: 1 } , aggs: { "total_by_type" => { sum: { field: "total" } } } + histogram: { + filter: { script: { script: "true"}}, + aggs: { + year_months: { date_histogram: { field: "occurred_at", interval: "month", min_doc_count: 1 }, aggs: { "total_by_year_month" => { sum: { field: "total" } } } }, "sum_distribution" => sum_distribution } - } + }} end - def self.advanced_aggregations(doi = nil) + def self.advanced_aggregations { unique_obj_count: { cardinality: { field: "obj_id" } }, unique_subj_count: { cardinality: { field: "subj_id" } } From 038e2e509c11ca4a1a8f9f0d3686047dd5cbfa70 Mon Sep 17 00:00:00 2001 From: Kristian Garza Date: Fri, 29 Nov 2019 11:42:47 +0100 Subject: [PATCH 10/17] update query object changes --- app/controllers/events_controller.rb | 10 +++--- app/graphql/types/metric_interface.rb | 44 +++++++++++---------------- app/models/concerns/indexable.rb | 3 +- 3 files changed, 23 insertions(+), 34 deletions(-) diff --git a/app/controllers/events_controller.rb b/app/controllers/events_controller.rb index e683f278e..db96d6672 100644 --- a/app/controllers/events_controller.rb +++ b/app/controllers/events_controller.rb @@ -120,17 +120,15 @@ def index registrants = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_registrants(response.response.aggregations.registrants.buckets) : nil pairings = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_pairings(response.response.aggregations.pairings.buckets) : nil dois = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_dois(response.response.aggregations.dois.buckets) : nil - # dois_usage = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_dois(response.response.aggregations.dois_usage.dois.buckets) : nil dois_usage = params[:doi].present? ? EventsQuery.new.usage(params[:doi]) : [] dois_citations = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_citations_by_year_v1(response.response.aggregations.dois_citations) : nil - citations_histogram = total.positive? && params[:doi].present? && aggregations.include?("citations_aggregations") ? facet_citations_by_year(response.response.aggregations.citations_histogram) : nil citations = params[:doi].present? ? EventsQuery.new.citations(params[:doi]) : [] + citations_histogram = params[:doi].present? ? EventsQuery.new.citations_histogram(params[:doi]) : [] references = total.positive? && params[:doi].present? && aggregations.include?("citations_aggregations") ? facet_citations_by_dois(response.response.aggregations.references.dois.buckets) : nil relations = total.positive? && params[:doi].present? && aggregations.include?("citations_aggregations") ? facet_citations_by_dois(response.response.aggregations.relations.dois.buckets) : nil - views_histogram = total.positive? && aggregations.include?("metrics_aggregations") ? facet_counts_by_year_month(response.response.aggregations.views_histogram) : nil - downloads_histogram = total.positive? && aggregations.include?("metrics_aggregations") ? facet_counts_by_year_month(response.response.aggregations.downloads_histogram) : nil - # views = total.positive? && aggregations.include?("metrics_aggregations") ? facet_by_source(response.response.aggregations.views.dois.buckets) : nil - # downloads = total.positive? && aggregations.include?("metrics_aggregations") ? facet_by_source(response.response.aggregations.downloads.dois.buckets) : nil + + views_histogram = params[:doi].present? ? EventsQuery.new.views_histogram(params[:doi]) : [] + downloads_histogram = params[:doi].present? ? EventsQuery.new.downloads_histogram(params[:doi]) : [] views = params[:doi].present? ? EventsQuery.new.views(params[:doi]) : [] downloads = params[:doi].present? ? EventsQuery.new.downloads(params[:doi]) : [] diff --git a/app/graphql/types/metric_interface.rb b/app/graphql/types/metric_interface.rb index 6cb8a63f9..af3ea3a57 100644 --- a/app/graphql/types/metric_interface.rb +++ b/app/graphql/types/metric_interface.rb @@ -30,15 +30,15 @@ def citation_count EventsQuery.new.doi_citations(doi_from_url(object.identifier)) end - def reference_count - meta = references_aggs - meta.first.fetch("total", {}).fetch("value", nil) if meta.any? - end + # def reference_count + # meta = references_aggs + # meta.first.fetch("total", {}).fetch("value", nil) if meta.any? + # end - def relation_count - meta = relations_aggs - meta.first.fetch("total", {}).fetch("value", nil) if meta.any? - end + # def relation_count + # meta = relations_aggs + # meta.first.fetch("total", {}).fetch("value", nil) if meta.any? + # end # def references_list # references_aggs.map { |item| item[:key]} @@ -56,27 +56,19 @@ def relation_count # # end # end - def citations_aggs - aggregation_results(id: object.identifier, aggregations: "citations_aggregations" ).citations.dois.buckets - end + # def citations_aggs + # aggregation_results(id: object.identifier, aggregations: "citations_aggregations" ).citations.dois.buckets + # end - def references_aggs - aggregation_results(id: object.identifier, aggregations: "citations_aggregations").references.dois.buckets - end + # def references_aggs + # aggregation_results(id: object.identifier, aggregations: "citations_aggregations").references.dois.buckets + # end - def relations_aggs - aggregation_results(id: object.identifier, aggregations: "citations_aggregations").relations.dois.buckets - end + # def relations_aggs + # aggregation_results(id: object.identifier, aggregations: "citations_aggregations").relations.dois.buckets + # end def citation_histogram - hash = aggregation_results(id: object.identifier, aggregations: "citations_aggregations").citations_histogram - - hash.dig('years', 'buckets').map do |h| - year = h['key'] - { - 'id' => year, - 'sum' => h.dig('total_by_year', 'value') - } - end + EventsQuery.new.citations_histogram(doi_from_url(object.identifier)) end end diff --git a/app/models/concerns/indexable.rb b/app/models/concerns/indexable.rb index 15a49e804..14e7e134d 100644 --- a/app/models/concerns/indexable.rb +++ b/app/models/concerns/indexable.rb @@ -103,8 +103,7 @@ def get_aggregations_hash(options={}) aggs = {} aggregations.split(",").each do |agg| agg = :query_aggregations if agg.blank? || !respond_to?(agg) - doi = options[:doi].present? ? options[:doi].downcase.split(",").first : nil - aggs.merge! send(agg,doi) + aggs.merge! send(agg) end aggs end From c6d1956c2950117fc0c8a8eb99d46be4ad28dd2a Mon Sep 17 00:00:00 2001 From: Kristian Garza Date: Fri, 29 Nov 2019 14:51:45 +0100 Subject: [PATCH 11/17] get metrics only if results --- app/controllers/events_controller.rb | 30 ++++++++++++++++++++-------- app/models/event.rb | 10 +--------- 2 files changed, 23 insertions(+), 17 deletions(-) diff --git a/app/controllers/events_controller.rb b/app/controllers/events_controller.rb index db96d6672..c7c2a36ab 100644 --- a/app/controllers/events_controller.rb +++ b/app/controllers/events_controller.rb @@ -4,6 +4,8 @@ class EventsController < ApplicationController include Facetable include BatchLoaderHelper + require 'benchmark' + prepend_before_action :authenticate_user!, except: [:index, :show] @@ -120,21 +122,33 @@ def index registrants = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_registrants(response.response.aggregations.registrants.buckets) : nil pairings = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_pairings(response.response.aggregations.pairings.buckets) : nil dois = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_dois(response.response.aggregations.dois.buckets) : nil - dois_usage = params[:doi].present? ? EventsQuery.new.usage(params[:doi]) : [] - dois_citations = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_citations_by_year_v1(response.response.aggregations.dois_citations) : nil - citations = params[:doi].present? ? EventsQuery.new.citations(params[:doi]) : [] + dois_usage = total.positive? && params[:doi].present? ? EventsQuery.new.usage(params[:doi]) : [] + # dois_citations = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_citations_by_year_v1(response.response.aggregations.dois_citations) : nil + citations = total.positive? && params[:doi].present? ? EventsQuery.new.citations(params[:doi]) : [] citations_histogram = params[:doi].present? ? EventsQuery.new.citations_histogram(params[:doi]) : [] references = total.positive? && params[:doi].present? && aggregations.include?("citations_aggregations") ? facet_citations_by_dois(response.response.aggregations.references.dois.buckets) : nil relations = total.positive? && params[:doi].present? && aggregations.include?("citations_aggregations") ? facet_citations_by_dois(response.response.aggregations.relations.dois.buckets) : nil - views_histogram = params[:doi].present? ? EventsQuery.new.views_histogram(params[:doi]) : [] - downloads_histogram = params[:doi].present? ? EventsQuery.new.downloads_histogram(params[:doi]) : [] + views_histogram = total.positive? && params[:doi].present? ? EventsQuery.new.views_histogram(params[:doi]) : [] + downloads_histogram = total.positive? && params[:doi].present? ? EventsQuery.new.downloads_histogram(params[:doi]) : [] - views = params[:doi].present? ? EventsQuery.new.views(params[:doi]) : [] - downloads = params[:doi].present? ? EventsQuery.new.downloads(params[:doi]) : [] + views = total.positive? && params[:doi].present? ? EventsQuery.new.views(params[:doi]) : [] + downloads = total.positive? && params[:doi].present? ? EventsQuery.new.downloads(params[:doi]) : [] unique_obj_count = total.positive? && aggregations.include?("advanced_aggregations") ? response.response.aggregations.unique_obj_count.value : nil unique_subj_count = total.positive? && aggregations.include?("advanced_aggregations") ? response.response.aggregations.unique_subj_count.value : nil + + + bmt = Benchmark.ms { + total.positive? && params[:doi].present? ? EventsQuery.new.citations(params[:doi]) : [] + } + if bmt > 10000 + logger.warn "[Benchmark Warning] citations " + bmt.to_s + " ms" + else + logger.info "[Benchmark] citations " + bmt.to_s + " ms" + end + + results = response.results options = {} @@ -150,7 +164,7 @@ def index registrants: registrants, "doisRelationTypes": dois, "doisUsageTypes": dois_usage, - "doisCitations": dois_citations, + # "doisCitations": dois_citations, "citationsHistogram": citations_histogram, "uniqueCitations": citations, "references": references, diff --git a/app/models/event.rb b/app/models/event.rb index f6a0d345d..c3ae71373 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -220,15 +220,7 @@ def self.query_aggregations pairings: { terms: { field: "registrant_id", size: 50, min_doc_count: 1 }, aggs: { recipient: { terms: { field: "registrant_id", size: 50, min_doc_count: 1 }, aggs: { "total" => { sum: { field: "total" } } } } } }, citation_types: { terms: { field: "citation_type", size: 50, min_doc_count: 1 }, aggs: { year_months: { date_histogram: { field: "occurred_at", interval: "month", min_doc_count: 1 }, aggs: { "total_by_year_month" => { sum: { field: "total" } } } } } }, relation_types: { terms: { field: "relation_type_id", size: 50, min_doc_count: 1 }, aggs: { year_months: { date_histogram: { field: "occurred_at", interval: "month", min_doc_count: 1 }, aggs: { "total_by_year_month" => { sum: { field: "total" } } } }, "sum_distribution" => sum_distribution } }, - dois: { terms: { field: "obj_id", size: 50, min_doc_count: 1 }, aggs: { relation_types: { terms: { field: "relation_type_id", size: 50, min_doc_count: 1 }, aggs: { "total_by_type" => { sum: { field: "total" } } } } } }, - dois_citations: { - filter: { - script: { - script: "#{INCLUDED_RELATION_TYPES}.contains(doc['relation_type_id'].value)" - } - }, - aggs: { years: { date_histogram: { field: "occurred_at", interval: "year", min_doc_count: 1 }, aggs: { "total_by_year" => { sum: { field: "total" } } } }, "sum_distribution" => sum_year_distribution } - } + dois: { terms: { field: "obj_id", size: 50, min_doc_count: 1 }, aggs: { relation_types: { terms: { field: "relation_type_id", size: 50, min_doc_count: 1 }, aggs: { "total_by_type" => { sum: { field: "total" } } } } } } } end From e3df75998b46e9e4d5e44a1ca13c02b4c9e4bea0 Mon Sep 17 00:00:00 2001 From: Kristian Garza Date: Fri, 29 Nov 2019 16:10:47 +0100 Subject: [PATCH 12/17] query ids not main doi --- app/queries/events_query.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/queries/events_query.rb b/app/queries/events_query.rb index a31c05aef..d0e49ecc2 100644 --- a/app/queries/events_query.rb +++ b/app/queries/events_query.rb @@ -80,8 +80,8 @@ def downloads_histogram(doi) def usage(doi) doi.downcase.split(",").map do |item| pid = Event.new.normalize_doi(item) - requests = EventsQuery.new.doi_downloads(doi) - investigations = EventsQuery.new.doi_views(doi) + requests = EventsQuery.new.doi_downloads(item) + investigations = EventsQuery.new.doi_views(item) { id: pid, title: pid, relationTypes: [ From 597f53517ee762fb2ebc244c2c1fe02d145d44bf Mon Sep 17 00:00:00 2001 From: Kristian Garza Date: Fri, 29 Nov 2019 16:59:32 +0100 Subject: [PATCH 13/17] remove filter and correct "supplemented" filter --- app/models/event.rb | 13 +++---------- app/queries/events_query.rb | 10 +++++----- 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/app/models/event.rb b/app/models/event.rb index c3ae71373..9e256efb3 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -250,10 +250,7 @@ def self.yearly_histogram_aggregation } { - histogram: { - filter: { script: { script: "true"}}, - aggs: { years: { histogram: { field: "citation_year", interval: 1 , min_doc_count: 1 }, aggs: { "total_by_year" => { sum: { field: "total" } } } }, "sum_distribution" => sum_year_distribution } - }, + years: { histogram: { field: "citation_year", interval: 1 , min_doc_count: 1 }, aggs: { "total_by_year" => { sum: { field: "total" } } } }, "sum_distribution" => sum_year_distribution } end @@ -264,12 +261,8 @@ def self.monthly_histogram_aggregation } } { - histogram: { - filter: { script: { script: "true"}}, - aggs: { - year_months: { date_histogram: { field: "occurred_at", interval: "month", min_doc_count: 1 }, aggs: { "total_by_year_month" => { sum: { field: "total" } } } }, "sum_distribution" => sum_distribution - } - }} + year_months: { date_histogram: { field: "occurred_at", interval: "month", min_doc_count: 1 }, aggs: { "total_by_year_month" => { sum: { field: "total" } } } }, "sum_distribution" => sum_distribution + } end diff --git a/app/queries/events_query.rb b/app/queries/events_query.rb index d0e49ecc2..d9ce825dd 100644 --- a/app/queries/events_query.rb +++ b/app/queries/events_query.rb @@ -5,13 +5,13 @@ class EventsQuery ACTIVE_RELATION_TYPES = [ "cites", - "is-supplement-by", + "is-supplemented-by", "references" ] PASSIVE_RELATION_TYPES = [ "is-cited-by", - "is-supplemented-to", + "is-supplement-to", "is-referenced-by" ] @@ -34,7 +34,7 @@ def citations(doi) def citations_histogram(doi) pid = Event.new.normalize_doi(doi.downcase.split(",").first) query = "(subj_id:\"#{pid}\" AND (relation_type_id:#{PASSIVE_RELATION_TYPES.join(' OR relation_type_id:')})) OR (obj_id:\"#{pid}\" AND (relation_type_id:#{ACTIVE_RELATION_TYPES.join(' OR relation_type_id:')}))" - results = Event.query(query, doi: doi, aggregations: "yearly_histogram_aggregation", page: { size: 1, cursor: [] }).response.aggregations.histogram + results = Event.query(query, doi: doi, aggregations: "yearly_histogram_aggregation", page: { size: 1, cursor: [] }).response.aggregations facet_citations_by_year(results) end @@ -54,7 +54,7 @@ def views(doi) def views_histogram(doi) doi = doi.downcase.split(",").first query = "(relation_type_id:unique-dataset-investigations-regular AND source_id:datacite-usage)" - results = Event.query(query, doi: doi, aggregations: "monthly_histogram_aggregation", page: { size: 1, cursor: [] }).response.aggregations.histogram + results = Event.query(query, doi: doi, aggregations: "monthly_histogram_aggregation", page: { size: 1, cursor: [] }).response.aggregations facet_counts_by_year_month(results) end @@ -73,7 +73,7 @@ def downloads(doi) def downloads_histogram(doi) doi = doi.downcase.split(",").first query = "(relation_type_id:unique-dataset-requests-regular AND source_id:datacite-usage)" - results = Event.query(query, doi: doi, aggregations: "monthly_histogram_aggregation", page: { size: 1, cursor: [] }).response.aggregations.histogram + results = Event.query(query, doi: doi, aggregations: "monthly_histogram_aggregation", page: { size: 1, cursor: [] }).response.aggregations facet_counts_by_year_month(results) end From 79936a2b0ec0bda563817912b6365d13d02a248f Mon Sep 17 00:00:00 2001 From: Kristian Garza Date: Fri, 29 Nov 2019 17:38:35 +0100 Subject: [PATCH 14/17] remove bentchmark --- app/controllers/events_controller.rb | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/app/controllers/events_controller.rb b/app/controllers/events_controller.rb index c7c2a36ab..f940f283d 100644 --- a/app/controllers/events_controller.rb +++ b/app/controllers/events_controller.rb @@ -125,7 +125,7 @@ def index dois_usage = total.positive? && params[:doi].present? ? EventsQuery.new.usage(params[:doi]) : [] # dois_citations = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_citations_by_year_v1(response.response.aggregations.dois_citations) : nil citations = total.positive? && params[:doi].present? ? EventsQuery.new.citations(params[:doi]) : [] - citations_histogram = params[:doi].present? ? EventsQuery.new.citations_histogram(params[:doi]) : [] + citations_histogram = total.positive? && params[:doi].present? ? EventsQuery.new.citations_histogram(params[:doi]) : [] references = total.positive? && params[:doi].present? && aggregations.include?("citations_aggregations") ? facet_citations_by_dois(response.response.aggregations.references.dois.buckets) : nil relations = total.positive? && params[:doi].present? && aggregations.include?("citations_aggregations") ? facet_citations_by_dois(response.response.aggregations.relations.dois.buckets) : nil @@ -137,18 +137,6 @@ def index unique_obj_count = total.positive? && aggregations.include?("advanced_aggregations") ? response.response.aggregations.unique_obj_count.value : nil unique_subj_count = total.positive? && aggregations.include?("advanced_aggregations") ? response.response.aggregations.unique_subj_count.value : nil - - - bmt = Benchmark.ms { - total.positive? && params[:doi].present? ? EventsQuery.new.citations(params[:doi]) : [] - } - if bmt > 10000 - logger.warn "[Benchmark Warning] citations " + bmt.to_s + " ms" - else - logger.info "[Benchmark] citations " + bmt.to_s + " ms" - end - - results = response.results options = {} From 24d1a326fef779735b9742ff5d3ac83feeeb615c Mon Sep 17 00:00:00 2001 From: Kristian Garza Date: Fri, 29 Nov 2019 20:07:56 +0100 Subject: [PATCH 15/17] should return nils --- app/controllers/events_controller.rb | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/app/controllers/events_controller.rb b/app/controllers/events_controller.rb index f940f283d..27537f4dc 100644 --- a/app/controllers/events_controller.rb +++ b/app/controllers/events_controller.rb @@ -122,18 +122,18 @@ def index registrants = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_registrants(response.response.aggregations.registrants.buckets) : nil pairings = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_pairings(response.response.aggregations.pairings.buckets) : nil dois = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_dois(response.response.aggregations.dois.buckets) : nil - dois_usage = total.positive? && params[:doi].present? ? EventsQuery.new.usage(params[:doi]) : [] + dois_usage = total.positive? && params[:doi].present? ? EventsQuery.new.usage(params[:doi]) : nil # dois_citations = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_citations_by_year_v1(response.response.aggregations.dois_citations) : nil - citations = total.positive? && params[:doi].present? ? EventsQuery.new.citations(params[:doi]) : [] - citations_histogram = total.positive? && params[:doi].present? ? EventsQuery.new.citations_histogram(params[:doi]) : [] + citations = total.positive? && params[:doi].present? ? EventsQuery.new.citations(params[:doi]) : nil + citations_histogram = total.positive? && params[:doi].present? ? EventsQuery.new.citations_histogram(params[:doi]) : nil references = total.positive? && params[:doi].present? && aggregations.include?("citations_aggregations") ? facet_citations_by_dois(response.response.aggregations.references.dois.buckets) : nil relations = total.positive? && params[:doi].present? && aggregations.include?("citations_aggregations") ? facet_citations_by_dois(response.response.aggregations.relations.dois.buckets) : nil - views_histogram = total.positive? && params[:doi].present? ? EventsQuery.new.views_histogram(params[:doi]) : [] - downloads_histogram = total.positive? && params[:doi].present? ? EventsQuery.new.downloads_histogram(params[:doi]) : [] + views_histogram = total.positive? && params[:doi].present? ? EventsQuery.new.views_histogram(params[:doi]) : nil + downloads_histogram = total.positive? && params[:doi].present? ? EventsQuery.new.downloads_histogram(params[:doi]) : nil - views = total.positive? && params[:doi].present? ? EventsQuery.new.views(params[:doi]) : [] - downloads = total.positive? && params[:doi].present? ? EventsQuery.new.downloads(params[:doi]) : [] + views = total.positive? && params[:doi].present? ? EventsQuery.new.views(params[:doi]) : nil + downloads = total.positive? && params[:doi].present? ? EventsQuery.new.downloads(params[:doi]) : nil unique_obj_count = total.positive? && aggregations.include?("advanced_aggregations") ? response.response.aggregations.unique_obj_count.value : nil unique_subj_count = total.positive? && aggregations.include?("advanced_aggregations") ? response.response.aggregations.unique_subj_count.value : nil From 5edfb07333bb0d750e28a280e24560ebc1bebc79 Mon Sep 17 00:00:00 2001 From: Kristian Garza Date: Sat, 30 Nov 2019 23:26:49 +0100 Subject: [PATCH 16/17] reduce conditions --- app/controllers/events_controller.rb | 24 ++++++++++++------------ app/queries/events_query.rb | 10 ++++++++++ 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/app/controllers/events_controller.rb b/app/controllers/events_controller.rb index 27537f4dc..cf4e915e5 100644 --- a/app/controllers/events_controller.rb +++ b/app/controllers/events_controller.rb @@ -122,18 +122,18 @@ def index registrants = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_registrants(response.response.aggregations.registrants.buckets) : nil pairings = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_pairings(response.response.aggregations.pairings.buckets) : nil dois = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_dois(response.response.aggregations.dois.buckets) : nil - dois_usage = total.positive? && params[:doi].present? ? EventsQuery.new.usage(params[:doi]) : nil + dois_usage = total.positive? ? EventsQuery.new.usage(params[:doi]) : nil # dois_citations = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_citations_by_year_v1(response.response.aggregations.dois_citations) : nil - citations = total.positive? && params[:doi].present? ? EventsQuery.new.citations(params[:doi]) : nil - citations_histogram = total.positive? && params[:doi].present? ? EventsQuery.new.citations_histogram(params[:doi]) : nil - references = total.positive? && params[:doi].present? && aggregations.include?("citations_aggregations") ? facet_citations_by_dois(response.response.aggregations.references.dois.buckets) : nil - relations = total.positive? && params[:doi].present? && aggregations.include?("citations_aggregations") ? facet_citations_by_dois(response.response.aggregations.relations.dois.buckets) : nil + citations = total.positive? ? EventsQuery.new.citations(params[:doi]) : nil + citations_histogram = total.positive? ? EventsQuery.new.citations_histogram(params[:doi]) : nil + references = total.positive? && aggregations.include?("citations_aggregations") ? facet_citations_by_dois(response.response.aggregations.references.dois.buckets) : nil + relations = total.positive? && aggregations.include?("citations_aggregations") ? facet_citations_by_dois(response.response.aggregations.relations.dois.buckets) : nil - views_histogram = total.positive? && params[:doi].present? ? EventsQuery.new.views_histogram(params[:doi]) : nil - downloads_histogram = total.positive? && params[:doi].present? ? EventsQuery.new.downloads_histogram(params[:doi]) : nil + views_histogram = total.positive? ? EventsQuery.new.views_histogram(params[:doi]) : nil + downloads_histogram = total.positive? ? EventsQuery.new.downloads_histogram(params[:doi]) : nil - views = total.positive? && params[:doi].present? ? EventsQuery.new.views(params[:doi]) : nil - downloads = total.positive? && params[:doi].present? ? EventsQuery.new.downloads(params[:doi]) : nil + # views = total.positive? ? EventsQuery.new.views(params[:doi]) : nil + # downloads = total.positive? ? EventsQuery.new.downloads(params[:doi]) : nil unique_obj_count = total.positive? && aggregations.include?("advanced_aggregations") ? response.response.aggregations.unique_obj_count.value : nil unique_subj_count = total.positive? && aggregations.include?("advanced_aggregations") ? response.response.aggregations.unique_subj_count.value : nil @@ -162,9 +162,9 @@ def index "subjCount": unique_subj_count }, "viewsHistogram": views_histogram, - "views": views, - "downloadsHistogram": downloads_histogram, - "downloads": downloads + # "views": views, + "downloadsHistogram": downloads_histogram + # "downloads": downloads }.compact options[:links] = { diff --git a/app/queries/events_query.rb b/app/queries/events_query.rb index d9ce825dd..0018fb7b4 100644 --- a/app/queries/events_query.rb +++ b/app/queries/events_query.rb @@ -19,6 +19,7 @@ def initialize end def doi_citations(doi) + return nil unless doi.present? pid = Event.new.normalize_doi(doi) query = "(subj_id:\"#{pid}\" AND (relation_type_id:#{PASSIVE_RELATION_TYPES.join(' OR relation_type_id:')})) OR (obj_id:\"#{pid}\" AND (relation_type_id:#{ACTIVE_RELATION_TYPES.join(' OR relation_type_id:')}))" results = Event.query(query, doi: doi, aggregations: "citation_count_aggregation", page: { size: 1, cursor: [] }).response.aggregations.citations.buckets @@ -26,12 +27,14 @@ def doi_citations(doi) end def citations(doi) + return {} unless doi.present? doi.downcase.split(",").map do |item| { id: item, count: EventsQuery.new.doi_citations(item) } end end def citations_histogram(doi) + return {} unless doi.present? pid = Event.new.normalize_doi(doi.downcase.split(",").first) query = "(subj_id:\"#{pid}\" AND (relation_type_id:#{PASSIVE_RELATION_TYPES.join(' OR relation_type_id:')})) OR (obj_id:\"#{pid}\" AND (relation_type_id:#{ACTIVE_RELATION_TYPES.join(' OR relation_type_id:')}))" results = Event.query(query, doi: doi, aggregations: "yearly_histogram_aggregation", page: { size: 1, cursor: [] }).response.aggregations @@ -40,18 +43,21 @@ def citations_histogram(doi) def doi_views(doi) + return nil unless doi.present? query = "(relation_type_id:unique-dataset-investigations-regular AND source_id:datacite-usage)" results = Event.query(query, doi: doi, aggregations: "usage_count_aggregation", page: { size: 1, cursor: [] }).response.aggregations.usage.buckets results.any? ? results.first.dig("total_by_type", "value") : 0 end def views(doi) + return {} unless doi.present? doi.downcase.split(",").map do |item| { id: item, count: EventsQuery.new.doi_views(item) } end end def views_histogram(doi) + return {} unless doi.present? doi = doi.downcase.split(",").first query = "(relation_type_id:unique-dataset-investigations-regular AND source_id:datacite-usage)" results = Event.query(query, doi: doi, aggregations: "monthly_histogram_aggregation", page: { size: 1, cursor: [] }).response.aggregations @@ -59,18 +65,21 @@ def views_histogram(doi) end def doi_downloads(doi) + return nil unless doi.present? query = "(relation_type_id:unique-dataset-requests-regular AND source_id:datacite-usage)" results = Event.query(query, doi: doi, aggregations: "usage_count_aggregation", page: { size: 1, cursor: [] }).response.aggregations.usage.buckets results.any? ? results.first.dig("total_by_type", "value") : 0 end def downloads(doi) + return {} unless doi.present? doi.downcase.split(",").map do |item| { id: item, count: EventsQuery.new.doi_downloads(item) } end end def downloads_histogram(doi) + return {} unless doi.present? doi = doi.downcase.split(",").first query = "(relation_type_id:unique-dataset-requests-regular AND source_id:datacite-usage)" results = Event.query(query, doi: doi, aggregations: "monthly_histogram_aggregation", page: { size: 1, cursor: [] }).response.aggregations @@ -78,6 +87,7 @@ def downloads_histogram(doi) end def usage(doi) + return {} unless doi.present? doi.downcase.split(",").map do |item| pid = Event.new.normalize_doi(item) requests = EventsQuery.new.doi_downloads(item) From 70373f61fc8a20aa7e40b72c2cb37075fa8ba3b4 Mon Sep 17 00:00:00 2001 From: Kristian Garza Date: Sat, 30 Nov 2019 23:26:59 +0100 Subject: [PATCH 17/17] un used fucntion commented --- spec/requests/events_spec.rb | 42 ++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/spec/requests/events_spec.rb b/spec/requests/events_spec.rb index a8d61f346..2d54fa04c 100644 --- a/spec/requests/events_spec.rb +++ b/spec/requests/events_spec.rb @@ -670,32 +670,32 @@ end end - context "has views and downloads" do - let!(:event) { create_list(:event_for_datacite_usage, 2) } - let(:doi) { (event.first.obj_id).gsub("https://doi.org/", "") } - let(:uri) { "/events?doi=#{doi}" } + # context "has views and downloads" do + # let!(:event) { create_list(:event_for_datacite_usage, 2) } + # let(:doi) { (event.first.obj_id).gsub("https://doi.org/", "") } + # let(:uri) { "/events?doi=#{doi}" } - before do - Event.import - sleep 1 - end + # before do + # Event.import + # sleep 1 + # end - # Exclude the token header. - let(:headers) do - { "HTTP_ACCEPT" => "application/vnd.api+json; version=2" } - end + # # Exclude the token header. + # let(:headers) do + # { "HTTP_ACCEPT" => "application/vnd.api+json; version=2" } + # end - it "json" do - get uri, nil, headers + # it "json" do + # get uri, nil, headers - expect(last_response.status).to eq(200) - response = JSON.parse(last_response.body) + # expect(last_response.status).to eq(200) + # response = JSON.parse(last_response.body) - views = (response.dig("meta", "views")).select { |item| item["id"] == doi } - expect(views.first["count"]).not_to eq(0) - expect(views.first["id"]).to eq(doi) - end - end + # views = (response.dig("meta", "views")).select { |item| item["id"] == doi } + # expect(views.first["count"]).not_to eq(0) + # expect(views.first["id"]).to eq(doi) + # end + # end context "check meta duplicated" do let!(:event) { create(:event_for_datacite_related, subj_id:"http://doi.org/10.0260/co.2004960.v2", obj_id:"http://doi.org/10.0260/co.2004960.v1") }