From fd4e389a340e105accd23ce2321645789fe083e0 Mon Sep 17 00:00:00 2001 From: kjgarza Date: Tue, 21 Jan 2020 18:52:38 +0100 Subject: [PATCH 01/13] reduce number of ES queries related to https://github.com/datacite/bracco/issues/270 --- app/controllers/concerns/metrics_helper.rb | 42 +++++++-- app/controllers/dois_controller.rb | 5 + app/queries/events_query.rb | 103 ++++++++++++++++++--- app/serializers/doi_serializer.rb | 12 +-- db/schema.rb | 10 +- spec/queries/events_query_spec.rb | 28 +++++- spec/requests/events_spec.rb | 8 +- 7 files changed, 171 insertions(+), 37 deletions(-) diff --git a/app/controllers/concerns/metrics_helper.rb b/app/controllers/concerns/metrics_helper.rb index c7f61cce2..94275f291 100644 --- a/app/controllers/concerns/metrics_helper.rb +++ b/app/controllers/concerns/metrics_helper.rb @@ -1,17 +1,45 @@ module MetricsHelper extend ActiveSupport::Concern - class_methods do - def doi_citations(doi) - EventsQuery.new.doi_citations(doi) + included do + + def get_metrics_array(dois) + + citations = EventsQuery.new.citations(dois) + views = EventsQuery.new.views(dois) + downloads = EventsQuery.new.downloads(dois) + + first_merge = merge_array_hashes(citations, views) + merge_array_hashes(first_merge, downloads) end - def doi_views(doi) - EventsQuery.new.doi_views(doi) + def merge_array_hashes(first_array, second_array) + return first_array if second_array.blank? + return second_array if first_array.blank? + + total = first_array | second_array + total.group_by {|hash| hash[:id]}.map do |key, value| + metrics = value.reduce(&:merge) + {id: key}.merge(metrics) + end end + end + + class_methods do + # def doi_citations(doi) + # EventsQuery.new.citations(doi) + # end + + # def doi_views(doi) + # EventsQuery.new.doi_views(doi) + # end + + # def doi_downloads(doi) + # EventsQuery.new.doi_downloads(doi) + # end - def doi_downloads(doi) - EventsQuery.new.doi_downloads(doi) + def mix_in_metrics(doi, metrics_array_hashes) + metrics_array_hashes.select { |hash| hash[:id] == doi }.first end end end diff --git a/app/controllers/dois_controller.rb b/app/controllers/dois_controller.rb index 24a00e3db..5de81afc4 100644 --- a/app/controllers/dois_controller.rb +++ b/app/controllers/dois_controller.rb @@ -5,6 +5,8 @@ class DoisController < ApplicationController include ActionController::MimeResponds include Crosscitable + include MetricsHelper # mixes in your helper method as class method + prepend_before_action :authenticate_user! before_action :set_include, only: [:index, :show, :create, :update] @@ -192,6 +194,8 @@ def index } logger.warn method: "GET", path: "/dois", message: "AggregationsLinkChecks /dois", duration: bm + dois_names = results.map { |result| result.dig(:_source, :doi) }.join(',') + metrics_array = get_metrics_array(dois_names) respond_to do |format| format.json do @@ -241,6 +245,7 @@ def index detail: params[:detail], events: params[:events], mix_in: params[:mix_in], + metrics: metrics_array, affiliation: params[:affiliation], is_collection: options[:is_collection] } diff --git a/app/queries/events_query.rb b/app/queries/events_query.rb index 0018fb7b4..484766b6b 100644 --- a/app/queries/events_query.rb +++ b/app/queries/events_query.rb @@ -2,6 +2,8 @@ class EventsQuery include Facetable + include BatchLoaderHelper + ACTIVE_RELATION_TYPES = [ "cites", @@ -26,10 +28,70 @@ def doi_citations(doi) results.any? ? results.first.total.value : 0 end - def citations(doi) - return {} unless doi.present? - doi.downcase.split(",").map do |item| - { id: item, count: EventsQuery.new.doi_citations(item) } + # def citations(doi) + # return {} unless doi.present? + # array = doi.downcase.split(",").uniq + # array.map do |item| + # { id: item, count: EventsQuery.new.doi_citations(item) } + # end + # end + + def citations_left_query(dois) + return nil unless dois.present? + pids = dois.split(",").map do |doi| + Event.new.normalize_doi(doi) + end.uniq + query = "((subj_id:\"#{pids.join('" OR subj_id:"')}\" ) AND (relation_type_id:#{PASSIVE_RELATION_TYPES.join(' OR relation_type_id:')}))" + results = Event.query(query, doi: dois, aggregations: "citation_count_aggregation", page: { size: 1, cursor: [] }).response.aggregations.citations.buckets + results.map do |item| + { id: item[:key], citations: item.total.value } + end + end + + def citations_right_query(dois) + return nil unless dois.present? + pids = dois.split(",").map do |doi| + Event.new.normalize_doi(doi) + end.uniq + query = "((obj_id:\"#{pids.join('" OR obj_id:"')}\") AND (relation_type_id:#{ACTIVE_RELATION_TYPES.join(' OR relation_type_id:')}))" + results = Event.query(query, doi: dois, aggregations: "citation_count_aggregation", page: { size: 1, cursor: [] }).response.aggregations.citations.buckets + results.map do |item| + { id: item[:key], citations: item.total.value } + end + end + + def citations(dois) + right = citations_right_query(dois) + left = citations_left_query(dois) + merge_array_hashes(right, left) + end + + def merge_array_hashes(first_array, second_array) + return first_array if second_array.blank? + return second_array if first_array.blank? + + total = first_array | second_array + total.group_by {|hash| hash[:id]}.map do |key, value| + metrics = value.reduce(&:merge) + {id: key}.merge(metrics) + end + end + + def doi_from_url(url) + if /\A(?:(http|https):\/\/(dx\.)?(doi.org|handle.test.datacite.org)\/)?(doi:)?(10\.\d{4,5}\/.+)\z/.match?(url) + uri = Addressable::URI.parse(url) + uri.path.gsub(/^\//, "").downcase + end + end + + def load_citation_events(doi) + # results.any? ? results.first.total.value : 0 + BatchLoader.for(doi).batch do |event_ids, loader| + pid = Event.new.normalize_doi(doi) + query = "(subj_id:\"#{pid}\" AND (relation_type_id:#{PASSIVE_RELATION_TYPES.join(' OR relation_type_id:')})) OR (obj_id:\"#{pid}\" AND (relation_type_id:#{ACTIVE_RELATION_TYPES.join(' OR relation_type_id:')}))" + Event.query(query, doi: event_ids.join(","), aggregations: "citation_count_aggregation", page: { size: 1, cursor: [] }).response.aggregations.citations.buckets.each do |event| + loader.call(event.uuid, event.total.value) + end end end @@ -41,7 +103,6 @@ def citations_histogram(doi) facet_citations_by_year(results) end - def doi_views(doi) return nil unless doi.present? query = "(relation_type_id:unique-dataset-investigations-regular AND source_id:datacite-usage)" @@ -49,10 +110,23 @@ def doi_views(doi) results.any? ? results.first.dig("total_by_type", "value") : 0 end - def views(doi) - return {} unless doi.present? - doi.downcase.split(",").map do |item| - { id: item, count: EventsQuery.new.doi_views(item) } + def views(dois) + return {} unless dois.present? + query = "(relation_type_id:unique-dataset-investigations-regular AND source_id:datacite-usage)" + results = Event.query(query, doi: dois, aggregations: "usage_count_aggregation", page: { size: 1, cursor: [] }).response.aggregations.usage.buckets + + results.map do |item| + { id: doi_from_url(item[:key]), views: item.dig("total_by_type", "value") } + end + end + + + def load_view_events(doi) + query = "(relation_type_id:unique-dataset-investigations-regular AND source_id:datacite-usage)" + BatchLoader.for(doi).batch do |event_ids, loader| + Event.query(query, doi: event_ids.join(","), aggregations: "monthly_histogram_aggregation", page: { size: 1, cursor: [] }).response.aggregations.each do |event| + loader.call(event.uuid, event.dig("total_by_type", "value")) + end end end @@ -71,10 +145,13 @@ def doi_downloads(doi) results.any? ? results.first.dig("total_by_type", "value") : 0 end - def downloads(doi) - return {} unless doi.present? - doi.downcase.split(",").map do |item| - { id: item, count: EventsQuery.new.doi_downloads(item) } + def downloads(dois) + return {} unless dois.present? + query = "(relation_type_id:unique-dataset-requests-regular AND source_id:datacite-usage)" + results = Event.query(query, doi: dois, aggregations: "usage_count_aggregation", page: { size: 1, cursor: [] }).response.aggregations.usage.buckets + + results.map do |item| + { id: doi_from_url(item[:key]), downloads: item.dig("total_by_type", "value") } end end diff --git a/app/serializers/doi_serializer.rb b/app/serializers/doi_serializer.rb index 9d1c9d0a2..181a9f49a 100644 --- a/app/serializers/doi_serializer.rb +++ b/app/serializers/doi_serializer.rb @@ -89,15 +89,15 @@ class DoiSerializer object.landing_page end - attribute :citations, if: Proc.new { |object, params| params && params[:mix_in] == "metrics" } do |object| - doi_citations(object.uid) + attribute :citations, if: Proc.new { |object, params| params && params[:mix_in] == "metrics" } do |object, params| + mix_in_metrics(object.uid, params[:metrics])[:citations] || 0 end - attribute :views, if: Proc.new { |object, params| params && params[:mix_in] == "metrics" } do |object| - doi_views(object.uid) + attribute :views, if: Proc.new { |object, params| params && params[:mix_in] == "metrics" } do |object, params| + mix_in_metrics(object.uid, params[:metrics])[:views] || 0 end - attribute :downloads, if: Proc.new { |object, params| params && params[:mix_in] == "metrics" } do |object| - doi_downloads(object.uid) + attribute :downloads, if: Proc.new { |object, params| params && params[:mix_in] == "metrics" } do |object, params| + mix_in_metrics(object.uid, params[:metrics])[:downloads] || 0 end end diff --git a/db/schema.rb b/db/schema.rb index 7f3dcd3df..6b17ef087 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -12,7 +12,7 @@ ActiveRecord::Schema.define(version: 2020_01_21_101841) do - create_table "active_storage_attachments", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin", force: :cascade do |t| + create_table "active_storage_attachments", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8mb4", force: :cascade do |t| t.string "name", limit: 191, null: false t.string "record_type", null: false t.bigint "record_id", null: false @@ -22,7 +22,7 @@ t.index ["record_type", "record_id", "name", "blob_id"], name: "index_active_storage_attachments_uniqueness", unique: true end - create_table "active_storage_blobs", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin", force: :cascade do |t| + create_table "active_storage_blobs", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8mb4", force: :cascade do |t| t.string "key", limit: 191, null: false t.string "filename", limit: 191, null: false t.string "content_type", limit: 191 @@ -85,7 +85,7 @@ t.index ["prefixes"], name: "FKE7FBD674AF86A1C7" end - create_table "audits", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin", force: :cascade do |t| + create_table "audits", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8mb4", force: :cascade do |t| t.integer "auditable_id" t.string "auditable_type" t.integer "associated_id" @@ -210,7 +210,7 @@ t.index ["url"], name: "index_dataset_on_url", length: 100 end - create_table "events", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin", force: :cascade do |t| + create_table "events", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8mb4", force: :cascade do |t| t.text "uuid", null: false t.text "subj_id", null: false t.text "obj_id" @@ -271,7 +271,7 @@ t.index ["prefix"], name: "prefix", unique: true end - create_table "researchers", options: "ENGINE=InnoDB DEFAULT CHARSET=latin1", force: :cascade do |t| + create_table "researchers", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8mb4", force: :cascade do |t| t.string "name", limit: 191 t.string "family_name", limit: 191 t.string "given_names", limit: 191 diff --git a/spec/queries/events_query_spec.rb b/spec/queries/events_query_spec.rb index 1ea2b92a1..950d41b40 100644 --- a/spec/queries/events_query_spec.rb +++ b/spec/queries/events_query_spec.rb @@ -26,8 +26,9 @@ results = EventsQuery.new.citations("10.5061/dryad.47sd5/1,10.5061/dryad.47sd5/2,10.0260/co.2004960.v1") citations = results.select { |item| item[:id] == "10.5061/dryad.47sd5/2" }.first no_citations = results.select { |item| item[:id] == "10.5061/dryad.47sd5/1" }.first - expect(citations[:count]).to eq(3) - expect(no_citations[:count]).to eq(0) + + expect(citations[:citations]).to eq(3) + # expect(no_citations[:count]).to eq(0) end end @@ -53,4 +54,27 @@ expect(EventsQuery.new.usage("10.0260/co.2004960.v1").first).to eq(id: "https://doi.org/10.0260/co.2004960.v1", title: "https://doi.org/10.0260/co.2004960.v1", relationTypes: [{ id: "unique-dataset-requests-regular", title: "unique-dataset-requests-regular", sum: downloads.first.total }, { id: "unique-dataset-investigations-regular", title: "unique-dataset-investigations-regular", sum: views.first.total }]) end end + + context "mutiple usage events" do + let!(:views) { create_list(:event_for_datacite_usage, 5, relation_type_id:"unique-dataset-investigations-regular") } + let!(:downloads) { create_list(:event_for_datacite_usage, 7, relation_type_id:"unique-dataset-requests-regular") } + + before do + Event.import + sleep 1 + end + + it "show views" do + response = EventsQuery.new.views( views.map { |view| view.doi }.join(',')) + # expect(response.size).to eq(5) + expect(response.first[:views]).to be > 0 + end + + it "show downloads" do + puts downloads.map { |download| download.doi }.join(',') + response = EventsQuery.new.downloads(downloads.map { |download| download.doi }.join(',')) + # expect(response.size).to eq(5) + expect(response.first[:downloads]).to be > 0 + end + end end diff --git a/spec/requests/events_spec.rb b/spec/requests/events_spec.rb index 930f50cd5..2df312c04 100644 --- a/spec/requests/events_spec.rb +++ b/spec/requests/events_spec.rb @@ -623,7 +623,7 @@ total = response.dig("meta", "total") expect(total).to eq(6) - expect(citations.first["count"]).to eq(5) + expect(citations.first["citations"]).to eq(5) expect(citations.first["id"]).to start_with("10.5061/dryad.47sd5e/") end end @@ -660,7 +660,7 @@ expect(json.dig("meta", "citationsHistogram", "years", 0, "title")).to eq("2015") expect(total).to eq(5) - expect(citations.first["count"]).to eq(2) + expect(citations.first["citations"]).to eq(2) expect(citations.first["id"]).to eq(doi) # expect(references.first["count"]).to eq(2) # expect(references.first["id"]).to eq(doi) @@ -721,7 +721,7 @@ total = response.dig("meta", "total") expect(total).to eq(2) - expect(citations.first["count"]).to eq(1) + expect(citations.first["citations"]).to eq(1) expect(citations.first["id"]).to eq("10.0260/co.2004960.v1") end end @@ -753,7 +753,7 @@ total = response.dig("meta", "total") expect(total).to eq(51) - expect((citations.select { |doi| dois.split(",").include?(doi["id"]) }).length).to eq(20) + expect((citations.select { |doi| dois.split(",").include?(doi["id"]) }).length).to eq(1) end end From e4a5c042a6ce6e1c3ec2f94b267fc23c9ba4f3fe Mon Sep 17 00:00:00 2001 From: kjgarza Date: Wed, 22 Jan 2020 19:58:44 +0100 Subject: [PATCH 02/13] revert unwanted schema changes --- db/schema.rb | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/db/schema.rb b/db/schema.rb index 6b17ef087..7f3dcd3df 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -12,7 +12,7 @@ ActiveRecord::Schema.define(version: 2020_01_21_101841) do - create_table "active_storage_attachments", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8mb4", force: :cascade do |t| + create_table "active_storage_attachments", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin", force: :cascade do |t| t.string "name", limit: 191, null: false t.string "record_type", null: false t.bigint "record_id", null: false @@ -22,7 +22,7 @@ t.index ["record_type", "record_id", "name", "blob_id"], name: "index_active_storage_attachments_uniqueness", unique: true end - create_table "active_storage_blobs", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8mb4", force: :cascade do |t| + create_table "active_storage_blobs", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin", force: :cascade do |t| t.string "key", limit: 191, null: false t.string "filename", limit: 191, null: false t.string "content_type", limit: 191 @@ -85,7 +85,7 @@ t.index ["prefixes"], name: "FKE7FBD674AF86A1C7" end - create_table "audits", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8mb4", force: :cascade do |t| + create_table "audits", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin", force: :cascade do |t| t.integer "auditable_id" t.string "auditable_type" t.integer "associated_id" @@ -210,7 +210,7 @@ t.index ["url"], name: "index_dataset_on_url", length: 100 end - create_table "events", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8mb4", force: :cascade do |t| + create_table "events", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin", force: :cascade do |t| t.text "uuid", null: false t.text "subj_id", null: false t.text "obj_id" @@ -271,7 +271,7 @@ t.index ["prefix"], name: "prefix", unique: true end - create_table "researchers", options: "ENGINE=InnoDB DEFAULT CHARSET=utf8mb4", force: :cascade do |t| + create_table "researchers", options: "ENGINE=InnoDB DEFAULT CHARSET=latin1", force: :cascade do |t| t.string "name", limit: 191 t.string "family_name", limit: 191 t.string "given_names", limit: 191 From a08ef236c4133af8ec5d1c67c69fa68d3da4db3b Mon Sep 17 00:00:00 2001 From: kjgarza Date: Wed, 22 Jan 2020 20:17:08 +0100 Subject: [PATCH 03/13] move methods to concern --- app/controllers/concerns/metrics_helper.rb | 24 +--------------------- app/models/concerns/helpable.rb | 11 ++++++++++ 2 files changed, 12 insertions(+), 23 deletions(-) diff --git a/app/controllers/concerns/metrics_helper.rb b/app/controllers/concerns/metrics_helper.rb index 94275f291..f32771a22 100644 --- a/app/controllers/concerns/metrics_helper.rb +++ b/app/controllers/concerns/metrics_helper.rb @@ -1,5 +1,6 @@ module MetricsHelper extend ActiveSupport::Concern + include Helpable included do @@ -12,32 +13,9 @@ def get_metrics_array(dois) first_merge = merge_array_hashes(citations, views) merge_array_hashes(first_merge, downloads) end - - def merge_array_hashes(first_array, second_array) - return first_array if second_array.blank? - return second_array if first_array.blank? - - total = first_array | second_array - total.group_by {|hash| hash[:id]}.map do |key, value| - metrics = value.reduce(&:merge) - {id: key}.merge(metrics) - end - end end class_methods do - # def doi_citations(doi) - # EventsQuery.new.citations(doi) - # end - - # def doi_views(doi) - # EventsQuery.new.doi_views(doi) - # end - - # def doi_downloads(doi) - # EventsQuery.new.doi_downloads(doi) - # end - def mix_in_metrics(doi, metrics_array_hashes) metrics_array_hashes.select { |hash| hash[:id] == doi }.first end diff --git a/app/models/concerns/helpable.rb b/app/models/concerns/helpable.rb index cddee8f88..67f8c678c 100644 --- a/app/models/concerns/helpable.rb +++ b/app/models/concerns/helpable.rb @@ -66,6 +66,17 @@ def register_url response end + def merge_array_hashes(first_array, second_array) + return first_array if second_array.blank? + return second_array if first_array.blank? + + total = first_array | second_array + total.group_by {|hash| hash[:id]}.map do |key, value| + metrics = value.reduce(&:merge) + {id: key}.merge(metrics) + end + end + def get_url url = "#{ENV['HANDLE_URL']}/api/handles/#{doi}?index=1" response = Maremma.get(url, ssl_self_signed: true, timeout: 10) From 26ab1aa8be03b4a32d6977a22b2aa4148a160ee9 Mon Sep 17 00:00:00 2001 From: kjgarza Date: Wed, 22 Jan 2020 20:17:20 +0100 Subject: [PATCH 04/13] remove batchloading --- app/queries/events_query.rb | 50 ++----------------------------------- 1 file changed, 2 insertions(+), 48 deletions(-) diff --git a/app/queries/events_query.rb b/app/queries/events_query.rb index 484766b6b..792b06ace 100644 --- a/app/queries/events_query.rb +++ b/app/queries/events_query.rb @@ -2,8 +2,8 @@ class EventsQuery include Facetable - include BatchLoaderHelper - + include Helpable + include Modelable ACTIVE_RELATION_TYPES = [ "cites", @@ -28,14 +28,6 @@ def doi_citations(doi) results.any? ? results.first.total.value : 0 end - # def citations(doi) - # return {} unless doi.present? - # array = doi.downcase.split(",").uniq - # array.map do |item| - # { id: item, count: EventsQuery.new.doi_citations(item) } - # end - # end - def citations_left_query(dois) return nil unless dois.present? pids = dois.split(",").map do |doi| @@ -66,35 +58,6 @@ def citations(dois) merge_array_hashes(right, left) end - def merge_array_hashes(first_array, second_array) - return first_array if second_array.blank? - return second_array if first_array.blank? - - total = first_array | second_array - total.group_by {|hash| hash[:id]}.map do |key, value| - metrics = value.reduce(&:merge) - {id: key}.merge(metrics) - end - end - - def doi_from_url(url) - if /\A(?:(http|https):\/\/(dx\.)?(doi.org|handle.test.datacite.org)\/)?(doi:)?(10\.\d{4,5}\/.+)\z/.match?(url) - uri = Addressable::URI.parse(url) - uri.path.gsub(/^\//, "").downcase - end - end - - def load_citation_events(doi) - # results.any? ? results.first.total.value : 0 - BatchLoader.for(doi).batch do |event_ids, loader| - pid = Event.new.normalize_doi(doi) - query = "(subj_id:\"#{pid}\" AND (relation_type_id:#{PASSIVE_RELATION_TYPES.join(' OR relation_type_id:')})) OR (obj_id:\"#{pid}\" AND (relation_type_id:#{ACTIVE_RELATION_TYPES.join(' OR relation_type_id:')}))" - Event.query(query, doi: event_ids.join(","), aggregations: "citation_count_aggregation", page: { size: 1, cursor: [] }).response.aggregations.citations.buckets.each do |event| - loader.call(event.uuid, event.total.value) - end - end - end - def citations_histogram(doi) return {} unless doi.present? pid = Event.new.normalize_doi(doi.downcase.split(",").first) @@ -121,15 +84,6 @@ def views(dois) end - def load_view_events(doi) - query = "(relation_type_id:unique-dataset-investigations-regular AND source_id:datacite-usage)" - BatchLoader.for(doi).batch do |event_ids, loader| - Event.query(query, doi: event_ids.join(","), aggregations: "monthly_histogram_aggregation", page: { size: 1, cursor: [] }).response.aggregations.each do |event| - loader.call(event.uuid, event.dig("total_by_type", "value")) - end - end - end - def views_histogram(doi) return {} unless doi.present? doi = doi.downcase.split(",").first From 8a2f84f238dced52d041abe0859342370b2c9e87 Mon Sep 17 00:00:00 2001 From: kjgarza Date: Thu, 23 Jan 2020 00:08:10 +0100 Subject: [PATCH 05/13] include person metrics https://github.com/datacite/bracco/issues/292 --- app/controllers/concerns/metrics_helper.rb | 23 ++++++++++++++++++++++ app/controllers/dois_controller.rb | 7 ++++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/app/controllers/concerns/metrics_helper.rb b/app/controllers/concerns/metrics_helper.rb index f32771a22..0b3fd08a4 100644 --- a/app/controllers/concerns/metrics_helper.rb +++ b/app/controllers/concerns/metrics_helper.rb @@ -13,6 +13,29 @@ def get_metrics_array(dois) first_merge = merge_array_hashes(citations, views) merge_array_hashes(first_merge, downloads) end + + def get_person_metrics(orcid) + dois = get_person_dois(orcid) + { + citations: EventsQuery.new.citations(dois.join(",")).sum { |h| h[:citations] }, + views: EventsQuery.new.views(dois.join(",")).sum { |h| h[:views] }, + downloads: EventsQuery.new.downloads(dois.join(",")).sum { |h| h[:downloads] } + } + end + + def get_person_dois(orcid) + Event.query(nil, page: { size: 500 }, obj_id: https_to_http(orcid)).results.to_a.map do |e| + doi_from_url(e.subj_id) + end + end + + def https_to_http(url) + orcid = orcid_from_url(url) + return nil unless orcid.present? + + "https://orcid.org/#{orcid}" + end + end class_methods do diff --git a/app/controllers/dois_controller.rb b/app/controllers/dois_controller.rb index 5de81afc4..c11607f1f 100644 --- a/app/controllers/dois_controller.rb +++ b/app/controllers/dois_controller.rb @@ -195,7 +195,9 @@ def index logger.warn method: "GET", path: "/dois", message: "AggregationsLinkChecks /dois", duration: bm dois_names = results.map { |result| result.dig(:_source, :doi) }.join(',') - metrics_array = get_metrics_array(dois_names) + metrics_array = get_metrics_array(dois_names) if params[:mix_in] == "metrics" + + person_metrics = get_person_metrics(params[:user_id]) if params[:mix_in] == "metrics" respond_to do |format| format.json do @@ -223,6 +225,9 @@ def index "linkChecksDcIdentifier" => link_checks_dc_identifier, "linkChecksCitationDoi" => link_checks_citation_doi, subjects: subjects, + citations: person_metrics[:citations], + views: person_metrics[:views], + downloads: person_metrics[:downloads], }.compact options[:links] = { From 099e862a3058c2c3add55a5eaab6e13bd01f64ea Mon Sep 17 00:00:00 2001 From: kjgarza Date: Thu, 23 Jan 2020 00:09:21 +0100 Subject: [PATCH 06/13] abstract code --- app/graphql/types/person_type.rb | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/app/graphql/types/person_type.rb b/app/graphql/types/person_type.rb index b892815d1..4361fe7d5 100644 --- a/app/graphql/types/person_type.rb +++ b/app/graphql/types/person_type.rb @@ -61,24 +61,21 @@ def creative_works(**_args) end def citation_count(**_args) - dois = Event.query(nil, page: { size: 500 }, obj_id: https_to_http(object[:id])).results.to_a.map do |e| - doi_from_url(e.subj_id) - end - EventsQuery.new.citations(dois.join(",")).sum { |h| h[:count] } + EventsQuery.new.citations(get_dois.join(",")).sum { |h| h[:citations] } end def view_count(**_args) - dois = Event.query(nil, page: { size: 500 }, obj_id: https_to_http(object[:id])).results.to_a.map do |e| - doi_from_url(e.subj_id) - end - EventsQuery.new.views(dois.join(",")).sum { |h| h[:count] } + EventsQuery.new.views(get_dois.join(",")).sum { |h| h[:views] } end def download_count(**_args) - dois = Event.query(nil, page: { size: 500 }, obj_id: https_to_http(object[:id])).results.to_a.map do |e| + EventsQuery.new.downloads(get_dois.join(",")).sum { |h| h[:downloads] } + end + + def get_dois + Event.query(nil, page: { size: 500 }, obj_id: https_to_http(object[:id])).results.to_a.map do |e| doi_from_url(e.subj_id) end - EventsQuery.new.downloads(dois.join(",")).sum { |h| h[:count] } end def https_to_http(url) From 665b57bb8bfc808a581f17c917868733b6eaa0cf Mon Sep 17 00:00:00 2001 From: kjgarza Date: Thu, 23 Jan 2020 07:39:26 +0100 Subject: [PATCH 07/13] better condition for mix_in --- app/controllers/dois_controller.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/controllers/dois_controller.rb b/app/controllers/dois_controller.rb index c11607f1f..7b4f6a339 100644 --- a/app/controllers/dois_controller.rb +++ b/app/controllers/dois_controller.rb @@ -195,9 +195,9 @@ def index logger.warn method: "GET", path: "/dois", message: "AggregationsLinkChecks /dois", duration: bm dois_names = results.map { |result| result.dig(:_source, :doi) }.join(',') - metrics_array = get_metrics_array(dois_names) if params[:mix_in] == "metrics" + metrics_array = params[:mix_in] == "metrics" ? get_metrics_array(dois_names) : [] - person_metrics = get_person_metrics(params[:user_id]) if params[:mix_in] == "metrics" + person_metrics = params[:mix_in] == "metrics" ? get_person_metrics(params[:user_id]) : {} respond_to do |format| format.json do From efbcf118af6fdca84850e5bd56f66a56c1850024 Mon Sep 17 00:00:00 2001 From: kjgarza Date: Thu, 23 Jan 2020 08:36:07 +0100 Subject: [PATCH 08/13] linting changes --- app/controllers/concerns/metrics_helper.rb | 13 +++++------- app/queries/events_query.rb | 24 ++++++++++------------ 2 files changed, 16 insertions(+), 21 deletions(-) diff --git a/app/controllers/concerns/metrics_helper.rb b/app/controllers/concerns/metrics_helper.rb index 0b3fd08a4..e2324f8ba 100644 --- a/app/controllers/concerns/metrics_helper.rb +++ b/app/controllers/concerns/metrics_helper.rb @@ -3,9 +3,7 @@ module MetricsHelper include Helpable included do - def get_metrics_array(dois) - citations = EventsQuery.new.citations(dois) views = EventsQuery.new.views(dois) downloads = EventsQuery.new.downloads(dois) @@ -19,7 +17,7 @@ def get_person_metrics(orcid) { citations: EventsQuery.new.citations(dois.join(",")).sum { |h| h[:citations] }, views: EventsQuery.new.views(dois.join(",")).sum { |h| h[:views] }, - downloads: EventsQuery.new.downloads(dois.join(",")).sum { |h| h[:downloads] } + downloads: EventsQuery.new.downloads(dois.join(",")).sum { |h| h[:downloads] }, } end @@ -28,17 +26,16 @@ def get_person_dois(orcid) doi_from_url(e.subj_id) end end - + def https_to_http(url) orcid = orcid_from_url(url) - return nil unless orcid.present? - + return nil if orcid.blank? + "https://orcid.org/#{orcid}" end - end - class_methods do + class_methods do def mix_in_metrics(doi, metrics_array_hashes) metrics_array_hashes.select { |hash| hash[:id] == doi }.first end diff --git a/app/queries/events_query.rb b/app/queries/events_query.rb index 792b06ace..031f20d3e 100644 --- a/app/queries/events_query.rb +++ b/app/queries/events_query.rb @@ -118,24 +118,22 @@ def downloads_histogram(doi) end def usage(doi) - return {} unless doi.present? + return {} if doi.blank? + doi.downcase.split(",").map do |item| pid = Event.new.normalize_doi(item) requests = EventsQuery.new.doi_downloads(item) investigations = EventsQuery.new.doi_views(item) - { id: pid, + { id: pid, title: pid, - relationTypes: [ - { id: "unique-dataset-requests-regular", - title: "unique-dataset-requests-regular", - sum: requests - }, - { id: "unique-dataset-investigations-regular", - title: "unique-dataset-investigations-regular", - sum: investigations - } - ] - } + relationTypes: [ + { id: "unique-dataset-requests-regular", + title: "unique-dataset-requests-regular", + sum: requests }, + { id: "unique-dataset-investigations-regular", + title: "unique-dataset-investigations-regular", + sum: investigations }, + ] } end end end From 1d515f235d22a69aa7bd028b734815502257cac3 Mon Sep 17 00:00:00 2001 From: kjgarza Date: Thu, 23 Jan 2020 08:43:12 +0100 Subject: [PATCH 09/13] liniting --- app/graphql/types/person_type.rb | 2 +- app/models/concerns/helpable.rb | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/app/graphql/types/person_type.rb b/app/graphql/types/person_type.rb index 4361fe7d5..09a118b4a 100644 --- a/app/graphql/types/person_type.rb +++ b/app/graphql/types/person_type.rb @@ -80,7 +80,7 @@ def get_dois def https_to_http(url) orcid = orcid_from_url(url) - return nil unless orcid.present? + return nil if orcid.blank? "https://orcid.org/#{orcid}" end diff --git a/app/models/concerns/helpable.rb b/app/models/concerns/helpable.rb index 67f8c678c..dac6109c7 100644 --- a/app/models/concerns/helpable.rb +++ b/app/models/concerns/helpable.rb @@ -71,9 +71,9 @@ def merge_array_hashes(first_array, second_array) return second_array if first_array.blank? total = first_array | second_array - total.group_by {|hash| hash[:id]}.map do |key, value| + total.group_by { |hash| hash[:id] }.map do |key, value| metrics = value.reduce(&:merge) - {id: key}.merge(metrics) + { id: key }.merge(metrics) end end From d255d3528da5dcd550f11ffc70d4925fc7f7f08a Mon Sep 17 00:00:00 2001 From: kjgarza Date: Mon, 27 Jan 2020 21:38:29 +0100 Subject: [PATCH 10/13] furthe reducing number of queries --- app/controllers/concerns/metrics_helper.rb | 26 +++++++++++++-------- app/models/event.rb | 8 +++++++ spec/queries/events_query_spec.rb | 27 +++++++++++----------- 3 files changed, 37 insertions(+), 24 deletions(-) diff --git a/app/controllers/concerns/metrics_helper.rb b/app/controllers/concerns/metrics_helper.rb index e2324f8ba..874c2d5c3 100644 --- a/app/controllers/concerns/metrics_helper.rb +++ b/app/controllers/concerns/metrics_helper.rb @@ -4,20 +4,18 @@ module MetricsHelper included do def get_metrics_array(dois) - citations = EventsQuery.new.citations(dois) - views = EventsQuery.new.views(dois) - downloads = EventsQuery.new.downloads(dois) - - first_merge = merge_array_hashes(citations, views) - merge_array_hashes(first_merge, downloads) + citations = EventsQuery.new.citations(dois) + usage = EventsQuery.new.views_and_downloads(dois) + merge_array_hashes(citations, usage) end def get_person_metrics(orcid) - dois = get_person_dois(orcid) + dois = get_person_dois(orcid).join(",") + usage = EventsQuery.new.views_and_downloads(dois) { - citations: EventsQuery.new.citations(dois.join(",")).sum { |h| h[:citations] }, - views: EventsQuery.new.views(dois.join(",")).sum { |h| h[:views] }, - downloads: EventsQuery.new.downloads(dois.join(",")).sum { |h| h[:downloads] }, + citations: EventsQuery.new.citations(dois).sum { |h| h[:citations] }, + views: usage.sum { |h| h[:views] }, + downloads: usage.sum { |h| h[:downloads] }, } end @@ -33,6 +31,14 @@ def https_to_http(url) "https://orcid.org/#{orcid}" end + + def mix_in_metrics(metadata_array_objects, metrics_array_hashes) + metadata_array_objects.map do |metadata| + metadata_hash = metadata.to_hash + metrics = metrics_array_hashes.select { |hash| hash[:id] == metadata_hash.doi }.first + Hashie::Mash.new(metrics).shallow_merge(metadata_hash) + end + end end class_methods do diff --git a/app/models/event.rb b/app/models/event.rb index 69ffcfb27..385ef7418 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -234,6 +234,14 @@ def self.usage_count_aggregation } end + def self.multiple_usage_count_aggregation + { + usage: { + terms: { field: "obj_id", size: 50, min_doc_count: 1 } , aggs: { relation_types: { terms: { field: "relation_type_id", size: 50, min_doc_count: 1 }, aggs: { "total_by_type" => { sum: { field: "total" } } } } } + } + } + end + def self.yearly_histogram_aggregation sum_year_distribution = { sum_bucket: { diff --git a/spec/queries/events_query_spec.rb b/spec/queries/events_query_spec.rb index 950d41b40..930c12b6e 100644 --- a/spec/queries/events_query_spec.rb +++ b/spec/queries/events_query_spec.rb @@ -3,11 +3,10 @@ require "rails_helper" describe EventsQuery, elasticsearch: true do - context "citation events" do - let!(:event) { create(:event_for_datacite_related, subj_id:"http://doi.org/10.0260/co.2004960.v2", obj_id:"http://doi.org/10.0260/co.2004960.v1") } - let!(:event_references) { create_list(:event_for_datacite_related, 3, obj_id:"10.5061/dryad.47sd5/2", relation_type_id: "references") } - let!(:copies) { create(:event_for_datacite_related, subj_id:"http://doi.org/10.0260/co.2004960.v2", obj_id:"http://doi.org/10.0260/co.2004960.v1", relation_type_id: "cites") } + let!(:event) { create(:event_for_datacite_related, subj_id: "http://doi.org/10.0260/co.2004960.v2", obj_id: "http://doi.org/10.0260/co.2004960.v1") } + let!(:event_references) { create_list(:event_for_datacite_related, 3, obj_id: "10.5061/dryad.47sd5/2", relation_type_id: "references") } + let!(:copies) { create(:event_for_datacite_related, subj_id: "http://doi.org/10.0260/co.2004960.v2", obj_id: "http://doi.org/10.0260/co.2004960.v1", relation_type_id: "cites") } before do Event.import @@ -26,16 +25,15 @@ results = EventsQuery.new.citations("10.5061/dryad.47sd5/1,10.5061/dryad.47sd5/2,10.0260/co.2004960.v1") citations = results.select { |item| item[:id] == "10.5061/dryad.47sd5/2" }.first no_citations = results.select { |item| item[:id] == "10.5061/dryad.47sd5/1" }.first - + expect(citations[:citations]).to eq(3) # expect(no_citations[:count]).to eq(0) end end - context "usage events" do - let!(:views) { create_list(:event_for_datacite_usage, 1, obj_id:"http://doi.org/10.0260/co.2004960.v1", relation_type_id:"unique-dataset-investigations-regular") } - let!(:downloads) { create_list(:event_for_datacite_usage, 1, obj_id:"http://doi.org/10.0260/co.2004960.v1", relation_type_id:"unique-dataset-requests-regular") } + let!(:views) { create_list(:event_for_datacite_usage, 1, obj_id: "http://doi.org/10.0260/co.2004960.v1", relation_type_id: "unique-dataset-investigations-regular") } + let!(:downloads) { create_list(:event_for_datacite_usage, 1, obj_id: "http://doi.org/10.0260/co.2004960.v1", relation_type_id: "unique-dataset-requests-regular") } before do Event.import @@ -51,13 +49,15 @@ end it "usage" do - expect(EventsQuery.new.usage("10.0260/co.2004960.v1").first).to eq(id: "https://doi.org/10.0260/co.2004960.v1", title: "https://doi.org/10.0260/co.2004960.v1", relationTypes: [{ id: "unique-dataset-requests-regular", title: "unique-dataset-requests-regular", sum: downloads.first.total }, { id: "unique-dataset-investigations-regular", title: "unique-dataset-investigations-regular", sum: views.first.total }]) + response = EventsQuery.new.views_and_downloads("10.0260/co.2004960.v1").first + expect(response[:downloads]).to be > 0 + expect(response[:views]).to be > 0 end end context "mutiple usage events" do - let!(:views) { create_list(:event_for_datacite_usage, 5, relation_type_id:"unique-dataset-investigations-regular") } - let!(:downloads) { create_list(:event_for_datacite_usage, 7, relation_type_id:"unique-dataset-requests-regular") } + let!(:views) { create_list(:event_for_datacite_usage, 5, relation_type_id: "unique-dataset-investigations-regular") } + let!(:downloads) { create_list(:event_for_datacite_usage, 7, relation_type_id: "unique-dataset-requests-regular") } before do Event.import @@ -65,14 +65,13 @@ end it "show views" do - response = EventsQuery.new.views( views.map { |view| view.doi }.join(',')) + response = EventsQuery.new.views(views.map(&:doi).join(",")) # expect(response.size).to eq(5) expect(response.first[:views]).to be > 0 end it "show downloads" do - puts downloads.map { |download| download.doi }.join(',') - response = EventsQuery.new.downloads(downloads.map { |download| download.doi }.join(',')) + response = EventsQuery.new.downloads(downloads.map(&:doi).join(",")) # expect(response.size).to eq(5) expect(response.first[:downloads]).to be > 0 end From ecc29337bcf251ce2d3e4fd7991f5f6adb475859 Mon Sep 17 00:00:00 2001 From: kjgarza Date: Mon, 27 Jan 2020 21:38:37 +0100 Subject: [PATCH 11/13] linting --- app/queries/events_query.rb | 63 ++++++++++++++++++++++++++----------- 1 file changed, 44 insertions(+), 19 deletions(-) diff --git a/app/queries/events_query.rb b/app/queries/events_query.rb index 031f20d3e..280425d6d 100644 --- a/app/queries/events_query.rb +++ b/app/queries/events_query.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true +require "pp" + class EventsQuery include Facetable include Helpable @@ -8,20 +10,20 @@ class EventsQuery ACTIVE_RELATION_TYPES = [ "cites", "is-supplemented-by", - "references" - ] + "references", + ].freeze PASSIVE_RELATION_TYPES = [ - "is-cited-by", - "is-supplement-to", - "is-referenced-by" - ] + "is-cited-by", + "is-supplement-to", + "is-referenced-by", + ].freeze - def initialize - end + def initialize; end def doi_citations(doi) - return nil unless doi.present? + return nil if doi.blank? + pid = Event.new.normalize_doi(doi) query = "(subj_id:\"#{pid}\" AND (relation_type_id:#{PASSIVE_RELATION_TYPES.join(' OR relation_type_id:')})) OR (obj_id:\"#{pid}\" AND (relation_type_id:#{ACTIVE_RELATION_TYPES.join(' OR relation_type_id:')}))" results = Event.query(query, doi: doi, aggregations: "citation_count_aggregation", page: { size: 1, cursor: [] }).response.aggregations.citations.buckets @@ -29,7 +31,8 @@ def doi_citations(doi) end def citations_left_query(dois) - return nil unless dois.present? + return nil if dois.blank? + pids = dois.split(",").map do |doi| Event.new.normalize_doi(doi) end.uniq @@ -41,7 +44,8 @@ def citations_left_query(dois) end def citations_right_query(dois) - return nil unless dois.present? + return nil if dois.blank? + pids = dois.split(",").map do |doi| Event.new.normalize_doi(doi) end.uniq @@ -59,7 +63,8 @@ def citations(dois) end def citations_histogram(doi) - return {} unless doi.present? + return {} if doi.blank? + pid = Event.new.normalize_doi(doi.downcase.split(",").first) query = "(subj_id:\"#{pid}\" AND (relation_type_id:#{PASSIVE_RELATION_TYPES.join(' OR relation_type_id:')})) OR (obj_id:\"#{pid}\" AND (relation_type_id:#{ACTIVE_RELATION_TYPES.join(' OR relation_type_id:')}))" results = Event.query(query, doi: doi, aggregations: "yearly_histogram_aggregation", page: { size: 1, cursor: [] }).response.aggregations @@ -67,14 +72,16 @@ def citations_histogram(doi) end def doi_views(doi) - return nil unless doi.present? + return nil if doi.blank? + query = "(relation_type_id:unique-dataset-investigations-regular AND source_id:datacite-usage)" results = Event.query(query, doi: doi, aggregations: "usage_count_aggregation", page: { size: 1, cursor: [] }).response.aggregations.usage.buckets results.any? ? results.first.dig("total_by_type", "value") : 0 end def views(dois) - return {} unless dois.present? + return {} if dois.blank? + query = "(relation_type_id:unique-dataset-investigations-regular AND source_id:datacite-usage)" results = Event.query(query, doi: dois, aggregations: "usage_count_aggregation", page: { size: 1, cursor: [] }).response.aggregations.usage.buckets @@ -83,9 +90,9 @@ def views(dois) end end - def views_histogram(doi) - return {} unless doi.present? + return {} if doi.blank? + doi = doi.downcase.split(",").first query = "(relation_type_id:unique-dataset-investigations-regular AND source_id:datacite-usage)" results = Event.query(query, doi: doi, aggregations: "monthly_histogram_aggregation", page: { size: 1, cursor: [] }).response.aggregations @@ -93,14 +100,16 @@ def views_histogram(doi) end def doi_downloads(doi) - return nil unless doi.present? + return nil if doi.blank? + query = "(relation_type_id:unique-dataset-requests-regular AND source_id:datacite-usage)" results = Event.query(query, doi: doi, aggregations: "usage_count_aggregation", page: { size: 1, cursor: [] }).response.aggregations.usage.buckets results.any? ? results.first.dig("total_by_type", "value") : 0 end def downloads(dois) - return {} unless dois.present? + return {} if dois.blank? + query = "(relation_type_id:unique-dataset-requests-regular AND source_id:datacite-usage)" results = Event.query(query, doi: dois, aggregations: "usage_count_aggregation", page: { size: 1, cursor: [] }).response.aggregations.usage.buckets @@ -110,13 +119,29 @@ def downloads(dois) end def downloads_histogram(doi) - return {} unless doi.present? + return {} if doi.blank? + doi = doi.downcase.split(",").first query = "(relation_type_id:unique-dataset-requests-regular AND source_id:datacite-usage)" results = Event.query(query, doi: doi, aggregations: "monthly_histogram_aggregation", page: { size: 1, cursor: [] }).response.aggregations facet_counts_by_year_month(results) end + def views_and_downloads(dois) + return {} if dois.blank? + + query = "(relation_type_id:unique-dataset-requests-regular AND source_id:datacite-usage) OR (relation_type_id:unique-dataset-investigations-regular AND source_id:datacite-usage)" + results = Event.query(query, doi: dois, aggregations: "multiple_usage_count_aggregation", page: { size: 1, cursor: [] }).response.aggregations + + results.usage.buckets.map do |bucket| + views = bucket.relation_types.buckets.select { |item| item["key"] == "unique-dataset-investigations-regular" }.first + downloads = bucket.relation_types.buckets.select { |item| item["key"] == "unique-dataset-requests-regular" }.first + views_counts = views.nil? ? 0 : views.dig("total_by_type", "value") + downloads_counts = downloads.nil? ? 0 : downloads.dig("total_by_type", "value") + { id: doi_from_url(bucket[:key]), downloads: downloads_counts, views: views_counts } + end + end + def usage(doi) return {} if doi.blank? From a0f599febb2a01ff94fb4f0e4a64256b471872da Mon Sep 17 00:00:00 2001 From: kjgarza Date: Tue, 28 Jan 2020 13:57:19 +0100 Subject: [PATCH 12/13] fix testing functions --- app/queries/events_query.rb | 32 ++++++++++++++++++++++++++----- spec/queries/events_query_spec.rb | 6 +++++- 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/app/queries/events_query.rb b/app/queries/events_query.rb index 280425d6d..4ac17a64c 100644 --- a/app/queries/events_query.rb +++ b/app/queries/events_query.rb @@ -59,7 +59,15 @@ def citations_right_query(dois) def citations(dois) right = citations_right_query(dois) left = citations_left_query(dois) - merge_array_hashes(right, left) + hashes = merge_array_hashes(right, left) + + dois_array = dois.split(",").map { |doi| doi } + + dois_array.map do |doi| + result = hashes.select { |item| item[:id] == doi }.first + count = result.nil? ? 0 : result[:citations] + { id: doi, citations: count } + end end def citations_histogram(doi) @@ -85,8 +93,15 @@ def views(dois) query = "(relation_type_id:unique-dataset-investigations-regular AND source_id:datacite-usage)" results = Event.query(query, doi: dois, aggregations: "usage_count_aggregation", page: { size: 1, cursor: [] }).response.aggregations.usage.buckets - results.map do |item| - { id: doi_from_url(item[:key]), views: item.dig("total_by_type", "value") } + # results.map do |item| + # { id: doi_from_url(item[:key]), views: item.dig("total_by_type", "value") } + # end + + dois_array = dois.split(",").map { |doi| doi } + dois_array.map do |doi| + result = results.select { |item| doi_from_url(item[:key]) == doi }.first + count = result.nil? ? 0 : result.dig("total_by_type", "value") + { id: doi, views: count } end end @@ -113,8 +128,15 @@ def downloads(dois) query = "(relation_type_id:unique-dataset-requests-regular AND source_id:datacite-usage)" results = Event.query(query, doi: dois, aggregations: "usage_count_aggregation", page: { size: 1, cursor: [] }).response.aggregations.usage.buckets - results.map do |item| - { id: doi_from_url(item[:key]), downloads: item.dig("total_by_type", "value") } + # results.map do |item| + # { id: doi_from_url(item[:key]), downloads: item.dig("total_by_type", "value") } + # end + + dois_array = dois.split(",").map { |doi| doi } + dois_array.map do |doi| + result = results.select { |item| doi_from_url(item[:key]) == doi }.first + count = result.nil? ? 0 : result.dig("total_by_type", "value") + { id: doi, downloads: count } end end diff --git a/spec/queries/events_query_spec.rb b/spec/queries/events_query_spec.rb index 930c12b6e..ef639e380 100644 --- a/spec/queries/events_query_spec.rb +++ b/spec/queries/events_query_spec.rb @@ -21,13 +21,17 @@ expect(EventsQuery.new.doi_citations("10.5061/dryad.dd47sd5/1")).to eq(0) end + it "doi_citations for non found doi" do + expect(EventsQuery.new.doi_citations("10.5061/d345435341")).to eq(0) + end + it "citations" do results = EventsQuery.new.citations("10.5061/dryad.47sd5/1,10.5061/dryad.47sd5/2,10.0260/co.2004960.v1") citations = results.select { |item| item[:id] == "10.5061/dryad.47sd5/2" }.first no_citations = results.select { |item| item[:id] == "10.5061/dryad.47sd5/1" }.first expect(citations[:citations]).to eq(3) - # expect(no_citations[:count]).to eq(0) + expect(no_citations[:citations]).to eq(0) end end From f3bb62d4c97f6634bfb1ae9263d51a9356d836b2 Mon Sep 17 00:00:00 2001 From: kjgarza Date: Tue, 28 Jan 2020 16:38:11 +0100 Subject: [PATCH 13/13] specs --- app/controllers/concerns/metrics_helper.rb | 26 ++++++++++++++----- app/controllers/dois_controller.rb | 15 ++++++++--- app/queries/events_query.rb | 16 ++++++++++-- app/serializers/doi_serializer.rb | 6 ++--- spec/requests/dois_spec.rb | 30 ++++++++++++++++------ spec/requests/events_spec.rb | 2 +- 6 files changed, 71 insertions(+), 24 deletions(-) diff --git a/app/controllers/concerns/metrics_helper.rb b/app/controllers/concerns/metrics_helper.rb index 874c2d5c3..54346d054 100644 --- a/app/controllers/concerns/metrics_helper.rb +++ b/app/controllers/concerns/metrics_helper.rb @@ -1,3 +1,4 @@ +require "pp" module MetricsHelper extend ActiveSupport::Concern include Helpable @@ -32,18 +33,29 @@ def https_to_http(url) "https://orcid.org/#{orcid}" end - def mix_in_metrics(metadata_array_objects, metrics_array_hashes) + def mix_in_metrics_array(metadata_array_objects, metrics_array_hashes) + return [] if metadata_array_objects.empty? + metadata_array_objects.map do |metadata| metadata_hash = metadata.to_hash - metrics = metrics_array_hashes.select { |hash| hash[:id] == metadata_hash.doi }.first - Hashie::Mash.new(metrics).shallow_merge(metadata_hash) + metrics = metrics_array_hashes.select { |hash| hash[:id] == metadata_hash["_source"]["uid"] }.first + Hashie::Mash.new(metadata_hash)._source.shallow_update(metrics) end end - end - class_methods do - def mix_in_metrics(doi, metrics_array_hashes) - metrics_array_hashes.select { |hash| hash[:id] == doi }.first + def mix_in_metrics(metadata, metrics) + metadata_hash = metadata.attributes + metrics[:doi] = metrics.delete :id + metrics[:uid] = metrics[:doi] + metrics[:doi] = metrics[:doi].upcase + metadata_hash.merge!(metrics) + Hashie::Mash.new(metadata_hash) end end + + # class_methods do + # # def mix_in_metrics(doi, metrics_array_hashes) + # # metrics_array_hashes.select { |hash| hash[:id] == doi }.first + # # end + # end end diff --git a/app/controllers/dois_controller.rb b/app/controllers/dois_controller.rb index 7b4f6a339..34a0cf5d3 100644 --- a/app/controllers/dois_controller.rb +++ b/app/controllers/dois_controller.rb @@ -194,8 +194,11 @@ def index } logger.warn method: "GET", path: "/dois", message: "AggregationsLinkChecks /dois", duration: bm - dois_names = results.map { |result| result.dig(:_source, :doi) }.join(',') - metrics_array = params[:mix_in] == "metrics" ? get_metrics_array(dois_names) : [] + if params[:mix_in] == "metrics" + dois_names = results.map { |result| result.dig(:_source, :doi) }.join(',') + metrics_array = get_metrics_array(dois_names) + results = mix_in_metrics_array(results, metrics_array) + end person_metrics = params[:mix_in] == "metrics" ? get_person_metrics(params[:user_id]) : {} @@ -290,6 +293,11 @@ def show doi = Doi.where(doi: params[:id]).first fail ActiveRecord::RecordNotFound if not_allowed_by_doi_and_user(doi: doi, user: current_user) + if params[:mix_in] == "metrics" + metrics_array = get_metrics_array(doi.uid) || [] + doi = mix_in_metrics(doi, metrics_array.first) + end + respond_to do |format| format.json do options = {} @@ -298,7 +306,8 @@ def show options[:params] = { current_ability: current_ability, detail: true, - affiliation: params[:affiliation] + mix_in: params[:mix_in], + affiliation: params[:affiliation], } render json: DoiSerializer.new(doi, options).serialized_json, status: :ok diff --git a/app/queries/events_query.rb b/app/queries/events_query.rb index 4ac17a64c..2c60d8cac 100644 --- a/app/queries/events_query.rb +++ b/app/queries/events_query.rb @@ -57,6 +57,8 @@ def citations_right_query(dois) end def citations(dois) + return nil if dois.blank? + right = citations_right_query(dois) left = citations_left_query(dois) hashes = merge_array_hashes(right, left) @@ -64,9 +66,9 @@ def citations(dois) dois_array = dois.split(",").map { |doi| doi } dois_array.map do |doi| - result = hashes.select { |item| item[:id] == doi }.first + result = hashes.select { |item| item[:id] == doi.downcase }.first count = result.nil? ? 0 : result[:citations] - { id: doi, citations: count } + { id: doi.downcase, citations: count } end end @@ -155,6 +157,16 @@ def views_and_downloads(dois) query = "(relation_type_id:unique-dataset-requests-regular AND source_id:datacite-usage) OR (relation_type_id:unique-dataset-investigations-regular AND source_id:datacite-usage)" results = Event.query(query, doi: dois, aggregations: "multiple_usage_count_aggregation", page: { size: 1, cursor: [] }).response.aggregations + # dois_array = dois.split(",").map { |doi| doi } + # dois_array.map do |doi| + # views = bucket.relation_types.buckets.select { |item| item["key"] == "unique-dataset-investigations-regular" }.first + # downloads = bucket.relation_types.buckets.select { |item| item["key"] == "unique-dataset-requests-regular" }.first + # views_counts = views.nil? ? 0 : views.dig("total_by_type", "value") + # downloads_counts = downloads.nil? ? 0 : downloads.dig("total_by_type", "value") + # { id: doi.downcase, downloads: downloads_counts, views: views_counts } + # end + + results.usage.buckets.map do |bucket| views = bucket.relation_types.buckets.select { |item| item["key"] == "unique-dataset-investigations-regular" }.first downloads = bucket.relation_types.buckets.select { |item| item["key"] == "unique-dataset-requests-regular" }.first diff --git a/app/serializers/doi_serializer.rb b/app/serializers/doi_serializer.rb index 181a9f49a..d9444f9b7 100644 --- a/app/serializers/doi_serializer.rb +++ b/app/serializers/doi_serializer.rb @@ -90,14 +90,14 @@ class DoiSerializer end attribute :citations, if: Proc.new { |object, params| params && params[:mix_in] == "metrics" } do |object, params| - mix_in_metrics(object.uid, params[:metrics])[:citations] || 0 + object.citations end attribute :views, if: Proc.new { |object, params| params && params[:mix_in] == "metrics" } do |object, params| - mix_in_metrics(object.uid, params[:metrics])[:views] || 0 + object.views end attribute :downloads, if: Proc.new { |object, params| params && params[:mix_in] == "metrics" } do |object, params| - mix_in_metrics(object.uid, params[:metrics])[:downloads] || 0 + object.downloads end end diff --git a/spec/requests/dois_spec.rb b/spec/requests/dois_spec.rb index 1428ac40b..a718fb36e 100644 --- a/spec/requests/dois_spec.rb +++ b/spec/requests/dois_spec.rb @@ -63,11 +63,25 @@ it 'returns the Doi' do get "/dois/#{doi.doi}", nil, headers - expect(last_response.status).to eq(200) + expect(last_response.status).to eq(200) expect(json.dig('data', 'attributes', 'doi')).to eq(doi.doi.downcase) end end + context 'when the record exists request metrics' do + it 'returns the Doi' do + get "/dois/#{doi.doi}?mix-in=metrics", nil, headers + + expect(last_response.status).to eq(200) + result = json.dig('data') + + expect(result.dig('attributes', 'doi')).to eq(doi.doi.downcase) + expect(result.dig('attributes', 'titles')).to eq(doi.titles) + expect(result.dig('attributes','citations')).to eq(0) + # expect(result.dig('attributes','views')).to eq(0) + end + end + context 'when the record does not exist' do it 'returns status code 404' do get "/dois/10.5256/xxxx", nil, headers @@ -132,17 +146,17 @@ context 'when the record exists' do it 'returns the Doi' do get "/dois?mix-in=metrics", nil, headers - puts json.dig('data',0) expect(last_response.status).to eq(200) expect(json['data'].size).to eq(1) + result = json.dig('data').select { |item| item["id"] == doi.doi.downcase }.first expect(json.dig('meta', 'total')).to eq(1) - expect(json.dig('data', 0, 'attributes', 'url')).to eq(doi.url) - expect(json.dig('data', 0, 'attributes', 'doi')).to eq(doi.doi.downcase) - expect(json.dig('data', 0, 'attributes', 'titles')).to eq(doi.titles) - expect(json.dig('data',0,'attributes','citations')).to eq(3) - expect(json.dig('data',0,'attributes','views')).to be > 0 - expect(json.dig('data',0,'attributes','downloads')).to eq(0) + expect(result.dig('attributes', 'url')).to eq(doi.url) + expect(result.dig('attributes', 'doi')).to eq(doi.doi.downcase) + expect(result.dig('attributes', 'titles')).to eq(doi.titles) + expect(result.dig('attributes','citations')).to eq(3) + expect(result.dig('attributes','views')).to be > 0 + expect(result.dig('attributes','downloads')).to eq(0) end end end diff --git a/spec/requests/events_spec.rb b/spec/requests/events_spec.rb index 2df312c04..ab5ad0ded 100644 --- a/spec/requests/events_spec.rb +++ b/spec/requests/events_spec.rb @@ -753,7 +753,7 @@ total = response.dig("meta", "total") expect(total).to eq(51) - expect((citations.select { |doi| dois.split(",").include?(doi["id"]) }).length).to eq(1) + expect((citations.select { |doi| dois.split(",").include?(doi["id"]) }).length).to eq(20) end end