diff --git a/app/controllers/concerns/metrics_helper.rb b/app/controllers/concerns/metrics_helper.rb index c7f61cce2..54346d054 100644 --- a/app/controllers/concerns/metrics_helper.rb +++ b/app/controllers/concerns/metrics_helper.rb @@ -1,17 +1,61 @@ +require "pp" module MetricsHelper extend ActiveSupport::Concern + include Helpable - class_methods do - def doi_citations(doi) - EventsQuery.new.doi_citations(doi) + included do + def get_metrics_array(dois) + citations = EventsQuery.new.citations(dois) + usage = EventsQuery.new.views_and_downloads(dois) + merge_array_hashes(citations, usage) end - def doi_views(doi) - EventsQuery.new.doi_views(doi) + def get_person_metrics(orcid) + dois = get_person_dois(orcid).join(",") + usage = EventsQuery.new.views_and_downloads(dois) + { + citations: EventsQuery.new.citations(dois).sum { |h| h[:citations] }, + views: usage.sum { |h| h[:views] }, + downloads: usage.sum { |h| h[:downloads] }, + } end - def doi_downloads(doi) - EventsQuery.new.doi_downloads(doi) + def get_person_dois(orcid) + Event.query(nil, page: { size: 500 }, obj_id: https_to_http(orcid)).results.to_a.map do |e| + doi_from_url(e.subj_id) + end + end + + def https_to_http(url) + orcid = orcid_from_url(url) + return nil if orcid.blank? + + "https://orcid.org/#{orcid}" + end + + def mix_in_metrics_array(metadata_array_objects, metrics_array_hashes) + return [] if metadata_array_objects.empty? + + metadata_array_objects.map do |metadata| + metadata_hash = metadata.to_hash + metrics = metrics_array_hashes.select { |hash| hash[:id] == metadata_hash["_source"]["uid"] }.first + Hashie::Mash.new(metadata_hash)._source.shallow_update(metrics) + end + end + + def mix_in_metrics(metadata, metrics) + metadata_hash = metadata.attributes + metrics[:doi] = metrics.delete :id + metrics[:uid] = metrics[:doi] + metrics[:doi] = metrics[:doi].upcase + metadata_hash.merge!(metrics) + Hashie::Mash.new(metadata_hash) end end + + # class_methods do + # # def mix_in_metrics(doi, metrics_array_hashes) + # # metrics_array_hashes.select { |hash| hash[:id] == doi }.first + # # end + # end end diff --git a/app/controllers/dois_controller.rb b/app/controllers/dois_controller.rb index 5dc335205..c509e4158 100644 --- a/app/controllers/dois_controller.rb +++ b/app/controllers/dois_controller.rb @@ -5,6 +5,8 @@ class DoisController < ApplicationController include ActionController::MimeResponds include Crosscitable + include MetricsHelper # mixes in your helper method as class method + prepend_before_action :authenticate_user! before_action :set_include, only: [:index, :show, :create, :update] @@ -198,6 +200,13 @@ def index } logger.warn method: "GET", path: "/dois", message: "AggregationsLinkChecks /dois", duration: bm + if params[:mix_in] == "metrics" + dois_names = results.map { |result| result.dig(:_source, :doi) }.join(',') + metrics_array = get_metrics_array(dois_names) + results = mix_in_metrics_array(results, metrics_array) + end + + person_metrics = params[:mix_in] == "metrics" ? get_person_metrics(params[:user_id]) : {} respond_to do |format| format.json do @@ -225,6 +234,9 @@ def index "linkChecksDcIdentifier" => link_checks_dc_identifier, "linkChecksCitationDoi" => link_checks_citation_doi, subjects: subjects, + citations: person_metrics[:citations], + views: person_metrics[:views], + downloads: person_metrics[:downloads], }.compact options[:links] = { @@ -247,6 +259,7 @@ def index detail: params[:detail], events: params[:events], mix_in: params[:mix_in], + metrics: metrics_array, affiliation: params[:affiliation], is_collection: options[:is_collection] } @@ -286,6 +299,11 @@ def show doi = Doi.where(doi: params[:id]).first fail ActiveRecord::RecordNotFound if not_allowed_by_doi_and_user(doi: doi, user: current_user) + if params[:mix_in] == "metrics" + metrics_array = get_metrics_array(doi.uid) || [] + doi = mix_in_metrics(doi, metrics_array.first) + end + respond_to do |format| format.json do options = {} @@ -295,7 +313,8 @@ def show current_ability: current_ability, events: params[:events], detail: true, - affiliation: params[:affiliation] + mix_in: params[:mix_in], + affiliation: params[:affiliation], } render json: DoiSerializer.new(doi, options).serialized_json, status: :ok diff --git a/app/graphql/types/person_type.rb b/app/graphql/types/person_type.rb index b892815d1..09a118b4a 100644 --- a/app/graphql/types/person_type.rb +++ b/app/graphql/types/person_type.rb @@ -61,29 +61,26 @@ def creative_works(**_args) end def citation_count(**_args) - dois = Event.query(nil, page: { size: 500 }, obj_id: https_to_http(object[:id])).results.to_a.map do |e| - doi_from_url(e.subj_id) - end - EventsQuery.new.citations(dois.join(",")).sum { |h| h[:count] } + EventsQuery.new.citations(get_dois.join(",")).sum { |h| h[:citations] } end def view_count(**_args) - dois = Event.query(nil, page: { size: 500 }, obj_id: https_to_http(object[:id])).results.to_a.map do |e| - doi_from_url(e.subj_id) - end - EventsQuery.new.views(dois.join(",")).sum { |h| h[:count] } + EventsQuery.new.views(get_dois.join(",")).sum { |h| h[:views] } end def download_count(**_args) - dois = Event.query(nil, page: { size: 500 }, obj_id: https_to_http(object[:id])).results.to_a.map do |e| + EventsQuery.new.downloads(get_dois.join(",")).sum { |h| h[:downloads] } + end + + def get_dois + Event.query(nil, page: { size: 500 }, obj_id: https_to_http(object[:id])).results.to_a.map do |e| doi_from_url(e.subj_id) end - EventsQuery.new.downloads(dois.join(",")).sum { |h| h[:count] } end def https_to_http(url) orcid = orcid_from_url(url) - return nil unless orcid.present? + return nil if orcid.blank? "https://orcid.org/#{orcid}" end diff --git a/app/models/concerns/helpable.rb b/app/models/concerns/helpable.rb index 1f654865c..199c32cad 100644 --- a/app/models/concerns/helpable.rb +++ b/app/models/concerns/helpable.rb @@ -66,6 +66,17 @@ def register_url response end + def merge_array_hashes(first_array, second_array) + return first_array if second_array.blank? + return second_array if first_array.blank? + + total = first_array | second_array + total.group_by { |hash| hash[:id] }.map do |key, value| + metrics = value.reduce(&:merge) + { id: key }.merge(metrics) + end + end + def get_url url = "#{ENV['HANDLE_URL']}/api/handles/#{doi}?index=1" response = Maremma.get(url, ssl_self_signed: true, timeout: 10) diff --git a/app/models/event.rb b/app/models/event.rb index a258dafc3..fd18e945a 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -242,6 +242,14 @@ def self.usage_count_aggregation } end + def self.multiple_usage_count_aggregation + { + usage: { + terms: { field: "obj_id", size: 50, min_doc_count: 1 } , aggs: { relation_types: { terms: { field: "relation_type_id", size: 50, min_doc_count: 1 }, aggs: { "total_by_type" => { sum: { field: "total" } } } } } + } + } + end + def self.yearly_histogram_aggregation sum_year_distribution = { sum_bucket: { diff --git a/app/queries/events_query.rb b/app/queries/events_query.rb index 0018fb7b4..2c60d8cac 100644 --- a/app/queries/events_query.rb +++ b/app/queries/events_query.rb @@ -1,63 +1,115 @@ # frozen_string_literal: true +require "pp" + class EventsQuery include Facetable + include Helpable + include Modelable ACTIVE_RELATION_TYPES = [ "cites", "is-supplemented-by", - "references" - ] + "references", + ].freeze PASSIVE_RELATION_TYPES = [ - "is-cited-by", - "is-supplement-to", - "is-referenced-by" - ] + "is-cited-by", + "is-supplement-to", + "is-referenced-by", + ].freeze - def initialize - end + def initialize; end def doi_citations(doi) - return nil unless doi.present? + return nil if doi.blank? + pid = Event.new.normalize_doi(doi) query = "(subj_id:\"#{pid}\" AND (relation_type_id:#{PASSIVE_RELATION_TYPES.join(' OR relation_type_id:')})) OR (obj_id:\"#{pid}\" AND (relation_type_id:#{ACTIVE_RELATION_TYPES.join(' OR relation_type_id:')}))" results = Event.query(query, doi: doi, aggregations: "citation_count_aggregation", page: { size: 1, cursor: [] }).response.aggregations.citations.buckets results.any? ? results.first.total.value : 0 end - def citations(doi) - return {} unless doi.present? - doi.downcase.split(",").map do |item| - { id: item, count: EventsQuery.new.doi_citations(item) } + def citations_left_query(dois) + return nil if dois.blank? + + pids = dois.split(",").map do |doi| + Event.new.normalize_doi(doi) + end.uniq + query = "((subj_id:\"#{pids.join('" OR subj_id:"')}\" ) AND (relation_type_id:#{PASSIVE_RELATION_TYPES.join(' OR relation_type_id:')}))" + results = Event.query(query, doi: dois, aggregations: "citation_count_aggregation", page: { size: 1, cursor: [] }).response.aggregations.citations.buckets + results.map do |item| + { id: item[:key], citations: item.total.value } + end + end + + def citations_right_query(dois) + return nil if dois.blank? + + pids = dois.split(",").map do |doi| + Event.new.normalize_doi(doi) + end.uniq + query = "((obj_id:\"#{pids.join('" OR obj_id:"')}\") AND (relation_type_id:#{ACTIVE_RELATION_TYPES.join(' OR relation_type_id:')}))" + results = Event.query(query, doi: dois, aggregations: "citation_count_aggregation", page: { size: 1, cursor: [] }).response.aggregations.citations.buckets + results.map do |item| + { id: item[:key], citations: item.total.value } + end + end + + def citations(dois) + return nil if dois.blank? + + right = citations_right_query(dois) + left = citations_left_query(dois) + hashes = merge_array_hashes(right, left) + + dois_array = dois.split(",").map { |doi| doi } + + dois_array.map do |doi| + result = hashes.select { |item| item[:id] == doi.downcase }.first + count = result.nil? ? 0 : result[:citations] + { id: doi.downcase, citations: count } end end def citations_histogram(doi) - return {} unless doi.present? + return {} if doi.blank? + pid = Event.new.normalize_doi(doi.downcase.split(",").first) query = "(subj_id:\"#{pid}\" AND (relation_type_id:#{PASSIVE_RELATION_TYPES.join(' OR relation_type_id:')})) OR (obj_id:\"#{pid}\" AND (relation_type_id:#{ACTIVE_RELATION_TYPES.join(' OR relation_type_id:')}))" results = Event.query(query, doi: doi, aggregations: "yearly_histogram_aggregation", page: { size: 1, cursor: [] }).response.aggregations facet_citations_by_year(results) end - def doi_views(doi) - return nil unless doi.present? + return nil if doi.blank? + query = "(relation_type_id:unique-dataset-investigations-regular AND source_id:datacite-usage)" results = Event.query(query, doi: doi, aggregations: "usage_count_aggregation", page: { size: 1, cursor: [] }).response.aggregations.usage.buckets results.any? ? results.first.dig("total_by_type", "value") : 0 end - def views(doi) - return {} unless doi.present? - doi.downcase.split(",").map do |item| - { id: item, count: EventsQuery.new.doi_views(item) } + def views(dois) + return {} if dois.blank? + + query = "(relation_type_id:unique-dataset-investigations-regular AND source_id:datacite-usage)" + results = Event.query(query, doi: dois, aggregations: "usage_count_aggregation", page: { size: 1, cursor: [] }).response.aggregations.usage.buckets + + # results.map do |item| + # { id: doi_from_url(item[:key]), views: item.dig("total_by_type", "value") } + # end + + dois_array = dois.split(",").map { |doi| doi } + dois_array.map do |doi| + result = results.select { |item| doi_from_url(item[:key]) == doi }.first + count = result.nil? ? 0 : result.dig("total_by_type", "value") + { id: doi, views: count } end end def views_histogram(doi) - return {} unless doi.present? + return {} if doi.blank? + doi = doi.downcase.split(",").first query = "(relation_type_id:unique-dataset-investigations-regular AND source_id:datacite-usage)" results = Event.query(query, doi: doi, aggregations: "monthly_histogram_aggregation", page: { size: 1, cursor: [] }).response.aggregations @@ -65,46 +117,82 @@ def views_histogram(doi) end def doi_downloads(doi) - return nil unless doi.present? + return nil if doi.blank? + query = "(relation_type_id:unique-dataset-requests-regular AND source_id:datacite-usage)" results = Event.query(query, doi: doi, aggregations: "usage_count_aggregation", page: { size: 1, cursor: [] }).response.aggregations.usage.buckets results.any? ? results.first.dig("total_by_type", "value") : 0 end - def downloads(doi) - return {} unless doi.present? - doi.downcase.split(",").map do |item| - { id: item, count: EventsQuery.new.doi_downloads(item) } + def downloads(dois) + return {} if dois.blank? + + query = "(relation_type_id:unique-dataset-requests-regular AND source_id:datacite-usage)" + results = Event.query(query, doi: dois, aggregations: "usage_count_aggregation", page: { size: 1, cursor: [] }).response.aggregations.usage.buckets + + # results.map do |item| + # { id: doi_from_url(item[:key]), downloads: item.dig("total_by_type", "value") } + # end + + dois_array = dois.split(",").map { |doi| doi } + dois_array.map do |doi| + result = results.select { |item| doi_from_url(item[:key]) == doi }.first + count = result.nil? ? 0 : result.dig("total_by_type", "value") + { id: doi, downloads: count } end end def downloads_histogram(doi) - return {} unless doi.present? + return {} if doi.blank? + doi = doi.downcase.split(",").first query = "(relation_type_id:unique-dataset-requests-regular AND source_id:datacite-usage)" results = Event.query(query, doi: doi, aggregations: "monthly_histogram_aggregation", page: { size: 1, cursor: [] }).response.aggregations facet_counts_by_year_month(results) end + def views_and_downloads(dois) + return {} if dois.blank? + + query = "(relation_type_id:unique-dataset-requests-regular AND source_id:datacite-usage) OR (relation_type_id:unique-dataset-investigations-regular AND source_id:datacite-usage)" + results = Event.query(query, doi: dois, aggregations: "multiple_usage_count_aggregation", page: { size: 1, cursor: [] }).response.aggregations + + # dois_array = dois.split(",").map { |doi| doi } + # dois_array.map do |doi| + # views = bucket.relation_types.buckets.select { |item| item["key"] == "unique-dataset-investigations-regular" }.first + # downloads = bucket.relation_types.buckets.select { |item| item["key"] == "unique-dataset-requests-regular" }.first + # views_counts = views.nil? ? 0 : views.dig("total_by_type", "value") + # downloads_counts = downloads.nil? ? 0 : downloads.dig("total_by_type", "value") + # { id: doi.downcase, downloads: downloads_counts, views: views_counts } + # end + + + results.usage.buckets.map do |bucket| + views = bucket.relation_types.buckets.select { |item| item["key"] == "unique-dataset-investigations-regular" }.first + downloads = bucket.relation_types.buckets.select { |item| item["key"] == "unique-dataset-requests-regular" }.first + views_counts = views.nil? ? 0 : views.dig("total_by_type", "value") + downloads_counts = downloads.nil? ? 0 : downloads.dig("total_by_type", "value") + { id: doi_from_url(bucket[:key]), downloads: downloads_counts, views: views_counts } + end + end + def usage(doi) - return {} unless doi.present? + return {} if doi.blank? + doi.downcase.split(",").map do |item| pid = Event.new.normalize_doi(item) requests = EventsQuery.new.doi_downloads(item) investigations = EventsQuery.new.doi_views(item) - { id: pid, + { id: pid, title: pid, - relationTypes: [ - { id: "unique-dataset-requests-regular", - title: "unique-dataset-requests-regular", - sum: requests - }, - { id: "unique-dataset-investigations-regular", - title: "unique-dataset-investigations-regular", - sum: investigations - } - ] - } + relationTypes: [ + { id: "unique-dataset-requests-regular", + title: "unique-dataset-requests-regular", + sum: requests }, + { id: "unique-dataset-investigations-regular", + title: "unique-dataset-investigations-regular", + sum: investigations }, + ] } end end end diff --git a/app/serializers/doi_serializer.rb b/app/serializers/doi_serializer.rb index 4881452e1..80ac42155 100644 --- a/app/serializers/doi_serializer.rb +++ b/app/serializers/doi_serializer.rb @@ -91,15 +91,15 @@ class DoiSerializer object.landing_page end - attribute :citations, if: Proc.new { |object, params| params && params[:mix_in] == "metrics" } do |object| - doi_citations(object.uid) + attribute :citations, if: Proc.new { |object, params| params && params[:mix_in] == "metrics" } do |object, params| + object.citations end - # attribute :views, if: Proc.new { |object, params| params && params[:mix_in] == "metrics" } do |object| - # doi_views(object.uid) - # end + attribute :views, if: Proc.new { |object, params| params && params[:mix_in] == "metrics" } do |object, params| + object.views + end - # attribute :downloads, if: Proc.new { |object, params| params && params[:mix_in] == "metrics" } do |object| - # doi_downloads(object.uid) - # end + attribute :downloads, if: Proc.new { |object, params| params && params[:mix_in] == "metrics" } do |object, params| + object.downloads + end end diff --git a/spec/queries/events_query_spec.rb b/spec/queries/events_query_spec.rb index 1ea2b92a1..ef639e380 100644 --- a/spec/queries/events_query_spec.rb +++ b/spec/queries/events_query_spec.rb @@ -3,11 +3,10 @@ require "rails_helper" describe EventsQuery, elasticsearch: true do - context "citation events" do - let!(:event) { create(:event_for_datacite_related, subj_id:"http://doi.org/10.0260/co.2004960.v2", obj_id:"http://doi.org/10.0260/co.2004960.v1") } - let!(:event_references) { create_list(:event_for_datacite_related, 3, obj_id:"10.5061/dryad.47sd5/2", relation_type_id: "references") } - let!(:copies) { create(:event_for_datacite_related, subj_id:"http://doi.org/10.0260/co.2004960.v2", obj_id:"http://doi.org/10.0260/co.2004960.v1", relation_type_id: "cites") } + let!(:event) { create(:event_for_datacite_related, subj_id: "http://doi.org/10.0260/co.2004960.v2", obj_id: "http://doi.org/10.0260/co.2004960.v1") } + let!(:event_references) { create_list(:event_for_datacite_related, 3, obj_id: "10.5061/dryad.47sd5/2", relation_type_id: "references") } + let!(:copies) { create(:event_for_datacite_related, subj_id: "http://doi.org/10.0260/co.2004960.v2", obj_id: "http://doi.org/10.0260/co.2004960.v1", relation_type_id: "cites") } before do Event.import @@ -22,19 +21,23 @@ expect(EventsQuery.new.doi_citations("10.5061/dryad.dd47sd5/1")).to eq(0) end + it "doi_citations for non found doi" do + expect(EventsQuery.new.doi_citations("10.5061/d345435341")).to eq(0) + end + it "citations" do results = EventsQuery.new.citations("10.5061/dryad.47sd5/1,10.5061/dryad.47sd5/2,10.0260/co.2004960.v1") citations = results.select { |item| item[:id] == "10.5061/dryad.47sd5/2" }.first no_citations = results.select { |item| item[:id] == "10.5061/dryad.47sd5/1" }.first - expect(citations[:count]).to eq(3) - expect(no_citations[:count]).to eq(0) + + expect(citations[:citations]).to eq(3) + expect(no_citations[:citations]).to eq(0) end end - context "usage events" do - let!(:views) { create_list(:event_for_datacite_usage, 1, obj_id:"http://doi.org/10.0260/co.2004960.v1", relation_type_id:"unique-dataset-investigations-regular") } - let!(:downloads) { create_list(:event_for_datacite_usage, 1, obj_id:"http://doi.org/10.0260/co.2004960.v1", relation_type_id:"unique-dataset-requests-regular") } + let!(:views) { create_list(:event_for_datacite_usage, 1, obj_id: "http://doi.org/10.0260/co.2004960.v1", relation_type_id: "unique-dataset-investigations-regular") } + let!(:downloads) { create_list(:event_for_datacite_usage, 1, obj_id: "http://doi.org/10.0260/co.2004960.v1", relation_type_id: "unique-dataset-requests-regular") } before do Event.import @@ -50,7 +53,31 @@ end it "usage" do - expect(EventsQuery.new.usage("10.0260/co.2004960.v1").first).to eq(id: "https://doi.org/10.0260/co.2004960.v1", title: "https://doi.org/10.0260/co.2004960.v1", relationTypes: [{ id: "unique-dataset-requests-regular", title: "unique-dataset-requests-regular", sum: downloads.first.total }, { id: "unique-dataset-investigations-regular", title: "unique-dataset-investigations-regular", sum: views.first.total }]) + response = EventsQuery.new.views_and_downloads("10.0260/co.2004960.v1").first + expect(response[:downloads]).to be > 0 + expect(response[:views]).to be > 0 + end + end + + context "mutiple usage events" do + let!(:views) { create_list(:event_for_datacite_usage, 5, relation_type_id: "unique-dataset-investigations-regular") } + let!(:downloads) { create_list(:event_for_datacite_usage, 7, relation_type_id: "unique-dataset-requests-regular") } + + before do + Event.import + sleep 1 + end + + it "show views" do + response = EventsQuery.new.views(views.map(&:doi).join(",")) + # expect(response.size).to eq(5) + expect(response.first[:views]).to be > 0 + end + + it "show downloads" do + response = EventsQuery.new.downloads(downloads.map(&:doi).join(",")) + # expect(response.size).to eq(5) + expect(response.first[:downloads]).to be > 0 end end end diff --git a/spec/requests/dois_spec.rb b/spec/requests/dois_spec.rb index 1b0e821d2..33520d565 100644 --- a/spec/requests/dois_spec.rb +++ b/spec/requests/dois_spec.rb @@ -63,11 +63,25 @@ it 'returns the Doi' do get "/dois/#{doi.doi}", nil, headers - expect(last_response.status).to eq(200) + expect(last_response.status).to eq(200) expect(json.dig('data', 'attributes', 'doi')).to eq(doi.doi.downcase) end end + context 'when the record exists request metrics' do + it 'returns the Doi' do + get "/dois/#{doi.doi}?mix-in=metrics", nil, headers + + expect(last_response.status).to eq(200) + result = json.dig('data') + + expect(result.dig('attributes', 'doi')).to eq(doi.doi.downcase) + expect(result.dig('attributes', 'titles')).to eq(doi.titles) + expect(result.dig('attributes','citations')).to eq(0) + # expect(result.dig('attributes','views')).to eq(0) + end + end + context 'when the record does not exist' do it 'returns status code 404' do get "/dois/10.5256/xxxx", nil, headers @@ -135,6 +149,7 @@ expect(last_response.status).to eq(200) expect(json['data'].size).to eq(1) + result = json.dig('data').select { |item| item["id"] == doi.doi.downcase }.first expect(json.dig('meta', 'total')).to eq(1) expect(json.dig('data', 0, 'attributes', 'url')).to eq(doi.url) expect(json.dig('data', 0, 'attributes', 'doi')).to eq(doi.doi.downcase) diff --git a/spec/requests/events_spec.rb b/spec/requests/events_spec.rb index 9e624c414..ec97d9ada 100644 --- a/spec/requests/events_spec.rb +++ b/spec/requests/events_spec.rb @@ -623,7 +623,7 @@ total = response.dig("meta", "total") expect(total).to eq(6) - expect(citations.first["count"]).to eq(5) + expect(citations.first["citations"]).to eq(5) expect(citations.first["id"]).to start_with("10.5061/dryad.47sd5e/") end end @@ -660,7 +660,7 @@ expect(json.dig("meta", "citationsHistogram", "years", 0, "title")).to eq("2015") expect(total).to eq(5) - expect(citations.first["count"]).to eq(2) + expect(citations.first["citations"]).to eq(2) expect(citations.first["id"]).to eq(doi) # expect(references.first["count"]).to eq(2) # expect(references.first["id"]).to eq(doi) @@ -721,7 +721,7 @@ total = response.dig("meta", "total") expect(total).to eq(2) - expect(citations.first["count"]).to eq(1) + expect(citations.first["citations"]).to eq(1) expect(citations.first["id"]).to eq("10.0260/co.2004960.v1") end end