Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/datacite/lupo
Browse files Browse the repository at this point in the history
  • Loading branch information
Martin Fenner committed Dec 4, 2019
2 parents a5f660a + 70373f6 commit 611fad8
Show file tree
Hide file tree
Showing 10 changed files with 241 additions and 151 deletions.
32 changes: 18 additions & 14 deletions app/controllers/events_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ class EventsController < ApplicationController
include Facetable

include BatchLoaderHelper
require 'benchmark'



prepend_before_action :authenticate_user!, except: [:index, :show]
Expand Down Expand Up @@ -120,16 +122,18 @@ def index
registrants = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_registrants(response.response.aggregations.registrants.buckets) : nil
pairings = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_pairings(response.response.aggregations.pairings.buckets) : nil
dois = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_dois(response.response.aggregations.dois.buckets) : nil
dois_usage = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_dois(response.response.aggregations.dois_usage.dois.buckets) : nil
dois_citations = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_citations_by_year_v1(response.response.aggregations.dois_citations) : nil
citations_histogram = total.positive? && params[:doi].present? && aggregations.include?("citations_aggregations") ? facet_citations_by_year(response.response.aggregations.citations_histogram) : nil
citations = params[:doi].present? ? EventsQuery.new.citations(params[:doi]) : []
references = total.positive? && params[:doi].present? && aggregations.include?("citations_aggregations") ? facet_citations_by_dois(response.response.aggregations.references.dois.buckets) : nil
relations = total.positive? && params[:doi].present? && aggregations.include?("citations_aggregations") ? facet_citations_by_dois(response.response.aggregations.relations.dois.buckets) : nil
views_histogram = total.positive? && aggregations.include?("metrics_aggregations") ? facet_counts_by_year_month(response.response.aggregations.views_histogram) : nil
downloads_histogram = total.positive? && aggregations.include?("metrics_aggregations") ? facet_counts_by_year_month(response.response.aggregations.downloads_histogram) : nil
views = total.positive? && aggregations.include?("metrics_aggregations") ? facet_by_source(response.response.aggregations.views.dois.buckets) : nil
downloads = total.positive? && aggregations.include?("metrics_aggregations") ? facet_by_source(response.response.aggregations.downloads.dois.buckets) : nil
dois_usage = total.positive? ? EventsQuery.new.usage(params[:doi]) : nil
# dois_citations = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_citations_by_year_v1(response.response.aggregations.dois_citations) : nil
citations = total.positive? ? EventsQuery.new.citations(params[:doi]) : nil
citations_histogram = total.positive? ? EventsQuery.new.citations_histogram(params[:doi]) : nil
references = total.positive? && aggregations.include?("citations_aggregations") ? facet_citations_by_dois(response.response.aggregations.references.dois.buckets) : nil
relations = total.positive? && aggregations.include?("citations_aggregations") ? facet_citations_by_dois(response.response.aggregations.relations.dois.buckets) : nil

views_histogram = total.positive? ? EventsQuery.new.views_histogram(params[:doi]) : nil
downloads_histogram = total.positive? ? EventsQuery.new.downloads_histogram(params[:doi]) : nil

# views = total.positive? ? EventsQuery.new.views(params[:doi]) : nil
# downloads = total.positive? ? EventsQuery.new.downloads(params[:doi]) : nil
unique_obj_count = total.positive? && aggregations.include?("advanced_aggregations") ? response.response.aggregations.unique_obj_count.value : nil
unique_subj_count = total.positive? && aggregations.include?("advanced_aggregations") ? response.response.aggregations.unique_subj_count.value : nil

Expand All @@ -148,7 +152,7 @@ def index
registrants: registrants,
"doisRelationTypes": dois,
"doisUsageTypes": dois_usage,
"doisCitations": dois_citations,
# "doisCitations": dois_citations,
"citationsHistogram": citations_histogram,
"uniqueCitations": citations,
"references": references,
Expand All @@ -158,9 +162,9 @@ def index
"subjCount": unique_subj_count
},
"viewsHistogram": views_histogram,
"views": views,
"downloadsHistogram": downloads_histogram,
"downloads": downloads
# "views": views,
"downloadsHistogram": downloads_histogram
# "downloads": downloads
}.compact

options[:links] = {
Expand Down
50 changes: 20 additions & 30 deletions app/graphql/types/metric_interface.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,28 +19,26 @@ def aggregation_results(**args)
end

def view_count
meta = aggregation_results(id: object.identifier).views.dois.buckets
meta.first.fetch("total_by_type", {}).fetch("value", nil) if meta.any?
EventsQuery.new.doi_views(doi_from_url(object.identifier))
end

def download_count
meta = aggregation_results(id: object.identifier).downloads.dois.buckets
meta.first.fetch("total_by_type", {}).fetch("value", nil) if meta.any?
EventsQuery.new.doi_downloads(doi_from_url(object.identifier))
end

def citation_count
EventsQuery.new.doi_citations(doi_from_url(object.identifier))
end

def reference_count
meta = references_aggs
meta.first.fetch("total", {}).fetch("value", nil) if meta.any?
end
# def reference_count
# meta = references_aggs
# meta.first.fetch("total", {}).fetch("value", nil) if meta.any?
# end

def relation_count
meta = relations_aggs
meta.first.fetch("total", {}).fetch("value", nil) if meta.any?
end
# def relation_count
# meta = relations_aggs
# meta.first.fetch("total", {}).fetch("value", nil) if meta.any?
# end

# def references_list
# references_aggs.map { |item| item[:key]}
Expand All @@ -58,27 +56,19 @@ def relation_count
# # end
# end

def citations_aggs
aggregation_results(id: object.identifier, aggregations: "citations_aggregations" ).citations.dois.buckets
end
# def citations_aggs
# aggregation_results(id: object.identifier, aggregations: "citations_aggregations" ).citations.dois.buckets
# end

def references_aggs
aggregation_results(id: object.identifier, aggregations: "citations_aggregations").references.dois.buckets
end
# def references_aggs
# aggregation_results(id: object.identifier, aggregations: "citations_aggregations").references.dois.buckets
# end

def relations_aggs
aggregation_results(id: object.identifier, aggregations: "citations_aggregations").relations.dois.buckets
end
# def relations_aggs
# aggregation_results(id: object.identifier, aggregations: "citations_aggregations").relations.dois.buckets
# end

def citation_histogram
hash = aggregation_results(id: object.identifier, aggregations: "citations_aggregations").citations_histogram

hash.dig('years', 'buckets').map do |h|
year = h['key']
{
'id' => year,
'sum' => h.dig('total_by_year', 'value')
}
end
EventsQuery.new.citations_histogram(doi_from_url(object.identifier))
end
end
3 changes: 1 addition & 2 deletions app/models/concerns/indexable.rb
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,7 @@ def get_aggregations_hash(options={})
aggs = {}
aggregations.split(",").each do |agg|
agg = :query_aggregations if agg.blank? || !respond_to?(agg)
doi = options[:doi].present? ? options[:doi].downcase.split(",").first : nil
aggs.merge! send(agg,doi)
aggs.merge! send(agg)
end
aggs
end
Expand Down
98 changes: 23 additions & 75 deletions app/models/event.rb
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ def self.query_fields
["subj_id^10", "obj_id^10", "subj.name^5", "subj.author^5", "subj.periodical^5", "subj.publisher^5", "obj.name^5", "obj.author^5", "obj.periodical^5", "obj.publisher^5", "_all"]
end

def self.query_aggregations(doi = nil)
def self.query_aggregations
sum_distribution = {
sum_bucket: {
buckets_path: "year_months>total_by_year_month"
Expand All @@ -220,105 +220,53 @@ def self.query_aggregations(doi = nil)
pairings: { terms: { field: "registrant_id", size: 50, min_doc_count: 1 }, aggs: { recipient: { terms: { field: "registrant_id", size: 50, min_doc_count: 1 }, aggs: { "total" => { sum: { field: "total" } } } } } },
citation_types: { terms: { field: "citation_type", size: 50, min_doc_count: 1 }, aggs: { year_months: { date_histogram: { field: "occurred_at", interval: "month", min_doc_count: 1 }, aggs: { "total_by_year_month" => { sum: { field: "total" } } } } } },
relation_types: { terms: { field: "relation_type_id", size: 50, min_doc_count: 1 }, aggs: { year_months: { date_histogram: { field: "occurred_at", interval: "month", min_doc_count: 1 }, aggs: { "total_by_year_month" => { sum: { field: "total" } } } }, "sum_distribution" => sum_distribution } },
dois: { terms: { field: "obj_id", size: 50, min_doc_count: 1 }, aggs: { relation_types: { terms: { field: "relation_type_id", size: 50, min_doc_count: 1 }, aggs: { "total_by_type" => { sum: { field: "total" } } } } } },
dois_usage: {
filter: { script: { script: "doc['source_id'].value == 'datacite-usage' && doc['occurred_at'].size() > 0 && doc['obj.datePublished'].size() > 0 && doc['occurred_at'].value.getMillis() >= doc['obj.datePublished'].value.getMillis() && doc['occurred_at'].value.getMillis() < new Date().getTime()" } },
aggs: {
dois: { terms: { field: "obj_id", size: 50, min_doc_count: 1 }, aggs: { relation_types: { terms: { field: "relation_type_id", size: 50, min_doc_count: 1 }, aggs: { "total_by_type" => { sum: { field: "total" } } } } } } }
},
dois_citations: {
filter: {
script: {
script: "#{INCLUDED_RELATION_TYPES}.contains(doc['relation_type_id'].value)"
}
},
aggs: { years: { date_histogram: { field: "occurred_at", interval: "year", min_doc_count: 1 }, aggs: { "total_by_year" => { sum: { field: "total" } } } }, "sum_distribution" => sum_year_distribution }
}
dois: { terms: { field: "obj_id", size: 50, min_doc_count: 1 }, aggs: { relation_types: { terms: { field: "relation_type_id", size: 50, min_doc_count: 1 }, aggs: { "total_by_type" => { sum: { field: "total" } } } } } }
}
end

def self.metrics_aggregations(doi = nil)
sum_distribution = {
sum_bucket: {
buckets_path: "year_months>total_by_year_month"


def self.citation_count_aggregation
{
citations: {
terms: { field: "doi", size: 100, min_doc_count: 1 }, aggs: { total: { cardinality: { field: "citation_id" } } }
}
}
end

views_filter = { script: { script: "doc['relation_type_id'].value == 'unique-dataset-investigations-regular' && doc['source_id'].value == 'datacite-usage' && doc['occurred_at'].size() > 0 && doc['obj.datePublished'].size() > 0 && doc['occurred_at'].value.getMillis() >= doc['obj.datePublished'].value.getMillis() && doc['occurred_at'].value.getMillis() < new Date().getTime()" } }
downloads_filter = { script: { script: "doc['relation_type_id'].value == 'unique-dataset-requests-regular' && doc['source_id'].value == 'datacite-usage' && doc['occurred_at'].size() > 0 && doc['obj.datePublished'].size() > 0 && doc['occurred_at'].value.getMillis() >= doc['obj.datePublished'].value.getMillis() && doc['occurred_at'].value.getMillis() < new Date().getTime()" } }

{
views: {
filter: views_filter,
aggs: { dois: {
terms: { field: "obj_id", size: 50, min_doc_count: 1 } , aggs: { "total_by_type" => { sum: { field: "total" } } }
} }
},
views_histogram: {
filter: views_filter,
aggs: {
year_months: { date_histogram: { field: "occurred_at", interval: "month", min_doc_count: 1 }, aggs: { "total_by_year_month" => { sum: { field: "total" } } } }, "sum_distribution" => sum_distribution
}
},
downloads: {
filter: downloads_filter,
aggs: { dois: {
terms: { field: "obj_id", size: 50, min_doc_count: 1 } , aggs: { "total_by_type" => { sum: { field: "total" } } }
} }
},
downloads_histogram: {
filter: downloads_filter,
aggs: {
year_months: { date_histogram: { field: "occurred_at", interval: "month", min_doc_count: 1 }, aggs: { "total_by_year_month" => { sum: { field: "total" } } } }, "sum_distribution" => sum_distribution
}
def self.usage_count_aggregation
{
usage: {
terms: { field: "obj_id", size: 50, min_doc_count: 1 } , aggs: { "total_by_type" => { sum: { field: "total" } } }
}
}
end

def self.citations_aggregations(doi)
doi = Event.new.normalize_doi(doi) if doi.present?

def self.yearly_histogram_aggregation
sum_year_distribution = {
sum_bucket: {
buckets_path: "years>total_by_year"
}
}

citations_filter = { script: { script: "(#{PASSIVE_RELATION_TYPES}.contains(doc['relation_type_id'].value) && '#{doi}' == doc['subj_id'].value) || (#{ACTIVE_RELATION_TYPES}.contains(doc['relation_type_id'].value) && '#{doi}' == doc['obj_id'].value)" } }
references_filter = { script: { script: "(#{PASSIVE_RELATION_TYPES}.contains(doc['relation_type_id'].value) && '#{doi}' == doc['obj_id'].value) || (#{ACTIVE_RELATION_TYPES}.contains(doc['relation_type_id'].value) && '#{doi}' == doc['subj_id'].value)" } }

{
citations_histogram: {
filter: citations_filter,
aggs: { years: { histogram: { field: "citation_year", interval: 1 , min_doc_count: 1 }, aggs: { "total_by_year" => { sum: { field: "total" } } } }, "sum_distribution" => sum_year_distribution }
},
references: {
filter: references_filter,
aggs: { dois: {
terms: { field: "doi", size: 100, min_doc_count: 1 }, aggs: { total: { cardinality: { field: "citation_id" } } }
} }
},
relations: {
filter: { script: { script: "#{RELATIONS_RELATION_TYPES}.contains(doc['relation_type_id'].value)" }
},
aggs: { dois: {
terms: { field: "doi", size: 100, min_doc_count: 1 }, aggs: { total: { cardinality: { field: "citation_id" } } }
} }
}
years: { histogram: { field: "citation_year", interval: 1 , min_doc_count: 1 }, aggs: { "total_by_year" => { sum: { field: "total" } } } }, "sum_distribution" => sum_year_distribution
}
end


def self.citation_count_aggregation(doi)
{
citations: {
terms: { field: "doi", size: 100, min_doc_count: 1 }, aggs: { total: { cardinality: { field: "citation_id" } } }
def self.monthly_histogram_aggregation
sum_distribution = {
sum_bucket: {
buckets_path: "year_months>total_by_year_month"
}
}
{
year_months: { date_histogram: { field: "occurred_at", interval: "month", min_doc_count: 1 }, aggs: { "total_by_year_month" => { sum: { field: "total" } } } }, "sum_distribution" => sum_distribution
}
end


def self.advanced_aggregations(doi = nil)
def self.advanced_aggregations
{
unique_obj_count: { cardinality: { field: "obj_id" } },
unique_subj_count: { cardinality: { field: "subj_id" } }
Expand Down
86 changes: 81 additions & 5 deletions app/queries/events_query.rb
Original file line number Diff line number Diff line change
@@ -1,34 +1,110 @@
# frozen_string_literal: true

class EventsQuery

include Facetable

ACTIVE_RELATION_TYPES = [
"cites",
"is-supplement-to",
"is-supplemented-by",
"references"
]

PASSIVE_RELATION_TYPES = [
"is-cited-by",
"is-supplemented-by",
"is-supplement-to",
"is-referenced-by"
]

def initialize()
def initialize
end

def doi_citations(doi)
return nil unless doi.present?
pid = Event.new.normalize_doi(doi)
query = "(subj_id:\"#{pid}\" AND (relation_type_id:#{PASSIVE_RELATION_TYPES.join(' OR relation_type_id:')})) OR (obj_id:\"#{pid}\" AND (relation_type_id:#{ACTIVE_RELATION_TYPES.join(' OR relation_type_id:')}))"
results = Event.query(query, doi:doi, aggregations: "citation_count_aggregation", page: { size: 1, cursor: [] }).response.aggregations.citations.buckets
results = Event.query(query, doi: doi, aggregations: "citation_count_aggregation", page: { size: 1, cursor: [] }).response.aggregations.citations.buckets
results.any? ? results.first.total.value : 0
end

def citations(doi)
return {} unless doi.present?
doi.downcase.split(",").map do |item|
{ id: item, count: EventsQuery.new.doi_citations(item) }
end
end

def citations_histogram(doi)
return {} unless doi.present?
pid = Event.new.normalize_doi(doi.downcase.split(",").first)
query = "(subj_id:\"#{pid}\" AND (relation_type_id:#{PASSIVE_RELATION_TYPES.join(' OR relation_type_id:')})) OR (obj_id:\"#{pid}\" AND (relation_type_id:#{ACTIVE_RELATION_TYPES.join(' OR relation_type_id:')}))"
results = Event.query(query, doi: doi, aggregations: "yearly_histogram_aggregation", page: { size: 1, cursor: [] }).response.aggregations
facet_citations_by_year(results)
end


def doi_views(doi)
return nil unless doi.present?
query = "(relation_type_id:unique-dataset-investigations-regular AND source_id:datacite-usage)"
results = Event.query(query, doi: doi, aggregations: "usage_count_aggregation", page: { size: 1, cursor: [] }).response.aggregations.usage.buckets
results.any? ? results.first.dig("total_by_type", "value") : 0
end

def views(doi)
return {} unless doi.present?
doi.downcase.split(",").map do |item|
{ id: item, count: EventsQuery.new.doi_views(item) }
end
end

def views_histogram(doi)
return {} unless doi.present?
doi = doi.downcase.split(",").first
query = "(relation_type_id:unique-dataset-investigations-regular AND source_id:datacite-usage)"
results = Event.query(query, doi: doi, aggregations: "monthly_histogram_aggregation", page: { size: 1, cursor: [] }).response.aggregations
facet_counts_by_year_month(results)
end

def doi_downloads(doi)
return nil unless doi.present?
query = "(relation_type_id:unique-dataset-requests-regular AND source_id:datacite-usage)"
results = Event.query(query, doi: doi, aggregations: "usage_count_aggregation", page: { size: 1, cursor: [] }).response.aggregations.usage.buckets
results.any? ? results.first.dig("total_by_type", "value") : 0
end

def downloads(doi)
return {} unless doi.present?
doi.downcase.split(",").map do |item|
{ id: item, count: EventsQuery.new.doi_downloads(item) }
end
end

def downloads_histogram(doi)
return {} unless doi.present?
doi = doi.downcase.split(",").first
query = "(relation_type_id:unique-dataset-requests-regular AND source_id:datacite-usage)"
results = Event.query(query, doi: doi, aggregations: "monthly_histogram_aggregation", page: { size: 1, cursor: [] }).response.aggregations
facet_counts_by_year_month(results)
end

def usage(doi)
return {} unless doi.present?
doi.downcase.split(",").map do |item|
pid = Event.new.normalize_doi(item)
requests = EventsQuery.new.doi_downloads(item)
investigations = EventsQuery.new.doi_views(item)
{ id: pid,
title: pid,
relationTypes: [
{ id: "unique-dataset-requests-regular",
title: "unique-dataset-requests-regular",
sum: requests
},
{ id: "unique-dataset-investigations-regular",
title: "unique-dataset-investigations-regular",
sum: investigations
}
]
}
end
end
end
Loading

0 comments on commit 611fad8

Please sign in to comment.