Skip to content

Commit

Permalink
make sure value in elasticsearch script exists. #358
Browse files Browse the repository at this point in the history
  • Loading branch information
Martin Fenner committed Nov 21, 2019
1 parent d30faa0 commit 7adc2f5
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 65 deletions.
37 changes: 4 additions & 33 deletions app/controllers/providers_controller.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
require 'benchmark'

class ProvidersController < ApplicationController
include ActionController::MimeResponds
include Countable
Expand Down Expand Up @@ -258,40 +256,13 @@ def totals
logger = Logger.new(STDOUT)

page = { size: 0, number: 1 }
response = nil
bmt = Benchmark.ms {
state = current_user.present? && current_user.is_admin_or_staff? && params[:state].present? ? params[:state] : "registered,findable"
response = Doi.query(nil, state: state, page: page, totals_agg: true)
}

if bmt > 10000
logger.warn "[Benchmark Warning] providers totals " + bmt.to_s + " ms"
else
logger.info "[Benchmark] providers totals " + bmt.to_s + " ms"
end

logger.info response.results.inspect

state = current_user.present? && current_user.is_admin_or_staff? && params[:state].present? ? params[:state] : "registered,findable"
response = Doi.query(nil, state: state, page: page, totals_agg: true)
total = response.results.total
registrant = total > 0 ? providers_totals(response.response.aggregations.providers_totals.buckets) : nil

registrant = nil
bmp = Benchmark.ms {
registrant = total > 0 ? providers_totals(response.response.aggregations.providers_totals.buckets) : nil
}
if bmp > 10000
logger.warn "[Benchmark Warning] providers providers_totals " + bmp.to_s + " ms"
else
logger.info "[Benchmark] providers providers_totals " + bmp.to_s + " ms"
end

bmr = Benchmark.ms {
render json: registrant, status: :ok
}
if bmr > 10000
logger.warn "[Benchmark Warning] providers render " + bmr.to_s + " ms"
else
logger.info "[Benchmark] providers render " + bmr.to_s + " ms"
end
render json: registrant, status: :ok
end

# don't delete, but set deleted_at timestamp
Expand Down
53 changes: 21 additions & 32 deletions app/models/event.rb
Original file line number Diff line number Diff line change
Expand Up @@ -96,14 +96,6 @@ class Event < ActiveRecord::Base
"describes", "is-described-by"
]

VIEWS_RELATION_TYPES = [
"unique-dataset-investigations-regular"
]

DOWNLOADS_RELATION_TYPES = [
"unique-dataset-requests-regular"
]

validates :subj_id, :source_id, :source_token, presence: true

attr_accessor :container_title, :url
Expand Down Expand Up @@ -215,7 +207,7 @@ def self.query_aggregations(doi=nil)

sum_year_distribution = {
sum_bucket: {
buckets_path: "years>total_by_year"
buckets_path: "years > total_by_year"
}
}

Expand All @@ -228,7 +220,7 @@ def self.query_aggregations(doi=nil)
relation_types: { terms: { field: 'relation_type_id', size: 50, min_doc_count: 1 }, aggs: { year_months: { date_histogram: { field: 'occurred_at', interval: 'month', min_doc_count: 1 }, aggs: { "total_by_year_month" => { sum: { field: 'total' }}}},"sum_distribution"=>sum_distribution} },
dois: { terms: { field: 'obj_id', size: 50, min_doc_count: 1 }, aggs: { relation_types: { terms: { field: 'relation_type_id',size: 50, min_doc_count: 1 }, aggs: { "total_by_type" => { sum: { field: 'total' }}}}} },
dois_usage: {
filter: { script: { script: "doc['source_id'].value == 'datacite-usage' && doc['occurred_at'].value.getMillis() >= doc['obj.datePublished'].value.getMillis() && doc['occurred_at'].value.getMillis() < new Date().getTime()" }},
filter: { script: { script: "doc['source_id'].value == 'datacite-usage' && doc['occurred_at'].size() > 0 && doc['obj.datePublished'].size() > 0 && doc['occurred_at'].value.getMillis() >= doc['obj.datePublished'].value.getMillis() && doc['occurred_at'].value.getMillis() < new Date().getTime()" }},
aggs: {
dois: { terms: { field: 'obj_id', size: 50, min_doc_count: 1 }, aggs: { relation_types: { terms: { field: 'relation_type_id',size: 50, min_doc_count: 1 }, aggs: { "total_by_type" => { sum: { field: 'total' }}}}} } }
},
Expand All @@ -243,63 +235,60 @@ def self.query_aggregations(doi=nil)
}
end

def self.metrics_aggregations(doi=nil)
def self.metrics_aggregations(doi = nil)
sum_distribution = {
sum_bucket: {
buckets_path: "year_months>total_by_year_month"
buckets_path: "year_months > total_by_year_month"
}
}

views_filter = {script: {script: "#{VIEWS_RELATION_TYPES}.contains(doc['relation_type_id'].value) && doc['source_id'].value == 'datacite-usage' && doc['occurred_at'].value.getMillis() >= doc['obj.datePublished'].value.getMillis() && doc['occurred_at'].value.getMillis() < new Date().getTime()"}}

downloads_filter = {script: {script: "#{DOWNLOADS_RELATION_TYPES}.contains(doc['relation_type_id'].value) && doc['source_id'].value == 'datacite-usage' && doc['occurred_at'].value.getMillis() >= doc['obj.datePublished'].value.getMillis() && doc['occurred_at'].value.getMillis() < new Date().getTime()"} }
views_filter = { script: { script: "doc['relation_type_id'].value == 'unique-dataset-investigations-regular' && doc['source_id'].value == 'datacite-usage' && doc['occurred_at'].size() > 0 && doc['obj.datePublished'].size() > 0 && doc['occurred_at'].value.getMillis() >= doc['obj.datePublished'].value.getMillis() && doc['occurred_at'].value.getMillis() < new Date().getTime()" } }
downloads_filter = { script: { script: "doc['relation_type_id'].value == 'unique-dataset-requests-regular' && doc['source_id'].value == 'datacite-usage' && doc['occurred_at'].size() > 0 && doc['obj.datePublished'].size() > 0 && doc['occurred_at'].value.getMillis() >= doc['obj.datePublished'].value.getMillis() && doc['occurred_at'].value.getMillis() < new Date().getTime()" } }

{
views: {
filter: views_filter,
aggs: { dois: {
terms: { field: 'obj_id', size: 50, min_doc_count: 1} , aggs: { "total_by_type" => { sum: { field: 'total' }}}
terms: { field: 'obj_id', size: 50, min_doc_count: 1 } , aggs: { "total_by_type" => { sum: { field: 'total' }}}
}}
},
views_histogram: {
filter: views_filter,
aggs: {
year_months: { date_histogram: { field: 'occurred_at', interval: 'month', min_doc_count: 1 }, aggs: { "total_by_year_month" => { sum: { field: 'total' } } } }, "sum_distribution" => sum_distribution
}
},
}
},
downloads: {
filter: downloads_filter,
aggs: { dois: {
terms: { field: 'obj_id', size: 50, min_doc_count: 1} , aggs: { "total_by_type" => { sum: { field: 'total' }}}
}}
},
filter: downloads_filter,
aggs: { dois: {
terms: { field: 'obj_id', size: 50, min_doc_count: 1} , aggs: { "total_by_type" => { sum: { field: 'total' }}}
}}
},
downloads_histogram: {
filter: downloads_filter,
aggs: {
year_months: { date_histogram: { field: 'occurred_at', interval: 'month', min_doc_count: 1 }, aggs: { "total_by_year_month" => { sum: { field: 'total' } } } }, "sum_distribution" => sum_distribution
}
}
}
}
}

end

def self.citations_aggregations(doi)
doi = Event.new.normalize_doi(doi) if doi.present?

sum_year_distribution = {
sum_bucket: {
buckets_path: "years>total_by_year"
buckets_path: "years > total_by_year"
}
}

citations_filter = {script: {script: "(#{PASSIVE_RELATION_TYPES}.contains(doc['relation_type_id'].value) && '#{doi}' == doc['subj_id'].value) || (#{ACTIVE_RELATION_TYPES}.contains(doc['relation_type_id'].value) && '#{doi}' == doc['obj_id'].value)"}}

references_filter = {script: {script: "(#{PASSIVE_RELATION_TYPES}.contains(doc['relation_type_id'].value) && '#{doi}' == doc['obj_id'].value) || (#{ACTIVE_RELATION_TYPES}.contains(doc['relation_type_id'].value) && '#{doi}' == doc['subj_id'].value)"}}
citations_filter = { script: { script: "(#{PASSIVE_RELATION_TYPES}.contains(doc['relation_type_id'].value) && '#{doi}' == doc['subj_id'].value) || (#{ACTIVE_RELATION_TYPES}.contains(doc['relation_type_id'].value) && '#{doi}' == doc['obj_id'].value)" } }
references_filter = { script: { script: "(#{PASSIVE_RELATION_TYPES}.contains(doc['relation_type_id'].value) && '#{doi}' == doc['obj_id'].value) || (#{ACTIVE_RELATION_TYPES}.contains(doc['relation_type_id'].value) && '#{doi}' == doc['subj_id'].value)" } }

{
citations_histogram: {
filter: citations_filter,
aggs: { years: { histogram: { field: 'citation_year', interval: 1 , min_doc_count: 1 }, aggs: { "total_by_year" => { sum: { field: 'total' }}}},"sum_distribution"=>sum_year_distribution}
aggs: { years: { histogram: { field: 'citation_year', interval: 1 , min_doc_count: 1 }, aggs: { "total_by_year" => { sum: { field: 'total' }}}}, "sum_distribution" => sum_year_distribution }
},
citations: {
filter: citations_filter,
Expand All @@ -314,7 +303,7 @@ def self.citations_aggregations(doi)
}}
},
relations: {
filter: {script: {script: "#{RELATIONS_RELATION_TYPES}.contains(doc['relation_type_id'].value)"}
filter: { script: { script: "#{RELATIONS_RELATION_TYPES}.contains(doc['relation_type_id'].value)" }
},
aggs: { dois: {
terms: { field: 'doi', size: 100, min_doc_count: 1 }, aggs: { total: { cardinality: { field: 'citation_id' }}}
Expand Down

0 comments on commit 7adc2f5

Please sign in to comment.