diff --git a/app/controllers/concerns/facetable.rb b/app/controllers/concerns/facetable.rb index 99a7d8942..32aba70e0 100644 --- a/app/controllers/concerns/facetable.rb +++ b/app/controllers/concerns/facetable.rb @@ -134,14 +134,6 @@ def facet_by_source(arr) end end - # def facet_citations_by_year(hash) - # hash.map do |hsh| - # { "id" => hsh["key"].to_i, - # "title" => hsh["key"], - # "count" => hsh["doc_count"] } - # end - # end - def facet_citations_by_year(hash) arr = hash.dig('years', 'buckets').map do |h| year = h['key_as_string'][0..3].to_i @@ -156,6 +148,22 @@ def facet_citations_by_year(hash) "years" => arr } end + def facet_counts_by_year_month(hash) + arr = hash.dig('year_months', 'buckets').map do |h| + month = h["key_as_string"][5..6].to_i + title = I18n.t("date.month_names")[month] + " " + h["key_as_string"][0..3] + + { + "id" => h["key_as_string"][0..6], + 'title' => title, + 'sum' => h.dig('total_by_year_month', 'value') } + end + { "count" => hash.dig("sum_distribution", "value"), + "yearMonths" => arr } + end + + + def facet_by_relation_type(arr) arr.map do |hsh| arr = hsh.dig("year_months", "buckets").map do |h| diff --git a/app/controllers/events_controller.rb b/app/controllers/events_controller.rb index 70de73952..7c20b0f4f 100644 --- a/app/controllers/events_controller.rb +++ b/app/controllers/events_controller.rb @@ -104,6 +104,7 @@ def index publication_year: params[:publication_year], occurred_at: params[:occurred_at], year_month: params[:year_month], + aggregations: params[:aggregations], unique: params[:unique], page: page, sort: sort) @@ -113,17 +114,22 @@ def index total_for_pages = page[:cursor].nil? ? [total.to_f, 10000].min : total.to_f total_pages = page[:size] > 0 ? (total_for_pages / page[:size]).ceil : 0 - sources = total.positive? ? facet_by_source(response.response.aggregations.sources.buckets) : nil - prefixes = total.positive? ? facet_by_source(response.response.aggregations.prefixes.buckets) : nil - citation_types = total.positive? ? facet_by_citation_type(response.response.aggregations.citation_types.buckets) : nil - relation_types = total.positive? ? facet_by_relation_type(response.response.aggregations.relation_types.buckets) : nil - registrants = total.positive? && params[:extra] ? facet_by_registrants(response.response.aggregations.registrants.buckets) : nil - pairings = total.positive? && params[:extra] ? facet_by_pairings(response.response.aggregations.pairings.buckets) : nil - dois = total.positive? && params[:extra] ? facet_by_dois(response.response.aggregations.dois.buckets) : nil - dois_usage = total.positive? && params[:extra] ? facet_by_dois(response.response.aggregations.dois_usage.dois.buckets) : nil - citations_histogram = total.positive? && params[:extra] ? facet_citations_by_year(response.response.aggregations.dois_citations) : nil - # citations_histogram = total.positive? && params[:extra] ? facet_citations_by_year(response.response.aggregations.citations_histogram.years.buckets) : nil - # citations = total.positive? && params[:extra] ? facet_citations_by_dois(response.response.aggregations.citations.dois.buckets) : nil + + aggregations = params.fetch(:aggregations, "") || "" + + sources = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_source(response.response.aggregations.sources.buckets) : nil + prefixes = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_source(response.response.aggregations.prefixes.buckets) : nil + citation_types = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_citation_type(response.response.aggregations.citation_types.buckets) : nil + relation_types = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_relation_type(response.response.aggregations.relation_types.buckets) : nil + registrants = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_registrants(response.response.aggregations.registrants.buckets) : nil + pairings = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_pairings(response.response.aggregations.pairings.buckets) : nil + dois = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_dois(response.response.aggregations.dois.buckets) : nil + dois_usage = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_dois(response.response.aggregations.dois_usage.dois.buckets) : nil + dois_citations = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_citations_by_year(response.response.aggregations.dois_citations) : nil + citations_histogram = total.positive? && aggregations.include?("metrics_aggregations") ? facet_citations_by_year(response.response.aggregations.citations_histogram) : nil + citations = total.positive? && aggregations.include?("metrics_aggregations") ? facet_citations_by_dois(response.response.aggregations.citations.dois.buckets) : nil + views_histogram = total.positive? && aggregations.include?("metrics_aggregations") ? facet_counts_by_year_month(response.response.aggregations.views) : nil + downloads_histogram = total.positive? && aggregations.include?("metrics_aggregations") ? facet_counts_by_year_month(response.response.aggregations.downloads) : nil results = response.results @@ -140,8 +146,11 @@ def index registrants: registrants, "doisRelationTypes": dois, "doisUsageTypes": dois_usage, - "doisCitations": citations_histogram - # "uniqueCitations": citations + "doisCitations": dois_citations, + "citationsHistogram": citations_histogram, + "uniqueCitations": citations, + "viewsHistogram": views_histogram, + "downloadsHistogram": downloads_histogram }.compact options[:links] = { diff --git a/app/models/concerns/indexable.rb b/app/models/concerns/indexable.rb index 29c327c5b..3fe6935ca 100644 --- a/app/models/concerns/indexable.rb +++ b/app/models/concerns/indexable.rb @@ -95,8 +95,18 @@ def find_by_id_list(ids, options={}) }) end + def get_aggregations_hash(aggregations = "") + return send(:query_aggregations) if aggregations.blank? + aggs = {} + aggregations.split(",").each do |agg| + agg = :query_aggregations if agg.blank? || !respond_to?(agg) + aggs.merge! send(agg) + end + aggs + end + def query(query, options={}) - aggregations = options[:totals_agg] == true ? totals_aggregations : query_aggregations + aggregations = options[:totals_agg] == true ? totals_aggregations : get_aggregations_hash(options[:aggregations]) options[:page] ||= {} options[:page][:number] ||= 1 options[:page][:size] ||= 25 diff --git a/app/models/event.rb b/app/models/event.rb index ef36017e4..2d32fb2c3 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -73,6 +73,14 @@ class Event < ActiveRecord::Base "describes", "is-described-by" ] + VIEWS_RELATION_TYPES = [ + "unique-dataset-investigations-regular" + ] + + DOWNLOADS_RELATION_TYPES = [ + "unique-dataset-requests-regular" + ] + validates :subj_id, :source_id, :source_token, presence: true attr_accessor :container_title, :url @@ -127,6 +135,7 @@ class Event < ActiveRecord::Base indexes :indexed_at, type: :date indexes :occurred_at, type: :date indexes :citation_id, type: :keyword + indexes :citation_year, type: :keyword indexes :cache_key, type: :keyword end @@ -161,6 +170,7 @@ def as_indexed_json(options={}) "indexed_at" => indexed_at, "occurred_at" => occurred_at, "citation_id" => citation_id, + "citation_year" => citation_year, "cache_key" => cache_key } end @@ -206,41 +216,52 @@ def self.query_aggregations } }, aggs: { years: { date_histogram: { field: 'occurred_at', interval: 'year', min_doc_count: 1 }, aggs: { "total_by_year" => { sum: { field: 'total' }}}},"sum_distribution"=>sum_year_distribution} - # citations_histogram: { - # filter: { - # script: { - # script: "#{INCLUDED_RELATION_TYPES}.contains(doc['relation_type_id'].value)" - # } - # }, - # aggs: { years: { terms: { script: { source: " - # String subjDatePublished = params['_source']['subj']['date_published']?.substring(0, 4); - # String objDatePublished = params['_source']['obj']['date_published']?.substring(0, 4); - - # if( params['_source']['subj']['date_published']?.substring(0, 4) !== null && params['_source']['obj']['date_published']?.substring(0, 4) !== null){ - - # if(Integer.parseInt(objDatePublished) > Integer.parseInt(subjDatePublished) ) - # { - # objDatePublished - # } - # else{ - # subjDatePublished - # } - # } - # " }}}} - # }, - # citations: { - # filter: { - # script: { - # script: "#{INCLUDED_RELATION_TYPES}.contains(doc['relation_type_id'].value)" - # } - # }, - # aggs: { dois: { - # terms: { field: 'obj_id', size: 50, min_doc_count: 1 }, aggs: { unique_citations: { cardinality: { field: 'citation_id' }}} - # }} } } end + def self.metrics_aggregations + sum_distribution = { + sum_bucket: { + buckets_path: "year_months>total_by_year_month" + } + } + sum_year_distribution = { + sum_bucket: { + buckets_path: "years>total_by_year" + } + } + + { + citations_histogram: { + filter: {script: {script: "#{INCLUDED_RELATION_TYPES}.contains(doc['relation_type_id'].value)"} + }, + aggs: { years: { date_histogram: { field: 'citation_year', interval: 'year', min_doc_count: 1 }, aggs: { "total_by_year" => { sum: { field: 'total' }}}},"sum_distribution"=>sum_year_distribution} + }, + citations: { + filter: {script: {script: "#{INCLUDED_RELATION_TYPES}.contains(doc['relation_type_id'].value)"} + }, + aggs: { dois: { + terms: { field: 'obj_id', size: 50, min_doc_count: 1 }, aggs: { unique_citations: { cardinality: { field: 'citation_id' }}} + }} + }, + views: { + filter: {script: {script: "#{VIEWS_RELATION_TYPES}.contains(doc['relation_type_id'].value) && doc['source_id'].value == 'datacite-usage' && doc['occurred_at'].value.getMillis() >= doc['obj.datePublished'].value.getMillis() && doc['occurred_at'].value.getMillis() < new Date().getTime()"} + }, + aggs: { + year_months: { date_histogram: { field: 'occurred_at', interval: 'month', min_doc_count: 1 }, aggs: { "total_by_year_month" => { sum: { field: 'total' } } } }, "sum_distribution" => sum_distribution + } + }, + downloads: { + filter: {script: {script: "#{DOWNLOADS_RELATION_TYPES}.contains(doc['relation_type_id'].value) && doc['source_id'].value == 'datacite-usage' && doc['occurred_at'].value.getMillis() >= doc['obj.datePublished'].value.getMillis() && doc['occurred_at'].value.getMillis() < new Date().getTime()"} + }, + aggs: { + year_months: { date_histogram: { field: 'occurred_at', interval: 'month', min_doc_count: 1 }, aggs: { "total_by_year_month" => { sum: { field: 'total' } } } }, "sum_distribution" => sum_distribution + } + } + } + end + # return results for one or more ids def self.find_by_id(ids, options={}) ids = ids.split(",") if ids.is_a?(String) @@ -534,6 +555,19 @@ def obj_cache_key "objects/#{obj_id}-#{timestamp}" end + def citation_year + "" unless INCLUDED_RELATION_TYPES.include?(relation_type_id) + subj_publication = subj['date_published'] || (date_published(subj_id) || year_month) + obj_publication = obj['date_published'] || (date_published(obj_id) || year_month) + [subj_publication[0..3].to_i, obj_publication[0..3].to_i].max + end + + def date_published(doi) + ## TODO: we need to make sure all the dois from other RA are indexed + doi = Doi.where(doi: doi).first + doi[:published] if doi.present? + end + def set_defaults self.uuid = SecureRandom.uuid if uuid.blank? self.subj_id = normalize_doi(subj_id) || subj_id diff --git a/spec/concerns/indexable_spec.rb b/spec/concerns/indexable_spec.rb index d0e803b6e..9da859643 100644 --- a/spec/concerns/indexable_spec.rb +++ b/spec/concerns/indexable_spec.rb @@ -100,6 +100,11 @@ it 'query by description' do results = Doi.query("description").results expect(results.total).to eq(1) + + expect(results.response.aggregations.states).not_to be_nil + expect(results.response.aggregations.prefixes).not_to be_nil + expect(results.response.aggregations.created).not_to be_nil + expect(results.response.aggregations.schema_versions).not_to be_nil end it 'query by description not found' do @@ -118,5 +123,62 @@ results = Doi.query(nil, page: { size: 1, cursor: results.to_a.last[:sort] }).results expect(results.to_a.length).to eq(1) end + + context "aggregations" do + it 'returns query_aggregation when filters aggregation with empty' do + aggregations = Doi.get_aggregations_hash("") + expect(aggregations[:resource_types]).not_to be_nil + expect(aggregations[:states]).not_to be_nil + expect(aggregations[:created]).not_to be_nil + expect(aggregations[:schema_versions]).not_to be_nil + end + + it 'returns multiple aggregations when filters aggregations with multiple' do + aggregations = Doi.get_aggregations_hash("query_aggregations,metrics_aggregations") + expect(aggregations[:resource_types]).not_to be_nil + expect(aggregations[:states]).not_to be_nil + expect(aggregations[:created]).not_to be_nil + expect(aggregations[:schema_versions]).not_to be_nil + end + end + end + + context "when event" do + let!(:event) { create(:event) } + let!(:events) { create_list(:event, 3) } + + before do + Event.import + sleep 1 + end + + context "aggregations" do + it 'returns query_aggregation when filters aggregation with empty' do + aggregations = Event.get_aggregations_hash("") + expect(aggregations[:sources]).not_to be_nil + expect(aggregations[:prefixes]).not_to be_nil + expect(aggregations[:citation_types]).not_to be_nil + expect(aggregations[:relation_types]).not_to be_nil + expect(aggregations[:registrants]).not_to be_nil + expect(aggregations[:pairings]).not_to be_nil + expect(aggregations[:dois_usage]).not_to be_nil + expect(aggregations[:citations_histogram]).to be_nil + expect(aggregations[:citations]).to be_nil + end + + it 'returns multiple aggregations when filters aggregations with multiple' do + aggregations = Event.get_aggregations_hash("query_aggregations,metrics_aggregations") + expect(aggregations[:sources]).not_to be_nil + expect(aggregations[:prefixes]).not_to be_nil + expect(aggregations[:citation_types]).not_to be_nil + expect(aggregations[:relation_types]).not_to be_nil + expect(aggregations[:registrants]).not_to be_nil + expect(aggregations[:pairings]).not_to be_nil + expect(aggregations[:dois]).not_to be_nil + expect(aggregations[:dois_usage]).not_to be_nil + expect(aggregations[:citations_histogram]).not_to be_nil + expect(aggregations[:citations]).not_to be_nil + end + end end end diff --git a/spec/factories/default.rb b/spec/factories/default.rb index 47b6141d4..87ad79c92 100644 --- a/spec/factories/default.rb +++ b/spec/factories/default.rb @@ -294,7 +294,7 @@ source_id { "datacite_related" } source_token { "datacite_related_123" } sequence(:subj_id) { |n| "http://doi.org/10.5061/DRYAD.47SD5e/#{n}" } - subj { nil } + subj { {"datePublished"=>"2006-06-13T16:14:19Z"} } obj_id { "http://doi.org/10.5061/DRYAD.47SD5/1" } relation_type_id { "has_part" } end diff --git a/spec/models/event_spec.rb b/spec/models/event_spec.rb index b02bc7cac..a3476da0e 100644 --- a/spec/models/event_spec.rb +++ b/spec/models/event_spec.rb @@ -2,14 +2,32 @@ describe Event, :type => :model, vcr: true do before(:each) { allow(Time.zone).to receive(:now).and_return(Time.mktime(2015, 4, 8)) } + context "event" do + subject { create(:event) } - subject { create(:event) } + it { is_expected.to validate_presence_of(:subj_id) } + it { is_expected.to validate_presence_of(:source_token) } + it { is_expected.to validate_presence_of(:source_id) } - it { is_expected.to validate_presence_of(:subj_id) } - it { is_expected.to validate_presence_of(:source_token) } - it { is_expected.to validate_presence_of(:source_id) } + it "has subj" do + expect(subject.subj["date-published"]).to eq("2006-06-13T16:14:19Z") + end + end - it "has subj" do - expect(subject.subj["date-published"]).to eq("2006-06-13T16:14:19Z") + context "citation" do + subject { create(:event_for_datacite_related) } + + it "has citation_id" do + expect(subject.citation_id).to eq("https://doi.org/10.5061/dryad.47sd5/1-https://doi.org/10.5061/dryad.47sd5e/1") + end + + it "has citation_year" do + expect(subject.citation_year).to eq(2015) + end + + it "has published_dates" do + expect(subject.subj["datePublished"]).to eq("2006-06-13T16:14:19Z") + expect(subject.obj["datePublished"]).to be_nil + end end end