Skip to content

Commit

Permalink
Merge pull request #330 from datacite/add_citation_year
Browse files Browse the repository at this point in the history
Add citation year to index
  • Loading branch information
kjgarza authored Aug 2, 2019
2 parents b21b896 + 77a72d6 commit f3c1a32
Show file tree
Hide file tree
Showing 7 changed files with 201 additions and 60 deletions.
24 changes: 16 additions & 8 deletions app/controllers/concerns/facetable.rb
Original file line number Diff line number Diff line change
Expand Up @@ -134,14 +134,6 @@ def facet_by_source(arr)
end
end

# def facet_citations_by_year(hash)
# hash.map do |hsh|
# { "id" => hsh["key"].to_i,
# "title" => hsh["key"],
# "count" => hsh["doc_count"] }
# end
# end

def facet_citations_by_year(hash)
arr = hash.dig('years', 'buckets').map do |h|
year = h['key_as_string'][0..3].to_i
Expand All @@ -156,6 +148,22 @@ def facet_citations_by_year(hash)
"years" => arr }
end

def facet_counts_by_year_month(hash)
arr = hash.dig('year_months', 'buckets').map do |h|
month = h["key_as_string"][5..6].to_i
title = I18n.t("date.month_names")[month] + " " + h["key_as_string"][0..3]

{
"id" => h["key_as_string"][0..6],
'title' => title,
'sum' => h.dig('total_by_year_month', 'value') }
end
{ "count" => hash.dig("sum_distribution", "value"),
"yearMonths" => arr }
end



def facet_by_relation_type(arr)
arr.map do |hsh|
arr = hsh.dig("year_months", "buckets").map do |h|
Expand Down
35 changes: 22 additions & 13 deletions app/controllers/events_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ def index
publication_year: params[:publication_year],
occurred_at: params[:occurred_at],
year_month: params[:year_month],
aggregations: params[:aggregations],
unique: params[:unique],
page: page,
sort: sort)
Expand All @@ -113,17 +114,22 @@ def index
total_for_pages = page[:cursor].nil? ? [total.to_f, 10000].min : total.to_f
total_pages = page[:size] > 0 ? (total_for_pages / page[:size]).ceil : 0

sources = total.positive? ? facet_by_source(response.response.aggregations.sources.buckets) : nil
prefixes = total.positive? ? facet_by_source(response.response.aggregations.prefixes.buckets) : nil
citation_types = total.positive? ? facet_by_citation_type(response.response.aggregations.citation_types.buckets) : nil
relation_types = total.positive? ? facet_by_relation_type(response.response.aggregations.relation_types.buckets) : nil
registrants = total.positive? && params[:extra] ? facet_by_registrants(response.response.aggregations.registrants.buckets) : nil
pairings = total.positive? && params[:extra] ? facet_by_pairings(response.response.aggregations.pairings.buckets) : nil
dois = total.positive? && params[:extra] ? facet_by_dois(response.response.aggregations.dois.buckets) : nil
dois_usage = total.positive? && params[:extra] ? facet_by_dois(response.response.aggregations.dois_usage.dois.buckets) : nil
citations_histogram = total.positive? && params[:extra] ? facet_citations_by_year(response.response.aggregations.dois_citations) : nil
# citations_histogram = total.positive? && params[:extra] ? facet_citations_by_year(response.response.aggregations.citations_histogram.years.buckets) : nil
# citations = total.positive? && params[:extra] ? facet_citations_by_dois(response.response.aggregations.citations.dois.buckets) : nil

aggregations = params.fetch(:aggregations, "") || ""

sources = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_source(response.response.aggregations.sources.buckets) : nil
prefixes = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_source(response.response.aggregations.prefixes.buckets) : nil
citation_types = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_citation_type(response.response.aggregations.citation_types.buckets) : nil
relation_types = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_relation_type(response.response.aggregations.relation_types.buckets) : nil
registrants = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_registrants(response.response.aggregations.registrants.buckets) : nil
pairings = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_pairings(response.response.aggregations.pairings.buckets) : nil
dois = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_dois(response.response.aggregations.dois.buckets) : nil
dois_usage = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_by_dois(response.response.aggregations.dois_usage.dois.buckets) : nil
dois_citations = total.positive? && aggregations.blank? || aggregations.include?("query_aggregations") ? facet_citations_by_year(response.response.aggregations.dois_citations) : nil
citations_histogram = total.positive? && aggregations.include?("metrics_aggregations") ? facet_citations_by_year(response.response.aggregations.citations_histogram) : nil
citations = total.positive? && aggregations.include?("metrics_aggregations") ? facet_citations_by_dois(response.response.aggregations.citations.dois.buckets) : nil
views_histogram = total.positive? && aggregations.include?("metrics_aggregations") ? facet_counts_by_year_month(response.response.aggregations.views) : nil
downloads_histogram = total.positive? && aggregations.include?("metrics_aggregations") ? facet_counts_by_year_month(response.response.aggregations.downloads) : nil

results = response.results

Expand All @@ -140,8 +146,11 @@ def index
registrants: registrants,
"doisRelationTypes": dois,
"doisUsageTypes": dois_usage,
"doisCitations": citations_histogram
# "uniqueCitations": citations
"doisCitations": dois_citations,
"citationsHistogram": citations_histogram,
"uniqueCitations": citations,
"viewsHistogram": views_histogram,
"downloadsHistogram": downloads_histogram
}.compact

options[:links] = {
Expand Down
12 changes: 11 additions & 1 deletion app/models/concerns/indexable.rb
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,18 @@ def find_by_id_list(ids, options={})
})
end

def get_aggregations_hash(aggregations = "")
return send(:query_aggregations) if aggregations.blank?
aggs = {}
aggregations.split(",").each do |agg|
agg = :query_aggregations if agg.blank? || !respond_to?(agg)
aggs.merge! send(agg)
end
aggs
end

def query(query, options={})
aggregations = options[:totals_agg] == true ? totals_aggregations : query_aggregations
aggregations = options[:totals_agg] == true ? totals_aggregations : get_aggregations_hash(options[:aggregations])
options[:page] ||= {}
options[:page][:number] ||= 1
options[:page][:size] ||= 25
Expand Down
96 changes: 65 additions & 31 deletions app/models/event.rb
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,14 @@ class Event < ActiveRecord::Base
"describes", "is-described-by"
]

VIEWS_RELATION_TYPES = [
"unique-dataset-investigations-regular"
]

DOWNLOADS_RELATION_TYPES = [
"unique-dataset-requests-regular"
]

validates :subj_id, :source_id, :source_token, presence: true

attr_accessor :container_title, :url
Expand Down Expand Up @@ -127,6 +135,7 @@ class Event < ActiveRecord::Base
indexes :indexed_at, type: :date
indexes :occurred_at, type: :date
indexes :citation_id, type: :keyword
indexes :citation_year, type: :keyword
indexes :cache_key, type: :keyword
end

Expand Down Expand Up @@ -161,6 +170,7 @@ def as_indexed_json(options={})
"indexed_at" => indexed_at,
"occurred_at" => occurred_at,
"citation_id" => citation_id,
"citation_year" => citation_year,
"cache_key" => cache_key
}
end
Expand Down Expand Up @@ -206,41 +216,52 @@ def self.query_aggregations
}
},
aggs: { years: { date_histogram: { field: 'occurred_at', interval: 'year', min_doc_count: 1 }, aggs: { "total_by_year" => { sum: { field: 'total' }}}},"sum_distribution"=>sum_year_distribution}
# citations_histogram: {
# filter: {
# script: {
# script: "#{INCLUDED_RELATION_TYPES}.contains(doc['relation_type_id'].value)"
# }
# },
# aggs: { years: { terms: { script: { source: "
# String subjDatePublished = params['_source']['subj']['date_published']?.substring(0, 4);
# String objDatePublished = params['_source']['obj']['date_published']?.substring(0, 4);

# if( params['_source']['subj']['date_published']?.substring(0, 4) !== null && params['_source']['obj']['date_published']?.substring(0, 4) !== null){

# if(Integer.parseInt(objDatePublished) > Integer.parseInt(subjDatePublished) )
# {
# objDatePublished
# }
# else{
# subjDatePublished
# }
# }
# " }}}}
# },
# citations: {
# filter: {
# script: {
# script: "#{INCLUDED_RELATION_TYPES}.contains(doc['relation_type_id'].value)"
# }
# },
# aggs: { dois: {
# terms: { field: 'obj_id', size: 50, min_doc_count: 1 }, aggs: { unique_citations: { cardinality: { field: 'citation_id' }}}
# }}
}
}
end

def self.metrics_aggregations
sum_distribution = {
sum_bucket: {
buckets_path: "year_months>total_by_year_month"
}
}
sum_year_distribution = {
sum_bucket: {
buckets_path: "years>total_by_year"
}
}

{
citations_histogram: {
filter: {script: {script: "#{INCLUDED_RELATION_TYPES}.contains(doc['relation_type_id'].value)"}
},
aggs: { years: { date_histogram: { field: 'citation_year', interval: 'year', min_doc_count: 1 }, aggs: { "total_by_year" => { sum: { field: 'total' }}}},"sum_distribution"=>sum_year_distribution}
},
citations: {
filter: {script: {script: "#{INCLUDED_RELATION_TYPES}.contains(doc['relation_type_id'].value)"}
},
aggs: { dois: {
terms: { field: 'obj_id', size: 50, min_doc_count: 1 }, aggs: { unique_citations: { cardinality: { field: 'citation_id' }}}
}}
},
views: {
filter: {script: {script: "#{VIEWS_RELATION_TYPES}.contains(doc['relation_type_id'].value) && doc['source_id'].value == 'datacite-usage' && doc['occurred_at'].value.getMillis() >= doc['obj.datePublished'].value.getMillis() && doc['occurred_at'].value.getMillis() < new Date().getTime()"}
},
aggs: {
year_months: { date_histogram: { field: 'occurred_at', interval: 'month', min_doc_count: 1 }, aggs: { "total_by_year_month" => { sum: { field: 'total' } } } }, "sum_distribution" => sum_distribution
}
},
downloads: {
filter: {script: {script: "#{DOWNLOADS_RELATION_TYPES}.contains(doc['relation_type_id'].value) && doc['source_id'].value == 'datacite-usage' && doc['occurred_at'].value.getMillis() >= doc['obj.datePublished'].value.getMillis() && doc['occurred_at'].value.getMillis() < new Date().getTime()"}
},
aggs: {
year_months: { date_histogram: { field: 'occurred_at', interval: 'month', min_doc_count: 1 }, aggs: { "total_by_year_month" => { sum: { field: 'total' } } } }, "sum_distribution" => sum_distribution
}
}
}
end

# return results for one or more ids
def self.find_by_id(ids, options={})
ids = ids.split(",") if ids.is_a?(String)
Expand Down Expand Up @@ -534,6 +555,19 @@ def obj_cache_key
"objects/#{obj_id}-#{timestamp}"
end

def citation_year
"" unless INCLUDED_RELATION_TYPES.include?(relation_type_id)
subj_publication = subj['date_published'] || (date_published(subj_id) || year_month)
obj_publication = obj['date_published'] || (date_published(obj_id) || year_month)
[subj_publication[0..3].to_i, obj_publication[0..3].to_i].max
end

def date_published(doi)
## TODO: we need to make sure all the dois from other RA are indexed
doi = Doi.where(doi: doi).first
doi[:published] if doi.present?
end

def set_defaults
self.uuid = SecureRandom.uuid if uuid.blank?
self.subj_id = normalize_doi(subj_id) || subj_id
Expand Down
62 changes: 62 additions & 0 deletions spec/concerns/indexable_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,11 @@
it 'query by description' do
results = Doi.query("description").results
expect(results.total).to eq(1)

expect(results.response.aggregations.states).not_to be_nil
expect(results.response.aggregations.prefixes).not_to be_nil
expect(results.response.aggregations.created).not_to be_nil
expect(results.response.aggregations.schema_versions).not_to be_nil
end

it 'query by description not found' do
Expand All @@ -118,5 +123,62 @@
results = Doi.query(nil, page: { size: 1, cursor: results.to_a.last[:sort] }).results
expect(results.to_a.length).to eq(1)
end

context "aggregations" do
it 'returns query_aggregation when filters aggregation with empty' do
aggregations = Doi.get_aggregations_hash("")
expect(aggregations[:resource_types]).not_to be_nil
expect(aggregations[:states]).not_to be_nil
expect(aggregations[:created]).not_to be_nil
expect(aggregations[:schema_versions]).not_to be_nil
end

it 'returns multiple aggregations when filters aggregations with multiple' do
aggregations = Doi.get_aggregations_hash("query_aggregations,metrics_aggregations")
expect(aggregations[:resource_types]).not_to be_nil
expect(aggregations[:states]).not_to be_nil
expect(aggregations[:created]).not_to be_nil
expect(aggregations[:schema_versions]).not_to be_nil
end
end
end

context "when event" do
let!(:event) { create(:event) }
let!(:events) { create_list(:event, 3) }

before do
Event.import
sleep 1
end

context "aggregations" do
it 'returns query_aggregation when filters aggregation with empty' do
aggregations = Event.get_aggregations_hash("")
expect(aggregations[:sources]).not_to be_nil
expect(aggregations[:prefixes]).not_to be_nil
expect(aggregations[:citation_types]).not_to be_nil
expect(aggregations[:relation_types]).not_to be_nil
expect(aggregations[:registrants]).not_to be_nil
expect(aggregations[:pairings]).not_to be_nil
expect(aggregations[:dois_usage]).not_to be_nil
expect(aggregations[:citations_histogram]).to be_nil
expect(aggregations[:citations]).to be_nil
end

it 'returns multiple aggregations when filters aggregations with multiple' do
aggregations = Event.get_aggregations_hash("query_aggregations,metrics_aggregations")
expect(aggregations[:sources]).not_to be_nil
expect(aggregations[:prefixes]).not_to be_nil
expect(aggregations[:citation_types]).not_to be_nil
expect(aggregations[:relation_types]).not_to be_nil
expect(aggregations[:registrants]).not_to be_nil
expect(aggregations[:pairings]).not_to be_nil
expect(aggregations[:dois]).not_to be_nil
expect(aggregations[:dois_usage]).not_to be_nil
expect(aggregations[:citations_histogram]).not_to be_nil
expect(aggregations[:citations]).not_to be_nil
end
end
end
end
2 changes: 1 addition & 1 deletion spec/factories/default.rb
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@
source_id { "datacite_related" }
source_token { "datacite_related_123" }
sequence(:subj_id) { |n| "http://doi.org/10.5061/DRYAD.47SD5e/#{n}" }
subj { nil }
subj { {"datePublished"=>"2006-06-13T16:14:19Z"} }
obj_id { "http://doi.org/10.5061/DRYAD.47SD5/1" }
relation_type_id { "has_part" }
end
Expand Down
30 changes: 24 additions & 6 deletions spec/models/event_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,32 @@

describe Event, :type => :model, vcr: true do
before(:each) { allow(Time.zone).to receive(:now).and_return(Time.mktime(2015, 4, 8)) }
context "event" do
subject { create(:event) }

subject { create(:event) }
it { is_expected.to validate_presence_of(:subj_id) }
it { is_expected.to validate_presence_of(:source_token) }
it { is_expected.to validate_presence_of(:source_id) }

it { is_expected.to validate_presence_of(:subj_id) }
it { is_expected.to validate_presence_of(:source_token) }
it { is_expected.to validate_presence_of(:source_id) }
it "has subj" do
expect(subject.subj["date-published"]).to eq("2006-06-13T16:14:19Z")
end
end

it "has subj" do
expect(subject.subj["date-published"]).to eq("2006-06-13T16:14:19Z")
context "citation" do
subject { create(:event_for_datacite_related) }

it "has citation_id" do
expect(subject.citation_id).to eq("https://doi.org/10.5061/dryad.47sd5/1-https://doi.org/10.5061/dryad.47sd5e/1")
end

it "has citation_year" do
expect(subject.citation_year).to eq(2015)
end

it "has published_dates" do
expect(subject.subj["datePublished"]).to eq("2006-06-13T16:14:19Z")
expect(subject.obj["datePublished"]).to be_nil
end
end
end

0 comments on commit f3c1a32

Please sign in to comment.