Skip to content

Commit

Permalink
Merge pull request #415 from datacite/feat_rake_label_event_with_error
Browse files Browse the repository at this point in the history
Rake tasks to label events with error
  • Loading branch information
kjgarza authored Feb 13, 2020
2 parents 5f2be30 + e42b388 commit 40237ce
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 24 deletions.
6 changes: 6 additions & 0 deletions app/controllers/events_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ def index
year_month: params[:year_month],
aggregations: params[:aggregations],
unique: params[:unique],
state_event: params[:state],
scroll_id: params[:scroll_id],
page: page,
sort: sort)
Expand Down Expand Up @@ -156,6 +157,8 @@ def index
downloads_histogram = nil
unique_obj_count = nil
unique_subj_count = nil
states = nil


bm = Benchmark.ms {
aggregations = params.fetch(:aggregations, "") || ""
Expand All @@ -181,6 +184,8 @@ def index
# downloads = total.positive? ? EventsQuery.new.downloads(params[:doi]) : nil
unique_obj_count = total.positive? && aggregations.include?("advanced_aggregations") ? response.response.aggregations.unique_obj_count.value : nil
unique_subj_count = total.positive? && aggregations.include?("advanced_aggregations") ? response.response.aggregations.unique_subj_count.value : nil
states = total.positive? && aggregations.include?("state_aggregations") ? facet_by_source(response.response.aggregations.states.buckets) : nil

}
Rails.logger.warn method: "GET", path: "/events", message: "Aggregations /events", duration: bm

Expand All @@ -204,6 +209,7 @@ def index
"uniqueCitations": citations,
"references": references,
"relations": relations,
"states": states,
"uniqueNodes": {
"objCount": unique_obj_count,
"subjCount": unique_subj_count
Expand Down
12 changes: 12 additions & 0 deletions app/jobs/subj_check_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
class SubjCheckJob < ActiveJob::Base
queue_as :lupo_background

def perform(events, options = {})
events.lazy.each do |event|
subj_prefix = event[:subj_id][/(10\.\d{4,5})/, 1]
if Prefix.where(prefix: subj_prefix).exists?
Event.find_by(id: event[:id]).update_attribute(:state_event, "subjId_error")
end
end
end
end
32 changes: 8 additions & 24 deletions app/models/event.rb
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,11 @@ def self.advanced_aggregations
}
end

def self.state_aggregations
{ states: { terms: { field: "state_event", size: 50, min_doc_count: 1 } }}
end


# return results for one or more ids
def self.find_by_id(ids, options = {})
ids = ids.split(",") if ids.is_a?(String)
Expand Down Expand Up @@ -533,10 +538,8 @@ def access_method
end

def self.subj_id_check(options = {})
file_name = "evens_with_double_crossref_dois.txt"
size = (options[:size] || 1000).to_i
cursor = [options[:from_id], options[:until_id]]
total_errors = 0

response = Event.query(nil, source_id: "datacite-crossref", page: { size: 1, cursor: [] })
Rails.logger.warn "[DoubleCheck] #{response.results.total} events for source datacite-crossref."
Expand All @@ -549,29 +552,10 @@ def self.subj_id_check(options = {})

Rails.logger.warn "[DoubleCheck] DoubleCheck #{response.results.results.length} events starting with _id #{response.results.to_a.first[:_id]}."
cursor = response.results.to_a.last[:sort]
Rails.logger.warn "[DoubleCheck] Cursor: #{cursor} "

# dois = response.results.results.map(&:subj_id)
events = response.results.results
events.lazy.each do | event|
subj_prefix = event.subj_id[/(10\.\d{4,5})/, 1]
if Prefix.where(prefix: subj_prefix).empty?
File.open(file_name, "a+") do |f|
f.write(event.uuid, "\n")
total_errors = total_errors + 1
end
end
end
end
end

file = File.open(file_name)
if file.present?
payload = { description: "events_with_errors_from_rake_task #{Time.now.getutc}", public: true,files: {uids_with_errors: {content: file.read} }}
### max file size 1MB
response = Maremma.post("https://api.github.com/gists", data: payload.to_json, username: ENV["GIST_USERNAME"], password:ENV["GIST_PASSWORD"])
Rails.logger.warn "[DoubleCheck] Total number of events with Errors: #{total_errors}"
Rails.logger.warn "[DoubleCheck] IDs saved: #{response.body.dig('data','url')}" if [200,201].include?(response.status)
events = response.results.results.map { |item| { id: item.id, subj_id: item.subj_id } }
SubjCheckJob.perform_later(events, options)
end
end
end

Expand Down

0 comments on commit 40237ce

Please sign in to comment.