diff --git a/app/models/doi.rb b/app/models/doi.rb index 49b4b8e55..776e5a480 100644 --- a/app/models/doi.rb +++ b/app/models/doi.rb @@ -2022,18 +2022,12 @@ def self.transfer(options={}) # +job_name+:: Acive Job class name of the Job that would be executed on every matched results def self.loop_through_dois(options={}) size = (options[:size] || 1000).to_i + cursor = options[:cursor] || [] filter = options[:filter] || {} label = options[:label] || "" options[:job_name] ||= "" query = options[:query].presence - if options[:cursor].present? - timestamp, doi = options[:cursor].split(",", 2) - cursor = [timestamp.to_i, doi] - else - cursor = [] - end - response = Doi.query(query, filter.merge(page: { size: 1, cursor: [] })) message = "#{label} #{response.results.total} Dois with #{label}." diff --git a/app/models/event.rb b/app/models/event.rb index 5e6fa0e29..b4bb43fb7 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -72,14 +72,16 @@ class Event < ActiveRecord::Base "references", "is-referenced-by" ] - ACTIVE_RELATION_TYPES = [ - "cites", + # renamed to make it clearer that these relation types are grouped together as references + REFERENCE_RELATION_TYPES = [ + "is-cited-by", "is-supplement-to", "references" ] - PASSIVE_RELATION_TYPES = [ - "is-cited-by", + # renamed to make it clearer that these relation types are grouped together as citations + CITATION_RELATION_TYPES = [ + "cites", "is-supplemented-by", "is-referenced-by" ] @@ -649,11 +651,11 @@ def self.label_state_event(event) # +job_name+:: Acive Job class name of the Job that would be executed on every matched results def self.loop_through_events(options) size = (options[:size] || 1000).to_i - cursor = options[:cursor] || [options[:from_id] || Event.minimum(:id).to_i, options[:until_id] || Event.maximum(:id).to_i] + cursor = options[:cursor] || [] filter = options[:filter] || {} label = options[:label] || "" job_name = options[:job_name] || "" - query = options[:query] || nil + query = options[:query].presence response = Event.query(query, filter.merge(page: { size: 1, cursor: [] })) Rails.logger.info "#{label} #{response.results.total} events with #{label}." @@ -664,7 +666,7 @@ def self.loop_through_events(options) response = Event.query(query, filter.merge(page: { size: size, cursor: cursor })) break unless response.results.results.length.positive? - Rails.logger.info "#{label} #{response.results.results.length} events starting with _id #{response.results.to_a.first[:_id]}." + Rails.logger.info "#{label} #{response.results.results.length} events starting with _id #{response.results.to_a.first[:_id]}." cursor = response.results.to_a.last[:sort] Rails.logger.info "#{label} Cursor: #{cursor} " @@ -784,12 +786,12 @@ def set_source_and_target_doi return nil unless subj_id && obj_id case relation_type_id - when *ACTIVE_RELATION_TYPES + when *REFERENCE_RELATION_TYPES self.source_doi = uppercase_doi_from_url(subj_id) self.target_doi = uppercase_doi_from_url(obj_id) self.source_relation_type_id = "references" self.target_relation_type_id = "citations" - when *PASSIVE_RELATION_TYPES + when *CITATION_RELATION_TYPES self.source_doi = uppercase_doi_from_url(obj_id) self.target_doi = uppercase_doi_from_url(subj_id) self.source_relation_type_id = "references" diff --git a/lib/tasks/doi.rake b/lib/tasks/doi.rake index 7bb5575c4..f79cefc34 100644 --- a/lib/tasks/doi.rake +++ b/lib/tasks/doi.rake @@ -83,7 +83,7 @@ namespace :doi do query: "+aasm_state:(findable OR registered) -schema_version:*", label: "[SetSchemaVersion]", job_name: "SchemaVersionJob", - cursor: ENV["CURSOR"], + cursor: ENV["CURSOR"].present? ? Base64.urlsafe_decode64(ENV["CURSOR"]).split(",", 2) : [], } puts Doi.loop_through_dois(options) end @@ -94,7 +94,7 @@ namespace :doi do query: "agency:DataCite OR agency:Crossref", label: "[SetRegistrationAgency]", job_name: "UpdateDoiJob", - cursor: ENV["CURSOR"], + cursor: ENV["CURSOR"].present? ? Base64.urlsafe_decode64(ENV["CURSOR"]).split(",", 2) : [], } puts Doi.loop_through_dois(options) end @@ -105,7 +105,7 @@ namespace :doi do query: "rights_list:* AND -rights_list.rightsIdentifier:*", label: "[SetLicense]", job_name: "UpdateDoiJob", - cursor: ENV["CURSOR"], + cursor: ENV["CURSOR"].present? ? Base64.urlsafe_decode64(ENV["CURSOR"]).split(",", 2) : [], } puts Doi.loop_through_dois(options) end @@ -116,7 +116,7 @@ namespace :doi do query: "language:*", label: "[SetLanguage]", job_name: "UpdateDoiJob", - cursor: ENV["CURSOR"], + cursor: ENV["CURSOR"].present? ? Base64.urlsafe_decode64(ENV["CURSOR"]).split(",", 2) : [], } puts Doi.loop_through_dois(options) end @@ -127,7 +127,7 @@ namespace :doi do query: "identifiers.identifierType:DOI", label: "[SetIdentifiers]", job_name: "UpdateDoiJob", - cursor: ENV["CURSOR"], + cursor: ENV["CURSOR"].present? ? Base64.urlsafe_decode64(ENV["CURSOR"]).split(",", 2) : [], } puts Doi.loop_through_dois(options) end @@ -138,7 +138,7 @@ namespace :doi do query: "subjects.subjectScheme:FOR", label: "[SetFieldOfScience]", job_name: "UpdateDoiJob", - cursor: ENV["CURSOR"], + cursor: ENV["CURSOR"].present? ? Base64.urlsafe_decode64(ENV["CURSOR"]).split(",", 2) : [], } puts Doi.loop_through_dois(options) end @@ -149,7 +149,7 @@ namespace :doi do query: "types.resourceTypeGeneral:* AND -types.schemaOrg:*", label: "[SetTypes]", job_name: "UpdateDoiJob", - cursor: ENV["CURSOR"], + cursor: ENV["CURSOR"].present? ? Base64.urlsafe_decode64(ENV["CURSOR"]).split(",", 2) : [], } Doi.loop_through_dois(options) end @@ -161,7 +161,7 @@ namespace :doi do query: ENV["QUERY"], label: "[RefreshMetadata]", job_name: "DoiRefreshJob", - cursor: ENV["CURSOR"], + cursor: ENV["CURSOR"].present? ? Base64.urlsafe_decode64(ENV["CURSOR"]).split(",", 2) : [], } puts Doi.loop_through_dois(options) end diff --git a/lib/tasks/event.rake b/lib/tasks/event.rake index 7247eea65..cbb784b80 100644 --- a/lib/tasks/event.rake +++ b/lib/tasks/event.rake @@ -16,6 +16,16 @@ namespace :event do puts Event.upgrade_index end + desc "Create alias for events" + task :create_alias => :environment do + puts Event.create_alias(index: ENV["INDEX"], alias: ENV["ALIAS"]) + end + + desc "Delete alias for events" + task :delete_alias => :environment do + puts Event.delete_alias(index: ENV["INDEX"], alias: ENV["ALIAS"]) + end + desc "Show index stats for events" task :index_stats => :environment do puts Event.index_stats @@ -56,6 +66,7 @@ namespace :event do options = { cursor: ENV["CURSOR"].present? ? Base64.urlsafe_decode64(ENV["CURSOR"]).split(",", 2) : [], filter: { update_target_doi: true }, + query: ENV["QUERY"], label: "[UpdateTargetDoi] Updating", job_name: "TargetDoiByIdJob", } @@ -66,7 +77,7 @@ end namespace :crossref do desc 'Import crossref dois for all events' task :import_doi => :environment do - cursor = ENV['CURSOR'].to_s.split(",") || [Event.minimum(:id), Event.minimum(:id)] + cursor = ENV["CURSOR"].present? ? Base64.urlsafe_decode64(ENV["CURSOR"]).split(",", 2) : [] Event.update_crossref(cursor: cursor) end @@ -107,7 +118,7 @@ end namespace :datacite_crossref do desc 'Import crossref dois for all events' task :import_doi => :environment do - cursor = (ENV['CURSOR'] || Event.minimum(:id)).to_i + cursor = ENV["CURSOR"].present? ? Base64.urlsafe_decode64(ENV["CURSOR"]).split(",", 2) : [] Event.update_datacite_crossref(cursor: cursor, refresh: ENV['REFRESH'], size: ENV['SIZE']) end @@ -116,7 +127,7 @@ end namespace :datacite_medra do desc 'Import medra dois for all events' task :import_doi => :environment do - cursor = (ENV['CURSOR'] || Event.minimum(:id)).to_i + cursor = ENV["CURSOR"].present? ? Base64.urlsafe_decode64(ENV["CURSOR"]).split(",", 2) : [] Event.update_datacite_medra(cursor: cursor, refresh: ENV['REFRESH'], size: ENV['SIZE']) end @@ -125,7 +136,7 @@ end namespace :datacite_kisti do desc 'Import kisti dois for all events' task :import_doi => :environment do - cursor = (ENV['CURSOR'] || Event.minimum(:id)).to_i + cursor = ENV["CURSOR"].present? ? Base64.urlsafe_decode64(ENV["CURSOR"]).split(",", 2) : [] Event.update_datacite_kisti(cursor: cursor, refresh: ENV['REFRESH'], size: ENV['SIZE']) end @@ -134,7 +145,7 @@ end namespace :datacite_jalc do desc 'Import jalc dois for all events' task :import_doi => :environment do - cursor = (ENV['CURSOR'] || Event.minimum(:id)).to_i + cursor = ENV["CURSOR"].present? ? Base64.urlsafe_decode64(ENV["CURSOR"]).split(",", 2) : [], Event.update_datacite_jalc(cursor: cursor, refresh: ENV['REFRESH'], size: ENV['SIZE']) end @@ -143,7 +154,7 @@ end namespace :datacite_op do desc 'Import op dois for all events' task :import_doi => :environment do - cursor = (ENV['CURSOR'] || Event.minimum(:id)).to_i + cursor = ENV["CURSOR"].present? ? Base64.urlsafe_decode64(ENV["CURSOR"]).split(",", 2) : [] Event.update_datacite_op(cursor: cursor, refresh: ENV['REFRESH'], size: ENV['SIZE']) end @@ -152,7 +163,7 @@ end namespace :datacite_orcid_auto_update do desc 'Import orcid ids for all events' task :import_orcid => :environment do - cursor = (ENV['CURSOR'] || Event.minimum(:id)).to_i + cursor = ENV["CURSOR"].present? ? Base64.urlsafe_decode64(ENV["CURSOR"]).split(",", 2) : [] Event.update_datacite_orcid_auto_update(cursor: cursor, refresh: ENV['REFRESH'], size: ENV['SIZE']) end