diff --git a/app/models/event.rb b/app/models/event.rb index dbd02bb52..6df5527de 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -532,6 +532,48 @@ def access_method end end + def self.subj_id_check(options = {}) + file_name = "evens_with_double_crossref_dois.txt" + size = (options[:size] || 1000).to_i + cursor = [options[:from_id], options[:until_id]] + total_errors = 0 + + response = Event.query(nil, source_id: "datacite-crossref,datacite-related", page: { size: 1, cursor: [] }) + Rails.logger.info "[DoubleCheck] #{response.results.total} events for source datacite-crossref,datacite-related." + + # walk through results using cursor + if response.results.total.positive? + while response.results.results.length.positive? + response = Event.query(nil, source_id: "datacite-crossref,datacite-related", page: { size: size, cursor: cursor }) + break unless response.results.results.length.positive? + + Rails.logger.info "[DoubleCheck] DoubleCheck #{response.results.results.length} events starting with _id #{response.results.to_a.first[:_id]}." + cursor = response.results.to_a.last[:sort] + + # dois = response.results.results.map(&:subj_id) + events = response.results.results + events.lazy.each do | event| + subj_prefix = event.subj_id[/(10\.\d{4,5})/, 1] + if Prefix.where(prefix: subj_prefix).empty? + File.open(file_name, "a+") do |f| + f.write(event.uuid, "\n") + total_errors = total_errors + 1 + end + end + end + end + end + + file = File.open(file_name) + if file.present? + payload = { description: "events_with_errors_from_rake_task #{Time.now.getutc}", public: true,files: {uids_with_errors: {content: file.read} }} + ### max file size 1MB + response = Maremma.post("https://api.github.com/gists", data: payload.to_json, username: ENV["GIST_USERNAME"], password:ENV["GIST_PASSWORD"]) + Rails.logger.warn "[DoubleCheck] Total number of events with Errors: #{total_errors}" + Rails.logger.warn "[DoubleCheck] IDs saved: #{response.body.dig('data','url')}" if [200,201].include?(response.status) + end + end + def metric_type if relation_type_id.to_s =~ /(requests|investigations)/ arr = relation_type_id.split("-", 4) diff --git a/lib/tasks/event.rake b/lib/tasks/event.rake index 2271b7018..63f052a04 100644 --- a/lib/tasks/event.rake +++ b/lib/tasks/event.rake @@ -78,6 +78,16 @@ namespace :crossref do end end +namespace :subj_id_check do + desc 'checks that events subject node congruency' + task :check => :environment do + from_id = (ENV['FROM_ID'] || Event.minimum(:id)).to_i + until_id = (ENV['UNTIL_ID'] || Event.maximum(:id)).to_i + + Event.subj_id_check(from_id: from_id, until_id: until_id) + end +end + namespace :datacite_crossref do desc 'Import crossref dois for all events' task :import_doi => :environment do diff --git a/spec/models/event_spec.rb b/spec/models/event_spec.rb index f94b8744d..0ddd279f2 100644 --- a/spec/models/event_spec.rb +++ b/spec/models/event_spec.rb @@ -37,5 +37,27 @@ expect(published).to eq("2011") expect(published).not_to eq(2011) end + + # context "double_crossref_check", elasticsearch: true do + # let(:provider) { create(:provider, symbol: "DATACITE") } + # let(:client) { create(:client, provider: provider, symbol: ENV['MDS_USERNAME'], password: ENV['MDS_PASSWORD']) } + # let!(:prefix) { create(:prefix, prefix: "10.14454") } + # let!(:client_prefix) { create(:client_prefix, client: client, prefix: prefix) } + # let!(:doi) { create(:doi, client: client) } + # let!(:dois) { create_list(:doi, 10) } + # let!(:events) { create_list(:event_for_datacite_related, 30, source_id: "datacite-crossref", obj_id: doi.doi) } + + # before do + # Provider.import + # Client.import + # Doi.import + # Event.import + # sleep 3 + # end + + # it "check run" do + # expect(Event.subj_id_check(cursor: [Event.minimum(:id),Event.maximum(:id)])).to eq(true) + # end + # end end end