Skip to content

Commit

Permalink
fix cursor pagination for maintenance tasks. #205
Browse files Browse the repository at this point in the history
  • Loading branch information
Martin Fenner committed Feb 20, 2019
1 parent 19c101d commit d14a4af
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 53 deletions.
2 changes: 1 addition & 1 deletion app/controllers/dois_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def index
options[:meta] = {
total: total,
"totalPages" => total_pages,
page: page[:number],
page: page[:cursor].blank? && page[:number].present? ? page[:number] : nil,
states: states,
"resourceTypes" => resource_types,
created: created,
Expand Down
2 changes: 1 addition & 1 deletion app/models/concerns/indexable.rb
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def query(query, options={})
if self.name == "Doi" && options.dig(:page, :cursor).present?
from = 0
search_after = [options.dig(:page, :cursor)]
sort = [{ updated: { order: 'asc' }}]
sort = [{ _id: { order: 'asc' }}]
else
from = (options.dig(:page, :number) - 1) * options.dig(:page, :size)
search_after = nil
Expand Down
97 changes: 49 additions & 48 deletions app/models/doi.rb
Original file line number Diff line number Diff line change
Expand Up @@ -420,19 +420,19 @@ def self.import_by_day(options={})

logger = Logger.new(STDOUT)

response = Doi.query("created:[#{from_date.strftime("%F")} TO #{from_date.strftime("%F")}]", page: { size: 0, cursor: 1 })
response = Doi.query("created:[#{from_date.strftime("%F")} TO #{from_date.strftime("%F")}]", page: { size: 1, cursor: 0 })
logger.info "#{response.results.total} DOIs found created on #{from_date.strftime("%F")}."

if response.results.total > 0
# walk through results using cursor
prev_cursor = 0
cursor = 1

while cursor > prev_cursor do
cursor = 0

while response.results.results.length > 0 do
response = Doi.query("created:[#{from_date.strftime("%F")} TO #{from_date.strftime("%F")}]", page: { size: 1000, cursor: cursor })
old_cursor = cursor
cursor = Array.wrap(response.results.results.last.to_h[:sort]).first.to_i
prev__cursor = old_cursor
break unless response.results.results.length > 0

logger.info "[MySQL] Importing metadata for #{response.results.results.length} DOIs starting with _id #{cursor + 1}."
cursor = response.results.results.last[:sort].first.to_i

response.records.each do |doi|
begin
Expand Down Expand Up @@ -477,19 +477,19 @@ def self.import_by_day_missing(options={})

logger = Logger.new(STDOUT)

response = Doi.query("-creators:* +created:[#{from_date.strftime("%F")} TO #{from_date.strftime("%F")}]", page: { size: 0, cursor: 1 })
response = Doi.query("-creators:* +created:[#{from_date.strftime("%F")} TO #{from_date.strftime("%F")}]", page: { size: 1, cursor: 0 })
logger.info "#{response.results.total} DOIs found with missing metadata created on #{from_date.strftime("%F")}."

if response.results.total > 0
# walk through results using cursor
prev_cursor = 0
cursor = 1
while cursor > prev_cursor do

while response.results.results.length > 0 do
response = Doi.query("-creators:* +created:[#{from_date.strftime("%F")} TO #{from_date.strftime("%F")}]", page: { size: 1000, cursor: cursor })
old_cursor = cursor
cursor = Array.wrap(response.results.results.last.to_h[:sort]).first.to_i
prev_cursor = old_cursor
break unless response.results.results.length > 0

logger.info "[MySQL] Importing missing metadata for #{response.results.results.length} DOIs starting with _id #{cursor + 1}."
cursor = response.results.results.last[:sort].first.to_i

response.records.each do |doi|
begin
Expand Down Expand Up @@ -816,19 +816,19 @@ def self.delete_test_dois(from_date: nil)
def self.set_handle
logger = Logger.new(STDOUT)

response = Doi.query("-registered:* +url:* -aasm_state:draft -provider_id:ethz -provider_id:europ", page: { size: 0, cursor: 1 })
response = Doi.query("-registered:* +url:* -aasm_state:draft -provider_id:ethz -provider_id:europ", page: { size: 1, cursor: 0 })
logger.info "#{response.results.total} DOIs found that are not registered in the Handle system."

if response.results.total > 0
# walk through results using cursor
prev_cursor = 0
cursor = 1

while cursor > prev_cursor do
cursor = 0

while response.results.results.length > 0 do
response = Doi.query("-registered:* +url:* -aasm_state:draft -provider_id:ethz -provider_id:europ", page: { size: 1000, cursor: cursor })
old_cursor = cursor
cursor = Array.wrap(response.results.results.last.to_h[:sort]).first.to_i
prev_cursor = old_cursor
break unless response.results.results.length > 0

logger.info "[Handle] Register #{response.results.results.length} DOIs in the handle system starting with _id #{cursor + 1}."
cursor = response.results.results.last[:sort].first.to_i

response.results.results.each do |d|
HandleJob.perform_later(d.doi)
Expand All @@ -840,19 +840,19 @@ def self.set_handle
def self.set_url
logger = Logger.new(STDOUT)

response = Doi.query("-url:* (+provider_id:ethz OR -aasm_status:draft)", page: { size: 0, cursor: 1 })
response = Doi.query("-url:* (+provider_id:ethz OR -aasm_status:draft)", page: { size: 1, cursor: 0 })
logger.info "#{response.results.total} DOIs with no URL found in the database."

if response.results.total > 0
# walk through results using cursor
prev_cursor = 0
cursor = 1

while cursor > prev_cursor do
cursor = 0

while response.results.results.length > 0 do
response = Doi.query("-url:* (+provider_id:ethz OR -aasm_status:draft)", page: { size: 1000, cursor: cursor })
old_cursor = cursor
cursor = Array.wrap(response.results.results.last.to_h[:sort]).first.to_i
prev_cursor = old_cursor
break unless response.results.results.length > 0

logger.info "[Handle] Update URL for #{response.results.results.length} DOIs starting with _id #{cursor + 1}."
cursor = response.results.results.last[:sort].first.to_i

response.results.results.each do |d|
UrlJob.perform_later(d.doi)
Expand All @@ -864,20 +864,20 @@ def self.set_url
def self.set_minted
logger = Logger.new(STDOUT)

response = Doi.query("url:* +provider_id:ethz +aasm_state:draft", page: { size: 0, cursor: 1 })
response = Doi.query("url:* +provider_id:ethz +aasm_state:draft", page: { size: 1, cursor: 0 })
logger.info "#{response.results.total} draft DOIs from provider ETHZ found in the database."

if response.results.total > 0
# walk through results using cursor
prev_cursor = 0
cursor = 1

while cursor > prev_cursor do
cursor = 0

while response.results.results.length > 0 do
response = Doi.query("url:* +provider_id:ethz +aasm_state:draft", page: { size: 1000, cursor: cursor })
old_cursor = cursor
cursor = Array.wrap(response.results.results.last.to_h[:sort]).first.to_i
prev_cursor = old_cursor

break unless response.results.results.length > 0

logger.info "[MySQL] Set minted for #{response.results.results.length} DOIs starting with _id #{cursor + 1}."
cursor = response.results.results.last[:sort].first.to_i

response.results.results.each do |d|
UrlJob.perform_later(d.doi)
end
Expand All @@ -899,20 +899,21 @@ def self.transfer(options={})
end

query = options[:query] || "*"
size = (options[:size] || 1000).to_i

response = Doi.query(query, client_id: options[:client_id], page: { size: 0, cursor: 1 })
response = Doi.query(query, client_id: options[:client_id], page: { size: 1, cursor: 0 })
logger.info "[Transfer] #{response.results.total} DOIs found for client #{options[:client_id]}."

if options[:client_id] && options[:target_id] && response.results.total > 0
# walk through results using cursor
prev_cursor = 0
cursor = 1
cursor = 0

while cursor > prev_cursor do
response = Doi.query(query, client_id: options[:client_id], page: { size: 1000, cursor: cursor })
old_cursor = cursor
cursor = Array.wrap(response.results.results.last.to_h[:sort]).first.to_i
prev_cursor = old_cursor
while response.results.results.length > 0 do
response = Doi.query(query, client_id: options[:client_id], page: { size: size, cursor: cursor })
break unless response.results.results.length > 0

logger.info "[Transfer] Transferring #{response.results.results.length} DOIs starting with _id #{cursor + 1}."
cursor = response.results.results.last[:sort].first.to_i

response.results.results.each do |d|
TransferJob.perform_later(d.doi, target_id: options[:target_id])
Expand Down
6 changes: 3 additions & 3 deletions spec/models/doi_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -505,16 +505,16 @@
let(:provider) { create(:provider) }
let(:client) { create(:client, provider: provider) }
let(:target) { create(:client, provider: provider, symbol: provider.symbol + ".TARGET", name: "Target Client") }
let!(:dois) { create_list(:doi, 3, client: client) }
let!(:dois) { create_list(:doi, 5, client: client) }

before do
Doi.import
sleep 1
end

it "transfer all dois" do
response = Doi.transfer(client_id: client.symbol.downcase, target_id: target.symbol.downcase)
expect(response).to eq(3)
response = Doi.transfer(client_id: client.symbol.downcase, target_id: target.symbol.downcase, size: 3)
expect(response).to eq(5)
end
end

Expand Down

0 comments on commit d14a4af

Please sign in to comment.