Skip to content

Commit

Permalink
Allow variable batch_size for rake datacite_doi imports
Browse files Browse the repository at this point in the history
  • Loading branch information
jrhoads committed Mar 9, 2023
1 parent f33a8cd commit 4c24167
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 3 deletions.
3 changes: 2 additions & 1 deletion app/models/datacite_doi.rb
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,13 @@ def self.import_by_ids(options = {})
DataciteDoi.where(type: "DataciteDoi").maximum(:id)
).
to_i
batch_size = options[:batch_size] || 50
count = 0

# TODO remove query for type once STI is enabled
# SQS message size limit is 256 kB, up to 2 GB with S3
DataciteDoi.where(type: "DataciteDoi").where(id: from_id..until_id).
find_in_batches(batch_size: 50) do |dois|
find_in_batches(batch_size: batch_size) do |dois|
ids = dois.pluck(:id)
DataciteDoiImportInBulkJob.perform_later(ids, index: index)
count += ids.length
Expand Down
10 changes: 8 additions & 2 deletions lib/tasks/datacite_doi.rake
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,14 @@ namespace :datacite_doi do
task import: :environment do
from_id = (ENV["FROM_ID"] || DataciteDoi.minimum(:id)).to_i
until_id = (ENV["UNTIL_ID"] || DataciteDoi.maximum(:id)).to_i

DataciteDoi.import_by_ids(from_id: from_id, until_id: until_id, index: ENV["INDEX"] || DataciteDoi.inactive_index)
batch_size = ENV["BATCH_SIZE"].nil? ? 50 : ENV["BATCH_SIZE"].to_i

DataciteDoi.import_by_ids(
from_id: from_id,
until_id: until_id,
batch_size: batch_size,
index: ENV["INDEX"] || DataciteDoi.inactive_index
)
end

desc "Import one datacite DOI"
Expand Down

0 comments on commit 4c24167

Please sign in to comment.