diff --git a/app/models/datacite_doi.rb b/app/models/datacite_doi.rb index 8c018925b..132f37c3b 100644 --- a/app/models/datacite_doi.rb +++ b/app/models/datacite_doi.rb @@ -39,12 +39,13 @@ def self.import_by_ids(options = {}) DataciteDoi.where(type: "DataciteDoi").maximum(:id) ). to_i + batch_size = options[:batch_size] || 50 count = 0 # TODO remove query for type once STI is enabled # SQS message size limit is 256 kB, up to 2 GB with S3 DataciteDoi.where(type: "DataciteDoi").where(id: from_id..until_id). - find_in_batches(batch_size: 50) do |dois| + find_in_batches(batch_size: batch_size) do |dois| ids = dois.pluck(:id) DataciteDoiImportInBulkJob.perform_later(ids, index: index) count += ids.length diff --git a/lib/tasks/datacite_doi.rake b/lib/tasks/datacite_doi.rake index 4191231f2..40fbf98c7 100644 --- a/lib/tasks/datacite_doi.rake +++ b/lib/tasks/datacite_doi.rake @@ -65,8 +65,14 @@ namespace :datacite_doi do task import: :environment do from_id = (ENV["FROM_ID"] || DataciteDoi.minimum(:id)).to_i until_id = (ENV["UNTIL_ID"] || DataciteDoi.maximum(:id)).to_i - - DataciteDoi.import_by_ids(from_id: from_id, until_id: until_id, index: ENV["INDEX"] || DataciteDoi.inactive_index) + batch_size = ENV["BATCH_SIZE"].nil? ? 50 : ENV["BATCH_SIZE"].to_i + + DataciteDoi.import_by_ids( + from_id: from_id, + until_id: until_id, + batch_size: batch_size, + index: ENV["INDEX"] || DataciteDoi.inactive_index + ) end desc "Import one datacite DOI"