Skip to content

Commit

Permalink
Merge pull request #104 from NYPL/SCC-4347-update-property-by-csv-script
Browse files Browse the repository at this point in the history
SCC-4347 - Add scripts/update-property-by-csv script
  • Loading branch information
nonword authored Feb 6, 2025
2 parents c5fbec6 + 9771265 commit 5afa05c
Show file tree
Hide file tree
Showing 7 changed files with 698 additions and 46 deletions.
40 changes: 33 additions & 7 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
"@nypl/scsb-rest-client": "^2.0.0",
"accent-fold": "^1.0.1",
"avsc": "^5.7.7",
"csv-parse": "^5.3.8",
"csv-parse": "^5.6.0",
"deep-object-diff": "^1.1.9",
"lodash": "^4.17.21",
"newrelic": "^11.7.0",
Expand All @@ -49,6 +49,7 @@
"@aws-sdk/credential-providers": "^3.614.0",
"chai": "^4.3.7",
"chai-as-promised": "^7.1.1",
"csv-parser": "^3.0.0",
"dotenv": "^16.0.3",
"minimist": "^1.2.8",
"mocha": "^10.1.0",
Expand Down
42 changes: 6 additions & 36 deletions scripts/bulk-index.js
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,11 @@ const {
awsCredentialsFromIni,
batch,
batchIdentifiersByTypeAndNyplSource,
delay,
die,
camelize,
capitalize,
secondsAsFriendlyDuration
printProgress
} = require('./utils')
const { setCredentials: kmsSetCredentials } = require('../lib/kms')
const logger = require('../lib/logger')
Expand Down Expand Up @@ -329,36 +330,6 @@ const restoreModelPrefetch = () => {
}
}

/**
* Print a summary of progress so far given:
*
* @param count {int} - Number of records processed to date
* @param total {int} Total number of records in the job
* @param startTime {Date} - When did the job begin?
*/
const printProgress = (count, total, startTime) => {
const progress = count / total
const ellapsed = (new Date() - startTime) / 1000
const recordsPerSecond = count / ellapsed
const recordsPerHour = recordsPerSecond * 60 * 60

// Calculate ETA:
const etaSeconds = Math.ceil((total - count) / recordsPerSecond)
const { display: etaDisplay } = secondsAsFriendlyDuration(etaSeconds)

logger.info([
`Processing ${count} - ${Math.min(count + argv.batchSize, total)} of ${total || '?'}`,
progress ? `: ${(progress * 100).toFixed(2)}%` : '',
recordsPerHour ? ` (${Math.round(recordsPerHour).toLocaleString()} records/h)` : '',
' ETA: ' + etaDisplay
].join(''))
}

/**
* Returns a promise that resolves after `howLong` ms
**/
const delay = (howLong) => new Promise((resolve, reject) => { setTimeout(resolve, howLong) })

/**
* Build a Bib/Item Service db query based on given options.
*
Expand Down Expand Up @@ -454,7 +425,7 @@ const buildSqlQuery = (options) => {
const updateByBibOrItemServiceQuery = async (options) => {
options = Object.assign({
// Default progress logger:
progressCallback: (count, total, startTime) => printProgress(count, total, startTime)
progressCallback: (count, total, startTime) => printProgress(count, total, argv.batchSize, startTime)
}, options)

let cursor
Expand All @@ -475,9 +446,6 @@ const updateByBibOrItemServiceQuery = async (options) => {
let done = false
while (!done && (count < options.limit || !options.limit)) {
await instrument('Bulk-index batch', async () => {
// Log out progress so far:
options.progressCallback(count, total, startTime)

// Pull next batch of records from the cursor:
const rows = await cursor.read(options.batchSize)

Expand Down Expand Up @@ -507,8 +475,10 @@ const updateByBibOrItemServiceQuery = async (options) => {
retries -= 1
})
}

count += rows.length

// Log out progress so far:
options.progressCallback(count, total, startTime)
})
}

Expand Down
Loading

0 comments on commit 5afa05c

Please sign in to comment.