Skip to content

Commit

Permalink
Merge pull request #162 from wp-cli/fix/chunking
Browse files Browse the repository at this point in the history
Fix offset handling when doing chunked replaces
  • Loading branch information
schlessera authored Jul 19, 2021
2 parents 08ba95f + bb6335b commit 7ffc573
Show file tree
Hide file tree
Showing 2 changed files with 101 additions and 44 deletions.
42 changes: 42 additions & 0 deletions features/search-replace.feature
Original file line number Diff line number Diff line change
Expand Up @@ -1142,3 +1142,45 @@ Feature: Do global search/replace
"""
Success:
"""

Scenario: Chunking works without skipping lines
Given a WP install
And a create_sql_file.sh file:
"""
#!/bin/bash
echo "CREATE TABLE \`wp_123_test\` (\`key\` INT(5) UNSIGNED NOT NULL AUTO_INCREMENT, \`text\` TEXT, PRIMARY KEY (\`key\`) );" > test_db.sql
echo "INSERT INTO \`wp_123_test\` (\`text\`) VALUES" >> test_db.sql
index=1
while [[ $index -le 199 ]];
do
echo "('abc'),('abc'),('abc'),('abc'),('abc'),('abc'),('abc'),('abc'),('abc'),('abc')," >> test_db.sql
index=`expr $index + 1`
done
echo "('abc'),('abc'),('abc'),('abc'),('abc'),('abc'),('abc'),('abc'),('abc'),('abc');" >> test_db.sql
"""
And I run `bash create_sql_file.sh`
And I run `wp db query "SOURCE test_db.sql;"`

When I run `wp search-replace --dry-run 'abc' 'def' --all-tables-with-prefix --skip-columns=guid,domain --precise`
Then STDOUT should contain:
"""
Success: 2000 replacements to be made.
"""

When I run `wp search-replace 'abc' 'def' --all-tables-with-prefix --skip-columns=guid,domain --precise`
Then STDOUT should contain:
"""
Success: Made 2000 replacements.
"""

When I run `wp search-replace --dry-run 'abc' 'def' --all-tables-with-prefix --skip-columns=guid,domain --precise`
Then STDOUT should contain:
"""
Success: 0 replacements to be made.
"""

When I run `wp search-replace 'abc' 'def' --all-tables-with-prefix --skip-columns=guid,domain --precise`
Then STDOUT should contain:
"""
Success: Made 0 replacements.
"""
103 changes: 59 additions & 44 deletions src/Search_Replace_Command.php
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
<?php

use cli\Colors;
use cli\Table;
use WP_CLI\Iterators;
use WP_CLI\SearchReplacer;
use WP_CLI\Utils;
use function cli\safe_substr;

class Search_Replace_Command extends WP_CLI_Command {

private $dry_run;
Expand All @@ -15,8 +22,9 @@ class Search_Replace_Command extends WP_CLI_Command {
private $include_columns;
private $format;
private $report;
private $report_changed_only;
private $verbose;

private $report_changed_only;
private $log_handle = null;
private $log_before_context = 40;
private $log_after_context = 40;
Expand Down Expand Up @@ -167,24 +175,24 @@ public function __invoke( $args, $assoc_args ) {
$new = array_shift( $args );
$total = 0;
$report = array();
$this->dry_run = \WP_CLI\Utils\get_flag_value( $assoc_args, 'dry-run' );
$php_only = \WP_CLI\Utils\get_flag_value( $assoc_args, 'precise' );
$this->recurse_objects = \WP_CLI\Utils\get_flag_value( $assoc_args, 'recurse-objects', true );
$this->verbose = \WP_CLI\Utils\get_flag_value( $assoc_args, 'verbose' );
$this->format = \WP_CLI\Utils\get_flag_value( $assoc_args, 'format' );
$this->regex = \WP_CLI\Utils\get_flag_value( $assoc_args, 'regex', false );
$this->dry_run = Utils\get_flag_value( $assoc_args, 'dry-run' );
$php_only = Utils\get_flag_value( $assoc_args, 'precise' );
$this->recurse_objects = Utils\get_flag_value( $assoc_args, 'recurse-objects', true );
$this->verbose = Utils\get_flag_value( $assoc_args, 'verbose' );
$this->format = Utils\get_flag_value( $assoc_args, 'format' );
$this->regex = Utils\get_flag_value( $assoc_args, 'regex', false );

if ( null !== $this->regex ) {
$default_regex_delimiter = false;
$this->regex_flags = \WP_CLI\Utils\get_flag_value( $assoc_args, 'regex-flags', false );
$this->regex_delimiter = \WP_CLI\Utils\get_flag_value( $assoc_args, 'regex-delimiter', '' );
$this->regex_flags = Utils\get_flag_value( $assoc_args, 'regex-flags', false );
$this->regex_delimiter = Utils\get_flag_value( $assoc_args, 'regex-delimiter', '' );
if ( '' === $this->regex_delimiter ) {
$this->regex_delimiter = chr( 1 );
$default_regex_delimiter = true;
}
}

$regex_limit = \WP_CLI\Utils\get_flag_value( $assoc_args, 'regex-limit' );
$regex_limit = Utils\get_flag_value( $assoc_args, 'regex-limit' );
if ( null !== $regex_limit ) {
if ( ! preg_match( '/^(?:[0-9]+|-1)$/', $regex_limit ) || 0 === (int) $regex_limit ) {
WP_CLI::error( '`--regex-limit` expects a non-zero positive integer or -1.' );
Expand Down Expand Up @@ -215,16 +223,16 @@ public function __invoke( $args, $assoc_args ) {
}
}

$this->skip_columns = explode( ',', \WP_CLI\Utils\get_flag_value( $assoc_args, 'skip-columns' ) );
$this->skip_tables = explode( ',', \WP_CLI\Utils\get_flag_value( $assoc_args, 'skip-tables' ) );
$this->include_columns = array_filter( explode( ',', \WP_CLI\Utils\get_flag_value( $assoc_args, 'include-columns' ) ) );
$this->skip_columns = explode( ',', Utils\get_flag_value( $assoc_args, 'skip-columns' ) );
$this->skip_tables = explode( ',', Utils\get_flag_value( $assoc_args, 'skip-tables' ) );
$this->include_columns = array_filter( explode( ',', Utils\get_flag_value( $assoc_args, 'include-columns' ) ) );

if ( $old === $new && ! $this->regex ) {
WP_CLI::warning( "Replacement value '{$old}' is identical to search value '{$new}'. Skipping operation." );
exit;
}

$export = \WP_CLI\Utils\get_flag_value( $assoc_args, 'export' );
$export = Utils\get_flag_value( $assoc_args, 'export' );
if ( null !== $export ) {
if ( $this->dry_run ) {
WP_CLI::error( 'You cannot supply --dry-run and --export at the same time.' );
Expand All @@ -239,15 +247,15 @@ public function __invoke( $args, $assoc_args ) {
WP_CLI::error( sprintf( 'Unable to open export file "%s" for writing: %s.', $assoc_args['export'], $error['message'] ) );
}
}
$export_insert_size = WP_CLI\Utils\get_flag_value( $assoc_args, 'export_insert_size', 50 );
$export_insert_size = Utils\get_flag_value( $assoc_args, 'export_insert_size', 50 );
// phpcs:ignore WordPress.PHP.StrictComparisons.LooseComparison -- See the code, this is deliberate.
if ( (int) $export_insert_size == $export_insert_size && $export_insert_size > 0 ) {
$this->export_insert_size = $export_insert_size;
}
$php_only = true;
}

$log = \WP_CLI\Utils\get_flag_value( $assoc_args, 'log' );
$log = Utils\get_flag_value( $assoc_args, 'log' );
if ( null !== $log ) {
if ( true === $log || '-' === $log ) {
$this->log_handle = STDOUT;
Expand All @@ -259,12 +267,12 @@ public function __invoke( $args, $assoc_args ) {
}
}
if ( $this->log_handle ) {
$before_context = \WP_CLI\Utils\get_flag_value( $assoc_args, 'before_context' );
$before_context = Utils\get_flag_value( $assoc_args, 'before_context' );
if ( null !== $before_context && preg_match( '/^[0-9]+$/', $before_context ) ) {
$this->log_before_context = (int) $before_context;
}

$after_context = \WP_CLI\Utils\get_flag_value( $assoc_args, 'after_context' );
$after_context = Utils\get_flag_value( $assoc_args, 'after_context' );
if ( null !== $after_context && preg_match( '/^[0-9]+$/', $after_context ) ) {
$this->log_after_context = (int) $after_context;
}
Expand Down Expand Up @@ -297,14 +305,14 @@ public function __invoke( $args, $assoc_args ) {
);
}

$this->log_colors = self::get_colors( $assoc_args, $default_log_colors );
$this->log_colors = $this->get_colors( $assoc_args, $default_log_colors );
$this->log_encoding = 0 === strpos( $wpdb->charset, 'utf8' ) ? 'UTF-8' : false;
}
}

$this->report = \WP_CLI\Utils\get_flag_value( $assoc_args, 'report', true );
$this->report = Utils\get_flag_value( $assoc_args, 'report', true );
// Defaults to true if logging, else defaults to false.
$this->report_changed_only = \WP_CLI\Utils\get_flag_value( $assoc_args, 'report-changed-only', null !== $this->log_handle );
$this->report_changed_only = Utils\get_flag_value( $assoc_args, 'report-changed-only', null !== $this->log_handle );

if ( $this->regex_flags ) {
$php_only = true;
Expand All @@ -314,7 +322,7 @@ public function __invoke( $args, $assoc_args ) {
$this->skip_columns[] = 'user_pass';

// Get table names based on leftover $args or supplied $assoc_args
$tables = \WP_CLI\Utils\wp_get_table_names( $args, $assoc_args );
$tables = Utils\wp_get_table_names( $args, $assoc_args );

foreach ( $tables as $table ) {

Expand Down Expand Up @@ -418,7 +426,7 @@ public function __invoke( $args, $assoc_args ) {
}

if ( $this->report && ! empty( $report ) ) {
$table = new \cli\Table();
$table = new Table();
$table->setHeaders( array( 'Table', 'Column', 'Replacements', 'Type' ) );
$table->setRows( $report );
$table->display();
Expand All @@ -429,7 +437,7 @@ public function __invoke( $args, $assoc_args ) {
$success_message = 1 === $total ? "Made 1 replacement and exported to {$assoc_args['export']}." : "Made {$total} replacements and exported to {$assoc_args['export']}.";
} else {
$success_message = 1 === $total ? 'Made 1 replacement.' : "Made $total replacements.";
if ( $total && 'Default' !== WP_CLI\Utils\wp_get_cache_type() ) {
if ( $total && 'Default' !== Utils\wp_get_cache_type() ) {
$success_message .= ' Please remember to flush your persistent object cache with `wp cache flush`.';
}
}
Expand All @@ -450,15 +458,15 @@ private function php_export_table( $table, $old, $new ) {
'chunk_size' => $chunk_size,
);

$replacer = new \WP_CLI\SearchReplacer( $old, $new, $this->recurse_objects, $this->regex, $this->regex_flags, $this->regex_delimiter, false, $this->regex_limit );
$replacer = new SearchReplacer( $old, $new, $this->recurse_objects, $this->regex, $this->regex_flags, $this->regex_delimiter, false, $this->regex_limit );
$col_counts = array_fill_keys( $all_columns, 0 );
if ( $this->verbose && 'table' === $this->format ) {
$this->start_time = microtime( true );
WP_CLI::log( sprintf( 'Checking: %s', $table ) );
}

$rows = array();
foreach ( new \WP_CLI\Iterators\Table( $args ) as $i => $row ) {
foreach ( new Iterators\Table( $args ) as $i => $row ) {
$row_fields = array();
foreach ( $all_columns as $col ) {
$value = $row->$col;
Expand Down Expand Up @@ -527,15 +535,15 @@ private function php_handle_col( $col, $primary_keys, $table, $old, $new ) {
global $wpdb;

$count = 0;
$replacer = new \WP_CLI\SearchReplacer( $old, $new, $this->recurse_objects, $this->regex, $this->regex_flags, $this->regex_delimiter, null !== $this->log_handle, $this->regex_limit );
$replacer = new SearchReplacer( $old, $new, $this->recurse_objects, $this->regex, $this->regex_flags, $this->regex_delimiter, null !== $this->log_handle, $this->regex_limit );

$table_sql = self::esc_sql_ident( $table );
$col_sql = self::esc_sql_ident( $col );
$where = $this->regex ? '' : " WHERE $col_sql" . $wpdb->prepare( ' LIKE BINARY %s', '%' . self::esc_like( $old ) . '%' );
$escaped_primary_keys = self::esc_sql_ident( $primary_keys );
$primary_keys_sql = implode( ',', $escaped_primary_keys );
$order_by_keys = array_map(
function( $key ) {
static function ( $key ) {
return "{$key} ASC";
},
$escaped_primary_keys
Expand All @@ -544,6 +552,10 @@ function( $key ) {
$limit = 1000;
$offset = 0;

// Updates have to be deferred to after the chunking is completed, as
// the offset will otherwise not work correctly.
$updates = [];

// 2 errors:
// - WordPress.DB.PreparedSQL.InterpolatedNotPrepared -- escaped through self::esc_sql_ident
// - WordPress.CodeAnalysis.AssignmentInCondition -- no reason to do copy-paste for a single valid assignment in while
Expand All @@ -552,7 +564,7 @@ function( $key ) {
foreach ( $rows as $keys ) {
$where_sql = '';
foreach ( (array) $keys as $k => $v ) {
if ( strlen( $where_sql ) ) {
if ( '' !== $where_sql ) {
$where_sql .= ' AND ';
}
$where_sql .= self::esc_sql_ident( $k ) . ' = ' . self::esc_sql_value( $v );
Expand All @@ -576,21 +588,24 @@ function( $key ) {
$replacer->clear_log_data();
}

if ( $this->dry_run ) {
$count++;
} else {
$count++;
if ( ! $this->dry_run ) {
$update_where = array();
foreach ( (array) $keys as $k => $v ) {
$update_where[ $k ] = $v;
}

$count += $wpdb->update( $table, array( $col => $value ), $update_where );
$updates[] = [ $table, array( $col => $value ), $update_where ];
}
}

$offset += $limit;
}

foreach ( $updates as $update ) {
$wpdb->update( ...$update );
}

if ( $this->verbose && 'table' === $this->format ) {
$time = round( microtime( true ) - $this->start_time, 3 );
WP_CLI::log( sprintf( '%d rows affected using PHP (in %ss).', $count, $time ) );
Expand Down Expand Up @@ -728,7 +743,7 @@ private static function esc_like( $old ) {
* @return string|array An escaped string if given a string, or an array of escaped strings if given an array of strings.
*/
private static function esc_sql_ident( $idents ) {
$backtick = function ( $v ) {
$backtick = static function ( $v ) {
// Escape any backticks in the identifier by doubling.
return '`' . str_replace( '`', '``', $v ) . '`';
};
Expand All @@ -745,7 +760,7 @@ private static function esc_sql_ident( $idents ) {
* @return string|array A quoted string if given a string, or an array of quoted strings if given an array of strings.
*/
private static function esc_sql_value( $values ) {
$quote = function ( $v ) {
$quote = static function ( $v ) {
// Don't quote integer values to avoid MySQL's implicit type conversion.
if ( preg_match( '/^[+-]?[0-9]{1,20}$/', $v ) ) { // MySQL BIGINT UNSIGNED max 18446744073709551615 (20 digits).
return esc_sql( $v );
Expand All @@ -772,18 +787,18 @@ private static function esc_sql_value( $values ) {
private function get_colors( $assoc_args, $colors ) {
$color_reset = WP_CLI::colorize( '%n' );

$color_code_callback = function ( $v ) {
$color_code_callback = static function ( $v ) {
return substr( $v, 1 );
};

$color_codes = array_keys( \cli\Colors::getColors() );
$color_codes = array_keys( Colors::getColors() );
$color_codes = array_map( $color_code_callback, $color_codes );
$color_codes = implode( '', $color_codes );

$color_codes_regex = '/^(?:%[' . $color_codes . '])*$/';

foreach ( array_keys( $colors ) as $color_col ) {
$col_color_flag = \WP_CLI\Utils\get_flag_value( $assoc_args, $color_col . '_color' );
$col_color_flag = Utils\get_flag_value( $assoc_args, $color_col . '_color' );
if ( null !== $col_color_flag ) {
if ( ! preg_match( $color_codes_regex, $col_color_flag, $matches ) ) {
WP_CLI::warning( "Unrecognized percent color code '$col_color_flag' for '{$color_col}_color'." );
Expand Down Expand Up @@ -891,12 +906,12 @@ private function log_bits( $search_regex, $old_data, $old_matches, $new ) {
$new_matches = array();
$new_data = preg_replace_callback(
$search_regex,
function ( $matches ) use ( $old_matches, $new, $is_regex, &$new_matches, &$i, &$diff ) {
static function ( $matches ) use ( $old_matches, $new, $is_regex, &$new_matches, &$i, &$diff ) {
if ( $is_regex ) {
// Sub in any back references, "$1", "\2" etc, in the replacement string.
$new = preg_replace_callback(
'/(?<!\\\\)(?:\\\\\\\\)*((?:\\\\|\\$)[0-9]{1,2}|\\${[0-9]{1,2}\\})/',
function ( $m ) use ( $matches ) {
static function ( $m ) use ( $matches ) {
$idx = (int) str_replace( array( '\\', '$', '{', '}' ), '', $m[0] );
return isset( $matches[ $idx ] ) ? $matches[ $idx ] : '';
},
Expand Down Expand Up @@ -939,14 +954,14 @@ function ( $m ) use ( $matches ) {

// Offsets are in bytes, so need to use `strlen()` and `substr()` before using `safe_substr()`.
if ( $this->log_before_context && $old_offset && ! $append_next ) {
$old_before = \cli\safe_substr( substr( $old_data, $last_old_offset, $old_offset - $last_old_offset ), -$this->log_before_context, null /*length*/, false /*is_width*/, $encoding );
$new_before = \cli\safe_substr( substr( $new_data, $last_new_offset, $new_offset - $last_new_offset ), -$this->log_before_context, null /*length*/, false /*is_width*/, $encoding );
$old_before = safe_substr( substr( $old_data, $last_old_offset, $old_offset - $last_old_offset ), -$this->log_before_context, null /*length*/, false /*is_width*/, $encoding );
$new_before = safe_substr( substr( $new_data, $last_new_offset, $new_offset - $last_new_offset ), -$this->log_before_context, null /*length*/, false /*is_width*/, $encoding );
}
if ( $this->log_after_context ) {
$old_end_offset = $old_offset + strlen( $old_match );
$new_end_offset = $new_offset + strlen( $new_match );
$old_after = \cli\safe_substr( substr( $old_data, $old_end_offset ), 0, $this->log_after_context, false /*is_width*/, $encoding );
$new_after = \cli\safe_substr( substr( $new_data, $new_end_offset ), 0, $this->log_after_context, false /*is_width*/, $encoding );
$old_after = safe_substr( substr( $old_data, $old_end_offset ), 0, $this->log_after_context, false /*is_width*/, $encoding );
$new_after = safe_substr( substr( $new_data, $new_end_offset ), 0, $this->log_after_context, false /*is_width*/, $encoding );
// To lessen context duplication in output, shorten the after context if it overlaps with the next match.
if ( $i + 1 < $match_cnt && $old_end_offset + strlen( $old_after ) > $old_matches[0][ $i + 1 ][1] ) {
$old_after = substr( $old_after, 0, $old_matches[0][ $i + 1 ][1] - $old_end_offset );
Expand Down

0 comments on commit 7ffc573

Please sign in to comment.