Skip to content

Commit

Permalink
fix: prevent the backfill from running forever.
Browse files Browse the repository at this point in the history
There's an edge case where an author that no longer exists can still be assigned to a post. This throws the backfill script into an infinite loop, because the respective author-term is never found/created, and so the underlying problem of missing author-term records is never resolved. The infinite loop is started when at the end of the while loop, the script asks for "remaining posts which need author terms" and so it returns the same rows over and over.

This fix addresses this in 2 ways:
1. If an author is not found, we look for the most prolific author on the site and assign the posts to them. If there is no prolific author, one is created. And if one can't be created, an exception is thrown so that the script can't proceed.
2. Checks have been added so that the script can't go beyond what should be the maximum number of rows needing to be addressed.
  • Loading branch information
eddiesshop committed Oct 18, 2024
1 parent 35aeda1 commit 7bd22d3
Showing 1 changed file with 207 additions and 6 deletions.
213 changes: 207 additions & 6 deletions php/class-wp-cli.php
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ public function create_terms_for_posts(): void {
* @subcommand create-author-terms-for-posts
* @synopsis [--post-types=<csv>] [--post-statuses=<csv>] [--unbatched] [--records-per-batch=<records-per-batch>] [--specific-post-ids=<csv>] [--above-post-id=<above-post-id>] [--below-post-id=<below-post-id>]
* @return void
* @throws Exception If above-post-id is greater than or equal to below-post-id.
* @throws Exception If above-post-id is greater than or equal to below-post-id, or if unable to obtain a prolific author account.
*/
public function create_author_terms_for_posts( $args, $assoc_args ) {
$post_types = isset( $assoc_args['post-types'] ) ? explode( ',', $assoc_args['post-types'] ) : [ 'post' ];
Expand All @@ -160,6 +160,7 @@ public function create_author_terms_for_posts( $args, $assoc_args ) {
WP_CLI::line( sprintf( 'Found %d posts with missing author terms.', $count_of_posts_with_missing_author_terms ) );

$authors = [];
$author_trans = [];
$author_terms = [];
$count = 0;
$affected = 0;
Expand All @@ -178,14 +179,31 @@ public function create_author_terms_for_posts( $args, $assoc_args ) {

do {
foreach ( $posts_with_missing_author_terms as $record ) {
$record->post_author = intval( $record->post_author );
++$count;
$complete_percentage = $this->get_formatted_complete_percentage( $count, $count_of_posts_with_missing_author_terms );
WP_CLI::line( sprintf( 'Processing post %d (%d/%d or %s)', $record->post_id, $count, $count_of_posts_with_missing_author_terms, $complete_percentage ) );

$author = ( ! empty( $authors[ $record->post_author ] ) ) ?
$authors[ $record->post_author ] :
get_user_by( 'id', $record->post_author );
$authors[ $record->post_author ] = $author;
$author = null;
if ( isset( $authors[ $record->post_author ] ) ) {
$author = $authors[ $record->post_author ];
} elseif ( isset( $author_trans[ $record->post_author ] ) ) {
$nonexistent_user_id = $record->post_author;
$record->post_author = $author_trans[ $record->post_author ];
$author = $authors[ $record->post_author ];
WP_CLI::warning( sprintf( 'Must transfer posts from User ID: %d to Author ID: %d (%s)', $nonexistent_user_id, $author->ID, $author->user_nicename ) );
} else {
$author = get_user_by( 'id', $record->post_author );

if ( false === $author ) {
$author = $this->get_most_prolific_author();
WP_CLI::warning( sprintf( 'Must transfer posts from User ID: %d to Author ID: %d (%s)', $record->post_author, $author->ID, $author->user_nicename ) );
$author_trans[ $record->post_author ] = $author->ID;
$record->post_author = $author->ID;
}

$authors[ $record->post_author ] = $author;
}

$author_term = ( ! empty( $author_terms[ $record->post_author ] ) ) ?
$author_terms[ $record->post_author ] :
Expand All @@ -209,14 +227,18 @@ public function create_author_terms_for_posts( $args, $assoc_args ) {
++$affected;
}

if ( $count >= $count_of_posts_with_missing_author_terms ) {
break;
}

if ( $count && 0 === $count % 500 ) {
sleep( 1 ); // Sleep for a second every 500 posts to avoid overloading the database.
}
}

$posts_with_missing_author_terms = [];

if ( $batched ) {
if ( $batched && $count < $count_of_posts_with_missing_author_terms ) {
++$page;
WP_CLI::line( sprintf( 'Processing page %d.', $page ) );
$posts_with_missing_author_terms = $this->get_posts_with_missing_terms(
Expand Down Expand Up @@ -1233,6 +1255,185 @@ private function get_posts_with_missing_terms( $author_taxonomy, $post_types = [
// phpcs:enable
}

/**
* This function handles obtaining an author account which should have the most posts assigned to it. If unable
* to find an appropriate account, this function will attempt to create an author account for use.
*
* @return WP_User
* @throws Exception If unable to successfully create an author user account.
*/
public function get_most_prolific_author() {
if ( ! wp_cache_get( 'co-authors-plus-most-prolific-author', 'co-authors-plus' ) ) {

global $wpdb;

// phpcs:ignore WordPress.DB.DirectDatabaseQuery.DirectQuery, WordPress.DB.DirectDatabaseQuery.NoCaching
$most_prolific_users = $wpdb->get_results(
'SELECT
u.ID,
u.user_email,
u.display_name,
COUNT(p.ID) as true_count
FROM wp_posts p
INNER JOIN wp_users u ON p.post_author = u.ID
GROUP BY p.post_author
ORDER BY true_count DESC'
);

$most_prolific_author = false;

foreach ( $most_prolific_users as $user ) {
if ( user_can( $user->ID, 'edit_posts' ) ) {
$most_prolific_author = get_user_by( 'id', $user->ID );
break;
}
}

if ( ! $most_prolific_author ) { // If we STILL can't find a user, we need to create one.
$user_nicename = 'user-' . substr( md5( wp_rand() ), 0, 10 );
$user_login = 'co-authors-plus-author-term-backfill-' . substr( md5( wp_rand() ), 0, 10 );
$user_email = $user_nicename . '@' . wp_parse_url( get_site_url(), PHP_URL_HOST );
$maybe_user_id = wp_insert_user(
[
'user_pass' => wp_generate_password( 24 ),
'user_login' => $user_login,
'user_nicename' => $user_nicename,
'user_email' => $user_email,
'display_name' => $this->get_random_display_name(),
'role' => 'author',
]
);

if ( is_wp_error( $maybe_user_id ) ) { // (╯°□°)╯︵ ┻━┻
$exception_message = '(' . $maybe_user_id->get_error_code() . ') ' . $maybe_user_id->get_error_message();
throw new Exception( wp_kses( $exception_message, wp_kses_allowed_html( 'post' ) ) );
}

$most_prolific_author = get_user_by( 'id', $maybe_user_id );
} else {
$most_prolific_author = get_user_by( 'id', $most_prolific_author->ID );
}

wp_cache_set( 'co-authors-plus-most-prolific-author', $most_prolific_author, 'co-authors-plus', HOUR_IN_SECONDS );
}

return wp_cache_get( 'co-authors-plus-most-prolific-author', 'co-authors-plus' );
}

/**
* Helper function to get randomly generated display names.
*
* @return string
*/
private function get_random_display_name() {
$first_names = [
'Olivia',
'Amelia',
'Emma',
'Sophia',
'Charlotte',
'Isabella',
'Ava',
'Mia',
'Ellie',
'Luna',
'Harper',
'Aurora',
'Evelyn',
'Eliana',
'Aria',
'Violet',
'Nova',
'Lily',
'Camila',
'Gianna',
'Mila',
'Sofia',
'Hazel',
'Scarlett',
'Ivy',
'Noah',
'Liam',
'Oliver',
'Elijah',
'Mateo',
'Lucas',
'Levi',
'Ezra',
'Asher',
'Leo',
'James',
'Luca',
'Henry',
'Hudson',
'Ethan',
'Muhammad',
'Maverick',
'Theodore',
'Grayson',
'Daniel',
'Michael',
'Jack',
'Benjamin',
'Elias',
'Sebastian',
];
$last_names = [
'Prakash',
'Pei',
'Rosa',
'Kato',
'Aung',
'Cauhan',
'Im',
'Chon',
'Saito',
'Peña',
'May',
'Gonzales',
'Francisco',
'Awad',
'Correa',
'Sawadogo',
'Perera',
'Ran',
'Haruna',
'Sinh',
'Santiago',
'Min',
'Hwang',
'Pandit',
'Ta',
'Toure',
'Mu',
'Ko',
'Chai',
'Khin',
'Aktar',
'Munda',
'Robinson',
'Suleiman',
'Chakraborty',
'Sharif',
'Juarez',
'Patal',
'Kamal',
'Jain',
'Phiri',
'Salah',
'Walker',
'Akbar',
'Clark',
'Lewis',
'Hosen',
'Diarra',
'Avila',
'Chaudhary',
];

return $first_names[ wp_rand( 0, 49 ) ] . ' ' . $last_names[ wp_rand( 0, 49 ) ];
}

/**
* Convenience function to generate a formatted percentage string.
*
Expand Down

0 comments on commit 7bd22d3

Please sign in to comment.