diff --git a/admin/sql/CreateFunctions.sql b/admin/sql/CreateFunctions.sql index 53cc4073f42..70aac428589 100644 --- a/admin/sql/CreateFunctions.sql +++ b/admin/sql/CreateFunctions.sql @@ -1928,7 +1928,12 @@ BEGIN a_rg.artist, -- Withdrawn releases were once official by definition bool_and(r.status IS NOT NULL AND r.status != 1 AND r.status != 5), - rg.type::SMALLINT, + (rg.type ORDER BY CASE + WHEN rg.type = 3 THEN 2 -- Sort EPs above singles + WHEN rg.type = 2 THEN 3 -- Sort singles below EPs + ELSE rg.type + END ASC + )::SMALLINT, array_agg( DISTINCT st.secondary_type ORDER BY st.secondary_type) FILTER (WHERE st.secondary_type IS NOT NULL diff --git a/admin/sql/updates/20241017-mbs-9253.sql b/admin/sql/updates/20241017-mbs-9253.sql new file mode 100644 index 00000000000..319a5120e39 --- /dev/null +++ b/admin/sql/updates/20241017-mbs-9253.sql @@ -0,0 +1,93 @@ +\set ON_ERROR_STOP 1 + +BEGIN; + +CREATE OR REPLACE FUNCTION get_artist_release_group_rows( + release_group_id INTEGER +) RETURNS SETOF artist_release_group AS $$ +BEGIN + -- PostgreSQL 12 generates a vastly more efficient plan when only + -- one release group ID is passed. A condition like + -- `rg.id = any(...)` can be over 200x slower, even with only one + -- release group ID in the array. + RETURN QUERY EXECUTE $SQL$ + SELECT DISTINCT ON (a_rg.artist, rg.id) + a_rg.is_track_artist, + a_rg.artist, + -- Withdrawn releases were once official by definition + bool_and(r.status IS NOT NULL AND r.status != 1 AND r.status != 5), + (rg.type ORDER BY CASE + WHEN rg.type = 3 THEN 2 -- Sort EPs above singles + WHEN rg.type = 2 THEN 3 -- Sort singles below EPs + ELSE rg.type + END ASC + )::SMALLINT, + array_agg( + DISTINCT st.secondary_type ORDER BY st.secondary_type) + FILTER (WHERE st.secondary_type IS NOT NULL + )::SMALLINT[], + integer_date( + rgm.first_release_date_year, + rgm.first_release_date_month, + rgm.first_release_date_day + ), + left(rg.name, 1)::CHAR(1), + rg.id + FROM ( + SELECT FALSE AS is_track_artist, rgacn.artist, rg.id AS release_group + FROM release_group rg + JOIN artist_credit_name rgacn ON rgacn.artist_credit = rg.artist_credit + UNION ALL + SELECT TRUE AS is_track_artist, tacn.artist, r.release_group + FROM release r + JOIN medium m ON m.release = r.id + JOIN track t ON t.medium = m.id + JOIN artist_credit_name tacn ON tacn.artist_credit = t.artist_credit + ) a_rg + JOIN release_group rg ON rg.id = a_rg.release_group + LEFT JOIN release r ON r.release_group = rg.id + JOIN release_group_meta rgm ON rgm.id = rg.id + LEFT JOIN release_group_secondary_type_join st ON st.release_group = rg.id + $SQL$ || (CASE WHEN release_group_id IS NULL THEN '' ELSE 'WHERE rg.id = $1' END) || + $SQL$ + GROUP BY a_rg.is_track_artist, a_rg.artist, rgm.id, rg.id + ORDER BY a_rg.artist, rg.id, a_rg.is_track_artist + $SQL$ + USING release_group_id; +END; +$$ LANGUAGE plpgsql; + +-- We update the table for any existing RGs of type Single or EP +-- with this one-off script; the updated function will keep it up after this. +DO $$ +DECLARE + release_group_ids INTEGER[]; + release_group_id INTEGER; +BEGIN + SELECT array_agg(DISTINCT rg.id) + INTO release_group_ids + FROM release_group rg + WHERE rg.type = 2 OR rg.type = 3; -- Single or EP + + IF coalesce(array_length(release_group_ids, 1), 0) > 0 THEN + -- If the user hasn't generated `artist_release_group`, then we + -- shouldn't update or insert to it. MBS determines whether to + -- use this table based on it being non-empty, so a partial + -- table would manifest as partial data on the website and + -- webservice. + PERFORM 1 FROM artist_release_group LIMIT 1; + IF FOUND THEN + DELETE FROM artist_release_group WHERE release_group = any(release_group_ids); + + FOREACH release_group_id IN ARRAY release_group_ids LOOP + -- We handle each release group ID separately because + -- the `get_artist_release_group_rows` query can be + -- planned much more efficiently that way. + INSERT INTO artist_release_group + SELECT * FROM get_artist_release_group_rows(release_group_id); + END LOOP; + END IF; + END IF; +END $$; + +COMMIT; diff --git a/admin/sql/updates/schema-change/30.all.sql b/admin/sql/updates/schema-change/30.all.sql new file mode 100644 index 00000000000..7b4e5595c92 --- /dev/null +++ b/admin/sql/updates/schema-change/30.all.sql @@ -0,0 +1,99 @@ +-- Generated by CompileSchemaScripts.pl from: +-- 20241017-mbs-9253.sql +\set ON_ERROR_STOP 1 +BEGIN; +SET search_path = musicbrainz, public; +SET LOCAL statement_timeout = 0; +-------------------------------------------------------------------------------- +SELECT '20241017-mbs-9253.sql'; + + +CREATE OR REPLACE FUNCTION get_artist_release_group_rows( + release_group_id INTEGER +) RETURNS SETOF artist_release_group AS $$ +BEGIN + -- PostgreSQL 12 generates a vastly more efficient plan when only + -- one release group ID is passed. A condition like + -- `rg.id = any(...)` can be over 200x slower, even with only one + -- release group ID in the array. + RETURN QUERY EXECUTE $SQL$ + SELECT DISTINCT ON (a_rg.artist, rg.id) + a_rg.is_track_artist, + a_rg.artist, + -- Withdrawn releases were once official by definition + bool_and(r.status IS NOT NULL AND r.status != 1 AND r.status != 5), + (rg.type ORDER BY CASE + WHEN rg.type = 3 THEN 2 -- Sort EPs above singles + WHEN rg.type = 2 THEN 3 -- Sort singles below EPs + ELSE rg.type + END ASC + )::SMALLINT, + array_agg( + DISTINCT st.secondary_type ORDER BY st.secondary_type) + FILTER (WHERE st.secondary_type IS NOT NULL + )::SMALLINT[], + integer_date( + rgm.first_release_date_year, + rgm.first_release_date_month, + rgm.first_release_date_day + ), + left(rg.name, 1)::CHAR(1), + rg.id + FROM ( + SELECT FALSE AS is_track_artist, rgacn.artist, rg.id AS release_group + FROM release_group rg + JOIN artist_credit_name rgacn ON rgacn.artist_credit = rg.artist_credit + UNION ALL + SELECT TRUE AS is_track_artist, tacn.artist, r.release_group + FROM release r + JOIN medium m ON m.release = r.id + JOIN track t ON t.medium = m.id + JOIN artist_credit_name tacn ON tacn.artist_credit = t.artist_credit + ) a_rg + JOIN release_group rg ON rg.id = a_rg.release_group + LEFT JOIN release r ON r.release_group = rg.id + JOIN release_group_meta rgm ON rgm.id = rg.id + LEFT JOIN release_group_secondary_type_join st ON st.release_group = rg.id + $SQL$ || (CASE WHEN release_group_id IS NULL THEN '' ELSE 'WHERE rg.id = $1' END) || + $SQL$ + GROUP BY a_rg.is_track_artist, a_rg.artist, rgm.id, rg.id + ORDER BY a_rg.artist, rg.id, a_rg.is_track_artist + $SQL$ + USING release_group_id; +END; +$$ LANGUAGE plpgsql; + +-- We update the table for any existing RGs of type Single or EP +-- with this one-off script; the updated function will keep it up after this. +DO $$ +DECLARE + release_group_ids INTEGER[]; + release_group_id INTEGER; +BEGIN + SELECT array_agg(DISTINCT rg.id) + INTO release_group_ids + FROM release_group rg + WHERE rg.type = 2 OR rg.type = 3; -- Single or EP + + IF coalesce(array_length(release_group_ids, 1), 0) > 0 THEN + -- If the user hasn't generated `artist_release_group`, then we + -- shouldn't update or insert to it. MBS determines whether to + -- use this table based on it being non-empty, so a partial + -- table would manifest as partial data on the website and + -- webservice. + PERFORM 1 FROM artist_release_group LIMIT 1; + IF FOUND THEN + DELETE FROM artist_release_group WHERE release_group = any(release_group_ids); + + FOREACH release_group_id IN ARRAY release_group_ids LOOP + -- We handle each release group ID separately because + -- the `get_artist_release_group_rows` query can be + -- planned much more efficiently that way. + INSERT INTO artist_release_group + SELECT * FROM get_artist_release_group_rows(release_group_id); + END LOOP; + END IF; + END IF; +END $$; + +COMMIT; diff --git a/lib/MusicBrainz/Server/Data/ReleaseGroup.pm b/lib/MusicBrainz/Server/Data/ReleaseGroup.pm index fb59727ae3f..64caacae28d 100644 --- a/lib/MusicBrainz/Server/Data/ReleaseGroup.pm +++ b/lib/MusicBrainz/Server/Data/ReleaseGroup.pm @@ -349,14 +349,20 @@ sub _find_by_artist_slow ON rgstj.secondary_type = rgst.id WHERE rgstj.release_group = rg.id ORDER BY name ASC - ) secondary_types + ) secondary_types, + (CASE + WHEN rg.type = 3 THEN 2 -- Sort EPs above singles + WHEN rg.type = 2 THEN 3 -- Sort singles below EPs + ELSE rg.type + END + ) as sorted_type FROM ' . $self->_table . ' JOIN artist_credit_name acn ON acn.artist_credit = rg.artist_credit ' . join(' ', @$extra_joins) . ' WHERE ' . join(' AND ', @$conditions) . ' ORDER BY - rg.type, secondary_types, + sorted_type, secondary_types, rgm.first_release_date_year, rgm.first_release_date_month, rgm.first_release_date_day, diff --git a/upgrade.json b/upgrade.json index 80853eb68d6..222989406b4 100644 --- a/upgrade.json +++ b/upgrade.json @@ -230,5 +230,10 @@ "20240223-mbs-13421-fks.sql", "20240319-mbs-13514.sql" ] + }, + "30": { + "all": [ + "20241017-mbs-9253.sql" + ] } }