Skip to content

Commit

Permalink
Fix issues with improper quoting when using CTE, refs tidyverse#1559
Browse files Browse the repository at this point in the history
  • Loading branch information
aguynamedryan committed Nov 22, 2024
1 parent ca33c0f commit 646396a
Show file tree
Hide file tree
Showing 8 changed files with 85 additions and 26 deletions.
2 changes: 1 addition & 1 deletion R/lazy-ops.R
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ sql_build.lazy_base_remote_query <- function(op, con, ...) {

#' @export
sql_build.lazy_base_local_query <- function(op, con, ...) {
base_query(op$name)
base_query(as_table_path(op$name, con))
}

#' @export
Expand Down
1 change: 1 addition & 0 deletions R/lazy-select-query.R
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ sql_build.lazy_select_query <- function(op, con, ..., sql_options = NULL) {
}

alias <- remote_name(op$x, null_if_local = FALSE) %||% unique_subquery_name()
alias <- as_table_path(alias, con)
from <- sql_build(op$x, con, sql_options = sql_options)
select_sql_list <- get_select_sql(
select = op$select,
Expand Down
12 changes: 3 additions & 9 deletions R/query-join.R
Original file line number Diff line number Diff line change
Expand Up @@ -118,21 +118,15 @@ flatten_query.join_query <- flatten_query_2_tables
flatten_query.multi_join_query <- function(qry, query_list, con) {
x <- qry$x
query_list_new <- flatten_query(x, query_list, con)
qry$x <- get_subquery_name(x, query_list_new)
qry$x <- get_subquery_name(x, query_list_new, con)

for (i in vctrs::vec_seq_along(qry$joins$table)) {
y <- qry$joins$table[[i]]
query_list_new <- flatten_query(y, query_list_new, con)
qry$joins$table[[i]] <- get_subquery_name(y, query_list_new)
qry$joins$table[[i]] <- get_subquery_name(y, query_list_new, con)
}

# TODO reuse query
name <- as_table_path(unique_subquery_name(), con)
wrapped_query <- set_names(list(qry), name)

query_list$queries <- c(query_list_new$queries, wrapped_query)
query_list$name <- name
query_list
querylist_reuse_query(qry, query_list_new, con)
}


Expand Down
2 changes: 1 addition & 1 deletion R/query-select.R
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ warn_drop_order_by <- function() {
flatten_query.select_query <- function(qry, query_list, con) {
from <- qry$from
query_list <- flatten_query(from, query_list, con)
qry$from <- get_subquery_name(from, query_list, con)

qry$from <- get_subquery_name(from, query_list)
querylist_reuse_query(qry, query_list, con)
}
12 changes: 3 additions & 9 deletions R/query-set-op.R
Original file line number Diff line number Diff line change
Expand Up @@ -108,19 +108,13 @@ flatten_query.set_op_query <- flatten_query_2_tables
flatten_query.union_query <- function(qry, query_list, con) {
x <- qry$x
query_list_new <- flatten_query(x, query_list, con)
qry$x <- get_subquery_name(x, query_list_new)
qry$x <- get_subquery_name(x, query_list_new, con)

for (i in vctrs::vec_seq_along(qry$unions$table)) {
y <- qry$unions$table[[i]]
query_list_new <- flatten_query(y, query_list_new, con)
qry$unions$table[[i]] <- get_subquery_name(y, query_list_new)
qry$unions$table[[i]] <- get_subquery_name(y, query_list_new, con)
}

# TODO reuse query
name <- as_table_path(unique_subquery_name(), con)
wrapped_query <- set_names(list(qry), name)

query_list$queries <- c(query_list_new$queries, wrapped_query)
query_list$name <- name
query_list
querylist_reuse_query(qry, query_list_new, con)
}
12 changes: 6 additions & 6 deletions R/sql-build.R
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ cte_render <- function(query_list, con) {
ctes <- purrr::imap(
query_list[-n],
function(query, name) {
name <- table_path(name)
name <- as_table_path(name, con)
glue_sql2(con, "{.name name} {.kw 'AS'} (\n{query}\n)")
}
)
Expand All @@ -153,10 +153,10 @@ cte_render <- function(query_list, con) {
glue_sql2(con, "{.kw 'WITH'} ", cte_query, "\n", query_list[[n]])
}

get_subquery_name <- function(x, query_list) {
get_subquery_name <- function(x, query_list, con) {
if (inherits(x, "base_query")) return(x)

base_query(query_list$name)
base_query(as_table_path(query_list$name, con))
}

flatten_query <- function(qry, query_list, con) {
Expand All @@ -169,7 +169,7 @@ querylist_reuse_query <- function(qry, query_list, con) {
if (!is.na(id)) {
query_list$name <- names(query_list$queries)[[id]]
} else {
name <- as_table_path(unique_subquery_name(), con)
name <- unique_subquery_name()
wrapped_query <- set_names(list(qry), name)
query_list$queries <- c(query_list$queries, wrapped_query)
query_list$name <- name
Expand All @@ -181,11 +181,11 @@ querylist_reuse_query <- function(qry, query_list, con) {
flatten_query_2_tables <- function(qry, query_list, con) {
x <- qry$x
query_list_x <- flatten_query(x, query_list, con)
qry$x <- get_subquery_name(x, query_list_x)
qry$x <- get_subquery_name(x, query_list_x, con)

y <- qry$y
query_list_y <- flatten_query(y, query_list_x, con)
qry$y <- get_subquery_name(y, query_list_y)
qry$y <- get_subquery_name(y, query_list_y, con)

querylist_reuse_query(qry, query_list_y, con)
}
Expand Down
37 changes: 37 additions & 0 deletions tests/testthat/_snaps/cte-quoting.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# CTE quoting works right

Code
.
Output
<SQL> WITH `q01` AS (
SELECT `x` * 2.0 AS `x`, `y`
FROM `lf1`
),
`q02` AS (
SELECT 'x' AS `column_name`
FROM `q01`
),
`q03` AS (
SELECT `x`, `y` * 2.0 AS `y`
FROM `lf1`
),
`q04` AS (
SELECT 'y' AS `column_name`
FROM `q03` AS `q01`
),
`q05` AS (
SELECT *
FROM `q02`
UNION ALL
SELECT *
FROM `q04`
)
SELECT `...1`.`column_name` AS `column_name`
FROM `q05` AS `...1`
LEFT JOIN `q05` AS `...2`
ON (`...1`.`column_name` = `...2`.`column_name`)
LEFT JOIN `q05` AS `...3`
ON (`...1`.`column_name` = `...3`.`column_name`)

33 changes: 33 additions & 0 deletions tests/testthat/test-cte-quoting.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
test_that("CTE quoting works right", {
lf1 <- lazy_frame(x = 1, y = 2, .name = "lf1")
lf2 <- lazy_frame(x = 1, z = 2, .name = "lf2")

# The query is nonsensical, because I took a failing query from the wild and
# whiddled it down to the minimum amount of code required to reveal two
# separate quoting issues when using CTEs

double_it <- function(.data, column_name) {
.data %>%
mutate(
"{ column_name }" := .data[[column_name]] * 2
)
}

skinny <- function(column_name) {
lf1 %>%
double_it(column_name) %>%
mutate(
column_name = column_name,
.keep = "none"
)
}

tall_tbl <- purrr::map(c("x", "y"), skinny) %>%
purrr::reduce(dplyr::union_all)

query <- tall_tbl %>%
left_join(tall_tbl, by = join_by(column_name)) %>%
left_join(tall_tbl, by = join_by(column_name)) %>%
sql_render(sql_options = sql_options(cte = TRUE)) %>%
expect_snapshot()
})

0 comments on commit 646396a

Please sign in to comment.