From e58e86c1253e3dfa3e60ac386f762f86a23b2e29 Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Tue, 24 Dec 2024 07:43:35 -0600 Subject: [PATCH 1/4] Add support for relative urls (#610) Add `base_url` parameter to `url_parse()` and implement new `req_url_relative()`. Fixes #449 --- NAMESPACE | 1 + NEWS.md | 2 ++ R/req-url.R | 14 ++++++++++++++ R/url.R | 10 ++++++++-- man/req_url.Rd | 8 ++++++++ man/url_parse.Rd | 8 +++++++- tests/testthat/test-req-url.R | 6 ++++++ tests/testthat/test-url.R | 8 ++++++++ 8 files changed, 54 insertions(+), 3 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 0613c62a..9b303d2e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -95,6 +95,7 @@ export(req_url) export(req_url_path) export(req_url_path_append) export(req_url_query) +export(req_url_relative) export(req_user_agent) export(req_verbose) export(request) diff --git a/NEWS.md b/NEWS.md index f09f990e..58822493 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # httr2 (development version) +* New `req_url_relative()` for constructing relative urls (#449). +* `url_parse()` gains `base_url` argument so you can also use it to parse relative URLs (#449). * `url_parse()` now uses `curl::curl_parse_url()` which is much faster and more correct (#577). * `req_retry()` now defaults to `max_tries = 2` with a message. Set to `max_tries = 1` to disable retries. diff --git a/R/req-url.R b/R/req-url.R index a35b1489..cc317dbc 100644 --- a/R/req-url.R +++ b/R/req-url.R @@ -31,6 +31,11 @@ #' req |> #' req_url("http://google.com") #' +#' # Use a relative url +#' req <- request("http://example.com/a/b/c") +#' req |> req_url_relative("..") +#' req |> req_url_relative("/d/e/f") +#' #' # Use .multi to control what happens with vector parameters: #' req |> req_url_query(id = 100:105, .multi = "comma") #' req |> req_url_query(id = 100:105, .multi = "explode") @@ -47,6 +52,15 @@ req_url <- function(req, url) { req } +#' @export +#' @rdname req_url +req_url_relative <- function(req, url) { + check_request(req) + + new_url <- url_parse(url, base_url = req$url) + req_url(req, url_build(new_url)) +} + #' @export #' @rdname req_url #' @param .multi Controls what happens when an element of `...` is a vector diff --git a/R/url.R b/R/url.R index 518fe2d5..9b249fe0 100644 --- a/R/url.R +++ b/R/url.R @@ -6,6 +6,7 @@ #' #' @param url For `url_parse()` a string to parse into a URL; #' for `url_build()` a URL to turn back into a string. +#' @param base_url Use this as a parent, if `url` is a relative URL. #' @returns #' * `url_build()` returns a string. #' * `url_parse()` returns a URL: a S3 list with class `httr2_url` @@ -18,15 +19,20 @@ #' url_parse("http://google.com:80/?a=1&b=2") #' url_parse("http://username@google.com:80/path;test?a=1&b=2#40") #' +#' # You can parse a relative URL if you also provide a base url +#' url_parse("foo", "http://google.com/bar/") +#' url_parse("..", "http://google.com/bar/") +#' #' url <- url_parse("http://google.com/") #' url$port <- 80 #' url$hostname <- "example.com" #' url$query <- list(a = 1, b = 2, c = 3) #' url_build(url) -url_parse <- function(url) { +url_parse <- function(url, base_url = NULL) { check_string(url) + check_string(base_url, allow_null = TRUE) - curl <- curl::curl_parse_url(url) + curl <- curl::curl_parse_url(url, baseurl = base_url) parsed <- list( scheme = curl$scheme, diff --git a/man/req_url.Rd b/man/req_url.Rd index ca5b50b6..933e7a58 100644 --- a/man/req_url.Rd +++ b/man/req_url.Rd @@ -2,6 +2,7 @@ % Please edit documentation in R/req-url.R \name{req_url} \alias{req_url} +\alias{req_url_relative} \alias{req_url_query} \alias{req_url_path} \alias{req_url_path_append} @@ -9,6 +10,8 @@ \usage{ req_url(req, url) +req_url_relative(req, url) + req_url_query(.req, ..., .multi = c("error", "comma", "pipe", "explode")) req_url_path(req, ...) @@ -66,6 +69,11 @@ req |> req |> req_url("http://google.com") +# Use a relative url +req <- request("http://example.com/a/b/c") +req |> req_url_relative("..") +req |> req_url_relative("/d/e/f") + # Use .multi to control what happens with vector parameters: req |> req_url_query(id = 100:105, .multi = "comma") req |> req_url_query(id = 100:105, .multi = "explode") diff --git a/man/url_parse.Rd b/man/url_parse.Rd index 15a8d0aa..f36adeeb 100644 --- a/man/url_parse.Rd +++ b/man/url_parse.Rd @@ -5,13 +5,15 @@ \alias{url_build} \title{Parse and build URLs} \usage{ -url_parse(url) +url_parse(url, base_url = NULL) url_build(url) } \arguments{ \item{url}{For \code{url_parse()} a string to parse into a URL; for \code{url_build()} a URL to turn back into a string.} + +\item{base_url}{Use this as a parent, if \code{url} is a relative URL.} } \value{ \itemize{ @@ -32,6 +34,10 @@ url_parse("http://google.com:80/") url_parse("http://google.com:80/?a=1&b=2") url_parse("http://username@google.com:80/path;test?a=1&b=2#40") +# You can parse a relative URL if you also provide a base url +url_parse("foo", "http://google.com/bar/") +url_parse("..", "http://google.com/bar/") + url <- url_parse("http://google.com/") url$port <- 80 url$hostname <- "example.com" diff --git a/tests/testthat/test-req-url.R b/tests/testthat/test-req-url.R index aa55bcd2..a76ecfa8 100644 --- a/tests/testthat/test-req-url.R +++ b/tests/testthat/test-req-url.R @@ -96,6 +96,12 @@ test_that("can opt-out of query escaping", { expect_equal(req_url_query(req, a = I(","))$url, "http://example.com/?a=,") }) +test_that("can construct relative urls", { + req <- request("http://example.com/a/b/c.html") + expect_equal(req_url_relative(req, ".")$url, "http://example.com/a/b/") + expect_equal(req_url_relative(req, "..")$url, "http://example.com/a/") + expect_equal(req_url_relative(req, "/d/e/f")$url, "http://example.com/d/e/f") +}) # explode ----------------------------------------------------------------- test_that("explode handles expected inputs", { diff --git a/tests/testthat/test-url.R b/tests/testthat/test-url.R index 885d08bc..7cae86e7 100644 --- a/tests/testthat/test-url.R +++ b/tests/testthat/test-url.R @@ -21,6 +21,14 @@ test_that("can round trip urls", { expect_equal(map(urls, ~ url_build(url_parse(.x))), urls) }) +test_that("can parse relative urls", { + base <- "http://example.com/a/b/c/" + expect_equal(url_parse("d", base)$path, "/a/b/c/d") + expect_equal(url_parse("..", base)$path, "/a/b/") + + expect_equal(url_parse("//archive.org", base)$scheme, "http") +}) + test_that("can print all url details", { expect_snapshot( url_parse("http://user:pass@example.com:80/path?a=1&b=2&c={1{2}3}#frag") From e6f425ee480b9b464a6ab552b6bf7360a1c2181c Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Tue, 24 Dec 2024 07:48:46 -0600 Subject: [PATCH 2/4] Improve url manipulation tooling (#611) * Check inputs and export `url_modify()`. Fixes #464. * Check inputs to `url_build()`. Fixes #482. --- NAMESPACE | 1 + NEWS.md | 1 + R/url.R | 126 ++++++++++++++++++++++++++++------- _pkgdown.yml | 5 +- man/url_build.Rd | 21 ++++++ man/url_modify.Rd | 60 +++++++++++++++++ man/url_parse.Rd | 35 ++++------ tests/testthat/_snaps/url.md | 69 +++++++++++++++++++ tests/testthat/test-url.R | 60 ++++++++++++++--- 9 files changed, 325 insertions(+), 53 deletions(-) create mode 100644 man/url_build.Rd create mode 100644 man/url_modify.Rd diff --git a/NAMESPACE b/NAMESPACE index 9b303d2e..01598d8b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -144,6 +144,7 @@ export(secret_write_rds) export(signal_total_pages) export(throttle_status) export(url_build) +export(url_modify) export(url_parse) export(with_mock) export(with_mocked_responses) diff --git a/NEWS.md b/NEWS.md index 58822493..bc1fb892 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,6 @@ # httr2 (development version) +* New `url_modify()` makes it easier to modify an existing url (#464). * New `req_url_relative()` for constructing relative urls (#449). * `url_parse()` gains `base_url` argument so you can also use it to parse relative URLs (#449). * `url_parse()` now uses `curl::curl_parse_url()` which is much faster and more correct (#577). diff --git a/R/url.R b/R/url.R index 9b249fe0..cd07befc 100644 --- a/R/url.R +++ b/R/url.R @@ -1,18 +1,16 @@ -#' Parse and build URLs +#' Parse a URL into its component pieces #' -#' `url_parse()` parses a URL into its component pieces; `url_build()` does -#' the reverse, converting a list of pieces into a string URL. See `r rfc(3986)` -#' for the details of the parsing algorithm. +#' `url_parse()` parses a URL into its component parts, powered by +#' [curl::curl_parse_url()]. The parsing algorithm follows the specifications +#' detailed in `r rfc(3986)`. #' -#' @param url For `url_parse()` a string to parse into a URL; -#' for `url_build()` a URL to turn back into a string. +#' @param url A string containing the URL to parse. #' @param base_url Use this as a parent, if `url` is a relative URL. -#' @returns -#' * `url_build()` returns a string. -#' * `url_parse()` returns a URL: a S3 list with class `httr2_url` -#' and elements `scheme`, `hostname`, `port`, `path`, `fragment`, `query`, -#' `username`, `password`. +#' @returns An S3 object of class `httr2_url` with the following components: +#' `scheme`, `hostname`, `username`, `password`, `port`, `path`, `query`, and +#' `fragment`. #' @export +#' @family URL manipulation #' @examples #' url_parse("http://google.com/") #' url_parse("http://google.com:80/") @@ -22,12 +20,6 @@ #' # You can parse a relative URL if you also provide a base url #' url_parse("foo", "http://google.com/bar/") #' url_parse("..", "http://google.com/bar/") -#' -#' url <- url_parse("http://google.com/") -#' url$port <- 80 -#' url$hostname <- "example.com" -#' url$query <- list(a = 1, b = 2, c = 3) -#' url_build(url) url_parse <- function(url, base_url = NULL) { check_string(url) check_string(base_url, allow_null = TRUE) @@ -48,10 +40,88 @@ url_parse <- function(url, base_url = NULL) { parsed } -url_modify <- function(url, ..., error_call = caller_env()) { - url <- url_parse(url) - url <- modify_list(url, ..., error_call = error_call) - url_build(url) +#' Modify a URL +#' +#' Modify components of a URL. The default value of each argument, `NULL`, +#' means leave the component as is. If you want to remove a component, +#' set it to `""`. Note that setting `scheme` or `hostname` to `""` will +#' create a relative URL. +#' +#' @param url A string or [parsed URL](url_parse). +#' @param scheme The scheme, typically either `http` or `https`. +#' @param hostname The hostname, e.g., `www.google.com` or `posit.co`. +#' @param username,password Username and password to embed in the URL. +#' Not generally recommended but needed for some legacy applications. +#' @param port An integer port number. +#' @param path The path, e.g., `/search`. Paths must start with `/`, so this +#' will be automatically added if omitted. +#' @param query Either a query string or a named list of query components. +#' @param fragment The fragment, e.g., `#section-1`. +#' @return An object of the same type as `url`. +#' @export +#' @family URL manipulation +#' @examples +#' url_modify("http://hadley.nz", path = "about") +#' url_modify("http://hadley.nz", scheme = "https") +#' url_modify("http://hadley.nz/abc", path = "/cde") +#' url_modify("http://hadley.nz/abc", path = "") +#' url_modify("http://hadley.nz?a=1", query = "b=2") +#' url_modify("http://hadley.nz?a=1", query = list(c = 3)) +url_modify <- function(url, + scheme = NULL, + hostname = NULL, + username = NULL, + password = NULL, + port = NULL, + path = NULL, + query = NULL, + fragment = NULL) { + + if (!is_string(url) && !is_url(url)) { + stop_input_type(url, "a string or parsed URL") + } + string_url <- is_string(url) + if (string_url) { + url <- url_parse(url) + } + + check_string(scheme, allow_null = TRUE) + check_string(hostname, allow_null = TRUE) + check_string(username, allow_null = TRUE) + check_string(password, allow_null = TRUE) + check_number_whole(port, min = 1, allow_null = TRUE) + check_string(path, allow_null = TRUE) + check_string(fragment, allow_null = TRUE) + + if (is_string(query)) { + query <- query_parse(query) + } else if (is.list(query) && (is_named(query) || length(query) == 0)) { + for (nm in names(query)) { + check_query_param(query[[nm]], paste0("query$", nm)) + } + } else if (!is.null(query)) { + stop_input_type(query, "a character vector, named list, or NULL") + } + + new <- compact(list( + scheme = scheme, + hostname = hostname, + username = username, + password = password, + port = port, + path = path, + query = query, + fragment = fragment + )) + is_empty <- map_lgl(new, identical, "") + new[is_empty] <- list(NULL) + url[names(new)] <- new + + if (string_url) { + url_build(url) + } else { + url + } } is_url <- function(x) inherits(x, "httr2_url") @@ -91,9 +161,19 @@ print.httr2_url <- function(x, ...) { invisible(x) } +#' Build a string from a URL object +#' +#' This is the inverse of [url_parse()], taking a parsed URL object and +#' turning it back into a string. +#' +#' @param url An URL object created by [url_parse]. +#' @family URL manipulation #' @export -#' @rdname url_parse url_build <- function(url) { + if (!is_url(url)) { + stop_input_type(url, "a parsed URL") + } + if (!is.null(url$query)) { query <- query_build(url$query) } else { @@ -119,7 +199,7 @@ url_build <- function(url) { authority <- NULL } - if (!is.null(url$path) && !startsWith(url$path, "/")) { + if (is.null(url$path) || !startsWith(url$path, "/")) { url$path <- paste0("/", url$path) } diff --git a/_pkgdown.yml b/_pkgdown.yml index d92798f2..a17aae11 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -70,12 +70,15 @@ reference: contents: - starts_with("resp_") +- title: URL manipulation + contents: + - starts_with("url_") + - title: Miscellaenous helpers contents: - curl_translate - secrets - obfuscate - - url_parse - title: OAuth desc: > diff --git a/man/url_build.Rd b/man/url_build.Rd new file mode 100644 index 00000000..c2099509 --- /dev/null +++ b/man/url_build.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/url.R +\name{url_build} +\alias{url_build} +\title{Build a string from a URL object} +\usage{ +url_build(url) +} +\arguments{ +\item{url}{An URL object created by \link{url_parse}.} +} +\description{ +This is the inverse of \code{\link[=url_parse]{url_parse()}}, taking a parsed URL object and +turning it back into a string. +} +\seealso{ +Other URL manipulation: +\code{\link{url_modify}()}, +\code{\link{url_parse}()} +} +\concept{URL manipulation} diff --git a/man/url_modify.Rd b/man/url_modify.Rd new file mode 100644 index 00000000..2f19d1d7 --- /dev/null +++ b/man/url_modify.Rd @@ -0,0 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/url.R +\name{url_modify} +\alias{url_modify} +\title{Modify a URL} +\usage{ +url_modify( + url, + scheme = NULL, + hostname = NULL, + username = NULL, + password = NULL, + port = NULL, + path = NULL, + query = NULL, + fragment = NULL +) +} +\arguments{ +\item{url}{A string or \href{url_parse}{parsed URL}.} + +\item{scheme}{The scheme, typically either \code{http} or \code{https}.} + +\item{hostname}{The hostname, e.g., \code{www.google.com} or \code{posit.co}.} + +\item{username, password}{Username and password to embed in the URL. +Not generally recommended but needed for some legacy applications.} + +\item{port}{An integer port number.} + +\item{path}{The path, e.g., \verb{/search}. Paths must start with \code{/}, so this +will be automatically added if omitted.} + +\item{query}{Either a query string or a named list of query components.} + +\item{fragment}{The fragment, e.g., \verb{#section-1}.} +} +\value{ +An object of the same type as \code{url}. +} +\description{ +Modify components of a URL. The default value of each argument, \code{NULL}, +means leave the component as is. If you want to remove a component, +set it to \code{""}. Note that setting \code{scheme} or \code{hostname} to \code{""} will +create a relative URL. +} +\examples{ +url_modify("http://hadley.nz", path = "about") +url_modify("http://hadley.nz", scheme = "https") +url_modify("http://hadley.nz/abc", path = "/cde") +url_modify("http://hadley.nz/abc", path = "") +url_modify("http://hadley.nz?a=1", query = "b=2") +url_modify("http://hadley.nz?a=1", query = list(c = 3)) +} +\seealso{ +Other URL manipulation: +\code{\link{url_build}()}, +\code{\link{url_parse}()} +} +\concept{URL manipulation} diff --git a/man/url_parse.Rd b/man/url_parse.Rd index f36adeeb..9a881c05 100644 --- a/man/url_parse.Rd +++ b/man/url_parse.Rd @@ -2,31 +2,24 @@ % Please edit documentation in R/url.R \name{url_parse} \alias{url_parse} -\alias{url_build} -\title{Parse and build URLs} +\title{Parse a URL into its component pieces} \usage{ url_parse(url, base_url = NULL) - -url_build(url) } \arguments{ -\item{url}{For \code{url_parse()} a string to parse into a URL; -for \code{url_build()} a URL to turn back into a string.} +\item{url}{A string containing the URL to parse.} \item{base_url}{Use this as a parent, if \code{url} is a relative URL.} } \value{ -\itemize{ -\item \code{url_build()} returns a string. -\item \code{url_parse()} returns a URL: a S3 list with class \code{httr2_url} -and elements \code{scheme}, \code{hostname}, \code{port}, \code{path}, \code{fragment}, \code{query}, -\code{username}, \code{password}. -} +An S3 object of class \code{httr2_url} with the following components: +\code{scheme}, \code{hostname}, \code{username}, \code{password}, \code{port}, \code{path}, \code{query}, and +\code{fragment}. } \description{ -\code{url_parse()} parses a URL into its component pieces; \code{url_build()} does -the reverse, converting a list of pieces into a string URL. See \href{https://datatracker.ietf.org/doc/html/rfc3986}{RFC 3986} -for the details of the parsing algorithm. +\code{url_parse()} parses a URL into its component parts, powered by +\code{\link[curl:curl_parse_url]{curl::curl_parse_url()}}. The parsing algorithm follows the specifications +detailed in \href{https://datatracker.ietf.org/doc/html/rfc3986}{RFC 3986}. } \examples{ url_parse("http://google.com/") @@ -37,10 +30,10 @@ url_parse("http://username@google.com:80/path;test?a=1&b=2#40") # You can parse a relative URL if you also provide a base url url_parse("foo", "http://google.com/bar/") url_parse("..", "http://google.com/bar/") - -url <- url_parse("http://google.com/") -url$port <- 80 -url$hostname <- "example.com" -url$query <- list(a = 1, b = 2, c = 3) -url_build(url) } +\seealso{ +Other URL manipulation: +\code{\link{url_build}()}, +\code{\link{url_modify}()} +} +\concept{URL manipulation} diff --git a/tests/testthat/_snaps/url.md b/tests/testthat/_snaps/url.md index a80e0ec1..36e5c565 100644 --- a/tests/testthat/_snaps/url.md +++ b/tests/testthat/_snaps/url.md @@ -24,6 +24,75 @@ Error in `url_build()`: ! Cannot set url `password` without `username`. +# url_build validates its input + + Code + url_build("abc") + Condition + Error in `url_build()`: + ! `url` must be a parsed URL, not the string "abc". + +# url_modify checks its inputs + + Code + url_modify(1) + Condition + Error in `url_modify()`: + ! `url` must be a string or parsed URL, not the number 1. + Code + url_modify(url, scheme = 1) + Condition + Error in `url_modify()`: + ! `scheme` must be a single string or `NULL`, not the number 1. + Code + url_modify(url, hostname = 1) + Condition + Error in `url_modify()`: + ! `hostname` must be a single string or `NULL`, not the number 1. + Code + url_modify(url, port = "x") + Condition + Error in `url_modify()`: + ! `port` must be a whole number or `NULL`, not the string "x". + Code + url_modify(url, username = 1) + Condition + Error in `url_modify()`: + ! `username` must be a single string or `NULL`, not the number 1. + Code + url_modify(url, password = 1) + Condition + Error in `url_modify()`: + ! `password` must be a single string or `NULL`, not the number 1. + Code + url_modify(url, path = 1) + Condition + Error in `url_modify()`: + ! `path` must be a single string or `NULL`, not the number 1. + Code + url_modify(url, fragment = 1) + Condition + Error in `url_modify()`: + ! `fragment` must be a single string or `NULL`, not the number 1. + +# checks various query formats + + Code + url_modify(url, query = 1) + Condition + Error in `url_modify()`: + ! `query` must be a character vector, named list, or NULL, not the number 1. + Code + url_modify(url, query = list(1)) + Condition + Error in `url_modify()`: + ! `query` must be a character vector, named list, or NULL, not a list. + Code + url_modify(url, query = list(x = 1:2)) + Condition + Error in `url_modify()`: + ! Query value `query$x` must be a length-1 atomic vector, not an integer vector. + # validates inputs Code diff --git a/tests/testthat/test-url.R b/tests/testthat/test-url.R index 7cae86e7..b99d7224 100644 --- a/tests/testthat/test-url.R +++ b/tests/testthat/test-url.R @@ -6,7 +6,6 @@ test_that("can parse special cases", { test_that("can round trip urls", { urls <- list( - "file:///", "http://google.com/", "http://google.com/path", "http://google.com/path?a=1&b=2", @@ -35,18 +34,63 @@ test_that("can print all url details", { ) }) -test_that("ensures path always starts with /", { - expect_equal( - url_modify("https://example.com/abc", path = "def"), - "https://example.com/def" - ) -}) - test_that("password also requires username", { url <- url_parse("http://username:pwd@example.com") url$username <- NULL expect_snapshot(url_build(url), error = TRUE) +}) + +test_that("url_build validates its input", { + expect_snapshot(url_build("abc"), error = TRUE) +}) + +# modify url ------------------------------------------------------------- + +test_that("url_modify checks its inputs", { + url <- "http://example.com" + + expect_snapshot(error = TRUE, { + url_modify(1) + url_modify(url, scheme = 1) + url_modify(url, hostname = 1) + url_modify(url, port = "x") + url_modify(url, username = 1) + url_modify(url, password = 1) + url_modify(url, path = 1) + url_modify(url, fragment = 1) + }) +}) + +test_that("no arguments is idempotent", { + string <- "http://example.com/" + url <- url_parse(string) + + expect_equal(url_modify(string), string) + expect_equal(url_modify(url), url) +}) + +test_that("can accept query as a string or list", { + url <- "http://test/" + + expect_equal(url_modify(url, query = "a=1&b=2"), "http://test/?a=1&b=2") + expect_equal(url_modify(url, query = list(a = 1, b = 2)), "http://test/?a=1&b=2") + + expect_equal(url_modify(url, query = ""), "http://test/") + expect_equal(url_modify(url, query = list()), "http://test/") +}) +test_that("checks various query formats", { + url <- "http://example.com" + + expect_snapshot(error = TRUE, { + url_modify(url, query = 1) + url_modify(url, query = list(1)) + url_modify(url, query = list(x = 1:2)) + }) +}) +test_that("path always starts with /", { + expect_equal(url_modify("https://x.com/abc", path = "def"), "https://x.com/def") + expect_equal(url_modify("https://x.com/abc", path = ""), "https://x.com/") }) # query ------------------------------------------------------------------- From b1bb9f3ca3ca1ba71fd7ac513dd4d9313e709940 Mon Sep 17 00:00:00 2001 From: Jeroen Ooms Date: Mon, 6 Jan 2025 14:04:23 +0100 Subject: [PATCH 3/4] Make parallel unit test more robust (#623) --- R/test.R | 2 +- tests/testthat/test-multi-req.R | 8 ++------ 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/R/test.R b/R/test.R index 9f973db1..7622338e 100644 --- a/R/test.R +++ b/R/test.R @@ -50,7 +50,7 @@ example_url <- function() { env_cache(the, "test_app", webfakes::new_app_process( app, - opts = webfakes::server_opts(num_threads = 2) + opts = webfakes::server_opts(num_threads = 6, enable_keep_alive = TRUE) ) ) the$test_app$url() diff --git a/tests/testthat/test-multi-req.R b/tests/testthat/test-multi-req.R index 9b637ec8..1650e026 100644 --- a/tests/testthat/test-multi-req.R +++ b/tests/testthat/test-multi-req.R @@ -9,13 +9,9 @@ test_that("correctly prepares request", { }) test_that("requests happen in parallel", { - # GHA MacOS builder seems to be very slow - skip_if( - isTRUE(as.logical(Sys.getenv("CI", "false"))) && - Sys.info()[["sysname"]] == "Darwin" - ) - + # test works best if webfakes has ample threads and keepalive reqs <- list2( + request_test("/delay/:secs", secs = 0), request_test("/delay/:secs", secs = 0.25), request_test("/delay/:secs", secs = 0.25), request_test("/delay/:secs", secs = 0.25), From b0e22960e7738c9b9de0e3ce1b38e14881b252f2 Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Mon, 6 Jan 2025 07:19:37 -0600 Subject: [PATCH 4/4] Minor `curl_translate()` polishing/testing (#619) Fixes #500 --- R/curl.R | 4 +++- R/headers.R | 14 +++++--------- tests/testthat/_snaps/curl.md | 11 +++++++++++ tests/testthat/test-curl.R | 14 +++++++++++++- 4 files changed, 32 insertions(+), 11 deletions(-) diff --git a/R/curl.R b/R/curl.R index 6016638d..acd4b946 100644 --- a/R/curl.R +++ b/R/curl.R @@ -56,7 +56,9 @@ curl_translate <- function(cmd, simplify_headers = TRUE) { # Content type set with data type <- data$headers$`Content-Type` - data$headers$`Content-Type` <- NULL + if (!identical(data$data, "")) { + data$headers$`Content-Type` <- NULL + } headers <- curl_simplify_headers(data$headers, simplify_headers) steps <- add_curl_step(steps, "req_headers", dots = headers) diff --git a/R/headers.R b/R/headers.R index 6fdc5dd2..c5c4057a 100644 --- a/R/headers.R +++ b/R/headers.R @@ -1,15 +1,11 @@ as_headers <- function(x, error_call = caller_env()) { if (is.character(x) || is.raw(x)) { - headers <- curl::parse_headers(x) - headers <- headers[grepl(":", headers, fixed = TRUE)] + parsed <- curl::parse_headers(x) + valid <- parsed[grepl(":", parsed, fixed = TRUE)] + halves <- parse_in_half(valid, ":") - equals <- regexpr(":", headers, fixed = TRUE) - pieces <- regmatches(headers, equals, invert = TRUE) - - names <- map_chr(pieces, "[[", 1) - values <- as.list(trimws(map_chr(pieces, "[[", 2))) - - new_headers(set_names(values, names), error_call = error_call) + headers <- set_names(trimws(halves$right), halves$left) + new_headers(as.list(headers), error_call = error_call) } else if (is.list(x)) { new_headers(x, error_call = error_call) } else { diff --git a/tests/testthat/_snaps/curl.md b/tests/testthat/_snaps/curl.md index 5d42dfa3..1c5cface 100644 --- a/tests/testthat/_snaps/curl.md +++ b/tests/testthat/_snaps/curl.md @@ -104,6 +104,17 @@ req_body_raw("abcdef", "text/plain") |> req_perform() +# content type stays in header if no data + + Code + curl_translate("curl http://example.com -H Content-Type:text/plain") + Output + request("http://example.com/") |> + req_headers( + `Content-Type` = "text/plain", + ) |> + req_perform() + # can read from clipboard Code diff --git a/tests/testthat/test-curl.R b/tests/testthat/test-curl.R index 225983a4..de170670 100644 --- a/tests/testthat/test-curl.R +++ b/tests/testthat/test-curl.R @@ -51,7 +51,11 @@ test_that("can handle line breaks", { test_that("headers are parsed", { expect_equal( curl_normalize("curl http://x.com -H 'A: 1'")$headers, - as_headers("A: 1") + new_headers(list(A = "1")) + ) + expect_equal( + curl_normalize("curl http://x.com -H 'B:'")$headers, + new_headers(list(B = "")) ) }) @@ -138,6 +142,14 @@ test_that("can translate data", { }) }) +test_that("content type stays in header if no data", { + skip_if(getRversion() < "4.1") + + expect_snapshot( + curl_translate("curl http://example.com -H Content-Type:text/plain") + ) +}) + test_that("can evaluate simple calls", { request_test() # hack to start server