Skip to content

Commit

Permalink
Merge branch 'main' into multi-link-header
Browse files Browse the repository at this point in the history
  • Loading branch information
hadley authored Jan 6, 2025
2 parents e1d230f + b0e2296 commit 6adb3aa
Show file tree
Hide file tree
Showing 18 changed files with 412 additions and 72 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ export(req_url)
export(req_url_path)
export(req_url_path_append)
export(req_url_query)
export(req_url_relative)
export(req_user_agent)
export(req_verbose)
export(request)
Expand Down Expand Up @@ -143,6 +144,7 @@ export(secret_write_rds)
export(signal_total_pages)
export(throttle_status)
export(url_build)
export(url_modify)
export(url_parse)
export(with_mock)
export(with_mocked_responses)
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# httr2 (development version)

* `resp_link_url()` now works if there are multiple `Link` headers (#587).
* New `url_modify()` makes it easier to modify an existing url (#464).
* New `req_url_relative()` for constructing relative urls (#449).
* `url_parse()` gains `base_url` argument so you can also use it to parse relative URLs (#449).
* `url_parse()` now uses `curl::curl_parse_url()` which is much faster and more correct (#577).
* `req_retry()` now defaults to `max_tries = 2` with a message.
Set to `max_tries = 1` to disable retries.
Expand Down
4 changes: 3 additions & 1 deletion R/curl.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,9 @@ curl_translate <- function(cmd, simplify_headers = TRUE) {

# Content type set with data
type <- data$headers$`Content-Type`
data$headers$`Content-Type` <- NULL
if (!identical(data$data, "")) {
data$headers$`Content-Type` <- NULL
}

headers <- curl_simplify_headers(data$headers, simplify_headers)
steps <- add_curl_step(steps, "req_headers", dots = headers)
Expand Down
14 changes: 5 additions & 9 deletions R/headers.R
Original file line number Diff line number Diff line change
@@ -1,15 +1,11 @@
as_headers <- function(x, error_call = caller_env()) {
if (is.character(x) || is.raw(x)) {
headers <- curl::parse_headers(x)
headers <- headers[grepl(":", headers, fixed = TRUE)]
parsed <- curl::parse_headers(x)
valid <- parsed[grepl(":", parsed, fixed = TRUE)]
halves <- parse_in_half(valid, ":")

equals <- regexpr(":", headers, fixed = TRUE)
pieces <- regmatches(headers, equals, invert = TRUE)

names <- map_chr(pieces, "[[", 1)
values <- as.list(trimws(map_chr(pieces, "[[", 2)))

new_headers(set_names(values, names), error_call = error_call)
headers <- set_names(trimws(halves$right), halves$left)
new_headers(as.list(headers), error_call = error_call)
} else if (is.list(x)) {
new_headers(x, error_call = error_call)
} else {
Expand Down
14 changes: 14 additions & 0 deletions R/req-url.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@
#' req |>
#' req_url("http://google.com")
#'
#' # Use a relative url
#' req <- request("http://example.com/a/b/c")
#' req |> req_url_relative("..")
#' req |> req_url_relative("/d/e/f")
#'
#' # Use .multi to control what happens with vector parameters:
#' req |> req_url_query(id = 100:105, .multi = "comma")
#' req |> req_url_query(id = 100:105, .multi = "explode")
Expand All @@ -47,6 +52,15 @@ req_url <- function(req, url) {
req
}

#' @export
#' @rdname req_url
req_url_relative <- function(req, url) {
check_request(req)

new_url <- url_parse(url, base_url = req$url)
req_url(req, url_build(new_url))
}

#' @export
#' @rdname req_url
#' @param .multi Controls what happens when an element of `...` is a vector
Expand Down
2 changes: 1 addition & 1 deletion R/test.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ example_url <- function() {
env_cache(the, "test_app",
webfakes::new_app_process(
app,
opts = webfakes::server_opts(num_threads = 2)
opts = webfakes::server_opts(num_threads = 6, enable_keep_alive = TRUE)
)
)
the$test_app$url()
Expand Down
134 changes: 110 additions & 24 deletions R/url.R
Original file line number Diff line number Diff line change
@@ -1,32 +1,30 @@
#' Parse and build URLs
#' Parse a URL into its component pieces
#'
#' `url_parse()` parses a URL into its component pieces; `url_build()` does
#' the reverse, converting a list of pieces into a string URL. See `r rfc(3986)`
#' for the details of the parsing algorithm.
#' `url_parse()` parses a URL into its component parts, powered by
#' [curl::curl_parse_url()]. The parsing algorithm follows the specifications
#' detailed in `r rfc(3986)`.
#'
#' @param url For `url_parse()` a string to parse into a URL;
#' for `url_build()` a URL to turn back into a string.
#' @returns
#' * `url_build()` returns a string.
#' * `url_parse()` returns a URL: a S3 list with class `httr2_url`
#' and elements `scheme`, `hostname`, `port`, `path`, `fragment`, `query`,
#' `username`, `password`.
#' @param url A string containing the URL to parse.
#' @param base_url Use this as a parent, if `url` is a relative URL.
#' @returns An S3 object of class `httr2_url` with the following components:
#' `scheme`, `hostname`, `username`, `password`, `port`, `path`, `query`, and
#' `fragment`.
#' @export
#' @family URL manipulation
#' @examples
#' url_parse("http://google.com/")
#' url_parse("http://google.com:80/")
#' url_parse("http://google.com:80/?a=1&b=2")
#' url_parse("http://[email protected]:80/path;test?a=1&b=2#40")
#'
#' url <- url_parse("http://google.com/")
#' url$port <- 80
#' url$hostname <- "example.com"
#' url$query <- list(a = 1, b = 2, c = 3)
#' url_build(url)
url_parse <- function(url) {
#' # You can parse a relative URL if you also provide a base url
#' url_parse("foo", "http://google.com/bar/")
#' url_parse("..", "http://google.com/bar/")
url_parse <- function(url, base_url = NULL) {
check_string(url)
check_string(base_url, allow_null = TRUE)

curl <- curl::curl_parse_url(url)
curl <- curl::curl_parse_url(url, baseurl = base_url)

parsed <- list(
scheme = curl$scheme,
Expand All @@ -42,10 +40,88 @@ url_parse <- function(url) {
parsed
}

url_modify <- function(url, ..., error_call = caller_env()) {
url <- url_parse(url)
url <- modify_list(url, ..., error_call = error_call)
url_build(url)
#' Modify a URL
#'
#' Modify components of a URL. The default value of each argument, `NULL`,
#' means leave the component as is. If you want to remove a component,
#' set it to `""`. Note that setting `scheme` or `hostname` to `""` will
#' create a relative URL.
#'
#' @param url A string or [parsed URL](url_parse).
#' @param scheme The scheme, typically either `http` or `https`.
#' @param hostname The hostname, e.g., `www.google.com` or `posit.co`.
#' @param username,password Username and password to embed in the URL.
#' Not generally recommended but needed for some legacy applications.
#' @param port An integer port number.
#' @param path The path, e.g., `/search`. Paths must start with `/`, so this
#' will be automatically added if omitted.
#' @param query Either a query string or a named list of query components.
#' @param fragment The fragment, e.g., `#section-1`.
#' @return An object of the same type as `url`.
#' @export
#' @family URL manipulation
#' @examples
#' url_modify("http://hadley.nz", path = "about")
#' url_modify("http://hadley.nz", scheme = "https")
#' url_modify("http://hadley.nz/abc", path = "/cde")
#' url_modify("http://hadley.nz/abc", path = "")
#' url_modify("http://hadley.nz?a=1", query = "b=2")
#' url_modify("http://hadley.nz?a=1", query = list(c = 3))
url_modify <- function(url,
scheme = NULL,
hostname = NULL,
username = NULL,
password = NULL,
port = NULL,
path = NULL,
query = NULL,
fragment = NULL) {

if (!is_string(url) && !is_url(url)) {
stop_input_type(url, "a string or parsed URL")
}
string_url <- is_string(url)
if (string_url) {
url <- url_parse(url)
}

check_string(scheme, allow_null = TRUE)
check_string(hostname, allow_null = TRUE)
check_string(username, allow_null = TRUE)
check_string(password, allow_null = TRUE)
check_number_whole(port, min = 1, allow_null = TRUE)
check_string(path, allow_null = TRUE)
check_string(fragment, allow_null = TRUE)

if (is_string(query)) {
query <- query_parse(query)
} else if (is.list(query) && (is_named(query) || length(query) == 0)) {
for (nm in names(query)) {
check_query_param(query[[nm]], paste0("query$", nm))
}
} else if (!is.null(query)) {
stop_input_type(query, "a character vector, named list, or NULL")
}

new <- compact(list(
scheme = scheme,
hostname = hostname,
username = username,
password = password,
port = port,
path = path,
query = query,
fragment = fragment
))
is_empty <- map_lgl(new, identical, "")
new[is_empty] <- list(NULL)
url[names(new)] <- new

if (string_url) {
url_build(url)
} else {
url
}
}

is_url <- function(x) inherits(x, "httr2_url")
Expand Down Expand Up @@ -85,9 +161,19 @@ print.httr2_url <- function(x, ...) {
invisible(x)
}

#' Build a string from a URL object
#'
#' This is the inverse of [url_parse()], taking a parsed URL object and
#' turning it back into a string.
#'
#' @param url An URL object created by [url_parse].
#' @family URL manipulation
#' @export
#' @rdname url_parse
url_build <- function(url) {
if (!is_url(url)) {
stop_input_type(url, "a parsed URL")
}

if (!is.null(url$query)) {
query <- query_build(url$query)
} else {
Expand All @@ -113,7 +199,7 @@ url_build <- function(url) {
authority <- NULL
}

if (!is.null(url$path) && !startsWith(url$path, "/")) {
if (is.null(url$path) || !startsWith(url$path, "/")) {
url$path <- paste0("/", url$path)
}

Expand Down
5 changes: 4 additions & 1 deletion _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,15 @@ reference:
contents:
- starts_with("resp_")

- title: URL manipulation
contents:
- starts_with("url_")

- title: Miscellaenous helpers
contents:
- curl_translate
- secrets
- obfuscate
- url_parse

- title: OAuth
desc: >
Expand Down
8 changes: 8 additions & 0 deletions man/req_url.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 21 additions & 0 deletions man/url_build.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

60 changes: 60 additions & 0 deletions man/url_modify.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 6adb3aa

Please sign in to comment.