From e223d9dcecb61e49e1c6a9e84937cdf5927615b9 Mon Sep 17 00:00:00 2001 From: Jon Harmon Date: Tue, 9 Jul 2024 08:46:21 -0500 Subject: [PATCH 1/5] Update snapshot for session. You work at posit now, not rstudio. --- tests/testthat/_snaps/session.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/_snaps/session.md b/tests/testthat/_snaps/session.md index ae4f055..cfab7ca 100644 --- a/tests/testthat/_snaps/session.md +++ b/tests/testthat/_snaps/session.md @@ -7,12 +7,12 @@ https://hadley.nz/ Status: 200 Type: text/html; charset=utf-8 - Size: 821273 + Size: 821905 Code expect_true(is.session(s)) s <- session_follow_link(s, css = "p a") Message - Navigating to . + Navigating to . Code session_history(s) Output From 9c0b78f638ed8add291a65b3d4956c17ae7089da Mon Sep 17 00:00:00 2001 From: Jon Harmon Date: Tue, 9 Jul 2024 08:56:51 -0500 Subject: [PATCH 2/5] Wait in click. Closes #405. I tried to make this automatic, and got it working some of the time, but I couldn't find a way to detect that something might change, and thus wait in that situation. I tried to make it as clear as possible for users to be able to fix this. Once a strategy is agreed on, the same strategy should probably be applied to other methods. --- R/live.R | 42 ++++++++++++++++++++++++++---- tests/testthat/html/navigate1.html | 8 ++++++ tests/testthat/html/navigate2.html | 8 ++++++ tests/testthat/test-live.R | 18 +++++++++++++ 4 files changed, 71 insertions(+), 5 deletions(-) create mode 100644 tests/testthat/html/navigate1.html create mode 100644 tests/testthat/html/navigate2.html diff --git a/R/live.R b/R/live.R index 91f4a1b..3f2c73b 100644 --- a/R/live.R +++ b/R/live.R @@ -92,7 +92,7 @@ LiveHTML <- R6::R6Class( self$session$Page$navigate(url, wait_ = FALSE) self$session$wait_for(p) - private$root_id <- self$session$DOM$getDocument(0)$root$nodeId + private$refresh_root() }, #' @description Called when `print()`ed @@ -129,10 +129,21 @@ LiveHTML <- R6::R6Class( #' @description Simulate a click on an HTML element. #' @param css CSS selector or xpath expression. #' @param n_clicks Number of clicks - click = function(css, n_clicks = 1) { + #' @param new_page Whether to wait for a new page to load, such as after + #' clicking a link. + click = function(css, n_clicks = 1, new_page = FALSE) { private$check_active() check_number_whole(n_clicks, min = 1) + # Wait for new page, #405. + if (new_page) { + p <- self$session$Page$loadEventFired(wait_ = FALSE) + on.exit({ + self$session$wait_for(p) + private$refresh_root() + }, add = TRUE) + } + # Implementation based on puppeteer as described in # https://medium.com/@aslushnikov/automating-clicks-in-chromium-a50e7f01d3fb # With code from https://github.com/puppeteer/puppeteer/blob/b53de4e0942e93c/packages/puppeteer-core/src/cdp/Input.ts#L431-L459 @@ -170,6 +181,7 @@ LiveHTML <- R6::R6Class( button = "left" ) } + invisible(self) }, @@ -224,6 +236,7 @@ LiveHTML <- R6::R6Class( deltaX = left, deltaY = top ) + invisible(self) }, @@ -268,14 +281,14 @@ LiveHTML <- R6::R6Class( if (new_chromote && !self$session$is_active()) { suppressMessages({ self$session <- self$session$respawn() - private$root_id <- self$session$DOM$getDocument(0)$root$nodeId + private$refresh_root() }) } }, wait_for_selector = function(css, timeout = 5) { done <- now() + timeout - while(now() < done) { + while (now() < done) { nodes <- private$find_nodes(css) if (length(nodes) > 0) { return(nodes) @@ -289,7 +302,22 @@ LiveHTML <- R6::R6Class( find_nodes = function(css, xpath) { check_exclusive(css, xpath) if (!missing(css)) { - unlist(self$session$DOM$querySelectorAll(private$root_id, css)$nodeIds) + node_ids <- try_fetch( + self$session$DOM$querySelectorAll(private$root_id, css)$nodeIds, + error = function(cnd) { + if (grepl("-32000", cnd_message(cnd))) { + cli::cli_abort( + c( + "Can't find root node.", + i = "Did you issue a {.code click()} without waiting for a {.arg new_page}?" + ), + class = "rvest_error-missing_node", + parent = cnd + ) + } + } + ) + unlist(node_ids) } else { search <- glue::glue(" (function() {{ @@ -324,6 +352,10 @@ LiveHTML <- R6::R6Class( object_id = function(node_id) { # https://chromedevtools.github.io/devtools-protocol/tot/DOM/#method-resolveNode self$session$DOM$resolveNode(node_id)$object$objectId + }, + + refresh_root = function() { + private$root_id <- self$session$DOM$getDocument(0)$root$nodeId } ) ) diff --git a/tests/testthat/html/navigate1.html b/tests/testthat/html/navigate1.html new file mode 100644 index 0000000..5778124 --- /dev/null +++ b/tests/testthat/html/navigate1.html @@ -0,0 +1,8 @@ + + + + Navigate 1 + + + Navigate to Page 2 + diff --git a/tests/testthat/html/navigate2.html b/tests/testthat/html/navigate2.html new file mode 100644 index 0000000..5ab020a --- /dev/null +++ b/tests/testthat/html/navigate2.html @@ -0,0 +1,8 @@ + + + + Navigate 2 + + +

Success!

+ diff --git a/tests/testthat/test-live.R b/tests/testthat/test-live.R index e1d7fab..f18be50 100644 --- a/tests/testthat/test-live.R +++ b/tests/testthat/test-live.R @@ -50,6 +50,14 @@ test_that("can click a button", { expect_equal(html_text(html_element(sess, "p")), "double clicked") }) +test_that("can find elements after click that navigates", { + skip_if_no_chromote() + + sess <- read_html_live(html_test_path("navigate1")) + sess$click("a", new_page = TRUE) + expect_equal(html_text2(html_element(sess, "p")), "Success!") +}) + test_that("can scroll in various ways", { skip_if_no_chromote() @@ -88,6 +96,16 @@ test_that("can press special keys",{ expect_equal(html_text(html_element(sess, "#keyInfo")), "]/BracketRight") }) +test_that("gracefully errors on missing root node", { + skip_if_no_chromote() + + sess <- read_html_live(html_test_path("navigate1")) + sess$click("a") + expect_error( + html_element(sess, "p"), + class = "rvest_error-missing_node" + ) +}) # as_key_desc ------------------------------------------------------------- From d945007b0733f5be046bc9d4d302b8ffa1c41ba9 Mon Sep 17 00:00:00 2001 From: Jon Harmon Date: Thu, 11 Jul 2024 08:22:44 -0500 Subject: [PATCH 3/5] Document. --- man/LiveHTML.Rd | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/man/LiveHTML.Rd b/man/LiveHTML.Rd index a568e66..35fdb22 100644 --- a/man/LiveHTML.Rd +++ b/man/LiveHTML.Rd @@ -122,7 +122,7 @@ Extract HTML elements from the current page. \subsection{Method \code{click()}}{ Simulate a click on an HTML element. \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{LiveHTML$click(css, n_clicks = 1)}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{LiveHTML$click(css, n_clicks = 1, new_page = FALSE)}\if{html}{\out{
}} } \subsection{Arguments}{ @@ -131,6 +131,9 @@ Simulate a click on an HTML element. \item{\code{css}}{CSS selector or xpath expression.} \item{\code{n_clicks}}{Number of clicks} + +\item{\code{new_page}}{Whether to wait for a new page to load, such as after +clicking a link.} } \if{html}{\out{}} } From 660267551d42cdac0b4105b83c64f5678683cf22 Mon Sep 17 00:00:00 2001 From: Jon Harmon Date: Thu, 11 Jul 2024 08:30:55 -0500 Subject: [PATCH 4/5] Add NEWS bullet. --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index 0b3b82c..e181300 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,7 @@ # rvest (development version) * New example vignette displays the same starwars data but rendered dynamically using JS, so you need to use `read_html_live()` to get the data. +* The `click()` method for `LiveHTML` objects gains a `new_page` argument to deal with situations where a click loads a new web page (@jonthegeek, #405). # rvest 1.0.4 From bcff2122f8ec9cf948e984e5d5fe028b615ac14d Mon Sep 17 00:00:00 2001 From: Jon Harmon Date: Tue, 16 Jul 2024 16:27:12 -0500 Subject: [PATCH 5/5] More-automatic handling of clicks that navigate. --- R/live.R | 52 ++++++++++++++++---------------------- tests/testthat/test-live.R | 13 +--------- 2 files changed, 23 insertions(+), 42 deletions(-) diff --git a/R/live.R b/R/live.R index 3f2c73b..751e895 100644 --- a/R/live.R +++ b/R/live.R @@ -87,12 +87,11 @@ LiveHTML <- R6::R6Class( self$session$Network$setUserAgentOverride("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36") + self$session$Page$loadEventFired(callback_ = private$refresh_root) # https://github.com/rstudio/chromote/issues/102 p <- self$session$Page$loadEventFired(wait_ = FALSE) self$session$Page$navigate(url, wait_ = FALSE) self$session$wait_for(p) - - private$refresh_root() }, #' @description Called when `print()`ed @@ -129,21 +128,10 @@ LiveHTML <- R6::R6Class( #' @description Simulate a click on an HTML element. #' @param css CSS selector or xpath expression. #' @param n_clicks Number of clicks - #' @param new_page Whether to wait for a new page to load, such as after - #' clicking a link. - click = function(css, n_clicks = 1, new_page = FALSE) { + click = function(css, n_clicks = 1) { private$check_active() check_number_whole(n_clicks, min = 1) - # Wait for new page, #405. - if (new_page) { - p <- self$session$Page$loadEventFired(wait_ = FALSE) - on.exit({ - self$session$wait_for(p) - private$refresh_root() - }, add = TRUE) - } - # Implementation based on puppeteer as described in # https://medium.com/@aslushnikov/automating-clicks-in-chromium-a50e7f01d3fb # With code from https://github.com/puppeteer/puppeteer/blob/b53de4e0942e93c/packages/puppeteer-core/src/cdp/Input.ts#L431-L459 @@ -158,6 +146,8 @@ LiveHTML <- R6::R6Class( center_x <- mean(content_quad[c(1, 3, 5, 7)]) center_y <- mean(content_quad[c(2, 4, 6, 8)]) + private$loadEvent_promise <- self$session$Page$loadEventFired(wait_ = FALSE) + # https://chromedevtools.github.io/devtools-protocol/1-3/Input/#method-dispatchMouseEvent self$session$Input$dispatchMouseEvent( type = "mouseMoved", @@ -277,6 +267,8 @@ LiveHTML <- R6::R6Class( private = list( root_id = NULL, + loadEvent_promise = NULL, + check_active = function() { if (new_chromote && !self$session$is_active()) { suppressMessages({ @@ -302,21 +294,7 @@ LiveHTML <- R6::R6Class( find_nodes = function(css, xpath) { check_exclusive(css, xpath) if (!missing(css)) { - node_ids <- try_fetch( - self$session$DOM$querySelectorAll(private$root_id, css)$nodeIds, - error = function(cnd) { - if (grepl("-32000", cnd_message(cnd))) { - cli::cli_abort( - c( - "Can't find root node.", - i = "Did you issue a {.code click()} without waiting for a {.arg new_page}?" - ), - class = "rvest_error-missing_node", - parent = cnd - ) - } - } - ) + node_ids <- private$nodes_from_css(css) unlist(node_ids) } else { search <- glue::glue(" @@ -341,6 +319,20 @@ LiveHTML <- R6::R6Class( } }, + nodes_from_css = function(css, retry = TRUE) { + try_fetch( + self$session$DOM$querySelectorAll(private$root_id, css)$nodeIds, + error = function(cnd) { + if (retry) { + self$session$wait_for(private$loadEvent_promise) + private$nodes_from_css(css, retry = FALSE) + } else { + cli::cli_abort(cnd) + } + } + ) + }, + # Inspired by https://github.com/rstudio/shinytest2/blob/v1/R/chromote-methods.R call_node_method = function(node_id, method, ...) { js_fun <- paste0("function() { return this", method, "}") @@ -354,7 +346,7 @@ LiveHTML <- R6::R6Class( self$session$DOM$resolveNode(node_id)$object$objectId }, - refresh_root = function() { + refresh_root = function(...) { private$root_id <- self$session$DOM$getDocument(0)$root$nodeId } ) diff --git a/tests/testthat/test-live.R b/tests/testthat/test-live.R index f18be50..72cccdd 100644 --- a/tests/testthat/test-live.R +++ b/tests/testthat/test-live.R @@ -54,7 +54,7 @@ test_that("can find elements after click that navigates", { skip_if_no_chromote() sess <- read_html_live(html_test_path("navigate1")) - sess$click("a", new_page = TRUE) + sess$click("a") expect_equal(html_text2(html_element(sess, "p")), "Success!") }) @@ -96,17 +96,6 @@ test_that("can press special keys",{ expect_equal(html_text(html_element(sess, "#keyInfo")), "]/BracketRight") }) -test_that("gracefully errors on missing root node", { - skip_if_no_chromote() - - sess <- read_html_live(html_test_path("navigate1")) - sess$click("a") - expect_error( - html_element(sess, "p"), - class = "rvest_error-missing_node" - ) -}) - # as_key_desc ------------------------------------------------------------- test_that("gracefully errors on bad inputs", {