Skip to content

Commit

Permalink
update renv
Browse files Browse the repository at this point in the history
need to update for cran test fail.
  • Loading branch information
mrchypark committed Feb 5, 2024
1 parent a14b8f9 commit ecd6520
Show file tree
Hide file tree
Showing 7 changed files with 814 additions and 464 deletions.
3 changes: 0 additions & 3 deletions .Rprofile
Original file line number Diff line number Diff line change
@@ -1,4 +1 @@
source("renv/activate.R")
if (interactive()) {
suppressMessages(require(usethis))
}
57 changes: 19 additions & 38 deletions R/getContent.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,55 +15,38 @@

getContent <-
function(turl,
col = c("url",
"original_url",
"section",
"datetime",
"edittime",
"press",
"title",
"body",
"value")) {

col = c(
"url",
"original_url",
"section",
"datetime",
"edittime",
"press",
"title",
"body"
)) {
httr2::request(turl) %>%
httr2::req_user_agent("N2H4 by chanyub.park <[email protected]>") %>%
httr2::req_method("GET") %>%
httr2::req_perform() -> root

html_obj <- httr2::resp_body_html(root)
urlcheck <- root$url
value <- T
if (identical(grep("^https?://n.news.naver.com",
urlcheck),
integer(0))) {

if (
identical(
grep("^https?://n.news.naver.com", urlcheck), integer(0)
)
) {
original_url <- "page is not news section."
title <- "page is not news section."
datetime <- "page is not news section."
edittime <- "page is not news section."
press <- "page is not news section."
body <- "page is not news section."
section <- "page is not news section."
value <- F

} else {
# TODO: 이거 동작하는지 확인해야 함.
chk <- rvest::html_nodes(html_obj, "div#main_content div div")
chk <- rvest::html_attr(chk, "class")
chk <- chk[1]
if (is.na(chk)) {
chk <- "not error"
}
if ("error_msg 404" == chk & value) {
original_url <- "page is moved."
title <- "page is moved."
datetime <- "page is moved."
edittime <- "page is moved."
press <- "page is moved."
body <- "page is moved."
section <- "page is moved."
value <- F
}
}
if (value) {
original_url <- getOriginalUrl(html_obj)
title <- getContentTitle(html_obj)
datetime <- getContentDatetime(html_obj)
Expand All @@ -84,8 +67,7 @@ getContent <-
press = press,
title = title,
body = body,
section = section,
value = value
section = section
)
return(newsInfo[, col])
}
Expand Down Expand Up @@ -141,7 +123,7 @@ getContentPress <-

getContentBody <-
function(html_obj,
body_node_info = "div#dic_area",
body_node_info = "article#dic_area",
body_attr = "") {
node <- rvest::html_nodes(html_obj, body_node_info)
body <- rvest::html_text(node)
Expand Down Expand Up @@ -171,4 +153,3 @@ getSection <- function(turl) {
}
return(httr2::url_parse(turl)$query$sid)
}

Loading

0 comments on commit ecd6520

Please sign in to comment.