library(RSelenium)
library(rvest)
library(stringr)
This is necessary for the Java-based Selenium server used by RSelenium.
Sys.setenv(CHROME_DRIVER_PATH = my_path)
open_browser <- rsDriver(browser = "chrome")
[1] "Connecting to remote server"
remote_driver <- open_browser[["client"]]
meta_cognitive_strategies <- "((METACOGNITIVE-READING-STRATEGIES))"
extract_info <- function(page_source) {
page <- read_html(page_source)
titles <- page %>%
html_nodes(".gs_rt") %>%
html_text()
authors <- page %>%
html_nodes(".gs_a") %>%
html_text()
years <- str_extract(authors, "\\d{4}", "")
authors <- str_replace(authors, "\\d{4}", "")
urls <- page %>%
html_nodes(".gs_rt a") %>%
html_attr("herf")
cited_by <- page %>%
html_nodes(".gs_fl a:nth-child(3)") %>%
html_text()
cited_by <- as.integer(str_extract(cited_by, "\\d+"))
data.frame(
Article_Title = titles, Author_Names = authors, Publication_Year = years,
Archive_Source = urls, Cited_By = cited_by
)
}