下載完後將檔案解壓縮到 Chrome.exe 所在的資料夾,以下為範例路徑
C:\Program Files (x86)\Google\Chrome\Application
再到環境變數的 PATH 增加 C:\Program Files (x86)\Google\Chrome\Application;
若無 PATH 請自行新增
java -jar selenium-server-standalone-3.4.0.jar
remDr <- remoteDriver(browserName = "chrome")remDr$open()remDr$navigate('https://tw.yahoo.com/')remDr$findElement()remDr$findElements()remDr$getCurrentUrl()remDr$goBack()函數名稱非常直觀
以下為錯誤示範
remDr$navigate(url = 'https://tw.yahoo.com/')
test <- remDr$findElement(using = 'css',value = '#UHSearchBox')
test$setElementAttribute(attributeName = 'value',value = 'NSYSU')
test$submitElement()
# 完成搜尋之後,使用 goBack() 返回上一頁
remDr$goBack()
test$setElementAttribute(attributeName = 'value',value = 'NSYSU')
# 直接執行 setElementAttribute() 會遇到 error
# 必須再執行一次 remDr$findElement()
test <- remDr$findElement(using = 'css',value = '#UHSearchBox')
test$setElementAttribute(attributeName = 'value',value = 'NSYSU')library(RSelenium)
library(tidyverse)
library(foreach)
library(doParallel)
#
Crawl <- function(board = board,pagefrom = 2189,pageto = 2190,speed = speed,p=1){
library(RSelenium)
library(tidyverse)
options(error = dump.frames)
remDr <- remoteDriver(browserName = "chrome")
remDr$open()
switch(p,
remDr$setWindowPosition(x = -5,y = -20),
remDr$setWindowPosition(x = 680,y = -20)
)
remDr$setWindowSize(width = 692,height = 788)
for(i in pagefrom:pageto) {
for(j in 1:20){
Sys.sleep(speed)
remDr$navigate(paste('https://www.ptt.cc/bbs/',board,'/index',i,'.html',sep=""))
Sys.sleep(speed)
tryCatch(a <- remDr$findElement(using = 'xpath',value = paste('//*[@id="main-container"]/div[2]/div[',j,']/div[3]/a',sep="")),
error = function(e) {},
warning = function(e) {},
finally = {
tryCatch(a$clickElement(),error = function(e){print('被刪了')})
})
cc <- remDr$findElements(using = 'css selector',value = '#main-content > a')
ccc <- remDr$findElements(using = 'css selector',value = '#main-content > span:nth-child(5)')
for(k in 1:length(cc)){
Sys.sleep(speed)
tryCatch(remDr$mouseMoveToLocation(webElement = cc[[k]]),
error = function(e) {})
tryCatch(remDr$mouseMoveToLocation(webElement = cc[[k-1]]),
error = function(e) {})
tryCatch(remDr$mouseMoveToLocation(webElement = ccc[[1]]),
error = function(e) {})
}
}
}
}
# 雙視窗瀏覽
cl<-makeCluster(2,"SOCK")
registerDoParallel(cl)
foreach(i = 1:2) %dopar% {
switch(i,
Crawl(board = 'pet',pagefrom = 1110,pageto = 1121,speed = 0.5,p = i),
Crawl(board = 'pet',pagefrom = 1122,pageto = 1133,speed = 0.5,p = i))
}
remDr$closeall()
stopImplicitCluster()
stopCluster(cl)
#library(RSelenium)
library(tidyverse)
library(foreach)
library(doParallel)
#
dcard_crawl <- function(board = 'pet', n = 10,speed = 1, p=1){
library(RSelenium)
library(tidyverse)
options(error = dump.frames)
remDr <- remoteDriver(browserName = "chrome")
remDr$open()
switch(p,
remDr$setWindowPosition(x = -5,y = -20),
remDr$setWindowPosition(x = 680,y = -20)
)
remDr$setWindowSize(width = 692,height = 788)
remDr$navigate(paste0('https://www.dcard.tw/f/',board,'?latest=true'))
for(i in 1:n) {
Sys.sleep(speed)
a <- remDr$findElement('css',paste0('#root > div > div.App_main_38Mbt > div > div > main > div > div > div:nth-child(4) > div:nth-child(',i,') > div > a > article > div.PostEntry_content_g2afg > h3'))
a$clickElement()
cc <- remDr$findElements(using = 'css selector',value = "[class='GalleryImage_image_3lGzO']")
for(k in 1:length(cc)){
Sys.sleep(speed)
tryCatch(remDr$mouseMoveToLocation(webElement = cc[[k]]),
error = function(e) {})
}
Sys.sleep(speed)
a$goBack()
}
}
#
cl<-makeCluster(2,"SOCK")
registerDoParallel(cl)
foreach(i = 1:2) %dopar% {
switch(i,
Crawl(board = 'pet',pagefrom = 1110,pageto = 1121,speed = 0.5,p = i),
dcard_crawl(board = 'pet',n = 100, speed = 1,p = i))
}
remDr$closeall()
stopImplicitCluster()
stopCluster(cl)