Scraping allegro.pl website
library(tidyr, warn.conflicts=F, quietly=T)
library(stringr, warn.conflicts=F, quietly=T)
library(RSelenium, warn.conflicts = F, quietly = T)
library(netstat)
tap_data=data.frame() #Create empty data frame
driver <- rsDriver(browser = c("chrome"), chromever = "107.0.5304.62",port=free_port())
## checking Selenium Server versions:
## BEGIN: PREDOWNLOAD
## BEGIN: DOWNLOAD
## BEGIN: POSTDOWNLOAD
## checking chromedriver versions:
## BEGIN: PREDOWNLOAD
## BEGIN: DOWNLOAD
## BEGIN: POSTDOWNLOAD
## checking geckodriver versions:
## BEGIN: PREDOWNLOAD
## BEGIN: DOWNLOAD
## BEGIN: POSTDOWNLOAD
## checking phantomjs versions:
## BEGIN: PREDOWNLOAD
## BEGIN: DOWNLOAD
## BEGIN: POSTDOWNLOAD
## [1] "Connecting to remote server"
## $acceptInsecureCerts
## [1] FALSE
##
## $browserName
## [1] "chrome"
##
## $browserVersion
## [1] "107.0.5304.110"
##
## $chrome
## $chrome$chromedriverVersion
## [1] "107.0.5304.62 (1eec40d3a5764881c92085aaee66d25075c159aa-refs/branch-heads/5304@{#942})"
##
## $chrome$userDataDir
## [1] "/tmp/.com.google.Chrome.UE41IC"
##
##
## $`goog:chromeOptions`
## $`goog:chromeOptions`$debuggerAddress
## [1] "localhost:46675"
##
##
## $networkConnectionEnabled
## [1] FALSE
##
## $pageLoadStrategy
## [1] "normal"
##
## $platformName
## [1] "linux"
##
## $proxy
## named list()
##
## $setWindowRect
## [1] TRUE
##
## $strictFileInteractability
## [1] FALSE
##
## $timeouts
## $timeouts$implicit
## [1] 0
##
## $timeouts$pageLoad
## [1] 300000
##
## $timeouts$script
## [1] 30000
##
##
## $unhandledPromptBehavior
## [1] "dismiss and notify"
##
## $`webauthn:extension:credBlob`
## [1] TRUE
##
## $`webauthn:extension:largeBlob`
## [1] TRUE
##
## $`webauthn:virtualAuthenticators`
## [1] TRUE
##
## $webdriver.remote.sessionid
## [1] "28bceca4d8c2f453dd8fb13706e0cfbc"
##
## $id
## [1] "28bceca4d8c2f453dd8fb13706e0cfbc"
remote_driver <- driver[["client"]]
tryCatch(
{
remote_driver$maxWindowSize()
remote_driver$navigate("https://allegro.pl/kategoria/galanteria-i-dodatki-parasole-11068?string=umbrella")
button<-remote_driver$findElements("xpath", '/html/body/div[2]/div[1]/div/div[2]/div/div[2]/button[1]')
button[[1]]$clickElement()
elements<-remote_driver$findElements("css", "article")
for (el in elements){
text<-el$getElementText()
list<-str_split(text, "\n")
zloty=gregexpr(text = text, pattern = fixed('zł\n'))
separators=gregexpr(text = text, pattern = '\\n')
num<-0
for (i in separators[[1]]){
num<-num+1
if(zloty[[1]][1]<i){
break
}
}
if(zloty[[1]][1]==-1){
num<-num+1
}
num
if(list[[1]][num-1]=='Black Week'){
name<-list[[1]][num-2]
price<-list[[1]][num+1]
}else{
name<-list[[1]][num-1]
price<-list[[1]][num]
}
tap_data=rbind(tap_data, data.frame(name,price,stringsAsFactors = FALSE))
}
print(paste(nrow(tap_data)))
},
error=function(e) {
message('An Error Occurred')
print(e)
},
warning=function(w) {
message('A Warning Occurred')
print(w)
return(NA)
}
)
## An Error Occurred
## <subscriptOutOfBoundsError in button[[1]]: subscript out of bounds>
head(tap_data)
## data frame with 0 columns and 0 rows