In this code block we are going to fetch data from tap.az local advertise website. We need to get name, phone number and some other data from this website. As this website has autoscroll (down) property we have used RSelenium package to get scroll down.
library(rvest, warn.conflicts=F, quietly=T)
library(dplyr, warn.conflicts=F, quietly=T)
library(tidyr, warn.conflicts=F, quietly=T)
library(stringr, warn.conflicts=F, quietly=T)
library(RSelenium, warn.conflicts = F, quietly = T)
tap_data=data.frame() #Create empty data frame
driver <- rsDriver(browser = c("chrome"), chromever = "107.0.5304.62")
remote_driver <- driver[["client"]]
tryCatch(
{
remote_driver$navigate("https://tap.az/elanlar/is-elanlari/is-axtariram")
#Scrolling...
bodyEl <- remote_driver$findElement("css", "body")
for (i in 1:1) {
bodyEl$sendKeysToElement(list(key = "end"))
Sys.sleep(2)
}
#tap_links <- remote_driver$findElement("css", 'div.js-endless-container.products.endless-products')
tap_links<-remote_driver$findElements("css", "a.products-link")
#Get all url from main search page
for (elem in tap_links) {
elem<-elem$getElementAttribute('href')
tryCatch({
moreInfo_page=read_html(elem[[1]])
phone=moreInfo_page %>% html_nodes("a.phone")%>%html_text() %>% paste(collapse ="," )
name=moreInfo_page %>% html_nodes("div.name")%>%html_text() %>% paste(collapse ="," )
elan=moreInfo_page %>% html_nodes("h1.js-lot-title")%>%html_text() %>% paste(collapse ="," )
mezmun=moreInfo_page %>% html_nodes("div.lot-text")%>%html_text() %>% paste(collapse ="," )
maas=moreInfo_page %>% html_nodes("div.middle")%>%html_text() %>% paste(collapse ="," )
#print(data.frame( elan,name, phone))
tap_data=rbind(tap_data, data.frame( elan,name, phone, maas, mezmun, elan_url=elem[[1]], stringsAsFactors = FALSE))
},
error=function(e) {print("URL Not Found, skipping")
next})
}
print(paste(nrow(tap_data)," number of ads scrapped!"))
},
error=function(e) {
message('An Error Occurred')
print(e)
},
warning=function(w) {
message('A Warning Occurred')
print(w)
return(NA)
}
)
## [1] "52 number of ads scrapped!"
#Removing duplicates by tap_moreinfo or by tap_links
tapdb<-tap_data
tapdb<-tapdb %>% distinct(tapdb$elan_url, .keep_all = TRUE)
#write.csv(tapdb,'tapaz_nomreler.csv')
head(data.frame(tapdb$elan,tapdb$name, tapdb$phone))
## tapdb.elan tapdb.name tapdb.phone
## 1 Fəhlə işi axtarıram Rauf (050) 491-23-94
## 2 Mühafizəçi işi axtarıram Süleyman (070) 635-25-84
## 3 Xadimə işi axtarıram Zemheri Xanım (070) 716-30-16
## 4
## 5 Ofisiant işi axtarıram Qumral (070) 761-33-03
## 6 Qabyuyan işi axtarıram Kamran (050) 413-70-17