Fetching data from tap.az

In this code block we are going to fetch data from tap.az local advertise website. We need to get name, phone number and some other data from this website. As this website has autoscroll (down) property we have used RSelenium package to get scroll down.

library(rvest, warn.conflicts=F, quietly=T)
library(dplyr, warn.conflicts=F, quietly=T)
library(tidyr, warn.conflicts=F, quietly=T)
library(stringr, warn.conflicts=F, quietly=T)
library(RSelenium, warn.conflicts = F, quietly = T)

tap_data=data.frame()   #Create empty data frame
driver <- rsDriver(browser = c("chrome"), chromever = "107.0.5304.62")
remote_driver <- driver[["client"]] 
tryCatch(
  {
    remote_driver$navigate("https://tap.az/elanlar/is-elanlari/is-axtariram")
    
    #Scrolling...
    bodyEl <- remote_driver$findElement("css", "body")
    for (i in 1:1) {
      bodyEl$sendKeysToElement(list(key = "end"))
      Sys.sleep(2)
    }
    
    #tap_links <- remote_driver$findElement("css", 'div.js-endless-container.products.endless-products')
    tap_links<-remote_driver$findElements("css", "a.products-link")
    
    #Get all url from main search page
    for (elem in tap_links) {
      elem<-elem$getElementAttribute('href')
      tryCatch({
        moreInfo_page=read_html(elem[[1]])
        phone=moreInfo_page %>% html_nodes("a.phone")%>%html_text() %>% paste(collapse ="," )
        name=moreInfo_page %>% html_nodes("div.name")%>%html_text() %>% paste(collapse ="," )
        elan=moreInfo_page %>% html_nodes("h1.js-lot-title")%>%html_text() %>% paste(collapse ="," )
        mezmun=moreInfo_page %>% html_nodes("div.lot-text")%>%html_text() %>% paste(collapse ="," )
        maas=moreInfo_page %>% html_nodes("div.middle")%>%html_text() %>% paste(collapse ="," )
        
        #print(data.frame( elan,name, phone))
        
        tap_data=rbind(tap_data, data.frame( elan,name, phone, maas, mezmun, elan_url=elem[[1]], stringsAsFactors = FALSE))
        },
        error=function(e) {print("URL Not Found, skipping") 
          next})
    }
    
    print(paste(nrow(tap_data)," number of ads scrapped!")) 
  },
  error=function(e) {
    message('An Error Occurred')
    print(e)
  },
  warning=function(w) {
    message('A Warning Occurred')
    print(w)
    return(NA)
  }
)
## [1] "52  number of ads scrapped!"
#Removing duplicates  by tap_moreinfo or by tap_links
tapdb<-tap_data
tapdb<-tapdb %>% distinct(tapdb$elan_url, .keep_all = TRUE)

#write.csv(tapdb,'tapaz_nomreler.csv')
head(data.frame(tapdb$elan,tapdb$name, tapdb$phone))
##                 tapdb.elan    tapdb.name     tapdb.phone
## 1      Fəhlə işi axtarıram         Rauf  (050) 491-23-94
## 2 Mühafizəçi işi axtarıram      Süleyman (070) 635-25-84
## 3     Xadimə işi axtarıram Zemheri Xanım (070) 716-30-16
## 4                                                       
## 5   Ofisiant işi axtarıram        Qumral (070) 761-33-03
## 6   Qabyuyan işi axtarıram        Kamran (050) 413-70-17