R Markdown
library(rvest, warn.conflicts=F, quietly=T)
library(dplyr, warn.conflicts=F, quietly=T)
library(tidyr, warn.conflicts=F, quietly=T)
library(stringr, warn.conflicts=F, quietly=T)
car_data=data.frame() #Create empty data frame
for (i in seq(1:50)){
link=paste("https://turbo.az/autos?page=",i,"&q%5Bmake%5D%5B%5D=23&q%5Bmodel%5D%5B%5D=283",sep="")
page=read_html(link)
car_name=page %>% html_nodes("div.products-i__name.products-i__bottom-text") %>% html_text()
car_info=page %>% html_nodes("div.products-i__attributes.products-i__bottom-text") %>%html_text()
car_price=page%>%html_nodes("div.product-price")%>%html_text()
ad_time=page %>% html_nodes("div.products-i__datetime") %>% html_text()
car_data=rbind(car_data, data.frame(car_name, car_info,car_price, ad_time, stringsAsFactors = FALSE))
}
print(paste(i," pages scrapped!"))
## [1] "50 pages scrapped!"
car_data3<-separate(data = car_data, col = car_info, into = c("year", "engine","mileage"), sep = "\\,")
car_data3$num<-gsub("[^0-9.-]", "", car_data$car_price)
car_data3$text<-gsub("[[:digit:]]", "", car_data$car_price)
car_data3$year<-as.numeric(car_data3$year)
car_data3$num<-as.numeric(car_data3$num)
for (i in 1:nrow(car_data3)) {
if (str_trim(car_data3$text[i])=="$") {
car_data3$num[i]<-car_data3$num[i]*1.7
}
}
car_data3<-subset(car_data3, str_trim(car_name)=="Toyota Prado") #Cleaning not Prado models from dataset
LinearRegr<-lm(num~year, data = car_data3)
plot(num~year, data = car_data3)
abline(LinearRegr)
