NOTE: Done with help of Hari.

library(rvest)
library(tidyverse)
library(ggplot2)
library(stringr)
price_ph <- function(html){
  html %>%
    html_nodes('.list-card-price') %>%
    html_text() %>%
    str_trim()
}

attrib_ph <- function(html){
  html %>%
    html_nodes('.list-card-details') %>%
    html_text() %>%
    str_trim()
}

#Looping

price <- c()
details <- c()

for(i in 1:3){
  if (i==1){
houses_url <- "https://www.zillow.com/bozeman-mt/3-_beds/2.0-_baths/?searchQueryState=%7B%22pagination%22%3A%7B%7D%2C%22usersSearchTerm%22%3A%22Bozeman%2C%20MT%22%2C%22mapBounds%22%3A%7B%22west%22%3A-112.06471723828125%2C%22east%22%3A-109.90590376171875%2C%22south%22%3A45.1519196193617%2C%22north%22%3A46.33753680187748%7D%2C%22regionSelection%22%3A%5B%7B%22regionId%22%3A44281%2C%22regionType%22%3A6%7D%5D%2C%22isMapVisible%22%3Atrue%2C%22filterState%22%3A%7B%22beds%22%3A%7B%22min%22%3A3%7D%2C%22baths%22%3A%7B%22min%22%3A2%7D%2C%22sort%22%3A%7B%22value%22%3A%22globalrelevanceex%22%7D%2C%22ah%22%3A%7B%22value%22%3Atrue%7D%2C%22price%22%3A%7B%22max%22%3A1000000%7D%2C%22mp%22%3A%7B%22max%22%3A4055%7D%7D%2C%22isListVisible%22%3Atrue%2C%22mapZoom%22%3A9%7D"
 } else {
      houses_url <- paste("https://www.zillow.com/homes/for_sale/1-_beds/1.0-_baths/?searchQueryState=%7B%22mapBounds%22%3A%7B%22west%22%3A-112.06471723828125%2C%22east%22%3A-109.90590376171875%2C%22south%22%3A45.1519196193617%2C%22north%22%3A46.33753680187748%7D%2C%22isMapVisible%22%3Atrue%2C%22filterState%22%3A%7B%22price%22%3A%7B%22min%22%3A0%2C%22max%22%3A1500000%7D%2C%22mp%22%3A%7B%22min%22%3A0%2C%22max%22%3A6082%7D%2C%22beds%22%3A%7B%22min%22%3A1%7D%2C%22baths%22%3A%7B%22min%22%3A1%7D%2C%22sort%22%3A%7B%22value%22%3A%22globalrelevanceex%22%7D%2C%22ah%22%3A%7B%22value%22%3Atrue%7D%7D%2C%22isListVisible%22%3Atrue%2C%22mapZoom%22%3A9%2C%22pagination%22%3A%7B%7D%7D",sep = "")
    }
  houses <- read_html(houses_url)
  price <- c(price,price_ph(houses))
  details <- c(details, attrib_ph(houses))
}

zillow_df <- data.frame(matrix(nrow = 27, ncol = 0))
zillow_df2 <- zillow_df %>%
  mutate(bedrooms = as.integer(str_trim(str_extract(details, "[\\d ]*(?=bds)")))) %>%
  mutate(bathrooms = as.integer(str_trim(str_extract(details, "[\\d ]*(?=ba)")))) %>%
  mutate(sqft = str_trim(str_extract(details, "[\\d ,]*(?=sqft)"))) %>%
  mutate(sqft = as.numeric(str_replace(sqft,",",""))) %>%
  mutate(price = as.numeric(str_replace_all(price,"[^0-9]*",""))) 
#Plotting the area of the house against its price

pl <- ggplot(zillow_df2, aes(x=sqft, y=price, size=sqft, color=as.factor(bedrooms))) + 
  geom_point()
pl

#Regressing price on the house attributes

reg <- lm(price ~ sqft + bedrooms + bathrooms, zillow_df2)
reg$coefficients
##  (Intercept)         sqft     bedrooms    bathrooms 
## 121600.03565     30.31374 115864.42458  30280.67622
summary(reg)
## 
## Call:
## lm(formula = price ~ sqft + bedrooms + bathrooms, data = zillow_df2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -103083  -50123  -14497   57470  104110 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept) 121600.04   88797.39   1.369  0.18469   
## sqft            30.31      11.49   2.639  0.01498 * 
## bedrooms    115864.42   32578.18   3.557  0.00177 **
## bathrooms    30280.68   34590.66   0.875  0.39081   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 70420 on 22 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.6485, Adjusted R-squared:  0.6006 
## F-statistic: 13.53 on 3 and 22 DF,  p-value: 3.22e-05