data <- read.csv('zillow-properties-for-sale-2023-11-11_14_54_06.csv')
data <- data[!is.na(data$Living.area),]
data <- data[!is.na(data$Property.price..USD.),]
data <- data[!is.na(data$Bedrooms),]
data <- data[!is.na(data$Lot.land.area),]
data$Property.price..USD. <- as.numeric(data$Property.price..USD.)
boxplot(data$Property.price..USD.~data$Listing.description, main = "Box-plot type of house and price", col = "blue", xlab = 'House type', ylab = 'House price')

scatter.smooth(data$Living.area,data$Property.price..USD., main = 'Price per area', col = 'green', xlab = 'House area', ylab = 'House price')

scatter.smooth(data$Lot.land.area,data$Property.price..USD., main = 'Price per area', col = 'red', xlab = 'Land area', ylab = 'House price')

boxplot(data$Property.price..USD.~data$Bedrooms, main = 'Box-plot housing price per bedrooms', xlab = 'Number of bedrooms', ylab = 'House price', col = ' pink')

BUilding predict model

mod <- lm(data$Property.price..USD.~data$Living.area*data$Bedrooms*data$Lot.land.area)
summary(mod)
## 
## Call:
## lm(formula = data$Property.price..USD. ~ data$Living.area * data$Bedrooms * 
##     data$Lot.land.area)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -61783 -28467  -5536  24299  81227 
## 
## Coefficients:
##                                                     Estimate Std. Error t value
## (Intercept)                                       -6.266e+05  3.233e+05  -1.938
## data$Living.area                                   3.951e+02  1.745e+02   2.265
## data$Bedrooms                                      2.243e+05  9.376e+04   2.392
## data$Lot.land.area                                 9.201e+01  3.951e+01   2.329
## data$Living.area:data$Bedrooms                    -1.048e+02  4.791e+01  -2.187
## data$Living.area:data$Lot.land.area               -3.474e-02  2.097e-02  -1.657
## data$Bedrooms:data$Lot.land.area                  -3.754e+01  1.334e+01  -2.813
## data$Living.area:data$Bedrooms:data$Lot.land.area  1.640e-02  6.257e-03   2.620
##                                                   Pr(>|t|)  
## (Intercept)                                         0.0685 .
## data$Living.area                                    0.0361 *
## data$Bedrooms                                       0.0279 *
## data$Lot.land.area                                  0.0317 *
## data$Living.area:data$Bedrooms                      0.0421 *
## data$Living.area:data$Lot.land.area                 0.1149  
## data$Bedrooms:data$Lot.land.area                    0.0115 *
## data$Living.area:data$Bedrooms:data$Lot.land.area   0.0173 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 43410 on 18 degrees of freedom
## Multiple R-squared:  0.6196, Adjusted R-squared:  0.4717 
## F-statistic: 4.189 on 7 and 18 DF,  p-value: 0.006667
summary(mod)$r.squared
## [1] 0.6196417
plot(mod)

coef(mod)
##                                       (Intercept) 
##                                     -6.265721e+05 
##                                  data$Living.area 
##                                      3.951479e+02 
##                                     data$Bedrooms 
##                                      2.242551e+05 
##                                data$Lot.land.area 
##                                      9.201469e+01 
##                    data$Living.area:data$Bedrooms 
##                                     -1.047983e+02 
##               data$Living.area:data$Lot.land.area 
##                                     -3.473920e-02 
##                  data$Bedrooms:data$Lot.land.area 
##                                     -3.754024e+01 
## data$Living.area:data$Bedrooms:data$Lot.land.area 
##                                      1.639609e-02

Find the outliner with low price:

data2 <- data.frame(data$Property.URL,data$Property.price..USD., mod$fitted.values, mod$residuals,data$Living.area, data$Bedrooms, data$Lot.land.area)
data2 <- data2[order(data2$mod.residuals),]
rmarkdown::paged_table(data2)