data <- read.csv('zillow-properties-for-sale-2023-11-11_14_54_06.csv')
data <- data[!is.na(data$Living.area),]
data <- data[!is.na(data$Property.price..USD.),]
data <- data[!is.na(data$Bedrooms),]
data <- data[!is.na(data$Lot.land.area),]
data$Property.price..USD. <- as.numeric(data$Property.price..USD.)
boxplot(data$Property.price..USD.~data$Listing.description, main = "Box-plot type of house and price", col = "blue", xlab = 'House type', ylab = 'House price')
scatter.smooth(data$Living.area,data$Property.price..USD., main = 'Price per area', col = 'green', xlab = 'House area', ylab = 'House price')
scatter.smooth(data$Lot.land.area,data$Property.price..USD., main = 'Price per area', col = 'red', xlab = 'Land area', ylab = 'House price')
boxplot(data$Property.price..USD.~data$Bedrooms, main = 'Box-plot housing price per bedrooms', xlab = 'Number of bedrooms', ylab = 'House price', col = ' pink')
BUilding predict model
mod <- lm(data$Property.price..USD.~data$Living.area*data$Bedrooms*data$Lot.land.area)
summary(mod)
##
## Call:
## lm(formula = data$Property.price..USD. ~ data$Living.area * data$Bedrooms *
## data$Lot.land.area)
##
## Residuals:
## Min 1Q Median 3Q Max
## -61783 -28467 -5536 24299 81227
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) -6.266e+05 3.233e+05 -1.938
## data$Living.area 3.951e+02 1.745e+02 2.265
## data$Bedrooms 2.243e+05 9.376e+04 2.392
## data$Lot.land.area 9.201e+01 3.951e+01 2.329
## data$Living.area:data$Bedrooms -1.048e+02 4.791e+01 -2.187
## data$Living.area:data$Lot.land.area -3.474e-02 2.097e-02 -1.657
## data$Bedrooms:data$Lot.land.area -3.754e+01 1.334e+01 -2.813
## data$Living.area:data$Bedrooms:data$Lot.land.area 1.640e-02 6.257e-03 2.620
## Pr(>|t|)
## (Intercept) 0.0685 .
## data$Living.area 0.0361 *
## data$Bedrooms 0.0279 *
## data$Lot.land.area 0.0317 *
## data$Living.area:data$Bedrooms 0.0421 *
## data$Living.area:data$Lot.land.area 0.1149
## data$Bedrooms:data$Lot.land.area 0.0115 *
## data$Living.area:data$Bedrooms:data$Lot.land.area 0.0173 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 43410 on 18 degrees of freedom
## Multiple R-squared: 0.6196, Adjusted R-squared: 0.4717
## F-statistic: 4.189 on 7 and 18 DF, p-value: 0.006667
summary(mod)$r.squared
## [1] 0.6196417
plot(mod)
coef(mod)
## (Intercept)
## -6.265721e+05
## data$Living.area
## 3.951479e+02
## data$Bedrooms
## 2.242551e+05
## data$Lot.land.area
## 9.201469e+01
## data$Living.area:data$Bedrooms
## -1.047983e+02
## data$Living.area:data$Lot.land.area
## -3.473920e-02
## data$Bedrooms:data$Lot.land.area
## -3.754024e+01
## data$Living.area:data$Bedrooms:data$Lot.land.area
## 1.639609e-02
Find the outliner with low price:
data2 <- data.frame(data$Property.URL,data$Property.price..USD., mod$fitted.values, mod$residuals,data$Living.area, data$Bedrooms, data$Lot.land.area)
data2 <- data2[order(data2$mod.residuals),]
rmarkdown::paged_table(data2)