house <- read.csv("data input/HousePrices.csv")
head(house)
## Area Garage FirePlace Baths White.Marble Black.Marble Indian.Marble Floors
## 1 164 2 0 2 0 1 0 0
## 2 84 2 0 4 0 0 1 1
## 3 190 2 4 4 1 0 0 0
## 4 75 2 4 4 0 0 1 1
## 5 148 1 4 2 1 0 0 1
## 6 124 3 3 3 0 1 0 1
## City Solar Electric Fiber Glass.Doors Swiming.Pool Garden Prices
## 1 3 1 1 1 1 0 0 43800
## 2 2 0 0 0 1 1 1 37550
## 3 2 0 0 1 0 0 0 49500
## 4 1 1 1 1 1 1 1 50075
## 5 2 1 0 0 1 1 1 52400
## 6 1 0 0 1 1 1 1 54300
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
glimpse(house)
## Rows: 500,000
## Columns: 16
## $ Area <int> 164, 84, 190, 75, 148, 124, 58, 249, 243, 242, 61, 189, …
## $ Garage <int> 2, 2, 2, 2, 1, 3, 1, 2, 1, 1, 2, 2, 2, 3, 3, 3, 1, 3, 2,…
## $ FirePlace <int> 0, 0, 4, 4, 4, 3, 0, 1, 0, 2, 4, 0, 0, 3, 3, 4, 0, 3, 3,…
## $ Baths <int> 2, 4, 4, 4, 2, 3, 2, 1, 2, 4, 5, 4, 2, 3, 1, 1, 5, 3, 5,…
## $ White.Marble <int> 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,…
## $ Black.Marble <int> 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1,…
## $ Indian.Marble <int> 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0,…
## $ Floors <int> 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1,…
## $ City <int> 3, 2, 2, 1, 2, 1, 3, 1, 1, 2, 1, 2, 1, 3, 3, 1, 3, 1, 3,…
## $ Solar <int> 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0,…
## $ Electric <int> 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1,…
## $ Fiber <int> 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0,…
## $ Glass.Doors <int> 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1,…
## $ Swiming.Pool <int> 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0,…
## $ Garden <int> 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0,…
## $ Prices <int> 43800, 37550, 49500, 50075, 52400, 54300, 34400, 50425, …
unique(house$White.Marble)
## [1] 0 1
unique(house$Black.Marble)
## [1] 1 0
unique(house$Indian.Marble)
## [1] 0 1
unique(house$Floors)
## [1] 0 1
unique(house$Solar)
## [1] 1 0
unique(house$Electric)
## [1] 1 0
unique(house$Garden)
## [1] 0 1
library(dplyr)
house <- house %>%
mutate (White.Marble = as.factor(White.Marble),
Black.Marble = as.factor(Black.Marble),
Indian.Marble = as.factor(Indian.Marble),
Floors = as.factor(Floors),
Solar = as.factor(Solar),
Electric = as.factor(Electric),
Fiber = as.factor(Fiber),
Glass.Doors = as.factor(Glass.Doors),
Swiming.Pool = as.factor(Swiming.Pool),
Garden = as.factor(Garden))
glimpse(house)
## Rows: 500,000
## Columns: 16
## $ Area <int> 164, 84, 190, 75, 148, 124, 58, 249, 243, 242, 61, 189, …
## $ Garage <int> 2, 2, 2, 2, 1, 3, 1, 2, 1, 1, 2, 2, 2, 3, 3, 3, 1, 3, 2,…
## $ FirePlace <int> 0, 0, 4, 4, 4, 3, 0, 1, 0, 2, 4, 0, 0, 3, 3, 4, 0, 3, 3,…
## $ Baths <int> 2, 4, 4, 4, 2, 3, 2, 1, 2, 4, 5, 4, 2, 3, 1, 1, 5, 3, 5,…
## $ White.Marble <fct> 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,…
## $ Black.Marble <fct> 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1,…
## $ Indian.Marble <fct> 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0,…
## $ Floors <fct> 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1,…
## $ City <int> 3, 2, 2, 1, 2, 1, 3, 1, 1, 2, 1, 2, 1, 3, 3, 1, 3, 1, 3,…
## $ Solar <fct> 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0,…
## $ Electric <fct> 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1,…
## $ Fiber <fct> 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0,…
## $ Glass.Doors <fct> 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1,…
## $ Swiming.Pool <fct> 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0,…
## $ Garden <fct> 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0,…
## $ Prices <int> 43800, 37550, 49500, 50075, 52400, 54300, 34400, 50425, …
model_price <- lm(formula = Prices ~ Electric, data = house)
summary(model_price)
##
## Call:
## lm(formula = Prices ~ Electric, data = house)
##
## Residuals:
## Min 1Q Median 3Q Max
## -34134 -8539 -209 8691 35361
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 41414.22 24.20 1711.13 <2e-16 ***
## Electric1 1270.18 34.21 37.13 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12090 on 499998 degrees of freedom
## Multiple R-squared: 0.00275, Adjusted R-squared: 0.002748
## F-statistic: 1379 on 1 and 499998 DF, p-value: < 2.2e-16
Model Keseluruhan Prediktor
model_price_all <- lm(formula = Prices ~ .-Indian.Marble, data = house)
summary(model_price_all)
##
## Call:
## lm(formula = Prices ~ . - Indian.Marble, data = house)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.343e-04 0.000e+00 0.000e+00 1.000e-09 5.000e-07
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.000e+03 1.509e-09 6.628e+11 <2e-16 ***
## Area 2.500e+01 3.740e-12 6.684e+12 <2e-16 ***
## Garage 1.500e+03 3.287e-10 4.564e+12 <2e-16 ***
## FirePlace 7.500e+02 1.899e-10 3.949e+12 <2e-16 ***
## Baths 1.250e+03 1.899e-10 6.583e+12 <2e-16 ***
## White.Marble1 1.400e+04 6.574e-10 2.129e+13 <2e-16 ***
## Black.Marble1 5.000e+03 6.576e-10 7.603e+12 <2e-16 ***
## Floors1 1.500e+04 5.371e-10 2.793e+13 <2e-16 ***
## City 3.500e+03 3.290e-10 1.064e+13 <2e-16 ***
## Solar1 2.500e+02 5.371e-10 4.655e+11 <2e-16 ***
## Electric1 1.250e+03 5.371e-10 2.327e+12 <2e-16 ***
## Fiber1 1.175e+04 5.371e-10 2.188e+13 <2e-16 ***
## Glass.Doors1 4.450e+03 5.371e-10 8.286e+12 <2e-16 ***
## Swiming.Pool1 5.412e-10 5.371e-10 1.008e+00 0.314
## Garden1 5.415e-10 5.371e-10 1.008e+00 0.313
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.899e-07 on 499985 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 1.453e+26 on 14 and 499985 DF, p-value: < 2.2e-16
summary(model_price_all)
##
## Call:
## lm(formula = Prices ~ . - Indian.Marble, data = house)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.343e-04 0.000e+00 0.000e+00 1.000e-09 5.000e-07
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.000e+03 1.509e-09 6.628e+11 <2e-16 ***
## Area 2.500e+01 3.740e-12 6.684e+12 <2e-16 ***
## Garage 1.500e+03 3.287e-10 4.564e+12 <2e-16 ***
## FirePlace 7.500e+02 1.899e-10 3.949e+12 <2e-16 ***
## Baths 1.250e+03 1.899e-10 6.583e+12 <2e-16 ***
## White.Marble1 1.400e+04 6.574e-10 2.129e+13 <2e-16 ***
## Black.Marble1 5.000e+03 6.576e-10 7.603e+12 <2e-16 ***
## Floors1 1.500e+04 5.371e-10 2.793e+13 <2e-16 ***
## City 3.500e+03 3.290e-10 1.064e+13 <2e-16 ***
## Solar1 2.500e+02 5.371e-10 4.655e+11 <2e-16 ***
## Electric1 1.250e+03 5.371e-10 2.327e+12 <2e-16 ***
## Fiber1 1.175e+04 5.371e-10 2.188e+13 <2e-16 ***
## Glass.Doors1 4.450e+03 5.371e-10 8.286e+12 <2e-16 ***
## Swiming.Pool1 5.412e-10 5.371e-10 1.008e+00 0.314
## Garden1 5.415e-10 5.371e-10 1.008e+00 0.313
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.899e-07 on 499985 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 1.453e+26 on 14 and 499985 DF, p-value: < 2.2e-16
library(GGally)
## Loading required package: ggplot2
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
ggcorr(house, label = T)
## Warning in ggcorr(house, label = T): data in column(s) 'White.Marble',
## 'Black.Marble', 'Indian.Marble', 'Floors', 'Solar', 'Electric', 'Fiber',
## 'Glass.Doors', 'Swiming.Pool', 'Garden' are not numeric and were ignored
Insight: Variabel yang memiliki korelasi tinggi terhadap Price adalah 1. City 2. Baths
model_price_corr <- lm(formula = Prices ~ City + Baths,
data = house)
summary(model_price_corr)
##
## Call:
## lm(formula = Prices ~ City + Baths, data = house)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28762.5 -7950.3 -523.8 8544.8 30113.0
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 31391.42 55.87 561.8 <2e-16 ***
## City 3462.76 20.18 171.6 <2e-16 ***
## Baths 1244.12 11.64 106.8 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11640 on 499997 degrees of freedom
## Multiple R-squared: 0.07552, Adjusted R-squared: 0.07551
## F-statistic: 2.042e+04 on 2 and 499997 DF, p-value: < 2.2e-16
house_pred <- house
house_pred$pred_price <- predict(model_price, house)
house_pred$pred_price_all <- predict(model_price_all, house)
house_pred$pred_corr <- predict(model_price_corr, house)
head(house_pred)
## Area Garage FirePlace Baths White.Marble Black.Marble Indian.Marble Floors
## 1 164 2 0 2 0 1 0 0
## 2 84 2 0 4 0 0 1 1
## 3 190 2 4 4 1 0 0 0
## 4 75 2 4 4 0 0 1 1
## 5 148 1 4 2 1 0 0 1
## 6 124 3 3 3 0 1 0 1
## City Solar Electric Fiber Glass.Doors Swiming.Pool Garden Prices pred_price
## 1 3 1 1 1 1 0 0 43800 42684.41
## 2 2 0 0 0 1 1 1 37550 41414.22
## 3 2 0 0 1 0 0 0 49500 41414.22
## 4 1 1 1 1 1 1 1 50075 42684.41
## 5 2 1 0 0 1 1 1 52400 41414.22
## 6 1 0 0 1 1 1 1 54300 41414.22
## pred_price_all pred_corr
## 1 43800 44267.92
## 2 37550 43293.40
## 3 49500 43293.40
## 4 50075 39830.64
## 5 52400 40805.17
## 6 54300 38586.52
summary(model_price)$r.squared
## [1] 0.002750225
summary(model_price_all)$adj.r.squared
## [1] 1
summary(model_price_corr)$adj.r.squared
## [1] 0.07551446
💡 Kesimpulan : Model terbaik berdasarkan R-squared adalah model_price_all (model dengan keseluruhan prediktor)
head(house_pred)
## Area Garage FirePlace Baths White.Marble Black.Marble Indian.Marble Floors
## 1 164 2 0 2 0 1 0 0
## 2 84 2 0 4 0 0 1 1
## 3 190 2 4 4 1 0 0 0
## 4 75 2 4 4 0 0 1 1
## 5 148 1 4 2 1 0 0 1
## 6 124 3 3 3 0 1 0 1
## City Solar Electric Fiber Glass.Doors Swiming.Pool Garden Prices pred_price
## 1 3 1 1 1 1 0 0 43800 42684.41
## 2 2 0 0 0 1 1 1 37550 41414.22
## 3 2 0 0 1 0 0 0 49500 41414.22
## 4 1 1 1 1 1 1 1 50075 42684.41
## 5 2 1 0 0 1 1 1 52400 41414.22
## 6 1 0 0 1 1 1 1 54300 41414.22
## pred_price_all pred_corr
## 1 43800 44267.92
## 2 37550 43293.40
## 3 49500 43293.40
## 4 50075 39830.64
## 5 52400 40805.17
## 6 54300 38586.52
library(MLmetrics)
##
## Attaching package: 'MLmetrics'
## The following object is masked from 'package:base':
##
## Recall
MAE(y_pred = house_pred$pred_price_all, y_true = house$Prices)
## [1] 1.901552e-07
range(house$Prices)
## [1] 7725 77975
summary(model_price_all)
##
## Call:
## lm(formula = Prices ~ . - Indian.Marble, data = house)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.343e-04 0.000e+00 0.000e+00 1.000e-09 5.000e-07
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.000e+03 1.509e-09 6.628e+11 <2e-16 ***
## Area 2.500e+01 3.740e-12 6.684e+12 <2e-16 ***
## Garage 1.500e+03 3.287e-10 4.564e+12 <2e-16 ***
## FirePlace 7.500e+02 1.899e-10 3.949e+12 <2e-16 ***
## Baths 1.250e+03 1.899e-10 6.583e+12 <2e-16 ***
## White.Marble1 1.400e+04 6.574e-10 2.129e+13 <2e-16 ***
## Black.Marble1 5.000e+03 6.576e-10 7.603e+12 <2e-16 ***
## Floors1 1.500e+04 5.371e-10 2.793e+13 <2e-16 ***
## City 3.500e+03 3.290e-10 1.064e+13 <2e-16 ***
## Solar1 2.500e+02 5.371e-10 4.655e+11 <2e-16 ***
## Electric1 1.250e+03 5.371e-10 2.327e+12 <2e-16 ***
## Fiber1 1.175e+04 5.371e-10 2.188e+13 <2e-16 ***
## Glass.Doors1 4.450e+03 5.371e-10 8.286e+12 <2e-16 ***
## Swiming.Pool1 5.412e-10 5.371e-10 1.008e+00 0.314
## Garden1 5.415e-10 5.371e-10 1.008e+00 0.313
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.899e-07 on 499985 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 1.453e+26 on 14 and 499985 DF, p-value: < 2.2e-16
model_backward <- step(object = model_price_all,
direction = "backward",
trace = FALSE)
## Warning: attempting model selection on an essentially perfect fit is nonsense
## Warning: attempting model selection on an essentially perfect fit is nonsense
## Warning: attempting model selection on an essentially perfect fit is nonsense
summary(model_backward)
##
## Call:
## lm(formula = Prices ~ Area + Garage + FirePlace + Baths + White.Marble +
## Black.Marble + Floors + City + Solar + Electric + Fiber +
## Glass.Doors, data = house)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.343e-04 0.000e+00 0.000e+00 1.000e-09 5.010e-07
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.000e+03 1.461e-09 6.844e+11 <2e-16 ***
## Area 2.500e+01 3.740e-12 6.684e+12 <2e-16 ***
## Garage 1.500e+03 3.287e-10 4.564e+12 <2e-16 ***
## FirePlace 7.500e+02 1.899e-10 3.949e+12 <2e-16 ***
## Baths 1.250e+03 1.899e-10 6.583e+12 <2e-16 ***
## White.Marble1 1.400e+04 6.574e-10 2.129e+13 <2e-16 ***
## Black.Marble1 5.000e+03 6.576e-10 7.603e+12 <2e-16 ***
## Floors1 1.500e+04 5.371e-10 2.793e+13 <2e-16 ***
## City 3.500e+03 3.290e-10 1.064e+13 <2e-16 ***
## Solar1 2.500e+02 5.371e-10 4.655e+11 <2e-16 ***
## Electric1 1.250e+03 5.371e-10 2.327e+12 <2e-16 ***
## Fiber1 1.175e+04 5.371e-10 2.188e+13 <2e-16 ***
## Glass.Doors1 4.450e+03 5.371e-10 8.286e+12 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.899e-07 on 499987 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 1.695e+26 on 12 and 499987 DF, p-value: < 2.2e-16
model_price_none <- lm(Prices ~ 1, house)
model_forward <- step(object = model_price_none,
direction = "forward",
scope = list(lower = model_price_none, upper = model_price_all))
## Start: AIC=9401807
## Prices ~ 1
##
## Df Sum of Sq RSS AIC
## + Floors 1 2.8138e+13 4.5191e+13 9159783
## + Fiber 1 1.7222e+13 5.6107e+13 9267960
## + White.Marble 1 1.4728e+13 5.8601e+13 9289711
## + City 1 3.9898e+12 6.9339e+13 9373836
## + Glass.Doors 1 2.4282e+12 7.0901e+13 9384972
## + Area 1 1.6001e+12 7.1729e+13 9390779
## + Baths 1 1.5436e+12 7.1785e+13 9391172
## + Garage 1 7.3760e+11 7.2591e+13 9396755
## + FirePlace 1 5.8265e+11 7.2746e+13 9397821
## + Black.Marble 1 4.4670e+11 7.2882e+13 9398754
## + Electric 1 2.0167e+11 7.3127e+13 9400432
## + Solar 1 5.2097e+09 7.3324e+13 9401774
## <none> 7.3329e+13 9401807
## + Swiming.Pool 1 2.3404e+08 7.3329e+13 9401808
## + Garden 1 1.7392e+08 7.3329e+13 9401808
##
## Step: AIC=9159783
## Prices ~ Floors
##
## Df Sum of Sq RSS AIC
## + Fiber 1 1.7162e+13 2.8029e+13 8920961
## + White.Marble 1 1.4724e+13 3.0467e+13 8962653
## + City 1 4.0034e+12 4.1188e+13 9113405
## + Glass.Doors 1 2.4284e+12 4.2763e+13 9132168
## + Area 1 1.6105e+12 4.3581e+13 9141641
## + Baths 1 1.5552e+12 4.3636e+13 9142275
## + Garage 1 7.4611e+11 4.4445e+13 9151461
## + FirePlace 1 5.8115e+11 4.4610e+13 9153313
## + Black.Marble 1 4.4409e+11 4.4747e+13 9154847
## + Electric 1 2.0128e+11 4.4990e+13 9157553
## + Solar 1 7.4376e+09 4.5184e+13 9159703
## + Swiming.Pool 1 2.6953e+08 4.5191e+13 9159782
## + Garden 1 2.4956e+08 4.5191e+13 9159782
## <none> 4.5191e+13 9159783
##
## Step: AIC=8920961
## Prices ~ Floors + Fiber
##
## Df Sum of Sq RSS AIC
## + White.Marble 1 1.4743e+13 1.3287e+13 8547717
## + City 1 4.0486e+12 2.3981e+13 8842963
## + Glass.Doors 1 2.4577e+12 2.5572e+13 8875078
## + Area 1 1.6093e+12 2.6420e+13 8891398
## + Baths 1 1.5623e+12 2.6467e+13 8892287
## + Garage 1 7.5013e+11 2.7279e+13 8907399
## + FirePlace 1 5.6973e+11 2.7460e+13 8910695
## + Black.Marble 1 4.4450e+11 2.7585e+13 8912970
## + Electric 1 2.0243e+11 2.7827e+13 8917338
## + Solar 1 7.2657e+09 2.8022e+13 8920833
## + Garden 1 2.5250e+08 2.8029e+13 8920958
## <none> 2.8029e+13 8920961
## + Swiming.Pool 1 4.6438e+05 2.8029e+13 8920963
##
## Step: AIC=8547717
## Prices ~ Floors + Fiber + White.Marble
##
## Df Sum of Sq RSS AIC
## + City 1 4.0619e+12 9.2248e+12 8365285
## + Glass.Doors 1 2.4626e+12 1.0824e+13 8445224
## + Black.Marble 1 2.0764e+12 1.1210e+13 8462752
## + Area 1 1.5848e+12 1.1702e+13 8484214
## + Baths 1 1.5385e+12 1.1748e+13 8486188
## + Garage 1 7.4653e+11 1.2540e+13 8518806
## + FirePlace 1 5.6422e+11 1.2722e+13 8526023
## + Electric 1 2.0051e+11 1.3086e+13 8540116
## + Solar 1 8.3389e+09 1.3278e+13 8547405
## + Garden 1 1.4907e+08 1.3287e+13 8547714
## <none> 1.3287e+13 8547717
## + Swiming.Pool 1 4.3507e+07 1.3287e+13 8547717
##
## Step: AIC=8365285
## Prices ~ Floors + Fiber + White.Marble + City
##
## Df Sum of Sq RSS AIC
## + Glass.Doors 1 2.4578e+12 6.7670e+12 8210369
## + Black.Marble 1 2.0815e+12 7.1433e+12 8237425
## + Area 1 1.6024e+12 7.6224e+12 8269886
## + Baths 1 1.5428e+12 7.6820e+12 8273780
## + Garage 1 7.4378e+11 8.4810e+12 8323254
## + FirePlace 1 5.6484e+11 8.6599e+12 8333694
## + Electric 1 1.9909e+11 9.0257e+12 8354377
## + Solar 1 8.1610e+09 9.2166e+12 8364844
## + Garden 1 9.5580e+07 9.2247e+12 8365282
## <none> 9.2248e+12 8365285
## + Swiming.Pool 1 3.5130e+07 9.2247e+12 8365285
##
## Step: AIC=8210369
## Prices ~ Floors + Fiber + White.Marble + City + Glass.Doors
##
## Df Sum of Sq RSS AIC
## + Black.Marble 1 2.0821e+12 4.6849e+12 8026513
## + Area 1 1.6072e+12 5.1597e+12 8074786
## + Baths 1 1.5493e+12 5.2177e+12 8080371
## + Garage 1 7.4967e+11 6.0173e+12 8151663
## + FirePlace 1 5.6569e+11 6.2013e+12 8166722
## + Electric 1 1.9757e+11 6.5694e+12 8195555
## + Solar 1 8.3941e+09 6.7586e+12 8209750
## + Swiming.Pool 1 2.8014e+07 6.7670e+12 8210368
## <none> 6.7670e+12 8210369
## + Garden 1 2.0781e+07 6.7670e+12 8210369
##
## Step: AIC=8026513
## Prices ~ Floors + Fiber + White.Marble + City + Glass.Doors +
## Black.Marble
##
## Df Sum of Sq RSS AIC
## + Area 1 1.6082e+12 3.0767e+12 7816274
## + Baths 1 1.5555e+12 3.1294e+12 7824762
## + Garage 1 7.4359e+11 3.9413e+12 7940099
## + FirePlace 1 5.6681e+11 4.1181e+12 7962037
## + Electric 1 1.9797e+11 4.4869e+12 8004927
## + Solar 1 8.0999e+09 4.6768e+12 8025650
## + Swiming.Pool 1 3.2267e+07 4.6848e+12 8026512
## <none> 4.6849e+12 8026513
## + Garden 1 1.2522e+07 4.6849e+12 8026514
##
## Step: AIC=7816274
## Prices ~ Floors + Fiber + White.Marble + City + Glass.Doors +
## Black.Marble + Area
##
## Df Sum of Sq RSS AIC
## + Baths 1 1.5568e+12 1.5199e+12 7463671
## + Garage 1 7.4555e+11 2.3312e+12 7677528
## + FirePlace 1 5.6610e+11 2.5106e+12 7714609
## + Electric 1 1.9811e+11 2.8786e+12 7782997
## + Solar 1 7.9796e+09 3.0687e+12 7814978
## + Swiming.Pool 1 2.4010e+07 3.0767e+12 7816272
## <none> 3.0767e+12 7816274
## + Garden 1 2.9624e+06 3.0767e+12 7816276
##
## Step: AIC=7463671
## Prices ~ Floors + Fiber + White.Marble + City + Glass.Doors +
## Black.Marble + Area + Baths
##
## Df Sum of Sq RSS AIC
## + Garage 1 7.5345e+11 7.6646e+11 7121363
## + FirePlace 1 5.6490e+11 9.5501e+11 7231331
## + Electric 1 1.9695e+11 1.3230e+12 7394282
## + Solar 1 8.1480e+09 1.5118e+12 7460985
## <none> 1.5199e+12 7463671
## + Swiming.Pool 1 4.5384e+06 1.5199e+12 7463671
## + Garden 1 1.8950e+05 1.5199e+12 7463673
##
## Step: AIC=7121363
## Prices ~ Floors + Fiber + White.Marble + City + Glass.Doors +
## Black.Marble + Area + Baths + Garage
##
## Df Sum of Sq RSS AIC
## + FirePlace 1 5.6319e+11 2.0327e+11 6457742
## + Electric 1 1.9635e+11 5.7011e+11 6973389
## + Solar 1 7.9190e+09 7.5854e+11 7116172
## <none> 7.6646e+11 7121363
## + Swiming.Pool 1 1.3797e+06 7.6646e+11 7121364
## + Garden 1 1.8324e+04 7.6646e+11 7121365
##
## Step: AIC=6457742
## Prices ~ Floors + Fiber + White.Marble + City + Glass.Doors +
## Black.Marble + Area + Baths + Garage + FirePlace
##
## Df Sum of Sq RSS AIC
## + Electric 1 1.9546e+11 7.8123e+09 4828325
## + Solar 1 7.9603e+09 1.9531e+11 6437770
## <none> 2.0327e+11 6457742
## + Swiming.Pool 1 4.3820e+04 2.0327e+11 6457744
## + Garden 1 1.4650e+03 2.0327e+11 6457744
##
## Step: AIC=4828325
## Prices ~ Floors + Fiber + White.Marble + City + Glass.Doors +
## Black.Marble + Area + Baths + Garage + FirePlace + Electric
##
## Df Sum of Sq RSS AIC
## + Solar 1 7812302801 0 -15466485
## + Garden 1 141896 7812160905 4828318
## <none> 7812302801 4828325
## + Swiming.Pool 1 1731 7812301069 4828327
##
## Step: AIC=-15466485
## Prices ~ Floors + Fiber + White.Marble + City + Glass.Doors +
## Black.Marble + Area + Baths + Garage + FirePlace + Electric +
## Solar
## Warning: attempting model selection on an essentially perfect fit is nonsense
## Df Sum of Sq RSS AIC
## <none> 1.8405e-08 -15466485
## + Garden 1 5.2638e-14 1.8405e-08 -15466484
## + Swiming.Pool 1 5.1670e-14 1.8405e-08 -15466484
summary(model_forward)
##
## Call:
## lm(formula = Prices ~ Floors + Fiber + White.Marble + City +
## Glass.Doors + Black.Marble + Area + Baths + Garage + FirePlace +
## Electric + Solar, data = house)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.343e-04 0.000e+00 0.000e+00 1.000e-09 1.633e-05
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.000e+03 1.476e-09 6.773e+11 <2e-16 ***
## Floors1 1.500e+04 5.427e-10 2.764e+13 <2e-16 ***
## Fiber1 1.175e+04 5.427e-10 2.165e+13 <2e-16 ***
## White.Marble1 1.400e+04 6.643e-10 2.107e+13 <2e-16 ***
## City 3.500e+03 3.324e-10 1.053e+13 <2e-16 ***
## Glass.Doors1 4.450e+03 5.427e-10 8.200e+12 <2e-16 ***
## Black.Marble1 5.000e+03 6.645e-10 7.525e+12 <2e-16 ***
## Area 2.500e+01 3.779e-12 6.615e+12 <2e-16 ***
## Baths 1.250e+03 1.919e-10 6.515e+12 <2e-16 ***
## Garage 1.500e+03 3.321e-10 4.517e+12 <2e-16 ***
## FirePlace 7.500e+02 1.919e-10 3.909e+12 <2e-16 ***
## Electric1 1.250e+03 5.427e-10 2.303e+12 <2e-16 ***
## Solar1 2.500e+02 5.427e-10 4.607e+11 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.919e-07 on 499987 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 1.66e+26 on 12 and 499987 DF, p-value: < 2.2e-16
model_both <- step(object = model_price_none,
direction = "both",
scope = list(upper = model_price_all))
## Start: AIC=9401807
## Prices ~ 1
##
## Df Sum of Sq RSS AIC
## + Floors 1 2.8138e+13 4.5191e+13 9159783
## + Fiber 1 1.7222e+13 5.6107e+13 9267960
## + White.Marble 1 1.4728e+13 5.8601e+13 9289711
## + City 1 3.9898e+12 6.9339e+13 9373836
## + Glass.Doors 1 2.4282e+12 7.0901e+13 9384972
## + Area 1 1.6001e+12 7.1729e+13 9390779
## + Baths 1 1.5436e+12 7.1785e+13 9391172
## + Garage 1 7.3760e+11 7.2591e+13 9396755
## + FirePlace 1 5.8265e+11 7.2746e+13 9397821
## + Black.Marble 1 4.4670e+11 7.2882e+13 9398754
## + Electric 1 2.0167e+11 7.3127e+13 9400432
## + Solar 1 5.2097e+09 7.3324e+13 9401774
## <none> 7.3329e+13 9401807
## + Swiming.Pool 1 2.3404e+08 7.3329e+13 9401808
## + Garden 1 1.7392e+08 7.3329e+13 9401808
##
## Step: AIC=9159783
## Prices ~ Floors
##
## Df Sum of Sq RSS AIC
## + Fiber 1 1.7162e+13 2.8029e+13 8920961
## + White.Marble 1 1.4724e+13 3.0467e+13 8962653
## + City 1 4.0034e+12 4.1188e+13 9113405
## + Glass.Doors 1 2.4284e+12 4.2763e+13 9132168
## + Area 1 1.6105e+12 4.3581e+13 9141641
## + Baths 1 1.5552e+12 4.3636e+13 9142275
## + Garage 1 7.4611e+11 4.4445e+13 9151461
## + FirePlace 1 5.8115e+11 4.4610e+13 9153313
## + Black.Marble 1 4.4409e+11 4.4747e+13 9154847
## + Electric 1 2.0128e+11 4.4990e+13 9157553
## + Solar 1 7.4376e+09 4.5184e+13 9159703
## + Swiming.Pool 1 2.6953e+08 4.5191e+13 9159782
## + Garden 1 2.4956e+08 4.5191e+13 9159782
## <none> 4.5191e+13 9159783
## - Floors 1 2.8138e+13 7.3329e+13 9401807
##
## Step: AIC=8920961
## Prices ~ Floors + Fiber
##
## Df Sum of Sq RSS AIC
## + White.Marble 1 1.4743e+13 1.3287e+13 8547717
## + City 1 4.0486e+12 2.3981e+13 8842963
## + Glass.Doors 1 2.4577e+12 2.5572e+13 8875078
## + Area 1 1.6093e+12 2.6420e+13 8891398
## + Baths 1 1.5623e+12 2.6467e+13 8892287
## + Garage 1 7.5013e+11 2.7279e+13 8907399
## + FirePlace 1 5.6973e+11 2.7460e+13 8910695
## + Black.Marble 1 4.4450e+11 2.7585e+13 8912970
## + Electric 1 2.0243e+11 2.7827e+13 8917338
## + Solar 1 7.2657e+09 2.8022e+13 8920833
## + Garden 1 2.5250e+08 2.8029e+13 8920958
## <none> 2.8029e+13 8920961
## + Swiming.Pool 1 4.6438e+05 2.8029e+13 8920963
## - Fiber 1 1.7162e+13 4.5191e+13 9159783
## - Floors 1 2.8077e+13 5.6107e+13 9267960
##
## Step: AIC=8547717
## Prices ~ Floors + Fiber + White.Marble
##
## Df Sum of Sq RSS AIC
## + City 1 4.0619e+12 9.2248e+12 8365285
## + Glass.Doors 1 2.4626e+12 1.0824e+13 8445224
## + Black.Marble 1 2.0764e+12 1.1210e+13 8462752
## + Area 1 1.5848e+12 1.1702e+13 8484214
## + Baths 1 1.5385e+12 1.1748e+13 8486188
## + Garage 1 7.4653e+11 1.2540e+13 8518806
## + FirePlace 1 5.6422e+11 1.2722e+13 8526023
## + Electric 1 2.0051e+11 1.3086e+13 8540116
## + Solar 1 8.3389e+09 1.3278e+13 8547405
## + Garden 1 1.4907e+08 1.3287e+13 8547714
## <none> 1.3287e+13 8547717
## + Swiming.Pool 1 4.3507e+07 1.3287e+13 8547717
## - White.Marble 1 1.4743e+13 2.8029e+13 8920961
## - Fiber 1 1.7180e+13 3.0467e+13 8962653
## - Floors 1 2.8074e+13 4.1361e+13 9115501
##
## Step: AIC=8365285
## Prices ~ Floors + Fiber + White.Marble + City
##
## Df Sum of Sq RSS AIC
## + Glass.Doors 1 2.4578e+12 6.7670e+12 8210369
## + Black.Marble 1 2.0815e+12 7.1433e+12 8237425
## + Area 1 1.6024e+12 7.6224e+12 8269886
## + Baths 1 1.5428e+12 7.6820e+12 8273780
## + Garage 1 7.4378e+11 8.4810e+12 8323254
## + FirePlace 1 5.6484e+11 8.6599e+12 8333694
## + Electric 1 1.9909e+11 9.0257e+12 8354377
## + Solar 1 8.1610e+09 9.2166e+12 8364844
## + Garden 1 9.5580e+07 9.2247e+12 8365282
## <none> 9.2248e+12 8365285
## + Swiming.Pool 1 3.5130e+07 9.2247e+12 8365285
## - City 1 4.0619e+12 1.3287e+13 8547717
## - White.Marble 1 1.4756e+13 2.3981e+13 8842963
## - Fiber 1 1.7225e+13 2.6450e+13 8891968
## - Floors 1 2.8088e+13 3.7312e+13 9064000
##
## Step: AIC=8210369
## Prices ~ Floors + Fiber + White.Marble + City + Glass.Doors
##
## Df Sum of Sq RSS AIC
## + Black.Marble 1 2.0821e+12 4.6849e+12 8026513
## + Area 1 1.6072e+12 5.1597e+12 8074786
## + Baths 1 1.5493e+12 5.2177e+12 8080371
## + Garage 1 7.4967e+11 6.0173e+12 8151663
## + FirePlace 1 5.6569e+11 6.2013e+12 8166722
## + Electric 1 1.9757e+11 6.5694e+12 8195555
## + Solar 1 8.3941e+09 6.7586e+12 8209750
## + Swiming.Pool 1 2.8014e+07 6.7670e+12 8210368
## <none> 6.7670e+12 8210369
## + Garden 1 2.0781e+07 6.7670e+12 8210369
## - Glass.Doors 1 2.4578e+12 9.2248e+12 8365285
## - City 1 4.0571e+12 1.0824e+13 8445224
## - White.Marble 1 1.4761e+13 2.1528e+13 8789012
## - Fiber 1 1.7255e+13 2.4022e+13 8843819
## - Floors 1 2.8088e+13 3.4855e+13 9029933
##
## Step: AIC=8026513
## Prices ~ Floors + Fiber + White.Marble + City + Glass.Doors +
## Black.Marble
##
## Df Sum of Sq RSS AIC
## + Area 1 1.6082e+12 3.0767e+12 7816274
## + Baths 1 1.5555e+12 3.1294e+12 7824762
## + Garage 1 7.4359e+11 3.9413e+12 7940099
## + FirePlace 1 5.6681e+11 4.1181e+12 7962037
## + Electric 1 1.9797e+11 4.4869e+12 8004927
## + Solar 1 8.0999e+09 4.6768e+12 8025650
## + Swiming.Pool 1 3.2267e+07 4.6848e+12 8026512
## <none> 4.6849e+12 8026513
## + Garden 1 1.2522e+07 4.6849e+12 8026514
## - Black.Marble 1 2.0821e+12 6.7670e+12 8210369
## - Glass.Doors 1 2.4584e+12 7.1433e+12 8237425
## - City 1 4.0621e+12 8.7470e+12 8338697
## - White.Marble 1 1.6399e+13 2.1084e+13 8778599
## - Fiber 1 1.7258e+13 2.1943e+13 8798557
## - Floors 1 2.8094e+13 3.2778e+13 8999226
##
## Step: AIC=7816274
## Prices ~ Floors + Fiber + White.Marble + City + Glass.Doors +
## Black.Marble + Area
##
## Df Sum of Sq RSS AIC
## + Baths 1 1.5568e+12 1.5199e+12 7463671
## + Garage 1 7.4555e+11 2.3312e+12 7677528
## + FirePlace 1 5.6610e+11 2.5106e+12 7714609
## + Electric 1 1.9811e+11 2.8786e+12 7782997
## + Solar 1 7.9796e+09 3.0687e+12 7814978
## + Swiming.Pool 1 2.4010e+07 3.0767e+12 7816272
## <none> 3.0767e+12 7816274
## + Garden 1 2.9624e+06 3.0767e+12 7816276
## - Area 1 1.6082e+12 4.6849e+12 8026513
## - Black.Marble 1 2.0830e+12 5.1597e+12 8074786
## - Glass.Doors 1 2.4633e+12 5.5400e+12 8110336
## - City 1 4.0797e+12 7.1565e+12 8238350
## - White.Marble 1 1.6378e+13 1.9455e+13 8738386
## - Fiber 1 1.7257e+13 2.0333e+13 8760475
## - Floors 1 2.8104e+13 3.1181e+13 8974243
##
## Step: AIC=7463671
## Prices ~ Floors + Fiber + White.Marble + City + Glass.Doors +
## Black.Marble + Area + Baths
##
## Df Sum of Sq RSS AIC
## + Garage 1 7.5345e+11 7.6646e+11 7121363
## + FirePlace 1 5.6490e+11 9.5501e+11 7231331
## + Electric 1 1.9695e+11 1.3230e+12 7394282
## + Solar 1 8.1480e+09 1.5118e+12 7460985
## <none> 1.5199e+12 7463671
## + Swiming.Pool 1 4.5384e+06 1.5199e+12 7463671
## + Garden 1 1.8950e+05 1.5199e+12 7463673
## - Baths 1 1.5568e+12 3.0767e+12 7816274
## - Area 1 1.6095e+12 3.1294e+12 7824762
## - Black.Marble 1 2.0893e+12 3.6092e+12 7896082
## - Glass.Doors 1 2.4698e+12 3.9897e+12 7946202
## - City 1 4.0841e+12 5.6040e+12 8116084
## - White.Marble 1 1.6365e+13 1.7885e+13 8696318
## - Fiber 1 1.7264e+13 1.8784e+13 8720841
## - Floors 1 2.8116e+13 2.9636e+13 8948832
##
## Step: AIC=7121363
## Prices ~ Floors + Fiber + White.Marble + City + Glass.Doors +
## Black.Marble + Area + Baths + Garage
##
## Df Sum of Sq RSS AIC
## + FirePlace 1 5.6319e+11 2.0327e+11 6457742
## + Electric 1 1.9635e+11 5.7011e+11 6973389
## + Solar 1 7.9190e+09 7.5854e+11 7116172
## <none> 7.6646e+11 7121363
## + Swiming.Pool 1 1.3797e+06 7.6646e+11 7121364
## + Garden 1 1.8324e+04 7.6646e+11 7121365
## - Garage 1 7.5345e+11 1.5199e+12 7463671
## - Baths 1 1.5647e+12 2.3312e+12 7677528
## - Area 1 1.6115e+12 2.3779e+12 7687459
## - Black.Marble 1 2.0831e+12 2.8496e+12 7777935
## - Glass.Doors 1 2.4757e+12 3.2422e+12 7842470
## - City 1 4.0813e+12 4.8478e+12 8043606
## - White.Marble 1 1.6353e+13 1.7119e+13 8674451
## - Fiber 1 1.7268e+13 1.8034e+13 8700487
## - Floors 1 2.8124e+13 2.8891e+13 8936106
##
## Step: AIC=6457742
## Prices ~ Floors + Fiber + White.Marble + City + Glass.Doors +
## Black.Marble + Area + Baths + Garage + FirePlace
##
## Df Sum of Sq RSS AIC
## + Electric 1 1.9546e+11 7.8123e+09 4828325
## + Solar 1 7.9603e+09 1.9531e+11 6437770
## <none> 2.0327e+11 6457742
## + Swiming.Pool 1 4.3820e+04 2.0327e+11 6457744
## + Garden 1 1.4650e+03 2.0327e+11 6457744
## - FirePlace 1 5.6319e+11 7.6646e+11 7121363
## - Garage 1 7.5174e+11 9.5501e+11 7231331
## - Baths 1 1.5635e+12 1.7667e+12 7538919
## - Area 1 1.6107e+12 1.8140e+12 7552121
## - Black.Marble 1 2.0843e+12 2.2875e+12 7668084
## - Glass.Doors 1 2.4766e+12 2.6798e+12 7747227
## - City 1 4.0819e+12 4.2852e+12 7981932
## - White.Marble 1 1.6349e+13 1.6553e+13 8657624
## - Fiber 1 1.7257e+13 1.7460e+13 8684299
## - Floors 1 2.8123e+13 2.8326e+13 8926239
##
## Step: AIC=4828325
## Prices ~ Floors + Fiber + White.Marble + City + Glass.Doors +
## Black.Marble + Area + Baths + Garage + FirePlace + Electric
##
## Df Sum of Sq RSS AIC
## + Solar 1 7.8123e+09 0.0000e+00 -15466485
## + Garden 1 1.4190e+05 7.8122e+09 4828318
## <none> 7.8123e+09 4828325
## + Swiming.Pool 1 1.7310e+03 7.8123e+09 4828327
## - Electric 1 1.9546e+11 2.0327e+11 6457742
## - FirePlace 1 5.6230e+11 5.7011e+11 6973389
## - Garage 1 7.5114e+11 7.5895e+11 7116441
## - Baths 1 1.5623e+12 1.5701e+12 7479930
## - Area 1 1.6109e+12 1.6187e+12 7495162
## - Black.Marble 1 2.0847e+12 2.0925e+12 7623522
## - Glass.Doors 1 2.4750e+12 2.4829e+12 7709056
## - City 1 4.0805e+12 4.0883e+12 7958420
## - White.Marble 1 1.6348e+13 1.6356e+13 8651651
## - Fiber 1 1.7258e+13 1.7265e+13 8678705
## - Floors 1 2.8122e+13 2.8130e+13 8922772
##
## Step: AIC=-15466485
## Prices ~ Floors + Fiber + White.Marble + City + Glass.Doors +
## Black.Marble + Area + Baths + Garage + FirePlace + Electric +
## Solar
## Warning: attempting model selection on an essentially perfect fit is nonsense
## Warning: attempting model selection on an essentially perfect fit is nonsense
## Df Sum of Sq RSS AIC
## <none> 0.0000e+00 -15466485
## + Garden 1 0.0000e+00 0.0000e+00 -15466484
## + Swiming.Pool 1 0.0000e+00 0.0000e+00 -15466484
## - Solar 1 7.8123e+09 7.8123e+09 4828325
## - Electric 1 1.9531e+11 1.9531e+11 6437770
## - FirePlace 1 5.6234e+11 5.6234e+11 6966529
## - Garage 1 7.5091e+11 7.5091e+11 7111118
## - Baths 1 1.5625e+12 1.5625e+12 7477490
## - Area 1 1.6108e+12 1.6108e+12 7492708
## - Black.Marble 1 2.0844e+12 2.0844e+12 7621585
## - Glass.Doors 1 2.4753e+12 2.4753e+12 7707528
## - City 1 4.0803e+12 4.0803e+12 7957444
## - White.Marble 1 1.6349e+13 1.6349e+13 8651431
## - Fiber 1 1.7257e+13 1.7257e+13 8678475
## - Floors 1 2.8125e+13 2.8125e+13 8922676
summary(model_both)
##
## Call:
## lm(formula = Prices ~ Floors + Fiber + White.Marble + City +
## Glass.Doors + Black.Marble + Area + Baths + Garage + FirePlace +
## Electric + Solar, data = house)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.343e-04 0.000e+00 0.000e+00 1.000e-09 1.633e-05
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.000e+03 1.476e-09 6.773e+11 <2e-16 ***
## Floors1 1.500e+04 5.427e-10 2.764e+13 <2e-16 ***
## Fiber1 1.175e+04 5.427e-10 2.165e+13 <2e-16 ***
## White.Marble1 1.400e+04 6.643e-10 2.107e+13 <2e-16 ***
## City 3.500e+03 3.324e-10 1.053e+13 <2e-16 ***
## Glass.Doors1 4.450e+03 5.427e-10 8.200e+12 <2e-16 ***
## Black.Marble1 5.000e+03 6.645e-10 7.525e+12 <2e-16 ***
## Area 2.500e+01 3.779e-12 6.615e+12 <2e-16 ***
## Baths 1.250e+03 1.919e-10 6.515e+12 <2e-16 ***
## Garage 1.500e+03 3.321e-10 4.517e+12 <2e-16 ***
## FirePlace 7.500e+02 1.919e-10 3.909e+12 <2e-16 ***
## Electric1 1.250e+03 5.427e-10 2.303e+12 <2e-16 ***
## Solar1 2.500e+02 5.427e-10 4.607e+11 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.919e-07 on 499987 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 1.66e+26 on 12 and 499987 DF, p-value: < 2.2e-16
library(performance)
comparison <- compare_performance(model_price_none, model_price_all, model_backward, model_forward, model_both)
as.data.frame(comparison)
## Name Model AIC AIC_wt AICc AICc_wt BIC
## 1 model_price_none lm 10820748 0.0000000 10820748 0.0000000 10820770
## 2 model_price_all lm -14057930 0.2720678 -14057930 0.2720432 -14057752
## 3 model_backward lm -14057932 0.7279322 -14057932 0.7279568 -14057776
## 4 model_forward lm -14047545 0.0000000 -14047545 0.0000000 -14047389
## 5 model_both lm -14047545 0.0000000 -14047545 0.0000000 -14047389
## BIC_wt R2 R2_adjusted RMSE Sigma
## 1 0.000000e+00 0 0 1.211023e+04 1.211024e+04
## 2 5.523351e-06 1 1 1.898749e-07 1.898777e-07
## 3 9.999945e-01 1 1 1.898752e-07 1.898777e-07
## 4 0.000000e+00 1 1 1.918579e-07 1.918604e-07
## 5 0.000000e+00 1 1 1.918579e-07 1.918604e-07
Kesimpulan: Model terbaik berdasarkan adjusted r-squared terbesar,
AIC terkecil dan RMSE terkecil adalah : - adjusted r-squared terbesar
-> model_backward - AIC terkecil ->
model_forward - RMSE terkecil ->
model_price_all
pred_model_step <- predict(object = model_backward,
newdata = house)
head(pred_model_step)
## 1 2 3 4 5 6
## 43800 37550 49500 50075 52400 54300
head(house$Prices)
## [1] 43800 37550 49500 50075 52400 54300
Asumsi model linear regression:
plot(model_forward, which = 1)
abline(h = 10, col = "green")
abline(h = 10, col = "green")
library(nortest)
ad.test(model_backward$residuals)
##
## Anderson-Darling normality test
##
## data: model_backward$residuals
## A = 191000, p-value < 2.2e-16
Kesimpulan: Karena p-value > 0.05, sehingga kita gagal tolak HO atau asumsi normality terpenuhi.
hist(model_backward$residuals)
plot(x = model_forward$fitted.values, y = model_forward$residuals)
abline(h = 0, col = "red")
**Uji statistik dengan bptest() dari package
lmtest
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
bptest(model_forward)
##
## studentized Breusch-Pagan test
##
## data: model_forward
## BP = 11.188, df = 12, p-value = 0.5129
Kesimpulan: karena nilai p-value dari BPtest > 0.05, sehingga gagal tolak H0 (asumsi homoscedasticity terpenuhi).
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
vif(model_forward)
## Floors Fiber White.Marble City Glass.Doors Black.Marble
## 1.000012 1.000019 1.331386 1.000024 1.000017 1.331386
## Area Baths Garage FirePlace Electric Solar
## 1.000022 1.000030 1.000031 1.000009 1.000009 1.000018
Kesimpulan: Dari uji VIF, prediktor di model_backward lolos uji asumsi multicolinearity (tidak ada nilai VIF > 10)
vif(model_price_all)
## Area Garage FirePlace Baths White.Marble Black.Marble
## 1.000024 1.000033 1.000011 1.000037 1.331392 1.331387
## Floors City Solar Electric Fiber Glass.Doors
## 1.000012 1.000026 1.000037 1.000010 1.000036 1.000028
## Swiming.Pool Garden
## 1.000030 1.000039
Kesimpulan: Dari uji VIF, prediktor di model_backward lolos uji asumsi multicolinearity (tidak ada nilai VIF > 10)