1. Read data

house <- read.csv("data input/HousePrices.csv")
head(house)
##   Area Garage FirePlace Baths White.Marble Black.Marble Indian.Marble Floors
## 1  164      2         0     2            0            1             0      0
## 2   84      2         0     4            0            0             1      1
## 3  190      2         4     4            1            0             0      0
## 4   75      2         4     4            0            0             1      1
## 5  148      1         4     2            1            0             0      1
## 6  124      3         3     3            0            1             0      1
##   City Solar Electric Fiber Glass.Doors Swiming.Pool Garden Prices
## 1    3     1        1     1           1            0      0  43800
## 2    2     0        0     0           1            1      1  37550
## 3    2     0        0     1           0            0      0  49500
## 4    1     1        1     1           1            1      1  50075
## 5    2     1        0     0           1            1      1  52400
## 6    1     0        0     1           1            1      1  54300

2. Data Cleansing

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
glimpse(house)
## Rows: 500,000
## Columns: 16
## $ Area          <int> 164, 84, 190, 75, 148, 124, 58, 249, 243, 242, 61, 189, …
## $ Garage        <int> 2, 2, 2, 2, 1, 3, 1, 2, 1, 1, 2, 2, 2, 3, 3, 3, 1, 3, 2,…
## $ FirePlace     <int> 0, 0, 4, 4, 4, 3, 0, 1, 0, 2, 4, 0, 0, 3, 3, 4, 0, 3, 3,…
## $ Baths         <int> 2, 4, 4, 4, 2, 3, 2, 1, 2, 4, 5, 4, 2, 3, 1, 1, 5, 3, 5,…
## $ White.Marble  <int> 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,…
## $ Black.Marble  <int> 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1,…
## $ Indian.Marble <int> 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0,…
## $ Floors        <int> 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1,…
## $ City          <int> 3, 2, 2, 1, 2, 1, 3, 1, 1, 2, 1, 2, 1, 3, 3, 1, 3, 1, 3,…
## $ Solar         <int> 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0,…
## $ Electric      <int> 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1,…
## $ Fiber         <int> 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0,…
## $ Glass.Doors   <int> 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1,…
## $ Swiming.Pool  <int> 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0,…
## $ Garden        <int> 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0,…
## $ Prices        <int> 43800, 37550, 49500, 50075, 52400, 54300, 34400, 50425, …
unique(house$White.Marble)
## [1] 0 1
unique(house$Black.Marble)
## [1] 1 0
unique(house$Indian.Marble)
## [1] 0 1
unique(house$Floors)
## [1] 0 1
unique(house$Solar)
## [1] 1 0
unique(house$Electric)
## [1] 1 0
unique(house$Garden)
## [1] 0 1
library(dplyr)
house <- house %>% 
  mutate (White.Marble = as.factor(White.Marble),
          Black.Marble = as.factor(Black.Marble),
          Indian.Marble = as.factor(Indian.Marble),
          Floors = as.factor(Floors),
          Solar = as.factor(Solar),
          Electric = as.factor(Electric),
          Fiber = as.factor(Fiber),
          Glass.Doors = as.factor(Glass.Doors),
          Swiming.Pool = as.factor(Swiming.Pool),
          Garden = as.factor(Garden))

glimpse(house)
## Rows: 500,000
## Columns: 16
## $ Area          <int> 164, 84, 190, 75, 148, 124, 58, 249, 243, 242, 61, 189, …
## $ Garage        <int> 2, 2, 2, 2, 1, 3, 1, 2, 1, 1, 2, 2, 2, 3, 3, 3, 1, 3, 2,…
## $ FirePlace     <int> 0, 0, 4, 4, 4, 3, 0, 1, 0, 2, 4, 0, 0, 3, 3, 4, 0, 3, 3,…
## $ Baths         <int> 2, 4, 4, 4, 2, 3, 2, 1, 2, 4, 5, 4, 2, 3, 1, 1, 5, 3, 5,…
## $ White.Marble  <fct> 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,…
## $ Black.Marble  <fct> 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1,…
## $ Indian.Marble <fct> 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0,…
## $ Floors        <fct> 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1,…
## $ City          <int> 3, 2, 2, 1, 2, 1, 3, 1, 1, 2, 1, 2, 1, 3, 3, 1, 3, 1, 3,…
## $ Solar         <fct> 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0,…
## $ Electric      <fct> 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1,…
## $ Fiber         <fct> 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0,…
## $ Glass.Doors   <fct> 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1,…
## $ Swiming.Pool  <fct> 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0,…
## $ Garden        <fct> 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0,…
## $ Prices        <int> 43800, 37550, 49500, 50075, 52400, 54300, 34400, 50425, …

a. Model 1 prediktor

  • Variabel target: Prices
  • Variabel prediktor: Electric
model_price <- lm(formula = Prices ~ Electric, data = house)
summary(model_price)
## 
## Call:
## lm(formula = Prices ~ Electric, data = house)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -34134  -8539   -209   8691  35361 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 41414.22      24.20 1711.13   <2e-16 ***
## Electric1    1270.18      34.21   37.13   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12090 on 499998 degrees of freedom
## Multiple R-squared:  0.00275,    Adjusted R-squared:  0.002748 
## F-statistic:  1379 on 1 and 499998 DF,  p-value: < 2.2e-16

b. Model 2 Keseluruhann prediktor

Model Keseluruhan Prediktor

model_price_all <- lm(formula = Prices ~ .-Indian.Marble,  data = house)
summary(model_price_all)
## 
## Call:
## lm(formula = Prices ~ . - Indian.Marble, data = house)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -1.343e-04  0.000e+00  0.000e+00  1.000e-09  5.000e-07 
## 
## Coefficients:
##                Estimate Std. Error   t value Pr(>|t|)    
## (Intercept)   1.000e+03  1.509e-09 6.628e+11   <2e-16 ***
## Area          2.500e+01  3.740e-12 6.684e+12   <2e-16 ***
## Garage        1.500e+03  3.287e-10 4.564e+12   <2e-16 ***
## FirePlace     7.500e+02  1.899e-10 3.949e+12   <2e-16 ***
## Baths         1.250e+03  1.899e-10 6.583e+12   <2e-16 ***
## White.Marble1 1.400e+04  6.574e-10 2.129e+13   <2e-16 ***
## Black.Marble1 5.000e+03  6.576e-10 7.603e+12   <2e-16 ***
## Floors1       1.500e+04  5.371e-10 2.793e+13   <2e-16 ***
## City          3.500e+03  3.290e-10 1.064e+13   <2e-16 ***
## Solar1        2.500e+02  5.371e-10 4.655e+11   <2e-16 ***
## Electric1     1.250e+03  5.371e-10 2.327e+12   <2e-16 ***
## Fiber1        1.175e+04  5.371e-10 2.188e+13   <2e-16 ***
## Glass.Doors1  4.450e+03  5.371e-10 8.286e+12   <2e-16 ***
## Swiming.Pool1 5.412e-10  5.371e-10 1.008e+00    0.314    
## Garden1       5.415e-10  5.371e-10 1.008e+00    0.313    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.899e-07 on 499985 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 1.453e+26 on 14 and 499985 DF,  p-value: < 2.2e-16

Goodness of Fit

summary(model_price_all)
## 
## Call:
## lm(formula = Prices ~ . - Indian.Marble, data = house)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -1.343e-04  0.000e+00  0.000e+00  1.000e-09  5.000e-07 
## 
## Coefficients:
##                Estimate Std. Error   t value Pr(>|t|)    
## (Intercept)   1.000e+03  1.509e-09 6.628e+11   <2e-16 ***
## Area          2.500e+01  3.740e-12 6.684e+12   <2e-16 ***
## Garage        1.500e+03  3.287e-10 4.564e+12   <2e-16 ***
## FirePlace     7.500e+02  1.899e-10 3.949e+12   <2e-16 ***
## Baths         1.250e+03  1.899e-10 6.583e+12   <2e-16 ***
## White.Marble1 1.400e+04  6.574e-10 2.129e+13   <2e-16 ***
## Black.Marble1 5.000e+03  6.576e-10 7.603e+12   <2e-16 ***
## Floors1       1.500e+04  5.371e-10 2.793e+13   <2e-16 ***
## City          3.500e+03  3.290e-10 1.064e+13   <2e-16 ***
## Solar1        2.500e+02  5.371e-10 4.655e+11   <2e-16 ***
## Electric1     1.250e+03  5.371e-10 2.327e+12   <2e-16 ***
## Fiber1        1.175e+04  5.371e-10 2.188e+13   <2e-16 ***
## Glass.Doors1  4.450e+03  5.371e-10 8.286e+12   <2e-16 ***
## Swiming.Pool1 5.412e-10  5.371e-10 1.008e+00    0.314    
## Garden1       5.415e-10  5.371e-10 1.008e+00    0.313    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.899e-07 on 499985 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 1.453e+26 on 14 and 499985 DF,  p-value: < 2.2e-16

3. Exploratory Data Analysis

library(GGally)
## Loading required package: ggplot2
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
ggcorr(house, label = T)
## Warning in ggcorr(house, label = T): data in column(s) 'White.Marble',
## 'Black.Marble', 'Indian.Marble', 'Floors', 'Solar', 'Electric', 'Fiber',
## 'Glass.Doors', 'Swiming.Pool', 'Garden' are not numeric and were ignored

Insight: Variabel yang memiliki korelasi tinggi terhadap Price adalah 1. City 2. Baths

model_price_corr <- lm(formula = Prices ~ City + Baths,
                       data = house)
summary(model_price_corr)
## 
## Call:
## lm(formula = Prices ~ City + Baths, data = house)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -28762.5  -7950.3   -523.8   8544.8  30113.0 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 31391.42      55.87   561.8   <2e-16 ***
## City         3462.76      20.18   171.6   <2e-16 ***
## Baths        1244.12      11.64   106.8   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11640 on 499997 degrees of freedom
## Multiple R-squared:  0.07552,    Adjusted R-squared:  0.07551 
## F-statistic: 2.042e+04 on 2 and 499997 DF,  p-value: < 2.2e-16
house_pred <- house
house_pred$pred_price <- predict(model_price, house)
house_pred$pred_price_all <- predict(model_price_all, house)
house_pred$pred_corr <- predict(model_price_corr, house)
head(house_pred)
##   Area Garage FirePlace Baths White.Marble Black.Marble Indian.Marble Floors
## 1  164      2         0     2            0            1             0      0
## 2   84      2         0     4            0            0             1      1
## 3  190      2         4     4            1            0             0      0
## 4   75      2         4     4            0            0             1      1
## 5  148      1         4     2            1            0             0      1
## 6  124      3         3     3            0            1             0      1
##   City Solar Electric Fiber Glass.Doors Swiming.Pool Garden Prices pred_price
## 1    3     1        1     1           1            0      0  43800   42684.41
## 2    2     0        0     0           1            1      1  37550   41414.22
## 3    2     0        0     1           0            0      0  49500   41414.22
## 4    1     1        1     1           1            1      1  50075   42684.41
## 5    2     1        0     0           1            1      1  52400   41414.22
## 6    1     0        0     1           1            1      1  54300   41414.22
##   pred_price_all pred_corr
## 1          43800  44267.92
## 2          37550  43293.40
## 3          49500  43293.40
## 4          50075  39830.64
## 5          52400  40805.17
## 6          54300  38586.52

7. Model Comparison

Goodness of Fit (R-Squared)

summary(model_price)$r.squared
## [1] 0.002750225
summary(model_price_all)$adj.r.squared
## [1] 1
summary(model_price_corr)$adj.r.squared
## [1] 0.07551446

💡 Kesimpulan : Model terbaik berdasarkan R-squared adalah model_price_all (model dengan keseluruhan prediktor)

head(house_pred)
##   Area Garage FirePlace Baths White.Marble Black.Marble Indian.Marble Floors
## 1  164      2         0     2            0            1             0      0
## 2   84      2         0     4            0            0             1      1
## 3  190      2         4     4            1            0             0      0
## 4   75      2         4     4            0            0             1      1
## 5  148      1         4     2            1            0             0      1
## 6  124      3         3     3            0            1             0      1
##   City Solar Electric Fiber Glass.Doors Swiming.Pool Garden Prices pred_price
## 1    3     1        1     1           1            0      0  43800   42684.41
## 2    2     0        0     0           1            1      1  37550   41414.22
## 3    2     0        0     1           0            0      0  49500   41414.22
## 4    1     1        1     1           1            1      1  50075   42684.41
## 5    2     1        0     0           1            1      1  52400   41414.22
## 6    1     0        0     1           1            1      1  54300   41414.22
##   pred_price_all pred_corr
## 1          43800  44267.92
## 2          37550  43293.40
## 3          49500  43293.40
## 4          50075  39830.64
## 5          52400  40805.17
## 6          54300  38586.52

Mean Absolute Error (MAE)

library(MLmetrics)
## 
## Attaching package: 'MLmetrics'
## The following object is masked from 'package:base':
## 
##     Recall
MAE(y_pred = house_pred$pred_price_all, y_true = house$Prices)
## [1] 1.901552e-07
range(house$Prices)
## [1]  7725 77975
summary(model_price_all)
## 
## Call:
## lm(formula = Prices ~ . - Indian.Marble, data = house)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -1.343e-04  0.000e+00  0.000e+00  1.000e-09  5.000e-07 
## 
## Coefficients:
##                Estimate Std. Error   t value Pr(>|t|)    
## (Intercept)   1.000e+03  1.509e-09 6.628e+11   <2e-16 ***
## Area          2.500e+01  3.740e-12 6.684e+12   <2e-16 ***
## Garage        1.500e+03  3.287e-10 4.564e+12   <2e-16 ***
## FirePlace     7.500e+02  1.899e-10 3.949e+12   <2e-16 ***
## Baths         1.250e+03  1.899e-10 6.583e+12   <2e-16 ***
## White.Marble1 1.400e+04  6.574e-10 2.129e+13   <2e-16 ***
## Black.Marble1 5.000e+03  6.576e-10 7.603e+12   <2e-16 ***
## Floors1       1.500e+04  5.371e-10 2.793e+13   <2e-16 ***
## City          3.500e+03  3.290e-10 1.064e+13   <2e-16 ***
## Solar1        2.500e+02  5.371e-10 4.655e+11   <2e-16 ***
## Electric1     1.250e+03  5.371e-10 2.327e+12   <2e-16 ***
## Fiber1        1.175e+04  5.371e-10 2.188e+13   <2e-16 ***
## Glass.Doors1  4.450e+03  5.371e-10 8.286e+12   <2e-16 ***
## Swiming.Pool1 5.412e-10  5.371e-10 1.008e+00    0.314    
## Garden1       5.415e-10  5.371e-10 1.008e+00    0.313    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.899e-07 on 499985 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 1.453e+26 on 14 and 499985 DF,  p-value: < 2.2e-16
model_backward <- step(object = model_price_all,
                       direction = "backward",
                       trace = FALSE)
## Warning: attempting model selection on an essentially perfect fit is nonsense

## Warning: attempting model selection on an essentially perfect fit is nonsense

## Warning: attempting model selection on an essentially perfect fit is nonsense
summary(model_backward)
## 
## Call:
## lm(formula = Prices ~ Area + Garage + FirePlace + Baths + White.Marble + 
##     Black.Marble + Floors + City + Solar + Electric + Fiber + 
##     Glass.Doors, data = house)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -1.343e-04  0.000e+00  0.000e+00  1.000e-09  5.010e-07 
## 
## Coefficients:
##                Estimate Std. Error   t value Pr(>|t|)    
## (Intercept)   1.000e+03  1.461e-09 6.844e+11   <2e-16 ***
## Area          2.500e+01  3.740e-12 6.684e+12   <2e-16 ***
## Garage        1.500e+03  3.287e-10 4.564e+12   <2e-16 ***
## FirePlace     7.500e+02  1.899e-10 3.949e+12   <2e-16 ***
## Baths         1.250e+03  1.899e-10 6.583e+12   <2e-16 ***
## White.Marble1 1.400e+04  6.574e-10 2.129e+13   <2e-16 ***
## Black.Marble1 5.000e+03  6.576e-10 7.603e+12   <2e-16 ***
## Floors1       1.500e+04  5.371e-10 2.793e+13   <2e-16 ***
## City          3.500e+03  3.290e-10 1.064e+13   <2e-16 ***
## Solar1        2.500e+02  5.371e-10 4.655e+11   <2e-16 ***
## Electric1     1.250e+03  5.371e-10 2.327e+12   <2e-16 ***
## Fiber1        1.175e+04  5.371e-10 2.188e+13   <2e-16 ***
## Glass.Doors1  4.450e+03  5.371e-10 8.286e+12   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.899e-07 on 499987 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 1.695e+26 on 12 and 499987 DF,  p-value: < 2.2e-16
model_price_none <- lm(Prices ~ 1, house)
model_forward <- step(object = model_price_none,
                      direction = "forward",
                      scope = list(lower = model_price_none, upper = model_price_all))
## Start:  AIC=9401807
## Prices ~ 1
## 
##                Df  Sum of Sq        RSS     AIC
## + Floors        1 2.8138e+13 4.5191e+13 9159783
## + Fiber         1 1.7222e+13 5.6107e+13 9267960
## + White.Marble  1 1.4728e+13 5.8601e+13 9289711
## + City          1 3.9898e+12 6.9339e+13 9373836
## + Glass.Doors   1 2.4282e+12 7.0901e+13 9384972
## + Area          1 1.6001e+12 7.1729e+13 9390779
## + Baths         1 1.5436e+12 7.1785e+13 9391172
## + Garage        1 7.3760e+11 7.2591e+13 9396755
## + FirePlace     1 5.8265e+11 7.2746e+13 9397821
## + Black.Marble  1 4.4670e+11 7.2882e+13 9398754
## + Electric      1 2.0167e+11 7.3127e+13 9400432
## + Solar         1 5.2097e+09 7.3324e+13 9401774
## <none>                       7.3329e+13 9401807
## + Swiming.Pool  1 2.3404e+08 7.3329e+13 9401808
## + Garden        1 1.7392e+08 7.3329e+13 9401808
## 
## Step:  AIC=9159783
## Prices ~ Floors
## 
##                Df  Sum of Sq        RSS     AIC
## + Fiber         1 1.7162e+13 2.8029e+13 8920961
## + White.Marble  1 1.4724e+13 3.0467e+13 8962653
## + City          1 4.0034e+12 4.1188e+13 9113405
## + Glass.Doors   1 2.4284e+12 4.2763e+13 9132168
## + Area          1 1.6105e+12 4.3581e+13 9141641
## + Baths         1 1.5552e+12 4.3636e+13 9142275
## + Garage        1 7.4611e+11 4.4445e+13 9151461
## + FirePlace     1 5.8115e+11 4.4610e+13 9153313
## + Black.Marble  1 4.4409e+11 4.4747e+13 9154847
## + Electric      1 2.0128e+11 4.4990e+13 9157553
## + Solar         1 7.4376e+09 4.5184e+13 9159703
## + Swiming.Pool  1 2.6953e+08 4.5191e+13 9159782
## + Garden        1 2.4956e+08 4.5191e+13 9159782
## <none>                       4.5191e+13 9159783
## 
## Step:  AIC=8920961
## Prices ~ Floors + Fiber
## 
##                Df  Sum of Sq        RSS     AIC
## + White.Marble  1 1.4743e+13 1.3287e+13 8547717
## + City          1 4.0486e+12 2.3981e+13 8842963
## + Glass.Doors   1 2.4577e+12 2.5572e+13 8875078
## + Area          1 1.6093e+12 2.6420e+13 8891398
## + Baths         1 1.5623e+12 2.6467e+13 8892287
## + Garage        1 7.5013e+11 2.7279e+13 8907399
## + FirePlace     1 5.6973e+11 2.7460e+13 8910695
## + Black.Marble  1 4.4450e+11 2.7585e+13 8912970
## + Electric      1 2.0243e+11 2.7827e+13 8917338
## + Solar         1 7.2657e+09 2.8022e+13 8920833
## + Garden        1 2.5250e+08 2.8029e+13 8920958
## <none>                       2.8029e+13 8920961
## + Swiming.Pool  1 4.6438e+05 2.8029e+13 8920963
## 
## Step:  AIC=8547717
## Prices ~ Floors + Fiber + White.Marble
## 
##                Df  Sum of Sq        RSS     AIC
## + City          1 4.0619e+12 9.2248e+12 8365285
## + Glass.Doors   1 2.4626e+12 1.0824e+13 8445224
## + Black.Marble  1 2.0764e+12 1.1210e+13 8462752
## + Area          1 1.5848e+12 1.1702e+13 8484214
## + Baths         1 1.5385e+12 1.1748e+13 8486188
## + Garage        1 7.4653e+11 1.2540e+13 8518806
## + FirePlace     1 5.6422e+11 1.2722e+13 8526023
## + Electric      1 2.0051e+11 1.3086e+13 8540116
## + Solar         1 8.3389e+09 1.3278e+13 8547405
## + Garden        1 1.4907e+08 1.3287e+13 8547714
## <none>                       1.3287e+13 8547717
## + Swiming.Pool  1 4.3507e+07 1.3287e+13 8547717
## 
## Step:  AIC=8365285
## Prices ~ Floors + Fiber + White.Marble + City
## 
##                Df  Sum of Sq        RSS     AIC
## + Glass.Doors   1 2.4578e+12 6.7670e+12 8210369
## + Black.Marble  1 2.0815e+12 7.1433e+12 8237425
## + Area          1 1.6024e+12 7.6224e+12 8269886
## + Baths         1 1.5428e+12 7.6820e+12 8273780
## + Garage        1 7.4378e+11 8.4810e+12 8323254
## + FirePlace     1 5.6484e+11 8.6599e+12 8333694
## + Electric      1 1.9909e+11 9.0257e+12 8354377
## + Solar         1 8.1610e+09 9.2166e+12 8364844
## + Garden        1 9.5580e+07 9.2247e+12 8365282
## <none>                       9.2248e+12 8365285
## + Swiming.Pool  1 3.5130e+07 9.2247e+12 8365285
## 
## Step:  AIC=8210369
## Prices ~ Floors + Fiber + White.Marble + City + Glass.Doors
## 
##                Df  Sum of Sq        RSS     AIC
## + Black.Marble  1 2.0821e+12 4.6849e+12 8026513
## + Area          1 1.6072e+12 5.1597e+12 8074786
## + Baths         1 1.5493e+12 5.2177e+12 8080371
## + Garage        1 7.4967e+11 6.0173e+12 8151663
## + FirePlace     1 5.6569e+11 6.2013e+12 8166722
## + Electric      1 1.9757e+11 6.5694e+12 8195555
## + Solar         1 8.3941e+09 6.7586e+12 8209750
## + Swiming.Pool  1 2.8014e+07 6.7670e+12 8210368
## <none>                       6.7670e+12 8210369
## + Garden        1 2.0781e+07 6.7670e+12 8210369
## 
## Step:  AIC=8026513
## Prices ~ Floors + Fiber + White.Marble + City + Glass.Doors + 
##     Black.Marble
## 
##                Df  Sum of Sq        RSS     AIC
## + Area          1 1.6082e+12 3.0767e+12 7816274
## + Baths         1 1.5555e+12 3.1294e+12 7824762
## + Garage        1 7.4359e+11 3.9413e+12 7940099
## + FirePlace     1 5.6681e+11 4.1181e+12 7962037
## + Electric      1 1.9797e+11 4.4869e+12 8004927
## + Solar         1 8.0999e+09 4.6768e+12 8025650
## + Swiming.Pool  1 3.2267e+07 4.6848e+12 8026512
## <none>                       4.6849e+12 8026513
## + Garden        1 1.2522e+07 4.6849e+12 8026514
## 
## Step:  AIC=7816274
## Prices ~ Floors + Fiber + White.Marble + City + Glass.Doors + 
##     Black.Marble + Area
## 
##                Df  Sum of Sq        RSS     AIC
## + Baths         1 1.5568e+12 1.5199e+12 7463671
## + Garage        1 7.4555e+11 2.3312e+12 7677528
## + FirePlace     1 5.6610e+11 2.5106e+12 7714609
## + Electric      1 1.9811e+11 2.8786e+12 7782997
## + Solar         1 7.9796e+09 3.0687e+12 7814978
## + Swiming.Pool  1 2.4010e+07 3.0767e+12 7816272
## <none>                       3.0767e+12 7816274
## + Garden        1 2.9624e+06 3.0767e+12 7816276
## 
## Step:  AIC=7463671
## Prices ~ Floors + Fiber + White.Marble + City + Glass.Doors + 
##     Black.Marble + Area + Baths
## 
##                Df  Sum of Sq        RSS     AIC
## + Garage        1 7.5345e+11 7.6646e+11 7121363
## + FirePlace     1 5.6490e+11 9.5501e+11 7231331
## + Electric      1 1.9695e+11 1.3230e+12 7394282
## + Solar         1 8.1480e+09 1.5118e+12 7460985
## <none>                       1.5199e+12 7463671
## + Swiming.Pool  1 4.5384e+06 1.5199e+12 7463671
## + Garden        1 1.8950e+05 1.5199e+12 7463673
## 
## Step:  AIC=7121363
## Prices ~ Floors + Fiber + White.Marble + City + Glass.Doors + 
##     Black.Marble + Area + Baths + Garage
## 
##                Df  Sum of Sq        RSS     AIC
## + FirePlace     1 5.6319e+11 2.0327e+11 6457742
## + Electric      1 1.9635e+11 5.7011e+11 6973389
## + Solar         1 7.9190e+09 7.5854e+11 7116172
## <none>                       7.6646e+11 7121363
## + Swiming.Pool  1 1.3797e+06 7.6646e+11 7121364
## + Garden        1 1.8324e+04 7.6646e+11 7121365
## 
## Step:  AIC=6457742
## Prices ~ Floors + Fiber + White.Marble + City + Glass.Doors + 
##     Black.Marble + Area + Baths + Garage + FirePlace
## 
##                Df  Sum of Sq        RSS     AIC
## + Electric      1 1.9546e+11 7.8123e+09 4828325
## + Solar         1 7.9603e+09 1.9531e+11 6437770
## <none>                       2.0327e+11 6457742
## + Swiming.Pool  1 4.3820e+04 2.0327e+11 6457744
## + Garden        1 1.4650e+03 2.0327e+11 6457744
## 
## Step:  AIC=4828325
## Prices ~ Floors + Fiber + White.Marble + City + Glass.Doors + 
##     Black.Marble + Area + Baths + Garage + FirePlace + Electric
## 
##                Df  Sum of Sq        RSS       AIC
## + Solar         1 7812302801          0 -15466485
## + Garden        1     141896 7812160905   4828318
## <none>                       7812302801   4828325
## + Swiming.Pool  1       1731 7812301069   4828327
## 
## Step:  AIC=-15466485
## Prices ~ Floors + Fiber + White.Marble + City + Glass.Doors + 
##     Black.Marble + Area + Baths + Garage + FirePlace + Electric + 
##     Solar
## Warning: attempting model selection on an essentially perfect fit is nonsense
##                Df  Sum of Sq        RSS       AIC
## <none>                       1.8405e-08 -15466485
## + Garden        1 5.2638e-14 1.8405e-08 -15466484
## + Swiming.Pool  1 5.1670e-14 1.8405e-08 -15466484
summary(model_forward)
## 
## Call:
## lm(formula = Prices ~ Floors + Fiber + White.Marble + City + 
##     Glass.Doors + Black.Marble + Area + Baths + Garage + FirePlace + 
##     Electric + Solar, data = house)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -1.343e-04  0.000e+00  0.000e+00  1.000e-09  1.633e-05 
## 
## Coefficients:
##                Estimate Std. Error   t value Pr(>|t|)    
## (Intercept)   1.000e+03  1.476e-09 6.773e+11   <2e-16 ***
## Floors1       1.500e+04  5.427e-10 2.764e+13   <2e-16 ***
## Fiber1        1.175e+04  5.427e-10 2.165e+13   <2e-16 ***
## White.Marble1 1.400e+04  6.643e-10 2.107e+13   <2e-16 ***
## City          3.500e+03  3.324e-10 1.053e+13   <2e-16 ***
## Glass.Doors1  4.450e+03  5.427e-10 8.200e+12   <2e-16 ***
## Black.Marble1 5.000e+03  6.645e-10 7.525e+12   <2e-16 ***
## Area          2.500e+01  3.779e-12 6.615e+12   <2e-16 ***
## Baths         1.250e+03  1.919e-10 6.515e+12   <2e-16 ***
## Garage        1.500e+03  3.321e-10 4.517e+12   <2e-16 ***
## FirePlace     7.500e+02  1.919e-10 3.909e+12   <2e-16 ***
## Electric1     1.250e+03  5.427e-10 2.303e+12   <2e-16 ***
## Solar1        2.500e+02  5.427e-10 4.607e+11   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.919e-07 on 499987 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 1.66e+26 on 12 and 499987 DF,  p-value: < 2.2e-16
model_both <- step(object = model_price_none,
                   direction = "both",
                   scope = list(upper = model_price_all))
## Start:  AIC=9401807
## Prices ~ 1
## 
##                Df  Sum of Sq        RSS     AIC
## + Floors        1 2.8138e+13 4.5191e+13 9159783
## + Fiber         1 1.7222e+13 5.6107e+13 9267960
## + White.Marble  1 1.4728e+13 5.8601e+13 9289711
## + City          1 3.9898e+12 6.9339e+13 9373836
## + Glass.Doors   1 2.4282e+12 7.0901e+13 9384972
## + Area          1 1.6001e+12 7.1729e+13 9390779
## + Baths         1 1.5436e+12 7.1785e+13 9391172
## + Garage        1 7.3760e+11 7.2591e+13 9396755
## + FirePlace     1 5.8265e+11 7.2746e+13 9397821
## + Black.Marble  1 4.4670e+11 7.2882e+13 9398754
## + Electric      1 2.0167e+11 7.3127e+13 9400432
## + Solar         1 5.2097e+09 7.3324e+13 9401774
## <none>                       7.3329e+13 9401807
## + Swiming.Pool  1 2.3404e+08 7.3329e+13 9401808
## + Garden        1 1.7392e+08 7.3329e+13 9401808
## 
## Step:  AIC=9159783
## Prices ~ Floors
## 
##                Df  Sum of Sq        RSS     AIC
## + Fiber         1 1.7162e+13 2.8029e+13 8920961
## + White.Marble  1 1.4724e+13 3.0467e+13 8962653
## + City          1 4.0034e+12 4.1188e+13 9113405
## + Glass.Doors   1 2.4284e+12 4.2763e+13 9132168
## + Area          1 1.6105e+12 4.3581e+13 9141641
## + Baths         1 1.5552e+12 4.3636e+13 9142275
## + Garage        1 7.4611e+11 4.4445e+13 9151461
## + FirePlace     1 5.8115e+11 4.4610e+13 9153313
## + Black.Marble  1 4.4409e+11 4.4747e+13 9154847
## + Electric      1 2.0128e+11 4.4990e+13 9157553
## + Solar         1 7.4376e+09 4.5184e+13 9159703
## + Swiming.Pool  1 2.6953e+08 4.5191e+13 9159782
## + Garden        1 2.4956e+08 4.5191e+13 9159782
## <none>                       4.5191e+13 9159783
## - Floors        1 2.8138e+13 7.3329e+13 9401807
## 
## Step:  AIC=8920961
## Prices ~ Floors + Fiber
## 
##                Df  Sum of Sq        RSS     AIC
## + White.Marble  1 1.4743e+13 1.3287e+13 8547717
## + City          1 4.0486e+12 2.3981e+13 8842963
## + Glass.Doors   1 2.4577e+12 2.5572e+13 8875078
## + Area          1 1.6093e+12 2.6420e+13 8891398
## + Baths         1 1.5623e+12 2.6467e+13 8892287
## + Garage        1 7.5013e+11 2.7279e+13 8907399
## + FirePlace     1 5.6973e+11 2.7460e+13 8910695
## + Black.Marble  1 4.4450e+11 2.7585e+13 8912970
## + Electric      1 2.0243e+11 2.7827e+13 8917338
## + Solar         1 7.2657e+09 2.8022e+13 8920833
## + Garden        1 2.5250e+08 2.8029e+13 8920958
## <none>                       2.8029e+13 8920961
## + Swiming.Pool  1 4.6438e+05 2.8029e+13 8920963
## - Fiber         1 1.7162e+13 4.5191e+13 9159783
## - Floors        1 2.8077e+13 5.6107e+13 9267960
## 
## Step:  AIC=8547717
## Prices ~ Floors + Fiber + White.Marble
## 
##                Df  Sum of Sq        RSS     AIC
## + City          1 4.0619e+12 9.2248e+12 8365285
## + Glass.Doors   1 2.4626e+12 1.0824e+13 8445224
## + Black.Marble  1 2.0764e+12 1.1210e+13 8462752
## + Area          1 1.5848e+12 1.1702e+13 8484214
## + Baths         1 1.5385e+12 1.1748e+13 8486188
## + Garage        1 7.4653e+11 1.2540e+13 8518806
## + FirePlace     1 5.6422e+11 1.2722e+13 8526023
## + Electric      1 2.0051e+11 1.3086e+13 8540116
## + Solar         1 8.3389e+09 1.3278e+13 8547405
## + Garden        1 1.4907e+08 1.3287e+13 8547714
## <none>                       1.3287e+13 8547717
## + Swiming.Pool  1 4.3507e+07 1.3287e+13 8547717
## - White.Marble  1 1.4743e+13 2.8029e+13 8920961
## - Fiber         1 1.7180e+13 3.0467e+13 8962653
## - Floors        1 2.8074e+13 4.1361e+13 9115501
## 
## Step:  AIC=8365285
## Prices ~ Floors + Fiber + White.Marble + City
## 
##                Df  Sum of Sq        RSS     AIC
## + Glass.Doors   1 2.4578e+12 6.7670e+12 8210369
## + Black.Marble  1 2.0815e+12 7.1433e+12 8237425
## + Area          1 1.6024e+12 7.6224e+12 8269886
## + Baths         1 1.5428e+12 7.6820e+12 8273780
## + Garage        1 7.4378e+11 8.4810e+12 8323254
## + FirePlace     1 5.6484e+11 8.6599e+12 8333694
## + Electric      1 1.9909e+11 9.0257e+12 8354377
## + Solar         1 8.1610e+09 9.2166e+12 8364844
## + Garden        1 9.5580e+07 9.2247e+12 8365282
## <none>                       9.2248e+12 8365285
## + Swiming.Pool  1 3.5130e+07 9.2247e+12 8365285
## - City          1 4.0619e+12 1.3287e+13 8547717
## - White.Marble  1 1.4756e+13 2.3981e+13 8842963
## - Fiber         1 1.7225e+13 2.6450e+13 8891968
## - Floors        1 2.8088e+13 3.7312e+13 9064000
## 
## Step:  AIC=8210369
## Prices ~ Floors + Fiber + White.Marble + City + Glass.Doors
## 
##                Df  Sum of Sq        RSS     AIC
## + Black.Marble  1 2.0821e+12 4.6849e+12 8026513
## + Area          1 1.6072e+12 5.1597e+12 8074786
## + Baths         1 1.5493e+12 5.2177e+12 8080371
## + Garage        1 7.4967e+11 6.0173e+12 8151663
## + FirePlace     1 5.6569e+11 6.2013e+12 8166722
## + Electric      1 1.9757e+11 6.5694e+12 8195555
## + Solar         1 8.3941e+09 6.7586e+12 8209750
## + Swiming.Pool  1 2.8014e+07 6.7670e+12 8210368
## <none>                       6.7670e+12 8210369
## + Garden        1 2.0781e+07 6.7670e+12 8210369
## - Glass.Doors   1 2.4578e+12 9.2248e+12 8365285
## - City          1 4.0571e+12 1.0824e+13 8445224
## - White.Marble  1 1.4761e+13 2.1528e+13 8789012
## - Fiber         1 1.7255e+13 2.4022e+13 8843819
## - Floors        1 2.8088e+13 3.4855e+13 9029933
## 
## Step:  AIC=8026513
## Prices ~ Floors + Fiber + White.Marble + City + Glass.Doors + 
##     Black.Marble
## 
##                Df  Sum of Sq        RSS     AIC
## + Area          1 1.6082e+12 3.0767e+12 7816274
## + Baths         1 1.5555e+12 3.1294e+12 7824762
## + Garage        1 7.4359e+11 3.9413e+12 7940099
## + FirePlace     1 5.6681e+11 4.1181e+12 7962037
## + Electric      1 1.9797e+11 4.4869e+12 8004927
## + Solar         1 8.0999e+09 4.6768e+12 8025650
## + Swiming.Pool  1 3.2267e+07 4.6848e+12 8026512
## <none>                       4.6849e+12 8026513
## + Garden        1 1.2522e+07 4.6849e+12 8026514
## - Black.Marble  1 2.0821e+12 6.7670e+12 8210369
## - Glass.Doors   1 2.4584e+12 7.1433e+12 8237425
## - City          1 4.0621e+12 8.7470e+12 8338697
## - White.Marble  1 1.6399e+13 2.1084e+13 8778599
## - Fiber         1 1.7258e+13 2.1943e+13 8798557
## - Floors        1 2.8094e+13 3.2778e+13 8999226
## 
## Step:  AIC=7816274
## Prices ~ Floors + Fiber + White.Marble + City + Glass.Doors + 
##     Black.Marble + Area
## 
##                Df  Sum of Sq        RSS     AIC
## + Baths         1 1.5568e+12 1.5199e+12 7463671
## + Garage        1 7.4555e+11 2.3312e+12 7677528
## + FirePlace     1 5.6610e+11 2.5106e+12 7714609
## + Electric      1 1.9811e+11 2.8786e+12 7782997
## + Solar         1 7.9796e+09 3.0687e+12 7814978
## + Swiming.Pool  1 2.4010e+07 3.0767e+12 7816272
## <none>                       3.0767e+12 7816274
## + Garden        1 2.9624e+06 3.0767e+12 7816276
## - Area          1 1.6082e+12 4.6849e+12 8026513
## - Black.Marble  1 2.0830e+12 5.1597e+12 8074786
## - Glass.Doors   1 2.4633e+12 5.5400e+12 8110336
## - City          1 4.0797e+12 7.1565e+12 8238350
## - White.Marble  1 1.6378e+13 1.9455e+13 8738386
## - Fiber         1 1.7257e+13 2.0333e+13 8760475
## - Floors        1 2.8104e+13 3.1181e+13 8974243
## 
## Step:  AIC=7463671
## Prices ~ Floors + Fiber + White.Marble + City + Glass.Doors + 
##     Black.Marble + Area + Baths
## 
##                Df  Sum of Sq        RSS     AIC
## + Garage        1 7.5345e+11 7.6646e+11 7121363
## + FirePlace     1 5.6490e+11 9.5501e+11 7231331
## + Electric      1 1.9695e+11 1.3230e+12 7394282
## + Solar         1 8.1480e+09 1.5118e+12 7460985
## <none>                       1.5199e+12 7463671
## + Swiming.Pool  1 4.5384e+06 1.5199e+12 7463671
## + Garden        1 1.8950e+05 1.5199e+12 7463673
## - Baths         1 1.5568e+12 3.0767e+12 7816274
## - Area          1 1.6095e+12 3.1294e+12 7824762
## - Black.Marble  1 2.0893e+12 3.6092e+12 7896082
## - Glass.Doors   1 2.4698e+12 3.9897e+12 7946202
## - City          1 4.0841e+12 5.6040e+12 8116084
## - White.Marble  1 1.6365e+13 1.7885e+13 8696318
## - Fiber         1 1.7264e+13 1.8784e+13 8720841
## - Floors        1 2.8116e+13 2.9636e+13 8948832
## 
## Step:  AIC=7121363
## Prices ~ Floors + Fiber + White.Marble + City + Glass.Doors + 
##     Black.Marble + Area + Baths + Garage
## 
##                Df  Sum of Sq        RSS     AIC
## + FirePlace     1 5.6319e+11 2.0327e+11 6457742
## + Electric      1 1.9635e+11 5.7011e+11 6973389
## + Solar         1 7.9190e+09 7.5854e+11 7116172
## <none>                       7.6646e+11 7121363
## + Swiming.Pool  1 1.3797e+06 7.6646e+11 7121364
## + Garden        1 1.8324e+04 7.6646e+11 7121365
## - Garage        1 7.5345e+11 1.5199e+12 7463671
## - Baths         1 1.5647e+12 2.3312e+12 7677528
## - Area          1 1.6115e+12 2.3779e+12 7687459
## - Black.Marble  1 2.0831e+12 2.8496e+12 7777935
## - Glass.Doors   1 2.4757e+12 3.2422e+12 7842470
## - City          1 4.0813e+12 4.8478e+12 8043606
## - White.Marble  1 1.6353e+13 1.7119e+13 8674451
## - Fiber         1 1.7268e+13 1.8034e+13 8700487
## - Floors        1 2.8124e+13 2.8891e+13 8936106
## 
## Step:  AIC=6457742
## Prices ~ Floors + Fiber + White.Marble + City + Glass.Doors + 
##     Black.Marble + Area + Baths + Garage + FirePlace
## 
##                Df  Sum of Sq        RSS     AIC
## + Electric      1 1.9546e+11 7.8123e+09 4828325
## + Solar         1 7.9603e+09 1.9531e+11 6437770
## <none>                       2.0327e+11 6457742
## + Swiming.Pool  1 4.3820e+04 2.0327e+11 6457744
## + Garden        1 1.4650e+03 2.0327e+11 6457744
## - FirePlace     1 5.6319e+11 7.6646e+11 7121363
## - Garage        1 7.5174e+11 9.5501e+11 7231331
## - Baths         1 1.5635e+12 1.7667e+12 7538919
## - Area          1 1.6107e+12 1.8140e+12 7552121
## - Black.Marble  1 2.0843e+12 2.2875e+12 7668084
## - Glass.Doors   1 2.4766e+12 2.6798e+12 7747227
## - City          1 4.0819e+12 4.2852e+12 7981932
## - White.Marble  1 1.6349e+13 1.6553e+13 8657624
## - Fiber         1 1.7257e+13 1.7460e+13 8684299
## - Floors        1 2.8123e+13 2.8326e+13 8926239
## 
## Step:  AIC=4828325
## Prices ~ Floors + Fiber + White.Marble + City + Glass.Doors + 
##     Black.Marble + Area + Baths + Garage + FirePlace + Electric
## 
##                Df  Sum of Sq        RSS       AIC
## + Solar         1 7.8123e+09 0.0000e+00 -15466485
## + Garden        1 1.4190e+05 7.8122e+09   4828318
## <none>                       7.8123e+09   4828325
## + Swiming.Pool  1 1.7310e+03 7.8123e+09   4828327
## - Electric      1 1.9546e+11 2.0327e+11   6457742
## - FirePlace     1 5.6230e+11 5.7011e+11   6973389
## - Garage        1 7.5114e+11 7.5895e+11   7116441
## - Baths         1 1.5623e+12 1.5701e+12   7479930
## - Area          1 1.6109e+12 1.6187e+12   7495162
## - Black.Marble  1 2.0847e+12 2.0925e+12   7623522
## - Glass.Doors   1 2.4750e+12 2.4829e+12   7709056
## - City          1 4.0805e+12 4.0883e+12   7958420
## - White.Marble  1 1.6348e+13 1.6356e+13   8651651
## - Fiber         1 1.7258e+13 1.7265e+13   8678705
## - Floors        1 2.8122e+13 2.8130e+13   8922772
## 
## Step:  AIC=-15466485
## Prices ~ Floors + Fiber + White.Marble + City + Glass.Doors + 
##     Black.Marble + Area + Baths + Garage + FirePlace + Electric + 
##     Solar
## Warning: attempting model selection on an essentially perfect fit is nonsense

## Warning: attempting model selection on an essentially perfect fit is nonsense
##                Df  Sum of Sq        RSS       AIC
## <none>                       0.0000e+00 -15466485
## + Garden        1 0.0000e+00 0.0000e+00 -15466484
## + Swiming.Pool  1 0.0000e+00 0.0000e+00 -15466484
## - Solar         1 7.8123e+09 7.8123e+09   4828325
## - Electric      1 1.9531e+11 1.9531e+11   6437770
## - FirePlace     1 5.6234e+11 5.6234e+11   6966529
## - Garage        1 7.5091e+11 7.5091e+11   7111118
## - Baths         1 1.5625e+12 1.5625e+12   7477490
## - Area          1 1.6108e+12 1.6108e+12   7492708
## - Black.Marble  1 2.0844e+12 2.0844e+12   7621585
## - Glass.Doors   1 2.4753e+12 2.4753e+12   7707528
## - City          1 4.0803e+12 4.0803e+12   7957444
## - White.Marble  1 1.6349e+13 1.6349e+13   8651431
## - Fiber         1 1.7257e+13 1.7257e+13   8678475
## - Floors        1 2.8125e+13 2.8125e+13   8922676
summary(model_both)
## 
## Call:
## lm(formula = Prices ~ Floors + Fiber + White.Marble + City + 
##     Glass.Doors + Black.Marble + Area + Baths + Garage + FirePlace + 
##     Electric + Solar, data = house)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -1.343e-04  0.000e+00  0.000e+00  1.000e-09  1.633e-05 
## 
## Coefficients:
##                Estimate Std. Error   t value Pr(>|t|)    
## (Intercept)   1.000e+03  1.476e-09 6.773e+11   <2e-16 ***
## Floors1       1.500e+04  5.427e-10 2.764e+13   <2e-16 ***
## Fiber1        1.175e+04  5.427e-10 2.165e+13   <2e-16 ***
## White.Marble1 1.400e+04  6.643e-10 2.107e+13   <2e-16 ***
## City          3.500e+03  3.324e-10 1.053e+13   <2e-16 ***
## Glass.Doors1  4.450e+03  5.427e-10 8.200e+12   <2e-16 ***
## Black.Marble1 5.000e+03  6.645e-10 7.525e+12   <2e-16 ***
## Area          2.500e+01  3.779e-12 6.615e+12   <2e-16 ***
## Baths         1.250e+03  1.919e-10 6.515e+12   <2e-16 ***
## Garage        1.500e+03  3.321e-10 4.517e+12   <2e-16 ***
## FirePlace     7.500e+02  1.919e-10 3.909e+12   <2e-16 ***
## Electric1     1.250e+03  5.427e-10 2.303e+12   <2e-16 ***
## Solar1        2.500e+02  5.427e-10 4.607e+11   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.919e-07 on 499987 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 1.66e+26 on 12 and 499987 DF,  p-value: < 2.2e-16
library(performance)
comparison <- compare_performance(model_price_none, model_price_all, model_backward, model_forward, model_both)

as.data.frame(comparison)
##               Name Model       AIC    AIC_wt      AICc   AICc_wt       BIC
## 1 model_price_none    lm  10820748 0.0000000  10820748 0.0000000  10820770
## 2  model_price_all    lm -14057930 0.2720678 -14057930 0.2720432 -14057752
## 3   model_backward    lm -14057932 0.7279322 -14057932 0.7279568 -14057776
## 4    model_forward    lm -14047545 0.0000000 -14047545 0.0000000 -14047389
## 5       model_both    lm -14047545 0.0000000 -14047545 0.0000000 -14047389
##         BIC_wt R2 R2_adjusted         RMSE        Sigma
## 1 0.000000e+00  0           0 1.211023e+04 1.211024e+04
## 2 5.523351e-06  1           1 1.898749e-07 1.898777e-07
## 3 9.999945e-01  1           1 1.898752e-07 1.898777e-07
## 4 0.000000e+00  1           1 1.918579e-07 1.918604e-07
## 5 0.000000e+00  1           1 1.918579e-07 1.918604e-07

Kesimpulan: Model terbaik berdasarkan adjusted r-squared terbesar, AIC terkecil dan RMSE terkecil adalah : - adjusted r-squared terbesar -> model_backward - AIC terkecil -> model_forward - RMSE terkecil -> model_price_all

pred_model_step <- predict(object = model_backward,
                           newdata = house)
head(pred_model_step)
##     1     2     3     4     5     6 
## 43800 37550 49500 50075 52400 54300
head(house$Prices)
## [1] 43800 37550 49500 50075 52400 54300

Asumsi Linear Regression

Asumsi model linear regression:

  1. Linearity
  2. Normality of Residuals
  3. Homoscedasticity of Residuals
  4. No Multicollinearity

Linearity

plot(model_forward, which = 1)
abline(h = 10, col = "green")
abline(h = 10, col = "green")

Normality of Residuals

library(nortest)
ad.test(model_backward$residuals)
## 
##  Anderson-Darling normality test
## 
## data:  model_backward$residuals
## A = 191000, p-value < 2.2e-16

Kesimpulan: Karena p-value > 0.05, sehingga kita gagal tolak HO atau asumsi normality terpenuhi.

hist(model_backward$residuals)

Homoscedasticity of Residuals

plot(x = model_forward$fitted.values, y = model_forward$residuals)
abline(h = 0, col = "red")

**Uji statistik dengan bptest() dari package lmtest

library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
bptest(model_forward)
## 
##  studentized Breusch-Pagan test
## 
## data:  model_forward
## BP = 11.188, df = 12, p-value = 0.5129

Kesimpulan: karena nilai p-value dari BPtest > 0.05, sehingga gagal tolak H0 (asumsi homoscedasticity terpenuhi).

No Multicollinearity

library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
vif(model_forward)
##       Floors        Fiber White.Marble         City  Glass.Doors Black.Marble 
##     1.000012     1.000019     1.331386     1.000024     1.000017     1.331386 
##         Area        Baths       Garage    FirePlace     Electric        Solar 
##     1.000022     1.000030     1.000031     1.000009     1.000009     1.000018

Kesimpulan: Dari uji VIF, prediktor di model_backward lolos uji asumsi multicolinearity (tidak ada nilai VIF > 10)

vif(model_price_all)
##         Area       Garage    FirePlace        Baths White.Marble Black.Marble 
##     1.000024     1.000033     1.000011     1.000037     1.331392     1.331387 
##       Floors         City        Solar     Electric        Fiber  Glass.Doors 
##     1.000012     1.000026     1.000037     1.000010     1.000036     1.000028 
## Swiming.Pool       Garden 
##     1.000030     1.000039

Kesimpulan: Dari uji VIF, prediktor di model_backward lolos uji asumsi multicolinearity (tidak ada nilai VIF > 10)