link to the dataset: https://www.kaggle.com/competitions/house-prices-advanced-regression-techniques/data

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(GGally)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
house_price <- read.csv("train.csv", stringsAsFactors = T)

head(house_price)

EDA

boxplot(house_price$SalePrice)

the price of the house seems has a left skewed data the price also has outliers

min(house_price$SalePrice)
## [1] 34900
max(house_price$SalePrice)
## [1] 755000
length(names(house_price))
## [1] 81

Data frame consist of 81 columns

glimpse(house_price)
## Rows: 1,460
## Columns: 81
## $ Id            <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1…
## $ MSSubClass    <int> 60, 20, 60, 70, 60, 50, 20, 60, 50, 190, 20, 60, 20, 20,…
## $ MSZoning      <fct> RL, RL, RL, RL, RL, RL, RL, RL, RM, RL, RL, RL, RL, RL, …
## $ LotFrontage   <int> 65, 80, 68, 60, 84, 85, 75, NA, 51, 50, 70, 85, NA, 91, …
## $ LotArea       <int> 8450, 9600, 11250, 9550, 14260, 14115, 10084, 10382, 612…
## $ Street        <fct> Pave, Pave, Pave, Pave, Pave, Pave, Pave, Pave, Pave, Pa…
## $ Alley         <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ LotShape      <fct> Reg, Reg, IR1, IR1, IR1, IR1, Reg, IR1, Reg, Reg, Reg, I…
## $ LandContour   <fct> Lvl, Lvl, Lvl, Lvl, Lvl, Lvl, Lvl, Lvl, Lvl, Lvl, Lvl, L…
## $ Utilities     <fct> AllPub, AllPub, AllPub, AllPub, AllPub, AllPub, AllPub, …
## $ LotConfig     <fct> Inside, FR2, Inside, Corner, FR2, Inside, Inside, Corner…
## $ LandSlope     <fct> Gtl, Gtl, Gtl, Gtl, Gtl, Gtl, Gtl, Gtl, Gtl, Gtl, Gtl, G…
## $ Neighborhood  <fct> CollgCr, Veenker, CollgCr, Crawfor, NoRidge, Mitchel, So…
## $ Condition1    <fct> Norm, Feedr, Norm, Norm, Norm, Norm, Norm, PosN, Artery,…
## $ Condition2    <fct> Norm, Norm, Norm, Norm, Norm, Norm, Norm, Norm, Norm, Ar…
## $ BldgType      <fct> 1Fam, 1Fam, 1Fam, 1Fam, 1Fam, 1Fam, 1Fam, 1Fam, 1Fam, 2f…
## $ HouseStyle    <fct> 2Story, 1Story, 2Story, 2Story, 2Story, 1.5Fin, 1Story, …
## $ OverallQual   <int> 7, 6, 7, 7, 8, 5, 8, 7, 7, 5, 5, 9, 5, 7, 6, 7, 6, 4, 5,…
## $ OverallCond   <int> 5, 8, 5, 5, 5, 5, 5, 6, 5, 6, 5, 5, 6, 5, 5, 8, 7, 5, 5,…
## $ YearBuilt     <int> 2003, 1976, 2001, 1915, 2000, 1993, 2004, 1973, 1931, 19…
## $ YearRemodAdd  <int> 2003, 1976, 2002, 1970, 2000, 1995, 2005, 1973, 1950, 19…
## $ RoofStyle     <fct> Gable, Gable, Gable, Gable, Gable, Gable, Gable, Gable, …
## $ RoofMatl      <fct> CompShg, CompShg, CompShg, CompShg, CompShg, CompShg, Co…
## $ Exterior1st   <fct> VinylSd, MetalSd, VinylSd, Wd Sdng, VinylSd, VinylSd, Vi…
## $ Exterior2nd   <fct> VinylSd, MetalSd, VinylSd, Wd Shng, VinylSd, VinylSd, Vi…
## $ MasVnrType    <fct> BrkFace, None, BrkFace, None, BrkFace, None, Stone, Ston…
## $ MasVnrArea    <int> 196, 0, 162, 0, 350, 0, 186, 240, 0, 0, 0, 286, 0, 306, …
## $ ExterQual     <fct> Gd, TA, Gd, TA, Gd, TA, Gd, TA, TA, TA, TA, Ex, TA, Gd, …
## $ ExterCond     <fct> TA, TA, TA, TA, TA, TA, TA, TA, TA, TA, TA, TA, TA, TA, …
## $ Foundation    <fct> PConc, CBlock, PConc, BrkTil, PConc, Wood, PConc, CBlock…
## $ BsmtQual      <fct> Gd, Gd, Gd, TA, Gd, Gd, Ex, Gd, TA, TA, TA, Ex, TA, Gd, …
## $ BsmtCond      <fct> TA, TA, TA, Gd, TA, TA, TA, TA, TA, TA, TA, TA, TA, TA, …
## $ BsmtExposure  <fct> No, Gd, Mn, No, Av, No, Av, Mn, No, No, No, No, No, Av, …
## $ BsmtFinType1  <fct> GLQ, ALQ, GLQ, ALQ, GLQ, GLQ, GLQ, ALQ, Unf, GLQ, Rec, G…
## $ BsmtFinSF1    <int> 706, 978, 486, 216, 655, 732, 1369, 859, 0, 851, 906, 99…
## $ BsmtFinType2  <fct> Unf, Unf, Unf, Unf, Unf, Unf, Unf, BLQ, Unf, Unf, Unf, U…
## $ BsmtFinSF2    <int> 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ BsmtUnfSF     <int> 150, 284, 434, 540, 490, 64, 317, 216, 952, 140, 134, 17…
## $ TotalBsmtSF   <int> 856, 1262, 920, 756, 1145, 796, 1686, 1107, 952, 991, 10…
## $ Heating       <fct> GasA, GasA, GasA, GasA, GasA, GasA, GasA, GasA, GasA, Ga…
## $ HeatingQC     <fct> Ex, Ex, Ex, Gd, Ex, Ex, Ex, Ex, Gd, Ex, Ex, Ex, TA, Ex, …
## $ CentralAir    <fct> Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y,…
## $ Electrical    <fct> SBrkr, SBrkr, SBrkr, SBrkr, SBrkr, SBrkr, SBrkr, SBrkr, …
## $ X1stFlrSF     <int> 856, 1262, 920, 961, 1145, 796, 1694, 1107, 1022, 1077, …
## $ X2ndFlrSF     <int> 854, 0, 866, 756, 1053, 566, 0, 983, 752, 0, 0, 1142, 0,…
## $ LowQualFinSF  <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ GrLivArea     <int> 1710, 1262, 1786, 1717, 2198, 1362, 1694, 2090, 1774, 10…
## $ BsmtFullBath  <int> 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1,…
## $ BsmtHalfBath  <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ FullBath      <int> 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 1, 3, 1, 2, 1, 1, 1, 2, 1,…
## $ HalfBath      <int> 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,…
## $ BedroomAbvGr  <int> 3, 3, 3, 3, 4, 1, 3, 3, 2, 2, 3, 4, 2, 3, 2, 2, 2, 2, 3,…
## $ KitchenAbvGr  <int> 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1,…
## $ KitchenQual   <fct> Gd, TA, Gd, Gd, Gd, TA, Gd, TA, TA, TA, TA, Ex, TA, Gd, …
## $ TotRmsAbvGrd  <int> 8, 6, 6, 7, 9, 5, 7, 7, 8, 5, 5, 11, 4, 7, 5, 5, 5, 6, 6…
## $ Functional    <fct> Typ, Typ, Typ, Typ, Typ, Typ, Typ, Typ, Min1, Typ, Typ, …
## $ Fireplaces    <int> 0, 1, 1, 1, 1, 0, 1, 2, 2, 2, 0, 2, 0, 1, 1, 0, 1, 0, 0,…
## $ FireplaceQu   <fct> NA, TA, TA, Gd, TA, NA, Gd, TA, TA, TA, NA, Gd, NA, Gd, …
## $ GarageType    <fct> Attchd, Attchd, Attchd, Detchd, Attchd, Attchd, Attchd, …
## $ GarageYrBlt   <int> 2003, 1976, 2001, 1998, 2000, 1993, 2004, 1973, 1931, 19…
## $ GarageFinish  <fct> RFn, RFn, RFn, Unf, RFn, Unf, RFn, RFn, Unf, RFn, Unf, F…
## $ GarageCars    <int> 2, 2, 2, 3, 3, 2, 2, 2, 2, 1, 1, 3, 1, 3, 1, 2, 2, 2, 2,…
## $ GarageArea    <int> 548, 460, 608, 642, 836, 480, 636, 484, 468, 205, 384, 7…
## $ GarageQual    <fct> TA, TA, TA, TA, TA, TA, TA, TA, Fa, Gd, TA, TA, TA, TA, …
## $ GarageCond    <fct> TA, TA, TA, TA, TA, TA, TA, TA, TA, TA, TA, TA, TA, TA, …
## $ PavedDrive    <fct> Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y,…
## $ WoodDeckSF    <int> 0, 298, 0, 0, 192, 40, 255, 235, 90, 0, 0, 147, 140, 160…
## $ OpenPorchSF   <int> 61, 0, 42, 35, 84, 30, 57, 204, 0, 4, 0, 21, 0, 33, 213,…
## $ EnclosedPorch <int> 0, 0, 0, 272, 0, 0, 0, 228, 205, 0, 0, 0, 0, 0, 176, 0, …
## $ X3SsnPorch    <int> 0, 0, 0, 0, 0, 320, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ ScreenPorch   <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 176, 0, 0, 0, 0, 0, …
## $ PoolArea      <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ PoolQC        <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ Fence         <fct> NA, NA, NA, NA, NA, MnPrv, NA, NA, NA, NA, NA, NA, NA, N…
## $ MiscFeature   <fct> NA, NA, NA, NA, NA, Shed, NA, Shed, NA, NA, NA, NA, NA, …
## $ MiscVal       <int> 0, 0, 0, 0, 0, 700, 0, 350, 0, 0, 0, 0, 0, 0, 0, 0, 700,…
## $ MoSold        <int> 2, 5, 9, 2, 12, 10, 8, 11, 4, 1, 2, 7, 9, 8, 5, 7, 3, 10…
## $ YrSold        <int> 2008, 2007, 2008, 2006, 2008, 2009, 2007, 2009, 2008, 20…
## $ SaleType      <fct> WD, WD, WD, WD, WD, WD, WD, WD, WD, WD, WD, New, WD, New…
## $ SaleCondition <fct> Normal, Normal, Normal, Abnorml, Normal, Normal, Normal,…
## $ SalePrice     <int> 208500, 181500, 223500, 140000, 250000, 143000, 307000, …
nrow(house_price)
## [1] 1460
sum(duplicated(house_price))
## [1] 0

the data consist of 1460 rows without any duplicated data.

house_price %>% summarise_all(n_distinct)

we can see unique values in every column

colSums(is.na(house_price))
##            Id    MSSubClass      MSZoning   LotFrontage       LotArea 
##             0             0             0           259             0 
##        Street         Alley      LotShape   LandContour     Utilities 
##             0          1369             0             0             0 
##     LotConfig     LandSlope  Neighborhood    Condition1    Condition2 
##             0             0             0             0             0 
##      BldgType    HouseStyle   OverallQual   OverallCond     YearBuilt 
##             0             0             0             0             0 
##  YearRemodAdd     RoofStyle      RoofMatl   Exterior1st   Exterior2nd 
##             0             0             0             0             0 
##    MasVnrType    MasVnrArea     ExterQual     ExterCond    Foundation 
##             8             8             0             0             0 
##      BsmtQual      BsmtCond  BsmtExposure  BsmtFinType1    BsmtFinSF1 
##            37            37            38            37             0 
##  BsmtFinType2    BsmtFinSF2     BsmtUnfSF   TotalBsmtSF       Heating 
##            38             0             0             0             0 
##     HeatingQC    CentralAir    Electrical     X1stFlrSF     X2ndFlrSF 
##             0             0             1             0             0 
##  LowQualFinSF     GrLivArea  BsmtFullBath  BsmtHalfBath      FullBath 
##             0             0             0             0             0 
##      HalfBath  BedroomAbvGr  KitchenAbvGr   KitchenQual  TotRmsAbvGrd 
##             0             0             0             0             0 
##    Functional    Fireplaces   FireplaceQu    GarageType   GarageYrBlt 
##             0             0           690            81            81 
##  GarageFinish    GarageCars    GarageArea    GarageQual    GarageCond 
##            81             0             0            81            81 
##    PavedDrive    WoodDeckSF   OpenPorchSF EnclosedPorch    X3SsnPorch 
##             0             0             0             0             0 
##   ScreenPorch      PoolArea        PoolQC         Fence   MiscFeature 
##             0             0          1453          1179          1406 
##       MiscVal        MoSold        YrSold      SaleType SaleCondition 
##             0             0             0             0             0 
##     SalePrice 
##             0

Data Wrangling

Let’s drop some columns that having NA data more than 5% of total row (73) -> (LotFrontage, Alley, FireplaceQu, PoolQC, Fence, MiscFeature)

house_price <- house_price %>% 
  select(-c(LotFrontage, Alley, FireplaceQu, PoolQC, Fence, MiscFeature)) 

we also need to drop some unused column and the columns that will bring harm in the prediction process. Id is definitely unused for model. Condition2 and Exterior1st will be problematic when the test being run. This happen because those are categories column which when splited into train-test data with random seed 100, some categories will only appears in test so that the test will run into error.

house_price <- house_price %>% 
  select(-c(Id, Condition2, Exterior1st))

Lets drop rows with NA values.

house_price <- house_price %>% 
  na.omit() 
sum(is.na(house_price))
## [1] 0
RNGkind(sample.kind = "Rounding")
## Warning in RNGkind(sample.kind = "Rounding"): non-uniform 'Rounding' sampler
## used
set.seed(100)
library(rsample)
## Warning: package 'rsample' was built under R version 4.2.2
# train-test splitting
index <- sample(x = nrow(house_price), size= nrow(house_price)*0.8)
hp_train <- house_price[index,] # subsetting data berdasarkan index data yang ada di variabel index
hp_test <- house_price[-index,]
nrow(hp_test)
## [1] 268
nrow(hp_train)
## [1] 1070

Modelling

Model All

model_price_all <- lm(formula = SalePrice~., data = hp_train)
summary(model_price_all)
## 
## Call:
## lm(formula = SalePrice ~ ., data = hp_train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -323837  -10454     187   10411  147699 
## 
## Coefficients: (3 not defined because of singularities)
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -1.987e+06  1.450e+06  -1.370 0.171115    
## MSSubClass           -3.622e+01  1.351e+02  -0.268 0.788770    
## MSZoningFV            5.770e+04  1.627e+04   3.545 0.000413 ***
## MSZoningRH            3.915e+04  1.703e+04   2.299 0.021770 *  
## MSZoningRL            3.970e+04  1.402e+04   2.833 0.004723 ** 
## MSZoningRM            3.538e+04  1.308e+04   2.705 0.006968 ** 
## LotArea               6.276e-01  1.846e-01   3.401 0.000703 ***
## StreetPave            4.319e+04  2.621e+04   1.648 0.099768 .  
## LotShapeIR2           5.719e+03  5.834e+03   0.980 0.327165    
## LotShapeIR3           9.069e+03  1.187e+04   0.764 0.445099    
## LotShapeReg           1.944e+03  2.214e+03   0.878 0.380113    
## LandContourHLS        1.785e+04  7.068e+03   2.525 0.011745 *  
## LandContourLow       -8.841e+03  8.962e+03  -0.987 0.324143    
## LandContourLvl        1.176e+04  5.460e+03   2.155 0.031475 *  
## UtilitiesNoSeWa      -5.461e+04  3.245e+04  -1.683 0.092795 .  
## LotConfigCulDSac      7.306e+03  4.254e+03   1.717 0.086249 .  
## LotConfigFR2         -1.197e+04  5.690e+03  -2.104 0.035692 *  
## LotConfigFR3         -2.285e+04  1.627e+04  -1.405 0.160514    
## LotConfigInside      -3.433e+03  2.388e+03  -1.437 0.150993    
## LandSlopeMod          7.856e+03  5.764e+03   1.363 0.173256    
## LandSlopeSev         -3.667e+04  1.438e+04  -2.551 0.010925 *  
## NeighborhoodBlueste  -2.690e+03  2.310e+04  -0.116 0.907350    
## NeighborhoodBrDale    9.045e+03  1.458e+04   0.620 0.535296    
## NeighborhoodBrkSide  -5.250e+02  1.311e+04  -0.040 0.968067    
## NeighborhoodClearCr  -1.724e+04  1.249e+04  -1.380 0.167969    
## NeighborhoodCollgCr  -6.873e+03  9.481e+03  -0.725 0.468701    
## NeighborhoodCrawfor   1.242e+04  1.136e+04   1.093 0.274719    
## NeighborhoodEdwards  -3.115e+04  1.072e+04  -2.905 0.003766 ** 
## NeighborhoodGilbert  -1.244e+04  1.002e+04  -1.241 0.214832    
## NeighborhoodIDOTRR   -2.448e+03  1.519e+04  -0.161 0.872023    
## NeighborhoodMeadowV  -1.523e+03  1.496e+04  -0.102 0.918912    
## NeighborhoodMitchel  -2.015e+04  1.080e+04  -1.866 0.062395 .  
## NeighborhoodNAmes    -1.762e+04  1.025e+04  -1.719 0.086040 .  
## NeighborhoodNoRidge   3.078e+04  1.093e+04   2.817 0.004953 ** 
## NeighborhoodNPkVill   8.912e+03  1.711e+04   0.521 0.602553    
## NeighborhoodNridgHt   2.280e+04  9.863e+03   2.311 0.021056 *  
## NeighborhoodNWAmes   -1.997e+04  1.061e+04  -1.882 0.060144 .  
## NeighborhoodOldTown  -9.811e+03  1.322e+04  -0.742 0.458281    
## NeighborhoodSawyer   -1.393e+04  1.085e+04  -1.284 0.199638    
## NeighborhoodSawyerW  -2.554e+03  1.041e+04  -0.245 0.806330    
## NeighborhoodSomerst  -4.281e+03  1.188e+04  -0.360 0.718708    
## NeighborhoodStoneBr   4.397e+04  1.069e+04   4.113 4.28e-05 ***
## NeighborhoodSWISU    -2.892e+03  1.352e+04  -0.214 0.830689    
## NeighborhoodTimber   -8.737e+03  1.085e+04  -0.805 0.420956    
## NeighborhoodVeenker  -3.286e+03  1.350e+04  -0.243 0.807749    
## Condition1Feedr       7.274e+03  7.334e+03   0.992 0.321602    
## Condition1Norm        1.552e+04  6.024e+03   2.576 0.010163 *  
## Condition1PosA        2.019e+04  1.632e+04   1.237 0.216345    
## Condition1PosN       -1.053e+04  9.447e+03  -1.114 0.265416    
## Condition1RRAe       -1.392e+04  1.310e+04  -1.062 0.288396    
## Condition1RRAn        9.213e+03  9.054e+03   1.018 0.309168    
## Condition1RRNe       -4.852e+03  2.110e+04  -0.230 0.818219    
## Condition1RRNn        9.586e+03  2.014e+04   0.476 0.634162    
## BldgType2fmCon       -6.619e+03  2.156e+04  -0.307 0.758938    
## BldgTypeDuplex       -1.813e+04  1.195e+04  -1.517 0.129712    
## BldgTypeTwnhs        -2.387e+04  1.536e+04  -1.554 0.120619    
## BldgTypeTwnhsE       -1.635e+04  1.411e+04  -1.159 0.246902    
## HouseStyle1.5Unf      9.923e+03  1.121e+04   0.885 0.376383    
## HouseStyle1Story      8.713e+03  6.425e+03   1.356 0.175440    
## HouseStyle2.5Fin     -1.701e+04  2.071e+04  -0.821 0.411673    
## HouseStyle2.5Unf     -2.132e+03  1.380e+04  -0.155 0.877227    
## HouseStyle2Story     -5.812e+03  5.059e+03  -1.149 0.250989    
## HouseStyleSFoyer      7.199e+03  9.363e+03   0.769 0.442204    
## HouseStyleSLvl        5.221e+03  8.149e+03   0.641 0.521888    
## OverallQual           5.644e+03  1.452e+03   3.886 0.000110 ***
## OverallCond           6.143e+03  1.249e+03   4.920 1.04e-06 ***
## YearBuilt             3.569e+02  1.160e+02   3.075 0.002168 ** 
## YearRemodAdd          4.802e+01  8.212e+01   0.585 0.558842    
## RoofStyleGable       -1.565e+04  2.254e+04  -0.694 0.487612    
## RoofStyleGambrel     -9.366e+03  2.485e+04  -0.377 0.706362    
## RoofStyleHip         -1.310e+04  2.261e+04  -0.579 0.562703    
## RoofStyleMansard     -4.672e+03  2.683e+04  -0.174 0.861787    
## RoofStyleShed        -2.807e+04  4.155e+04  -0.676 0.499373    
## RoofMatlCompShg       5.933e+05  4.140e+04  14.331  < 2e-16 ***
## RoofMatlMembran       6.633e+05  5.844e+04  11.351  < 2e-16 ***
## RoofMatlMetal         6.355e+05  5.679e+04  11.190  < 2e-16 ***
## RoofMatlRoll          5.985e+05  5.179e+04  11.556  < 2e-16 ***
## RoofMatlTar&Grv       5.742e+05  4.686e+04  12.254  < 2e-16 ***
## RoofMatlWdShake       5.914e+05  4.528e+04  13.062  < 2e-16 ***
## RoofMatlWdShngl       6.825e+05  4.291e+04  15.903  < 2e-16 ***
## Exterior2ndAsphShn    1.237e+04  2.417e+04   0.512 0.609098    
## Exterior2ndBrk Cmn   -3.407e+03  2.387e+04  -0.143 0.886538    
## Exterior2ndBrkFace    1.853e+04  1.195e+04   1.551 0.121382    
## Exterior2ndCBlock    -1.920e+04  4.045e+04  -0.475 0.635129    
## Exterior2ndCmentBd    9.135e+02  1.110e+04   0.082 0.934411    
## Exterior2ndHdBoard    7.064e+02  9.928e+03   0.071 0.943296    
## Exterior2ndImStucc    2.111e+04  1.455e+04   1.451 0.147159    
## Exterior2ndMetalSd    8.491e+03  9.826e+03   0.864 0.387765    
## Exterior2ndOther     -2.265e+04  2.986e+04  -0.759 0.448356    
## Exterior2ndPlywood    2.969e+03  1.024e+04   0.290 0.771889    
## Exterior2ndStone     -2.969e+03  3.223e+04  -0.092 0.926632    
## Exterior2ndStucco     1.265e+03  1.247e+04   0.101 0.919204    
## Exterior2ndVinylSd    6.584e+03  9.827e+03   0.670 0.503026    
## Exterior2ndWd Sdng    6.819e+03  9.782e+03   0.697 0.485968    
## Exterior2ndWd Shng   -3.954e+02  1.108e+04  -0.036 0.971541    
## MasVnrTypeBrkFace     8.306e+03  8.354e+03   0.994 0.320379    
## MasVnrTypeNone        9.527e+03  8.396e+03   1.135 0.256809    
## MasVnrTypeStone       1.454e+04  8.893e+03   1.635 0.102358    
## MasVnrArea            1.196e+01  7.599e+00   1.573 0.116009    
## ExterQualFa           7.161e+03  2.648e+04   0.270 0.786873    
## ExterQualGd          -1.170e+04  6.195e+03  -1.888 0.059343 .  
## ExterQualTA          -1.181e+04  6.852e+03  -1.723 0.085202 .  
## ExterCondFa           1.438e+04  3.140e+04   0.458 0.647149    
## ExterCondGd           9.515e+03  2.990e+04   0.318 0.750354    
## ExterCondTA           1.188e+04  2.987e+04   0.398 0.691025    
## FoundationCBlock      3.624e+03  4.701e+03   0.771 0.440995    
## FoundationPConc       3.204e+03  4.916e+03   0.652 0.514760    
## FoundationStone       1.496e+04  1.350e+04   1.108 0.268209    
## FoundationWood       -3.058e+04  1.793e+04  -1.705 0.088475 .  
## BsmtQualFa           -1.425e+04  8.648e+03  -1.648 0.099665 .  
## BsmtQualGd           -1.820e+04  4.272e+03  -4.261 2.26e-05 ***
## BsmtQualTA           -1.596e+04  5.405e+03  -2.952 0.003242 ** 
## BsmtCondGd           -5.668e+02  7.785e+03  -0.073 0.941973    
## BsmtCondPo            3.286e+04  4.851e+04   0.677 0.498402    
## BsmtCondTA            3.302e+03  6.459e+03   0.511 0.609349    
## BsmtExposureGd        1.901e+04  4.011e+03   4.740 2.50e-06 ***
## BsmtExposureMn       -1.898e+03  3.984e+03  -0.476 0.633880    
## BsmtExposureNo       -4.490e+03  2.906e+03  -1.545 0.122740    
## BsmtFinType1BLQ       5.680e+03  3.866e+03   1.469 0.142199    
## BsmtFinType1GLQ       6.938e+03  3.507e+03   1.978 0.048209 *  
## BsmtFinType1LwQ      -4.164e+02  5.364e+03  -0.078 0.938147    
## BsmtFinType1Rec       3.763e+03  4.182e+03   0.900 0.368504    
## BsmtFinType1Unf       3.152e+03  4.039e+03   0.780 0.435340    
## BsmtFinSF1            3.535e+01  7.585e+00   4.661 3.65e-06 ***
## BsmtFinType2BLQ      -1.074e+04  1.069e+04  -1.005 0.315048    
## BsmtFinType2GLQ      -6.562e+03  1.244e+04  -0.528 0.597895    
## BsmtFinType2LwQ      -1.483e+04  9.618e+03  -1.542 0.123424    
## BsmtFinType2Rec      -9.269e+03  9.550e+03  -0.971 0.332032    
## BsmtFinType2Unf      -1.142e+04  9.910e+03  -1.152 0.249532    
## BsmtFinSF2            2.340e+01  1.266e+01   1.849 0.064802 .  
## BsmtUnfSF             2.074e+01  6.962e+00   2.980 0.002965 ** 
## TotalBsmtSF                  NA         NA      NA       NA    
## HeatingGasW          -1.012e+03  1.026e+04  -0.099 0.921441    
## HeatingGrav           1.187e+04  2.761e+04   0.430 0.667295    
## HeatingOthW          -2.439e+04  3.361e+04  -0.726 0.468313    
## HeatingQCFa           5.196e+03  6.871e+03   0.756 0.449668    
## HeatingQCGd          -4.482e+03  2.833e+03  -1.582 0.114006    
## HeatingQCPo          -6.261e+03  3.376e+04  -0.185 0.852927    
## HeatingQCTA          -4.241e+03  3.007e+03  -1.411 0.158730    
## CentralAirY           3.942e+03  6.478e+03   0.608 0.543044    
## ElectricalFuseF       1.352e+03  9.768e+03   0.138 0.889940    
## ElectricalFuseP       1.366e+04  2.898e+04   0.471 0.637531    
## ElectricalMix                NA         NA      NA       NA    
## ElectricalSBrkr      -1.373e+03  4.293e+03  -0.320 0.749264    
## X1stFlrSF             4.100e+01  8.033e+00   5.103 4.10e-07 ***
## X2ndFlrSF             6.284e+01  7.566e+00   8.306 3.80e-16 ***
## LowQualFinSF          5.533e+00  2.888e+01   0.192 0.848100    
## GrLivArea                    NA         NA      NA       NA    
## BsmtFullBath          1.529e+03  2.736e+03   0.559 0.576453    
## BsmtHalfBath          7.139e+02  4.195e+03   0.170 0.864906    
## FullBath              2.340e+03  3.195e+03   0.732 0.464093    
## HalfBath              2.767e+02  2.944e+03   0.094 0.925154    
## BedroomAbvGr         -1.571e+03  1.952e+03  -0.805 0.421089    
## KitchenAbvGr         -1.992e+04  8.951e+03  -2.225 0.026334 *  
## KitchenQualFa        -2.539e+04  9.149e+03  -2.775 0.005631 ** 
## KitchenQualGd        -2.603e+04  4.834e+03  -5.384 9.40e-08 ***
## KitchenQualTA        -2.406e+04  5.517e+03  -4.361 1.45e-05 ***
## TotRmsAbvGrd          2.876e+03  1.304e+03   2.206 0.027668 *  
## FunctionalMaj2       -1.953e+04  2.374e+04  -0.823 0.410925    
## FunctionalMin1       -1.993e+03  1.187e+04  -0.168 0.866734    
## FunctionalMin2       -2.368e+03  1.207e+04  -0.196 0.844569    
## FunctionalMod        -8.219e+03  1.489e+04  -0.552 0.581110    
## FunctionalSev        -5.392e+04  3.670e+04  -1.469 0.142095    
## FunctionalTyp         7.671e+03  1.020e+04   0.752 0.452109    
## Fireplaces            3.506e+03  1.819e+03   1.927 0.054275 .  
## GarageTypeAttchd      1.825e+04  1.515e+04   1.204 0.228817    
## GarageTypeBasment     3.443e+04  1.745e+04   1.973 0.048769 *  
## GarageTypeBuiltIn     1.451e+04  1.570e+04   0.924 0.355664    
## GarageTypeCarPort     3.295e+04  2.058e+04   1.601 0.109672    
## GarageTypeDetchd      2.085e+04  1.499e+04   1.391 0.164657    
## GarageYrBlt          -2.343e+01  8.838e+01  -0.265 0.791007    
## GarageFinishRFn      -2.715e+03  2.631e+03  -1.032 0.302423    
## GarageFinishUnf      -1.278e+03  3.238e+03  -0.394 0.693314    
## GarageCars            9.008e+03  3.058e+03   2.945 0.003311 ** 
## GarageArea            4.193e+00  1.083e+01   0.387 0.698848    
## GarageQualFa         -7.619e+04  3.955e+04  -1.927 0.054351 .  
## GarageQualGd         -6.079e+04  4.069e+04  -1.494 0.135567    
## GarageQualPo         -1.038e+05  5.317e+04  -1.952 0.051280 .  
## GarageQualTA         -7.176e+04  3.918e+04  -1.831 0.067370 .  
## GarageCondFa          6.407e+04  4.955e+04   1.293 0.196312    
## GarageCondGd          5.858e+04  5.146e+04   1.138 0.255270    
## GarageCondPo          7.191e+04  5.312e+04   1.354 0.176211    
## GarageCondTA          6.737e+04  4.926e+04   1.368 0.171717    
## PavedDriveP          -3.707e+03  8.205e+03  -0.452 0.651546    
## PavedDriveY           1.321e+03  5.312e+03   0.249 0.803698    
## WoodDeckSF            1.353e+01  7.871e+00   1.719 0.085889 .  
## OpenPorchSF          -9.911e+00  1.624e+01  -0.610 0.541868    
## EnclosedPorch         2.283e+00  1.732e+01   0.132 0.895177    
## X3SsnPorch            5.489e+01  2.920e+01   1.880 0.060426 .  
## ScreenPorch           3.234e+01  1.604e+01   2.016 0.044151 *  
## PoolArea              1.035e+02  2.550e+01   4.060 5.34e-05 ***
## MiscVal              -1.705e-01  1.712e+00  -0.100 0.920686    
## MoSold               -3.663e+02  3.387e+02  -1.082 0.279683    
## YrSold                2.732e+02  7.089e+02   0.385 0.700069    
## SaleTypeCon           2.542e+04  2.095e+04   1.213 0.225460    
## SaleTypeConLD         2.920e+04  1.648e+04   1.772 0.076751 .  
## SaleTypeConLI         6.839e+03  1.549e+04   0.442 0.658913    
## SaleTypeConLw        -1.225e+03  2.114e+04  -0.058 0.953810    
## SaleTypeCWD           1.182e+04  1.556e+04   0.760 0.447703    
## SaleTypeNew           1.988e+04  1.925e+04   1.032 0.302155    
## SaleTypeOth           2.243e+04  2.765e+04   0.811 0.417486    
## SaleTypeWD           -2.270e+03  5.524e+03  -0.411 0.681219    
## SaleConditionAdjLand  2.819e+04  3.111e+04   0.906 0.365176    
## SaleConditionAlloca   9.052e+03  1.351e+04   0.670 0.502999    
## SaleConditionFamily  -5.259e+03  8.166e+03  -0.644 0.519705    
## SaleConditionNormal   2.715e+03  4.121e+03   0.659 0.510278    
## SaleConditionPartial -8.667e+03  1.845e+04  -0.470 0.638683    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 26490 on 866 degrees of freedom
## Multiple R-squared:  0.9129, Adjusted R-squared:  0.8925 
## F-statistic: 44.74 on 203 and 866 DF,  p-value: < 2.2e-16

Model Backward

model_backward <- step(object = model_price_all,
                       direction = "backward",
                       trace = F)

summary(model_backward)
## 
## Call:
## lm(formula = SalePrice ~ MSSubClass + MSZoning + LotArea + Street + 
##     LandContour + Utilities + LotConfig + LandSlope + Neighborhood + 
##     Condition1 + BldgType + OverallQual + OverallCond + YearBuilt + 
##     YearRemodAdd + RoofMatl + MasVnrArea + ExterQual + BsmtQual + 
##     BsmtExposure + BsmtFinSF1 + BsmtFinSF2 + BsmtUnfSF + X1stFlrSF + 
##     X2ndFlrSF + BedroomAbvGr + KitchenAbvGr + KitchenQual + TotRmsAbvGrd + 
##     Functional + Fireplaces + GarageCars + X3SsnPorch + ScreenPorch + 
##     PoolArea + SaleType, data = hp_train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -336627  -10740     340   10196  154771 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -1.593e+06  1.999e+05  -7.970 4.45e-15 ***
## MSSubClass          -1.149e+02  6.337e+01  -1.813 0.070169 .  
## MSZoningFV           4.834e+04  1.471e+04   3.285 0.001057 ** 
## MSZoningRH           2.946e+04  1.528e+04   1.928 0.054205 .  
## MSZoningRL           3.157e+04  1.257e+04   2.511 0.012202 *  
## MSZoningRM           2.778e+04  1.172e+04   2.371 0.017927 *  
## LotArea              5.390e-01  1.636e-01   3.295 0.001018 ** 
## StreetPave           4.171e+04  2.032e+04   2.053 0.040377 *  
## LandContourHLS       1.711e+04  6.509e+03   2.628 0.008730 ** 
## LandContourLow      -1.092e+04  8.245e+03  -1.324 0.185683    
## LandContourLvl       1.125e+04  4.875e+03   2.307 0.021248 *  
## UtilitiesNoSeWa     -4.368e+04  2.837e+04  -1.540 0.123940    
## LotConfigCulDSac     8.345e+03  3.906e+03   2.136 0.032900 *  
## LotConfigFR2        -8.925e+03  5.362e+03  -1.664 0.096355 .  
## LotConfigFR3        -1.638e+04  1.583e+04  -1.035 0.300997    
## LotConfigInside     -2.194e+03  2.213e+03  -0.991 0.321880    
## LandSlopeMod         9.253e+03  5.263e+03   1.758 0.079055 .  
## LandSlopeSev        -3.642e+04  1.307e+04  -2.786 0.005441 ** 
## NeighborhoodBlueste -2.245e+03  2.140e+04  -0.105 0.916460    
## NeighborhoodBrDale   8.289e+02  1.328e+04   0.062 0.950229    
## NeighborhoodBrkSide -2.458e+02  1.153e+04  -0.021 0.982995    
## NeighborhoodClearCr -1.109e+04  1.141e+04  -0.972 0.331156    
## NeighborhoodCollgCr -4.247e+03  8.687e+03  -0.489 0.625053    
## NeighborhoodCrawfor  1.213e+04  1.039e+04   1.167 0.243450    
## NeighborhoodEdwards -2.445e+04  9.692e+03  -2.523 0.011797 *  
## NeighborhoodGilbert -1.183e+04  9.172e+03  -1.290 0.197348    
## NeighborhoodIDOTRR   1.128e+03  1.328e+04   0.085 0.932290    
## NeighborhoodMeadowV -4.554e+03  1.258e+04  -0.362 0.717383    
## NeighborhoodMitchel -2.177e+04  9.870e+03  -2.206 0.027640 *  
## NeighborhoodNAmes   -1.610e+04  9.321e+03  -1.727 0.084455 .  
## NeighborhoodNoRidge  3.651e+04  9.956e+03   3.667 0.000259 ***
## NeighborhoodNPkVill -1.462e+03  1.291e+04  -0.113 0.909823    
## NeighborhoodNridgHt  2.418e+04  8.892e+03   2.719 0.006658 ** 
## NeighborhoodNWAmes  -2.250e+04  9.567e+03  -2.352 0.018869 *  
## NeighborhoodOldTown -1.117e+04  1.184e+04  -0.944 0.345379    
## NeighborhoodSawyer  -1.400e+04  9.899e+03  -1.415 0.157530    
## NeighborhoodSawyerW -2.795e+03  9.443e+03  -0.296 0.767261    
## NeighborhoodSomerst -1.250e+03  1.075e+04  -0.116 0.907510    
## NeighborhoodStoneBr  3.998e+04  9.720e+03   4.113 4.23e-05 ***
## NeighborhoodSWISU    4.768e+01  1.226e+04   0.004 0.996898    
## NeighborhoodTimber  -1.043e+04  1.016e+04  -1.026 0.304927    
## NeighborhoodVeenker -6.469e+03  1.219e+04  -0.531 0.595673    
## Condition1Feedr      4.568e+03  6.530e+03   0.700 0.484395    
## Condition1Norm       1.097e+04  5.245e+03   2.092 0.036696 *  
## Condition1PosA       1.616e+04  1.532e+04   1.055 0.291497    
## Condition1PosN      -1.296e+04  8.495e+03  -1.526 0.127370    
## Condition1RRAe      -1.247e+04  1.144e+04  -1.090 0.275927    
## Condition1RRAn       4.957e+03  8.122e+03   0.610 0.541740    
## Condition1RRNe      -9.149e+03  2.020e+04  -0.453 0.650725    
## Condition1RRNn       1.290e+04  1.830e+04   0.705 0.481034    
## BldgType2fmCon       1.133e+04  1.187e+04   0.955 0.340060    
## BldgTypeDuplex      -8.537e+03  9.388e+03  -0.909 0.363401    
## BldgTypeTwnhs       -1.579e+04  9.517e+03  -1.659 0.097373 .  
## BldgTypeTwnhsE      -7.852e+03  7.511e+03  -1.045 0.296071    
## OverallQual          6.235e+03  1.297e+03   4.806 1.79e-06 ***
## OverallCond          6.468e+03  1.054e+03   6.139 1.21e-09 ***
## YearBuilt            3.728e+02  8.123e+01   4.590 5.02e-06 ***
## YearRemodAdd         1.077e+02  7.211e+01   1.494 0.135417    
## RoofMatlCompShg      6.056e+05  3.531e+04  17.153  < 2e-16 ***
## RoofMatlMembran      6.766e+05  4.886e+04  13.850  < 2e-16 ***
## RoofMatlMetal        6.519e+05  4.744e+04  13.740  < 2e-16 ***
## RoofMatlRoll         6.106e+05  4.480e+04  13.628  < 2e-16 ***
## RoofMatlTar&Grv      5.954e+05  3.686e+04  16.153  < 2e-16 ***
## RoofMatlWdShake      6.057e+05  3.883e+04  15.599  < 2e-16 ***
## RoofMatlWdShngl      7.078e+05  3.681e+04  19.230  < 2e-16 ***
## MasVnrArea           9.809e+00  5.706e+00   1.719 0.085941 .  
## ExterQualFa          5.836e+03  1.819e+04   0.321 0.748373    
## ExterQualGd         -1.199e+04  5.702e+03  -2.103 0.035690 *  
## ExterQualTA         -1.323e+04  6.327e+03  -2.091 0.036757 *  
## BsmtQualFa          -1.671e+04  7.776e+03  -2.148 0.031928 *  
## BsmtQualGd          -2.066e+04  3.989e+03  -5.178 2.72e-07 ***
## BsmtQualTA          -1.809e+04  4.932e+03  -3.668 0.000258 ***
## BsmtExposureGd       1.964e+04  3.799e+03   5.168 2.87e-07 ***
## BsmtExposureMn      -2.286e+03  3.731e+03  -0.613 0.540191    
## BsmtExposureNo      -5.322e+03  2.679e+03  -1.987 0.047211 *  
## BsmtFinSF1           3.600e+01  6.245e+00   5.764 1.10e-08 ***
## BsmtFinSF2           2.777e+01  7.854e+00   3.537 0.000425 ***
## BsmtUnfSF            1.998e+01  6.102e+00   3.274 0.001096 ** 
## X1stFlrSF            4.557e+01  6.784e+00   6.717 3.17e-11 ***
## X2ndFlrSF            5.213e+01  4.427e+00  11.775  < 2e-16 ***
## BedroomAbvGr        -3.095e+03  1.723e+03  -1.796 0.072818 .  
## KitchenAbvGr        -2.278e+04  7.451e+03  -3.057 0.002296 ** 
## KitchenQualFa       -2.467e+04  8.254e+03  -2.988 0.002876 ** 
## KitchenQualGd       -2.594e+04  4.438e+03  -5.845 6.91e-09 ***
## KitchenQualTA       -2.435e+04  5.028e+03  -4.844 1.48e-06 ***
## TotRmsAbvGrd         3.259e+03  1.186e+03   2.748 0.006113 ** 
## FunctionalMaj2      -1.942e+04  1.888e+04  -1.029 0.303886    
## FunctionalMin1      -3.995e+03  1.104e+04  -0.362 0.717497    
## FunctionalMin2      -3.603e+03  1.109e+04  -0.325 0.745249    
## FunctionalMod       -6.284e+03  1.371e+04  -0.458 0.646721    
## FunctionalSev       -6.389e+04  3.060e+04  -2.088 0.037070 *  
## FunctionalTyp        5.596e+03  9.455e+03   0.592 0.554055    
## Fireplaces           4.459e+03  1.670e+03   2.670 0.007721 ** 
## GarageCars           9.609e+03  1.938e+03   4.959 8.39e-07 ***
## X3SsnPorch           4.407e+01  2.731e+01   1.614 0.106949    
## ScreenPorch          3.291e+01  1.461e+01   2.253 0.024499 *  
## PoolArea             1.139e+02  2.181e+01   5.221 2.18e-07 ***
## SaleTypeCon          3.639e+04  2.016e+04   1.806 0.071301 .  
## SaleTypeConLD        2.067e+04  1.433e+04   1.443 0.149420    
## SaleTypeConLI       -1.091e+03  1.466e+04  -0.074 0.940702    
## SaleTypeConLw        1.230e+04  2.006e+04   0.613 0.540117    
## SaleTypeCWD          7.326e+03  1.454e+04   0.504 0.614579    
## SaleTypeNew          9.258e+03  6.148e+03   1.506 0.132405    
## SaleTypeOth          2.444e+04  2.687e+04   0.910 0.363194    
## SaleTypeWD          -7.366e+02  5.026e+03  -0.147 0.883512    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 26240 on 965 degrees of freedom
## Multiple R-squared:  0.9049, Adjusted R-squared:  0.8946 
## F-statistic: 88.25 on 104 and 965 DF,  p-value: < 2.2e-16

Nilai adjusted R-squared yang didapat pada model_backward adalah 0.8946 yang mana ini merupakan nilai yang cukup baik serta nilainya meningkat dibandingkan nilai model_all

hp_test$price_predict <- predict(model_backward, newdata = hp_test)
library(MLmetrics)
## Warning: package 'MLmetrics' was built under R version 4.2.2
## 
## Attaching package: 'MLmetrics'
## The following object is masked from 'package:base':
## 
##     Recall
rms_error <- RMSE(y_pred = hp_test$price_predict, y_true = hp_test$SalePrice)
rms_error
## [1] 20605.21
absolute_error <- MAE(y_pred = hp_test$price_predict, y_true = hp_test$SalePrice)
absolute_error
## [1] 14280.77

RMSE = 20,605.21 MAE = 14,280.77

Let’s compare error values with average house price

average_price <- mean(house_price$SalePrice)
average_price
## [1] 186761.8

Average house price = 18,6761.8

rms_error/average_price*100
## [1] 11.03288
absolute_error/average_price*100
## [1] 7.646516

% RMSE towards average = 11% % MAE towards average = 7.6%