link to the dataset: https://www.kaggle.com/competitions/house-prices-advanced-regression-techniques/data
library(dplyr)##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(GGally)## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
house_price <- read.csv("train.csv", stringsAsFactors = T)
head(house_price)boxplot(house_price$SalePrice)
the price of the house seems has a left skewed data the price also has
outliers
min(house_price$SalePrice)## [1] 34900
max(house_price$SalePrice)## [1] 755000
length(names(house_price))## [1] 81
Data frame consist of 81 columns
glimpse(house_price)## Rows: 1,460
## Columns: 81
## $ Id <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1…
## $ MSSubClass <int> 60, 20, 60, 70, 60, 50, 20, 60, 50, 190, 20, 60, 20, 20,…
## $ MSZoning <fct> RL, RL, RL, RL, RL, RL, RL, RL, RM, RL, RL, RL, RL, RL, …
## $ LotFrontage <int> 65, 80, 68, 60, 84, 85, 75, NA, 51, 50, 70, 85, NA, 91, …
## $ LotArea <int> 8450, 9600, 11250, 9550, 14260, 14115, 10084, 10382, 612…
## $ Street <fct> Pave, Pave, Pave, Pave, Pave, Pave, Pave, Pave, Pave, Pa…
## $ Alley <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ LotShape <fct> Reg, Reg, IR1, IR1, IR1, IR1, Reg, IR1, Reg, Reg, Reg, I…
## $ LandContour <fct> Lvl, Lvl, Lvl, Lvl, Lvl, Lvl, Lvl, Lvl, Lvl, Lvl, Lvl, L…
## $ Utilities <fct> AllPub, AllPub, AllPub, AllPub, AllPub, AllPub, AllPub, …
## $ LotConfig <fct> Inside, FR2, Inside, Corner, FR2, Inside, Inside, Corner…
## $ LandSlope <fct> Gtl, Gtl, Gtl, Gtl, Gtl, Gtl, Gtl, Gtl, Gtl, Gtl, Gtl, G…
## $ Neighborhood <fct> CollgCr, Veenker, CollgCr, Crawfor, NoRidge, Mitchel, So…
## $ Condition1 <fct> Norm, Feedr, Norm, Norm, Norm, Norm, Norm, PosN, Artery,…
## $ Condition2 <fct> Norm, Norm, Norm, Norm, Norm, Norm, Norm, Norm, Norm, Ar…
## $ BldgType <fct> 1Fam, 1Fam, 1Fam, 1Fam, 1Fam, 1Fam, 1Fam, 1Fam, 1Fam, 2f…
## $ HouseStyle <fct> 2Story, 1Story, 2Story, 2Story, 2Story, 1.5Fin, 1Story, …
## $ OverallQual <int> 7, 6, 7, 7, 8, 5, 8, 7, 7, 5, 5, 9, 5, 7, 6, 7, 6, 4, 5,…
## $ OverallCond <int> 5, 8, 5, 5, 5, 5, 5, 6, 5, 6, 5, 5, 6, 5, 5, 8, 7, 5, 5,…
## $ YearBuilt <int> 2003, 1976, 2001, 1915, 2000, 1993, 2004, 1973, 1931, 19…
## $ YearRemodAdd <int> 2003, 1976, 2002, 1970, 2000, 1995, 2005, 1973, 1950, 19…
## $ RoofStyle <fct> Gable, Gable, Gable, Gable, Gable, Gable, Gable, Gable, …
## $ RoofMatl <fct> CompShg, CompShg, CompShg, CompShg, CompShg, CompShg, Co…
## $ Exterior1st <fct> VinylSd, MetalSd, VinylSd, Wd Sdng, VinylSd, VinylSd, Vi…
## $ Exterior2nd <fct> VinylSd, MetalSd, VinylSd, Wd Shng, VinylSd, VinylSd, Vi…
## $ MasVnrType <fct> BrkFace, None, BrkFace, None, BrkFace, None, Stone, Ston…
## $ MasVnrArea <int> 196, 0, 162, 0, 350, 0, 186, 240, 0, 0, 0, 286, 0, 306, …
## $ ExterQual <fct> Gd, TA, Gd, TA, Gd, TA, Gd, TA, TA, TA, TA, Ex, TA, Gd, …
## $ ExterCond <fct> TA, TA, TA, TA, TA, TA, TA, TA, TA, TA, TA, TA, TA, TA, …
## $ Foundation <fct> PConc, CBlock, PConc, BrkTil, PConc, Wood, PConc, CBlock…
## $ BsmtQual <fct> Gd, Gd, Gd, TA, Gd, Gd, Ex, Gd, TA, TA, TA, Ex, TA, Gd, …
## $ BsmtCond <fct> TA, TA, TA, Gd, TA, TA, TA, TA, TA, TA, TA, TA, TA, TA, …
## $ BsmtExposure <fct> No, Gd, Mn, No, Av, No, Av, Mn, No, No, No, No, No, Av, …
## $ BsmtFinType1 <fct> GLQ, ALQ, GLQ, ALQ, GLQ, GLQ, GLQ, ALQ, Unf, GLQ, Rec, G…
## $ BsmtFinSF1 <int> 706, 978, 486, 216, 655, 732, 1369, 859, 0, 851, 906, 99…
## $ BsmtFinType2 <fct> Unf, Unf, Unf, Unf, Unf, Unf, Unf, BLQ, Unf, Unf, Unf, U…
## $ BsmtFinSF2 <int> 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ BsmtUnfSF <int> 150, 284, 434, 540, 490, 64, 317, 216, 952, 140, 134, 17…
## $ TotalBsmtSF <int> 856, 1262, 920, 756, 1145, 796, 1686, 1107, 952, 991, 10…
## $ Heating <fct> GasA, GasA, GasA, GasA, GasA, GasA, GasA, GasA, GasA, Ga…
## $ HeatingQC <fct> Ex, Ex, Ex, Gd, Ex, Ex, Ex, Ex, Gd, Ex, Ex, Ex, TA, Ex, …
## $ CentralAir <fct> Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y,…
## $ Electrical <fct> SBrkr, SBrkr, SBrkr, SBrkr, SBrkr, SBrkr, SBrkr, SBrkr, …
## $ X1stFlrSF <int> 856, 1262, 920, 961, 1145, 796, 1694, 1107, 1022, 1077, …
## $ X2ndFlrSF <int> 854, 0, 866, 756, 1053, 566, 0, 983, 752, 0, 0, 1142, 0,…
## $ LowQualFinSF <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ GrLivArea <int> 1710, 1262, 1786, 1717, 2198, 1362, 1694, 2090, 1774, 10…
## $ BsmtFullBath <int> 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1,…
## $ BsmtHalfBath <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ FullBath <int> 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 1, 3, 1, 2, 1, 1, 1, 2, 1,…
## $ HalfBath <int> 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,…
## $ BedroomAbvGr <int> 3, 3, 3, 3, 4, 1, 3, 3, 2, 2, 3, 4, 2, 3, 2, 2, 2, 2, 3,…
## $ KitchenAbvGr <int> 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1,…
## $ KitchenQual <fct> Gd, TA, Gd, Gd, Gd, TA, Gd, TA, TA, TA, TA, Ex, TA, Gd, …
## $ TotRmsAbvGrd <int> 8, 6, 6, 7, 9, 5, 7, 7, 8, 5, 5, 11, 4, 7, 5, 5, 5, 6, 6…
## $ Functional <fct> Typ, Typ, Typ, Typ, Typ, Typ, Typ, Typ, Min1, Typ, Typ, …
## $ Fireplaces <int> 0, 1, 1, 1, 1, 0, 1, 2, 2, 2, 0, 2, 0, 1, 1, 0, 1, 0, 0,…
## $ FireplaceQu <fct> NA, TA, TA, Gd, TA, NA, Gd, TA, TA, TA, NA, Gd, NA, Gd, …
## $ GarageType <fct> Attchd, Attchd, Attchd, Detchd, Attchd, Attchd, Attchd, …
## $ GarageYrBlt <int> 2003, 1976, 2001, 1998, 2000, 1993, 2004, 1973, 1931, 19…
## $ GarageFinish <fct> RFn, RFn, RFn, Unf, RFn, Unf, RFn, RFn, Unf, RFn, Unf, F…
## $ GarageCars <int> 2, 2, 2, 3, 3, 2, 2, 2, 2, 1, 1, 3, 1, 3, 1, 2, 2, 2, 2,…
## $ GarageArea <int> 548, 460, 608, 642, 836, 480, 636, 484, 468, 205, 384, 7…
## $ GarageQual <fct> TA, TA, TA, TA, TA, TA, TA, TA, Fa, Gd, TA, TA, TA, TA, …
## $ GarageCond <fct> TA, TA, TA, TA, TA, TA, TA, TA, TA, TA, TA, TA, TA, TA, …
## $ PavedDrive <fct> Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y,…
## $ WoodDeckSF <int> 0, 298, 0, 0, 192, 40, 255, 235, 90, 0, 0, 147, 140, 160…
## $ OpenPorchSF <int> 61, 0, 42, 35, 84, 30, 57, 204, 0, 4, 0, 21, 0, 33, 213,…
## $ EnclosedPorch <int> 0, 0, 0, 272, 0, 0, 0, 228, 205, 0, 0, 0, 0, 0, 176, 0, …
## $ X3SsnPorch <int> 0, 0, 0, 0, 0, 320, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ ScreenPorch <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 176, 0, 0, 0, 0, 0, …
## $ PoolArea <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ PoolQC <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ Fence <fct> NA, NA, NA, NA, NA, MnPrv, NA, NA, NA, NA, NA, NA, NA, N…
## $ MiscFeature <fct> NA, NA, NA, NA, NA, Shed, NA, Shed, NA, NA, NA, NA, NA, …
## $ MiscVal <int> 0, 0, 0, 0, 0, 700, 0, 350, 0, 0, 0, 0, 0, 0, 0, 0, 700,…
## $ MoSold <int> 2, 5, 9, 2, 12, 10, 8, 11, 4, 1, 2, 7, 9, 8, 5, 7, 3, 10…
## $ YrSold <int> 2008, 2007, 2008, 2006, 2008, 2009, 2007, 2009, 2008, 20…
## $ SaleType <fct> WD, WD, WD, WD, WD, WD, WD, WD, WD, WD, WD, New, WD, New…
## $ SaleCondition <fct> Normal, Normal, Normal, Abnorml, Normal, Normal, Normal,…
## $ SalePrice <int> 208500, 181500, 223500, 140000, 250000, 143000, 307000, …
nrow(house_price)## [1] 1460
sum(duplicated(house_price))## [1] 0
the data consist of 1460 rows without any duplicated data.
house_price %>% summarise_all(n_distinct)we can see unique values in every column
colSums(is.na(house_price))## Id MSSubClass MSZoning LotFrontage LotArea
## 0 0 0 259 0
## Street Alley LotShape LandContour Utilities
## 0 1369 0 0 0
## LotConfig LandSlope Neighborhood Condition1 Condition2
## 0 0 0 0 0
## BldgType HouseStyle OverallQual OverallCond YearBuilt
## 0 0 0 0 0
## YearRemodAdd RoofStyle RoofMatl Exterior1st Exterior2nd
## 0 0 0 0 0
## MasVnrType MasVnrArea ExterQual ExterCond Foundation
## 8 8 0 0 0
## BsmtQual BsmtCond BsmtExposure BsmtFinType1 BsmtFinSF1
## 37 37 38 37 0
## BsmtFinType2 BsmtFinSF2 BsmtUnfSF TotalBsmtSF Heating
## 38 0 0 0 0
## HeatingQC CentralAir Electrical X1stFlrSF X2ndFlrSF
## 0 0 1 0 0
## LowQualFinSF GrLivArea BsmtFullBath BsmtHalfBath FullBath
## 0 0 0 0 0
## HalfBath BedroomAbvGr KitchenAbvGr KitchenQual TotRmsAbvGrd
## 0 0 0 0 0
## Functional Fireplaces FireplaceQu GarageType GarageYrBlt
## 0 0 690 81 81
## GarageFinish GarageCars GarageArea GarageQual GarageCond
## 81 0 0 81 81
## PavedDrive WoodDeckSF OpenPorchSF EnclosedPorch X3SsnPorch
## 0 0 0 0 0
## ScreenPorch PoolArea PoolQC Fence MiscFeature
## 0 0 1453 1179 1406
## MiscVal MoSold YrSold SaleType SaleCondition
## 0 0 0 0 0
## SalePrice
## 0
Let’s drop some columns that having NA data more than 5% of total row (73) -> (LotFrontage, Alley, FireplaceQu, PoolQC, Fence, MiscFeature)
house_price <- house_price %>%
select(-c(LotFrontage, Alley, FireplaceQu, PoolQC, Fence, MiscFeature)) we also need to drop some unused column and the columns that will bring harm in the prediction process. Id is definitely unused for model. Condition2 and Exterior1st will be problematic when the test being run. This happen because those are categories column which when splited into train-test data with random seed 100, some categories will only appears in test so that the test will run into error.
house_price <- house_price %>%
select(-c(Id, Condition2, Exterior1st))Lets drop rows with NA values.
house_price <- house_price %>%
na.omit() sum(is.na(house_price))## [1] 0
RNGkind(sample.kind = "Rounding")## Warning in RNGkind(sample.kind = "Rounding"): non-uniform 'Rounding' sampler
## used
set.seed(100)
library(rsample)## Warning: package 'rsample' was built under R version 4.2.2
# train-test splitting
index <- sample(x = nrow(house_price), size= nrow(house_price)*0.8)
hp_train <- house_price[index,] # subsetting data berdasarkan index data yang ada di variabel index
hp_test <- house_price[-index,]nrow(hp_test)## [1] 268
nrow(hp_train)## [1] 1070
model_price_all <- lm(formula = SalePrice~., data = hp_train)
summary(model_price_all)##
## Call:
## lm(formula = SalePrice ~ ., data = hp_train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -323837 -10454 187 10411 147699
##
## Coefficients: (3 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.987e+06 1.450e+06 -1.370 0.171115
## MSSubClass -3.622e+01 1.351e+02 -0.268 0.788770
## MSZoningFV 5.770e+04 1.627e+04 3.545 0.000413 ***
## MSZoningRH 3.915e+04 1.703e+04 2.299 0.021770 *
## MSZoningRL 3.970e+04 1.402e+04 2.833 0.004723 **
## MSZoningRM 3.538e+04 1.308e+04 2.705 0.006968 **
## LotArea 6.276e-01 1.846e-01 3.401 0.000703 ***
## StreetPave 4.319e+04 2.621e+04 1.648 0.099768 .
## LotShapeIR2 5.719e+03 5.834e+03 0.980 0.327165
## LotShapeIR3 9.069e+03 1.187e+04 0.764 0.445099
## LotShapeReg 1.944e+03 2.214e+03 0.878 0.380113
## LandContourHLS 1.785e+04 7.068e+03 2.525 0.011745 *
## LandContourLow -8.841e+03 8.962e+03 -0.987 0.324143
## LandContourLvl 1.176e+04 5.460e+03 2.155 0.031475 *
## UtilitiesNoSeWa -5.461e+04 3.245e+04 -1.683 0.092795 .
## LotConfigCulDSac 7.306e+03 4.254e+03 1.717 0.086249 .
## LotConfigFR2 -1.197e+04 5.690e+03 -2.104 0.035692 *
## LotConfigFR3 -2.285e+04 1.627e+04 -1.405 0.160514
## LotConfigInside -3.433e+03 2.388e+03 -1.437 0.150993
## LandSlopeMod 7.856e+03 5.764e+03 1.363 0.173256
## LandSlopeSev -3.667e+04 1.438e+04 -2.551 0.010925 *
## NeighborhoodBlueste -2.690e+03 2.310e+04 -0.116 0.907350
## NeighborhoodBrDale 9.045e+03 1.458e+04 0.620 0.535296
## NeighborhoodBrkSide -5.250e+02 1.311e+04 -0.040 0.968067
## NeighborhoodClearCr -1.724e+04 1.249e+04 -1.380 0.167969
## NeighborhoodCollgCr -6.873e+03 9.481e+03 -0.725 0.468701
## NeighborhoodCrawfor 1.242e+04 1.136e+04 1.093 0.274719
## NeighborhoodEdwards -3.115e+04 1.072e+04 -2.905 0.003766 **
## NeighborhoodGilbert -1.244e+04 1.002e+04 -1.241 0.214832
## NeighborhoodIDOTRR -2.448e+03 1.519e+04 -0.161 0.872023
## NeighborhoodMeadowV -1.523e+03 1.496e+04 -0.102 0.918912
## NeighborhoodMitchel -2.015e+04 1.080e+04 -1.866 0.062395 .
## NeighborhoodNAmes -1.762e+04 1.025e+04 -1.719 0.086040 .
## NeighborhoodNoRidge 3.078e+04 1.093e+04 2.817 0.004953 **
## NeighborhoodNPkVill 8.912e+03 1.711e+04 0.521 0.602553
## NeighborhoodNridgHt 2.280e+04 9.863e+03 2.311 0.021056 *
## NeighborhoodNWAmes -1.997e+04 1.061e+04 -1.882 0.060144 .
## NeighborhoodOldTown -9.811e+03 1.322e+04 -0.742 0.458281
## NeighborhoodSawyer -1.393e+04 1.085e+04 -1.284 0.199638
## NeighborhoodSawyerW -2.554e+03 1.041e+04 -0.245 0.806330
## NeighborhoodSomerst -4.281e+03 1.188e+04 -0.360 0.718708
## NeighborhoodStoneBr 4.397e+04 1.069e+04 4.113 4.28e-05 ***
## NeighborhoodSWISU -2.892e+03 1.352e+04 -0.214 0.830689
## NeighborhoodTimber -8.737e+03 1.085e+04 -0.805 0.420956
## NeighborhoodVeenker -3.286e+03 1.350e+04 -0.243 0.807749
## Condition1Feedr 7.274e+03 7.334e+03 0.992 0.321602
## Condition1Norm 1.552e+04 6.024e+03 2.576 0.010163 *
## Condition1PosA 2.019e+04 1.632e+04 1.237 0.216345
## Condition1PosN -1.053e+04 9.447e+03 -1.114 0.265416
## Condition1RRAe -1.392e+04 1.310e+04 -1.062 0.288396
## Condition1RRAn 9.213e+03 9.054e+03 1.018 0.309168
## Condition1RRNe -4.852e+03 2.110e+04 -0.230 0.818219
## Condition1RRNn 9.586e+03 2.014e+04 0.476 0.634162
## BldgType2fmCon -6.619e+03 2.156e+04 -0.307 0.758938
## BldgTypeDuplex -1.813e+04 1.195e+04 -1.517 0.129712
## BldgTypeTwnhs -2.387e+04 1.536e+04 -1.554 0.120619
## BldgTypeTwnhsE -1.635e+04 1.411e+04 -1.159 0.246902
## HouseStyle1.5Unf 9.923e+03 1.121e+04 0.885 0.376383
## HouseStyle1Story 8.713e+03 6.425e+03 1.356 0.175440
## HouseStyle2.5Fin -1.701e+04 2.071e+04 -0.821 0.411673
## HouseStyle2.5Unf -2.132e+03 1.380e+04 -0.155 0.877227
## HouseStyle2Story -5.812e+03 5.059e+03 -1.149 0.250989
## HouseStyleSFoyer 7.199e+03 9.363e+03 0.769 0.442204
## HouseStyleSLvl 5.221e+03 8.149e+03 0.641 0.521888
## OverallQual 5.644e+03 1.452e+03 3.886 0.000110 ***
## OverallCond 6.143e+03 1.249e+03 4.920 1.04e-06 ***
## YearBuilt 3.569e+02 1.160e+02 3.075 0.002168 **
## YearRemodAdd 4.802e+01 8.212e+01 0.585 0.558842
## RoofStyleGable -1.565e+04 2.254e+04 -0.694 0.487612
## RoofStyleGambrel -9.366e+03 2.485e+04 -0.377 0.706362
## RoofStyleHip -1.310e+04 2.261e+04 -0.579 0.562703
## RoofStyleMansard -4.672e+03 2.683e+04 -0.174 0.861787
## RoofStyleShed -2.807e+04 4.155e+04 -0.676 0.499373
## RoofMatlCompShg 5.933e+05 4.140e+04 14.331 < 2e-16 ***
## RoofMatlMembran 6.633e+05 5.844e+04 11.351 < 2e-16 ***
## RoofMatlMetal 6.355e+05 5.679e+04 11.190 < 2e-16 ***
## RoofMatlRoll 5.985e+05 5.179e+04 11.556 < 2e-16 ***
## RoofMatlTar&Grv 5.742e+05 4.686e+04 12.254 < 2e-16 ***
## RoofMatlWdShake 5.914e+05 4.528e+04 13.062 < 2e-16 ***
## RoofMatlWdShngl 6.825e+05 4.291e+04 15.903 < 2e-16 ***
## Exterior2ndAsphShn 1.237e+04 2.417e+04 0.512 0.609098
## Exterior2ndBrk Cmn -3.407e+03 2.387e+04 -0.143 0.886538
## Exterior2ndBrkFace 1.853e+04 1.195e+04 1.551 0.121382
## Exterior2ndCBlock -1.920e+04 4.045e+04 -0.475 0.635129
## Exterior2ndCmentBd 9.135e+02 1.110e+04 0.082 0.934411
## Exterior2ndHdBoard 7.064e+02 9.928e+03 0.071 0.943296
## Exterior2ndImStucc 2.111e+04 1.455e+04 1.451 0.147159
## Exterior2ndMetalSd 8.491e+03 9.826e+03 0.864 0.387765
## Exterior2ndOther -2.265e+04 2.986e+04 -0.759 0.448356
## Exterior2ndPlywood 2.969e+03 1.024e+04 0.290 0.771889
## Exterior2ndStone -2.969e+03 3.223e+04 -0.092 0.926632
## Exterior2ndStucco 1.265e+03 1.247e+04 0.101 0.919204
## Exterior2ndVinylSd 6.584e+03 9.827e+03 0.670 0.503026
## Exterior2ndWd Sdng 6.819e+03 9.782e+03 0.697 0.485968
## Exterior2ndWd Shng -3.954e+02 1.108e+04 -0.036 0.971541
## MasVnrTypeBrkFace 8.306e+03 8.354e+03 0.994 0.320379
## MasVnrTypeNone 9.527e+03 8.396e+03 1.135 0.256809
## MasVnrTypeStone 1.454e+04 8.893e+03 1.635 0.102358
## MasVnrArea 1.196e+01 7.599e+00 1.573 0.116009
## ExterQualFa 7.161e+03 2.648e+04 0.270 0.786873
## ExterQualGd -1.170e+04 6.195e+03 -1.888 0.059343 .
## ExterQualTA -1.181e+04 6.852e+03 -1.723 0.085202 .
## ExterCondFa 1.438e+04 3.140e+04 0.458 0.647149
## ExterCondGd 9.515e+03 2.990e+04 0.318 0.750354
## ExterCondTA 1.188e+04 2.987e+04 0.398 0.691025
## FoundationCBlock 3.624e+03 4.701e+03 0.771 0.440995
## FoundationPConc 3.204e+03 4.916e+03 0.652 0.514760
## FoundationStone 1.496e+04 1.350e+04 1.108 0.268209
## FoundationWood -3.058e+04 1.793e+04 -1.705 0.088475 .
## BsmtQualFa -1.425e+04 8.648e+03 -1.648 0.099665 .
## BsmtQualGd -1.820e+04 4.272e+03 -4.261 2.26e-05 ***
## BsmtQualTA -1.596e+04 5.405e+03 -2.952 0.003242 **
## BsmtCondGd -5.668e+02 7.785e+03 -0.073 0.941973
## BsmtCondPo 3.286e+04 4.851e+04 0.677 0.498402
## BsmtCondTA 3.302e+03 6.459e+03 0.511 0.609349
## BsmtExposureGd 1.901e+04 4.011e+03 4.740 2.50e-06 ***
## BsmtExposureMn -1.898e+03 3.984e+03 -0.476 0.633880
## BsmtExposureNo -4.490e+03 2.906e+03 -1.545 0.122740
## BsmtFinType1BLQ 5.680e+03 3.866e+03 1.469 0.142199
## BsmtFinType1GLQ 6.938e+03 3.507e+03 1.978 0.048209 *
## BsmtFinType1LwQ -4.164e+02 5.364e+03 -0.078 0.938147
## BsmtFinType1Rec 3.763e+03 4.182e+03 0.900 0.368504
## BsmtFinType1Unf 3.152e+03 4.039e+03 0.780 0.435340
## BsmtFinSF1 3.535e+01 7.585e+00 4.661 3.65e-06 ***
## BsmtFinType2BLQ -1.074e+04 1.069e+04 -1.005 0.315048
## BsmtFinType2GLQ -6.562e+03 1.244e+04 -0.528 0.597895
## BsmtFinType2LwQ -1.483e+04 9.618e+03 -1.542 0.123424
## BsmtFinType2Rec -9.269e+03 9.550e+03 -0.971 0.332032
## BsmtFinType2Unf -1.142e+04 9.910e+03 -1.152 0.249532
## BsmtFinSF2 2.340e+01 1.266e+01 1.849 0.064802 .
## BsmtUnfSF 2.074e+01 6.962e+00 2.980 0.002965 **
## TotalBsmtSF NA NA NA NA
## HeatingGasW -1.012e+03 1.026e+04 -0.099 0.921441
## HeatingGrav 1.187e+04 2.761e+04 0.430 0.667295
## HeatingOthW -2.439e+04 3.361e+04 -0.726 0.468313
## HeatingQCFa 5.196e+03 6.871e+03 0.756 0.449668
## HeatingQCGd -4.482e+03 2.833e+03 -1.582 0.114006
## HeatingQCPo -6.261e+03 3.376e+04 -0.185 0.852927
## HeatingQCTA -4.241e+03 3.007e+03 -1.411 0.158730
## CentralAirY 3.942e+03 6.478e+03 0.608 0.543044
## ElectricalFuseF 1.352e+03 9.768e+03 0.138 0.889940
## ElectricalFuseP 1.366e+04 2.898e+04 0.471 0.637531
## ElectricalMix NA NA NA NA
## ElectricalSBrkr -1.373e+03 4.293e+03 -0.320 0.749264
## X1stFlrSF 4.100e+01 8.033e+00 5.103 4.10e-07 ***
## X2ndFlrSF 6.284e+01 7.566e+00 8.306 3.80e-16 ***
## LowQualFinSF 5.533e+00 2.888e+01 0.192 0.848100
## GrLivArea NA NA NA NA
## BsmtFullBath 1.529e+03 2.736e+03 0.559 0.576453
## BsmtHalfBath 7.139e+02 4.195e+03 0.170 0.864906
## FullBath 2.340e+03 3.195e+03 0.732 0.464093
## HalfBath 2.767e+02 2.944e+03 0.094 0.925154
## BedroomAbvGr -1.571e+03 1.952e+03 -0.805 0.421089
## KitchenAbvGr -1.992e+04 8.951e+03 -2.225 0.026334 *
## KitchenQualFa -2.539e+04 9.149e+03 -2.775 0.005631 **
## KitchenQualGd -2.603e+04 4.834e+03 -5.384 9.40e-08 ***
## KitchenQualTA -2.406e+04 5.517e+03 -4.361 1.45e-05 ***
## TotRmsAbvGrd 2.876e+03 1.304e+03 2.206 0.027668 *
## FunctionalMaj2 -1.953e+04 2.374e+04 -0.823 0.410925
## FunctionalMin1 -1.993e+03 1.187e+04 -0.168 0.866734
## FunctionalMin2 -2.368e+03 1.207e+04 -0.196 0.844569
## FunctionalMod -8.219e+03 1.489e+04 -0.552 0.581110
## FunctionalSev -5.392e+04 3.670e+04 -1.469 0.142095
## FunctionalTyp 7.671e+03 1.020e+04 0.752 0.452109
## Fireplaces 3.506e+03 1.819e+03 1.927 0.054275 .
## GarageTypeAttchd 1.825e+04 1.515e+04 1.204 0.228817
## GarageTypeBasment 3.443e+04 1.745e+04 1.973 0.048769 *
## GarageTypeBuiltIn 1.451e+04 1.570e+04 0.924 0.355664
## GarageTypeCarPort 3.295e+04 2.058e+04 1.601 0.109672
## GarageTypeDetchd 2.085e+04 1.499e+04 1.391 0.164657
## GarageYrBlt -2.343e+01 8.838e+01 -0.265 0.791007
## GarageFinishRFn -2.715e+03 2.631e+03 -1.032 0.302423
## GarageFinishUnf -1.278e+03 3.238e+03 -0.394 0.693314
## GarageCars 9.008e+03 3.058e+03 2.945 0.003311 **
## GarageArea 4.193e+00 1.083e+01 0.387 0.698848
## GarageQualFa -7.619e+04 3.955e+04 -1.927 0.054351 .
## GarageQualGd -6.079e+04 4.069e+04 -1.494 0.135567
## GarageQualPo -1.038e+05 5.317e+04 -1.952 0.051280 .
## GarageQualTA -7.176e+04 3.918e+04 -1.831 0.067370 .
## GarageCondFa 6.407e+04 4.955e+04 1.293 0.196312
## GarageCondGd 5.858e+04 5.146e+04 1.138 0.255270
## GarageCondPo 7.191e+04 5.312e+04 1.354 0.176211
## GarageCondTA 6.737e+04 4.926e+04 1.368 0.171717
## PavedDriveP -3.707e+03 8.205e+03 -0.452 0.651546
## PavedDriveY 1.321e+03 5.312e+03 0.249 0.803698
## WoodDeckSF 1.353e+01 7.871e+00 1.719 0.085889 .
## OpenPorchSF -9.911e+00 1.624e+01 -0.610 0.541868
## EnclosedPorch 2.283e+00 1.732e+01 0.132 0.895177
## X3SsnPorch 5.489e+01 2.920e+01 1.880 0.060426 .
## ScreenPorch 3.234e+01 1.604e+01 2.016 0.044151 *
## PoolArea 1.035e+02 2.550e+01 4.060 5.34e-05 ***
## MiscVal -1.705e-01 1.712e+00 -0.100 0.920686
## MoSold -3.663e+02 3.387e+02 -1.082 0.279683
## YrSold 2.732e+02 7.089e+02 0.385 0.700069
## SaleTypeCon 2.542e+04 2.095e+04 1.213 0.225460
## SaleTypeConLD 2.920e+04 1.648e+04 1.772 0.076751 .
## SaleTypeConLI 6.839e+03 1.549e+04 0.442 0.658913
## SaleTypeConLw -1.225e+03 2.114e+04 -0.058 0.953810
## SaleTypeCWD 1.182e+04 1.556e+04 0.760 0.447703
## SaleTypeNew 1.988e+04 1.925e+04 1.032 0.302155
## SaleTypeOth 2.243e+04 2.765e+04 0.811 0.417486
## SaleTypeWD -2.270e+03 5.524e+03 -0.411 0.681219
## SaleConditionAdjLand 2.819e+04 3.111e+04 0.906 0.365176
## SaleConditionAlloca 9.052e+03 1.351e+04 0.670 0.502999
## SaleConditionFamily -5.259e+03 8.166e+03 -0.644 0.519705
## SaleConditionNormal 2.715e+03 4.121e+03 0.659 0.510278
## SaleConditionPartial -8.667e+03 1.845e+04 -0.470 0.638683
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 26490 on 866 degrees of freedom
## Multiple R-squared: 0.9129, Adjusted R-squared: 0.8925
## F-statistic: 44.74 on 203 and 866 DF, p-value: < 2.2e-16
model_backward <- step(object = model_price_all,
direction = "backward",
trace = F)
summary(model_backward)##
## Call:
## lm(formula = SalePrice ~ MSSubClass + MSZoning + LotArea + Street +
## LandContour + Utilities + LotConfig + LandSlope + Neighborhood +
## Condition1 + BldgType + OverallQual + OverallCond + YearBuilt +
## YearRemodAdd + RoofMatl + MasVnrArea + ExterQual + BsmtQual +
## BsmtExposure + BsmtFinSF1 + BsmtFinSF2 + BsmtUnfSF + X1stFlrSF +
## X2ndFlrSF + BedroomAbvGr + KitchenAbvGr + KitchenQual + TotRmsAbvGrd +
## Functional + Fireplaces + GarageCars + X3SsnPorch + ScreenPorch +
## PoolArea + SaleType, data = hp_train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -336627 -10740 340 10196 154771
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.593e+06 1.999e+05 -7.970 4.45e-15 ***
## MSSubClass -1.149e+02 6.337e+01 -1.813 0.070169 .
## MSZoningFV 4.834e+04 1.471e+04 3.285 0.001057 **
## MSZoningRH 2.946e+04 1.528e+04 1.928 0.054205 .
## MSZoningRL 3.157e+04 1.257e+04 2.511 0.012202 *
## MSZoningRM 2.778e+04 1.172e+04 2.371 0.017927 *
## LotArea 5.390e-01 1.636e-01 3.295 0.001018 **
## StreetPave 4.171e+04 2.032e+04 2.053 0.040377 *
## LandContourHLS 1.711e+04 6.509e+03 2.628 0.008730 **
## LandContourLow -1.092e+04 8.245e+03 -1.324 0.185683
## LandContourLvl 1.125e+04 4.875e+03 2.307 0.021248 *
## UtilitiesNoSeWa -4.368e+04 2.837e+04 -1.540 0.123940
## LotConfigCulDSac 8.345e+03 3.906e+03 2.136 0.032900 *
## LotConfigFR2 -8.925e+03 5.362e+03 -1.664 0.096355 .
## LotConfigFR3 -1.638e+04 1.583e+04 -1.035 0.300997
## LotConfigInside -2.194e+03 2.213e+03 -0.991 0.321880
## LandSlopeMod 9.253e+03 5.263e+03 1.758 0.079055 .
## LandSlopeSev -3.642e+04 1.307e+04 -2.786 0.005441 **
## NeighborhoodBlueste -2.245e+03 2.140e+04 -0.105 0.916460
## NeighborhoodBrDale 8.289e+02 1.328e+04 0.062 0.950229
## NeighborhoodBrkSide -2.458e+02 1.153e+04 -0.021 0.982995
## NeighborhoodClearCr -1.109e+04 1.141e+04 -0.972 0.331156
## NeighborhoodCollgCr -4.247e+03 8.687e+03 -0.489 0.625053
## NeighborhoodCrawfor 1.213e+04 1.039e+04 1.167 0.243450
## NeighborhoodEdwards -2.445e+04 9.692e+03 -2.523 0.011797 *
## NeighborhoodGilbert -1.183e+04 9.172e+03 -1.290 0.197348
## NeighborhoodIDOTRR 1.128e+03 1.328e+04 0.085 0.932290
## NeighborhoodMeadowV -4.554e+03 1.258e+04 -0.362 0.717383
## NeighborhoodMitchel -2.177e+04 9.870e+03 -2.206 0.027640 *
## NeighborhoodNAmes -1.610e+04 9.321e+03 -1.727 0.084455 .
## NeighborhoodNoRidge 3.651e+04 9.956e+03 3.667 0.000259 ***
## NeighborhoodNPkVill -1.462e+03 1.291e+04 -0.113 0.909823
## NeighborhoodNridgHt 2.418e+04 8.892e+03 2.719 0.006658 **
## NeighborhoodNWAmes -2.250e+04 9.567e+03 -2.352 0.018869 *
## NeighborhoodOldTown -1.117e+04 1.184e+04 -0.944 0.345379
## NeighborhoodSawyer -1.400e+04 9.899e+03 -1.415 0.157530
## NeighborhoodSawyerW -2.795e+03 9.443e+03 -0.296 0.767261
## NeighborhoodSomerst -1.250e+03 1.075e+04 -0.116 0.907510
## NeighborhoodStoneBr 3.998e+04 9.720e+03 4.113 4.23e-05 ***
## NeighborhoodSWISU 4.768e+01 1.226e+04 0.004 0.996898
## NeighborhoodTimber -1.043e+04 1.016e+04 -1.026 0.304927
## NeighborhoodVeenker -6.469e+03 1.219e+04 -0.531 0.595673
## Condition1Feedr 4.568e+03 6.530e+03 0.700 0.484395
## Condition1Norm 1.097e+04 5.245e+03 2.092 0.036696 *
## Condition1PosA 1.616e+04 1.532e+04 1.055 0.291497
## Condition1PosN -1.296e+04 8.495e+03 -1.526 0.127370
## Condition1RRAe -1.247e+04 1.144e+04 -1.090 0.275927
## Condition1RRAn 4.957e+03 8.122e+03 0.610 0.541740
## Condition1RRNe -9.149e+03 2.020e+04 -0.453 0.650725
## Condition1RRNn 1.290e+04 1.830e+04 0.705 0.481034
## BldgType2fmCon 1.133e+04 1.187e+04 0.955 0.340060
## BldgTypeDuplex -8.537e+03 9.388e+03 -0.909 0.363401
## BldgTypeTwnhs -1.579e+04 9.517e+03 -1.659 0.097373 .
## BldgTypeTwnhsE -7.852e+03 7.511e+03 -1.045 0.296071
## OverallQual 6.235e+03 1.297e+03 4.806 1.79e-06 ***
## OverallCond 6.468e+03 1.054e+03 6.139 1.21e-09 ***
## YearBuilt 3.728e+02 8.123e+01 4.590 5.02e-06 ***
## YearRemodAdd 1.077e+02 7.211e+01 1.494 0.135417
## RoofMatlCompShg 6.056e+05 3.531e+04 17.153 < 2e-16 ***
## RoofMatlMembran 6.766e+05 4.886e+04 13.850 < 2e-16 ***
## RoofMatlMetal 6.519e+05 4.744e+04 13.740 < 2e-16 ***
## RoofMatlRoll 6.106e+05 4.480e+04 13.628 < 2e-16 ***
## RoofMatlTar&Grv 5.954e+05 3.686e+04 16.153 < 2e-16 ***
## RoofMatlWdShake 6.057e+05 3.883e+04 15.599 < 2e-16 ***
## RoofMatlWdShngl 7.078e+05 3.681e+04 19.230 < 2e-16 ***
## MasVnrArea 9.809e+00 5.706e+00 1.719 0.085941 .
## ExterQualFa 5.836e+03 1.819e+04 0.321 0.748373
## ExterQualGd -1.199e+04 5.702e+03 -2.103 0.035690 *
## ExterQualTA -1.323e+04 6.327e+03 -2.091 0.036757 *
## BsmtQualFa -1.671e+04 7.776e+03 -2.148 0.031928 *
## BsmtQualGd -2.066e+04 3.989e+03 -5.178 2.72e-07 ***
## BsmtQualTA -1.809e+04 4.932e+03 -3.668 0.000258 ***
## BsmtExposureGd 1.964e+04 3.799e+03 5.168 2.87e-07 ***
## BsmtExposureMn -2.286e+03 3.731e+03 -0.613 0.540191
## BsmtExposureNo -5.322e+03 2.679e+03 -1.987 0.047211 *
## BsmtFinSF1 3.600e+01 6.245e+00 5.764 1.10e-08 ***
## BsmtFinSF2 2.777e+01 7.854e+00 3.537 0.000425 ***
## BsmtUnfSF 1.998e+01 6.102e+00 3.274 0.001096 **
## X1stFlrSF 4.557e+01 6.784e+00 6.717 3.17e-11 ***
## X2ndFlrSF 5.213e+01 4.427e+00 11.775 < 2e-16 ***
## BedroomAbvGr -3.095e+03 1.723e+03 -1.796 0.072818 .
## KitchenAbvGr -2.278e+04 7.451e+03 -3.057 0.002296 **
## KitchenQualFa -2.467e+04 8.254e+03 -2.988 0.002876 **
## KitchenQualGd -2.594e+04 4.438e+03 -5.845 6.91e-09 ***
## KitchenQualTA -2.435e+04 5.028e+03 -4.844 1.48e-06 ***
## TotRmsAbvGrd 3.259e+03 1.186e+03 2.748 0.006113 **
## FunctionalMaj2 -1.942e+04 1.888e+04 -1.029 0.303886
## FunctionalMin1 -3.995e+03 1.104e+04 -0.362 0.717497
## FunctionalMin2 -3.603e+03 1.109e+04 -0.325 0.745249
## FunctionalMod -6.284e+03 1.371e+04 -0.458 0.646721
## FunctionalSev -6.389e+04 3.060e+04 -2.088 0.037070 *
## FunctionalTyp 5.596e+03 9.455e+03 0.592 0.554055
## Fireplaces 4.459e+03 1.670e+03 2.670 0.007721 **
## GarageCars 9.609e+03 1.938e+03 4.959 8.39e-07 ***
## X3SsnPorch 4.407e+01 2.731e+01 1.614 0.106949
## ScreenPorch 3.291e+01 1.461e+01 2.253 0.024499 *
## PoolArea 1.139e+02 2.181e+01 5.221 2.18e-07 ***
## SaleTypeCon 3.639e+04 2.016e+04 1.806 0.071301 .
## SaleTypeConLD 2.067e+04 1.433e+04 1.443 0.149420
## SaleTypeConLI -1.091e+03 1.466e+04 -0.074 0.940702
## SaleTypeConLw 1.230e+04 2.006e+04 0.613 0.540117
## SaleTypeCWD 7.326e+03 1.454e+04 0.504 0.614579
## SaleTypeNew 9.258e+03 6.148e+03 1.506 0.132405
## SaleTypeOth 2.444e+04 2.687e+04 0.910 0.363194
## SaleTypeWD -7.366e+02 5.026e+03 -0.147 0.883512
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 26240 on 965 degrees of freedom
## Multiple R-squared: 0.9049, Adjusted R-squared: 0.8946
## F-statistic: 88.25 on 104 and 965 DF, p-value: < 2.2e-16
Nilai adjusted R-squared yang didapat pada model_backward adalah 0.8946 yang mana ini merupakan nilai yang cukup baik serta nilainya meningkat dibandingkan nilai model_all
hp_test$price_predict <- predict(model_backward, newdata = hp_test)library(MLmetrics)## Warning: package 'MLmetrics' was built under R version 4.2.2
##
## Attaching package: 'MLmetrics'
## The following object is masked from 'package:base':
##
## Recall
rms_error <- RMSE(y_pred = hp_test$price_predict, y_true = hp_test$SalePrice)
rms_error## [1] 20605.21
absolute_error <- MAE(y_pred = hp_test$price_predict, y_true = hp_test$SalePrice)
absolute_error## [1] 14280.77
RMSE = 20,605.21 MAE = 14,280.77
Let’s compare error values with average house price
average_price <- mean(house_price$SalePrice)
average_price## [1] 186761.8
Average house price = 18,6761.8
rms_error/average_price*100## [1] 11.03288
absolute_error/average_price*100## [1] 7.646516
% RMSE towards average = 11% % MAE towards average = 7.6%