library(caret)
## 载入需要的程辑包:ggplot2
## 载入需要的程辑包:lattice
housing01.df <- mlba::WestRoxbury
housing.df <- housing01.df[, -2]
str(housing.df)
## 'data.frame': 5802 obs. of 13 variables:
## $ TOTAL.VALUE: num 344 413 330 499 332 ...
## $ LOT.SQFT : int 9965 6590 7500 13773 5000 5142 5000 10000 6835 5093 ...
## $ YR.BUILT : int 1880 1945 1890 1957 1910 1950 1954 1950 1958 1900 ...
## $ GROSS.AREA : int 2436 3108 2294 5032 2370 2124 3220 2208 2582 4818 ...
## $ LIVING.AREA: int 1352 1976 1371 2608 1438 1060 1916 1200 1092 2992 ...
## $ FLOORS : num 2 2 2 1 2 1 2 1 1 2 ...
## $ ROOMS : int 6 10 8 9 7 6 7 6 5 8 ...
## $ BEDROOMS : int 3 4 4 5 3 3 3 3 3 4 ...
## $ FULL.BATH : int 1 2 1 1 2 1 1 1 1 2 ...
## $ HALF.BATH : int 1 1 1 1 0 0 1 0 0 0 ...
## $ KITCHEN : int 1 1 1 1 1 1 1 1 1 1 ...
## $ FIREPLACE : int 0 0 0 1 0 1 0 0 1 0 ...
## $ REMODEL : chr "None" "Recent" "None" "None" ...
outcom <- "TOTAL.VALUE"
str(housing.df)
## 'data.frame': 5802 obs. of 13 variables:
## $ TOTAL.VALUE: num 344 413 330 499 332 ...
## $ LOT.SQFT : int 9965 6590 7500 13773 5000 5142 5000 10000 6835 5093 ...
## $ YR.BUILT : int 1880 1945 1890 1957 1910 1950 1954 1950 1958 1900 ...
## $ GROSS.AREA : int 2436 3108 2294 5032 2370 2124 3220 2208 2582 4818 ...
## $ LIVING.AREA: int 1352 1976 1371 2608 1438 1060 1916 1200 1092 2992 ...
## $ FLOORS : num 2 2 2 1 2 1 2 1 1 2 ...
## $ ROOMS : int 6 10 8 9 7 6 7 6 5 8 ...
## $ BEDROOMS : int 3 4 4 5 3 3 3 3 3 4 ...
## $ FULL.BATH : int 1 2 1 1 2 1 1 1 1 2 ...
## $ HALF.BATH : int 1 1 1 1 0 0 1 0 0 0 ...
## $ KITCHEN : int 1 1 1 1 1 1 1 1 1 1 ...
## $ FIREPLACE : int 0 0 0 1 0 1 0 0 1 0 ...
## $ REMODEL : chr "None" "Recent" "None" "None" ...
predictors <- c("LOT.SQFT","YR.BUILT","GROSS.AREA","LIVING.AREA","FLOORS","ROOMS",
"BEDROOMS","FULL.BATH","HALF.BATH","KITCHEN","FIREPLACE","REMODEL")
housing.df <- housing.df[1:1000,c(outcom,predictors)]
set.seed(1)
idx <- createDataPartition(housing.df$TOTAL.VALUE,p = 0.6,list = FALSE)
train.df <- housing.df[idx,]
holdout.df <- housing.df[-idx,]
housing.lm <- lm(TOTAL.VALUE ~ .,data = train.df)
options(scipen = 999)
summary(housing.lm)
##
## Call:
## lm(formula = TOTAL.VALUE ~ ., data = train.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -95.727 -18.289 -1.575 18.677 90.987
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1271.4347418 120.4002636 -10.560 < 0.0000000000000002 ***
## LOT.SQFT 0.0062462 0.0005326 11.728 < 0.0000000000000002 ***
## YR.BUILT 0.6845956 0.0615966 11.114 < 0.0000000000000002 ***
## GROSS.AREA 0.0286970 0.0030623 9.371 < 0.0000000000000002 ***
## LIVING.AREA 0.0391289 0.0052851 7.404 0.0000000000004623 ***
## FLOORS 24.7398816 3.1436691 7.870 0.0000000000000172 ***
## ROOMS 0.6787551 1.4763820 0.460 0.645871
## BEDROOMS 1.1239237 2.2794705 0.493 0.622152
## FULL.BATH 17.2673662 3.2432597 5.324 0.0000001446830437 ***
## HALF.BATH 10.2177861 2.8843747 3.542 0.000428 ***
## KITCHEN 8.4066697 5.9528691 1.412 0.158420
## FIREPLACE 1.2343088 2.2050379 0.560 0.575851
## REMODELOld 6.7309615 4.5966832 1.464 0.143645
## REMODELRecent 28.9446681 4.0690805 7.113 0.0000000000033110 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 28.97 on 587 degrees of freedom
## Multiple R-squared: 0.8012, Adjusted R-squared: 0.7968
## F-statistic: 181.9 on 13 and 587 DF, p-value: < 0.00000000000000022
pred <- predict(housing.lm,holdout.df)
options(scipen = 999,digits = 1)
data.frame(
'Predicted' = pred[1:20],
'Actual' = holdout.df$TOTAL.VALUE[1:20],
'Residual' = holdout.df$TOTAL.VALUE[1:20]-pred[1:20]
)
## Predicted Actual Residual
## 1 293 344 51
## 11 324 313 -11
## 13 249 316 66
## 16 259 298 40
## 17 278 313 35
## 19 277 331 53
## 21 242 318 75
## 22 275 331 55
## 26 287 346 59
## 28 261 317 56
## 29 220 247 27
## 31 289 329 40
## 33 324 280 -44
## 36 298 336 38
## 39 263 240 -23
## 41 404 432 27
## 42 304 350 46
## 44 306 358 52
## 45 312 346 34
## 46 474 491 17
options(scipen = 999,digits = 3)
rbind(
Training = mlba::regressionSummary(predict(housing.lm,train.df),train.df$TOTAL.VALUE),
Holdout = mlba::regressionSummary(pred,holdout.df$TOTAL.VALUE)
)
## ME RMSE MAE
## Training 0.00000000000352 28.6 22.5
## Holdout -2.05 33.5 25.8
library(ggplot2)
library(caret)
pred <- predict(housing.lm,holdout.df)
all.residuals <- holdout.df$TOTAL.VALUE - pred
ggplot() + geom_histogram(aes(x=all.residuals),fill="lightgray",color="grey") +
labs(x="Residuals",y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

set.seed(1)
trControl <- caret::trainControl(method="cv", number=5, allowParallel=TRUE)
model <- caret::train(TOTAL.VALUE ~ ., data=housing.df,
method="lm",
trControl=trControl)
model
## Linear Regression
##
## 1000 samples
## 12 predictor
##
## No pre-processing
## Resampling: Cross-Validated (5 fold)
## Summary of sample sizes: 801, 799, 800, 800, 800
## Resampling results:
##
## RMSE Rsquared MAE
## 31.1 0.765 24.1
##
## Tuning parameter 'intercept' was held constant at a value of TRUE
coef(model$finalModel)
## (Intercept) LOT.SQFT YR.BUILT GROSS.AREA LIVING.AREA
## -1212.50467 0.00626 0.65851 0.02625 0.03854
## FLOORS ROOMS BEDROOMS FULL.BATH HALF.BATH
## 25.06836 0.73489 1.80020 16.18717 9.56406
## KITCHEN FIREPLACE REMODELOld REMODELRecent
## 5.66075 1.18755 8.40823 28.06191
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ lubridate 1.9.2 ✔ tibble 3.2.1
## ✔ purrr 1.0.1 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ purrr::lift() masks caret::lift()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
collectMetrics <- function(model, train.df, holdout.df, nPredictors) {
if (missing(nPredictors)) {
coefs = coef(model$finalModel)
nPredictors = length(coefs) - 1
}
return (cbind(
CV=model$results %>% slice_min(RMSE) %>% dplyr::select(c(RMSE, MAE)),
Training=mlba::regressionSummary(predict(model, train.df), train.df$TOTAL.VALUE),
Holdout=mlba::regressionSummary(predict(model, holdout.df), holdout.df$TOTAL.VALUE),
nPredictors=nPredictors
))
}
metric.full <- collectMetrics(model, train.df, holdout.df)
predict(model, housing.df[1:3,])
## 1 2 3
## 295 408 287
library(leaps)
library(fastDummies)
## Thank you for using fastDummies!
## To acknowledge our work, please cite the package:
## Kaplan, J. & Schlegel, B. (2023). fastDummies: Fast Creation of Dummy (Binary) Columns and Rows from Categorical Variables. Version 1.7.1. URL: https://github.com/jacobkap/fastDummies, https://jacobkap.github.io/fastDummies/.
leaps.train.df <- dummy_cols(train.df, remove_first_dummy=TRUE,
remove_selected_columns=TRUE)
search <- regsubsets(TOTAL.VALUE ~ ., data=leaps.train.df, nbest=1,
nvmax=ncol(leaps.train.df), method="exhaustive")
sum <- summary(search)
sum$which
## (Intercept) LOT.SQFT YR.BUILT GROSS.AREA LIVING.AREA FLOORS ROOMS BEDROOMS
## 1 TRUE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
## 2 TRUE TRUE FALSE FALSE TRUE FALSE FALSE FALSE
## 3 TRUE FALSE TRUE TRUE TRUE FALSE FALSE FALSE
## 4 TRUE TRUE TRUE TRUE TRUE FALSE FALSE FALSE
## 5 TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE
## 6 TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE
## 7 TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE
## 8 TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE
## 9 TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE
## 10 TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE
## 11 TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE
## 12 TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE
## 13 TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## FULL.BATH HALF.BATH KITCHEN FIREPLACE REMODEL_Old REMODEL_Recent
## 1 FALSE FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE FALSE
## 3 FALSE FALSE FALSE FALSE FALSE FALSE
## 4 FALSE FALSE FALSE FALSE FALSE FALSE
## 5 FALSE FALSE FALSE FALSE FALSE FALSE
## 6 FALSE FALSE FALSE FALSE FALSE TRUE
## 7 TRUE FALSE FALSE FALSE FALSE TRUE
## 8 TRUE TRUE FALSE FALSE FALSE TRUE
## 9 TRUE TRUE TRUE FALSE FALSE TRUE
## 10 TRUE TRUE TRUE FALSE TRUE TRUE
## 11 TRUE TRUE TRUE FALSE TRUE TRUE
## 12 TRUE TRUE TRUE TRUE TRUE TRUE
## 13 TRUE TRUE TRUE TRUE TRUE TRUE
sum$rsq
## [1] 0.596 0.646 0.700 0.736 0.767 0.787 0.794 0.799 0.800 0.801 0.801 0.801
## [13] 0.801
sum$adjr2
## [1] 0.595 0.645 0.698 0.734 0.765 0.785 0.791 0.797 0.797 0.797 0.797 0.797
## [13] 0.797
sum$cp
## [1] 597.07 449.74 293.98 189.41 99.19 42.56 23.46 9.73 9.37 9.20
## [11] 10.54 12.21 14.00
optimal <- which.min(sum$cp)
X <- summary(search)$which[, -1]
xvars <- dimnames(X)[[2]]
xvars <- xvars[X[optimal,]]
xvars <- c("LOT.SQFT","YR.BUILT","GROSS.AREA","LIVING.AREA","FLOORS","FULL.BATH","HALF.BATH","KITCHEN","REMODEL")
set.seed(1)
trControl <- caret::trainControl(method="cv", number=5, allowParallel=TRUE)
model <- caret::train(TOTAL.VALUE ~ ., data=housing.df[, c("TOTAL.VALUE", xvars)],
method="lm",
trControl=trControl)
model
## Linear Regression
##
## 1000 samples
## 9 predictor
##
## No pre-processing
## Resampling: Cross-Validated (5 fold)
## Summary of sample sizes: 801, 799, 800, 800, 800
## Resampling results:
##
## RMSE Rsquared MAE
## 31 0.766 24.1
##
## Tuning parameter 'intercept' was held constant at a value of TRUE
coef(model$finalModel)
## (Intercept) LOT.SQFT YR.BUILT GROSS.AREA LIVING.AREA
## -1216.68115 0.00627 0.66325 0.02650 0.04007
## FLOORS FULL.BATH HALF.BATH KITCHEN REMODELOld
## 25.40125 17.11530 10.24455 6.39764 8.65626
## REMODELRecent
## 28.26329
metric.exhaustive <- collectMetrics(model, train.df, holdout.df)
trControl <- caret::trainControl(method="none")
model <- caret::train(TOTAL.VALUE ~ ., data=train.df, trControl=trControl,
method="glmStepAIC", direction='backward')
## Start: AIC=5768
## .outcome ~ LOT.SQFT + YR.BUILT + GROSS.AREA + LIVING.AREA + FLOORS +
## ROOMS + BEDROOMS + FULL.BATH + HALF.BATH + KITCHEN + FIREPLACE +
## REMODELOld + REMODELRecent
##
## Df Deviance AIC
## - ROOMS 1 492967 5766
## - BEDROOMS 1 492994 5766
## - FIREPLACE 1 493053 5766
## <none> 492790 5768
## - KITCHEN 1 494464 5768
## - REMODELOld 1 494590 5768
## - HALF.BATH 1 503325 5779
## - FULL.BATH 1 516586 5794
## - REMODELRecent 1 535268 5816
## - LIVING.AREA 1 538806 5819
## - FLOORS 1 544783 5826
## - GROSS.AREA 1 566511 5850
## - YR.BUILT 1 596490 5881
## - LOT.SQFT 1 608258 5892
##
## Step: AIC=5766
## .outcome ~ LOT.SQFT + YR.BUILT + GROSS.AREA + LIVING.AREA + FLOORS +
## BEDROOMS + FULL.BATH + HALF.BATH + KITCHEN + FIREPLACE +
## REMODELOld + REMODELRecent
##
## Df Deviance AIC
## - FIREPLACE 1 493257 5764
## - BEDROOMS 1 493504 5765
## <none> 492967 5766
## - REMODELOld 1 494782 5766
## - KITCHEN 1 494793 5766
## - HALF.BATH 1 504128 5777
## - FULL.BATH 1 517506 5793
## - REMODELRecent 1 535768 5814
## - FLOORS 1 544955 5824
## - LIVING.AREA 1 545135 5824
## - GROSS.AREA 1 566566 5848
## - YR.BUILT 1 597340 5879
## - LOT.SQFT 1 609497 5892
##
## Step: AIC=5764
## .outcome ~ LOT.SQFT + YR.BUILT + GROSS.AREA + LIVING.AREA + FLOORS +
## BEDROOMS + FULL.BATH + HALF.BATH + KITCHEN + REMODELOld +
## REMODELRecent
##
## Df Deviance AIC
## - BEDROOMS 1 493796 5763
## <none> 493257 5764
## - KITCHEN 1 495047 5765
## - REMODELOld 1 495098 5765
## - HALF.BATH 1 504283 5776
## - FULL.BATH 1 517681 5791
## - REMODELRecent 1 536070 5812
## - FLOORS 1 545339 5823
## - LIVING.AREA 1 545743 5823
## - GROSS.AREA 1 572134 5852
## - YR.BUILT 1 611275 5891
## - LOT.SQFT 1 613496 5893
##
## Step: AIC=5763
## .outcome ~ LOT.SQFT + YR.BUILT + GROSS.AREA + LIVING.AREA + FLOORS +
## FULL.BATH + HALF.BATH + KITCHEN + REMODELOld + REMODELRecent
##
## Df Deviance AIC
## <none> 493796 5763
## - REMODELOld 1 495617 5763
## - KITCHEN 1 495715 5763
## - HALF.BATH 1 505628 5775
## - FULL.BATH 1 520435 5793
## - REMODELRecent 1 536747 5811
## - FLOORS 1 547545 5823
## - LIVING.AREA 1 553653 5830
## - GROSS.AREA 1 572364 5850
## - YR.BUILT 1 611602 5890
## - LOT.SQFT 1 613570 5892
coef(model$finalModel)
## (Intercept) LOT.SQFT YR.BUILT GROSS.AREA LIVING.AREA
## -1279.70645 0.00629 0.69082 0.02897 0.04039
## FLOORS FULL.BATH HALF.BATH KITCHEN REMODELOld
## 25.01876 17.85302 10.62199 8.92141 6.76882
## REMODELRecent
## 29.07483
model <- caret::train(TOTAL.VALUE ~ ., data=train.df, trControl=trControl,
method="glmStepAIC", direction='forward')
## Start: AIC=6713
## .outcome ~ 1
##
## Df Deviance AIC
## + LIVING.AREA 1 1002430 6171
## + GROSS.AREA 1 1210022 6284
## + ROOMS 1 1742812 6503
## + FULL.BATH 1 1803972 6524
## + BEDROOMS 1 1956883 6573
## + YR.BUILT 1 2003950 6587
## + LOT.SQFT 1 2041754 6598
## + HALF.BATH 1 2320169 6675
## + FLOORS 1 2338215 6680
## + REMODELRecent 1 2400231 6695
## + FIREPLACE 1 2407421 6697
## + KITCHEN 1 2410160 6698
## <none> 2478386 6713
## + REMODELOld 1 2478067 6715
##
## Step: AIC=6171
## .outcome ~ LIVING.AREA
##
## Df Deviance AIC
## + LOT.SQFT 1 877061 6092
## + YR.BUILT 1 883869 6097
## + GROSS.AREA 1 897905 6106
## + FULL.BATH 1 927387 6126
## + FIREPLACE 1 949137 6140
## + REMODELRecent 1 970678 6153
## + FLOORS 1 979511 6159
## + HALF.BATH 1 980085 6159
## + BEDROOMS 1 994459 6168
## + ROOMS 1 995184 6168
## + KITCHEN 1 995690 6169
## <none> 1002430 6171
## + REMODELOld 1 1002057 6172
##
## Step: AIC=6092
## .outcome ~ LIVING.AREA + LOT.SQFT
##
## Df Deviance AIC
## + YR.BUILT 1 769374 6016
## + GROSS.AREA 1 793847 6034
## + FULL.BATH 1 807860 6045
## + FLOORS 1 820328 6054
## + REMODELRecent 1 843127 6071
## + FIREPLACE 1 846139 6073
## + HALF.BATH 1 857659 6081
## + BEDROOMS 1 864533 6086
## + ROOMS 1 871610 6091
## + KITCHEN 1 872310 6091
## <none> 877061 6092
## + REMODELOld 1 877016 6094
##
## Step: AIC=6016
## .outcome ~ LIVING.AREA + LOT.SQFT + YR.BUILT
##
## Df Deviance AIC
## + GROSS.AREA 1 655163 5921
## + FLOORS 1 678253 5942
## + REMODELRecent 1 712846 5972
## + FULL.BATH 1 719045 5977
## + BEDROOMS 1 759199 6010
## + KITCHEN 1 760858 6011
## + ROOMS 1 761111 6011
## + FIREPLACE 1 761892 6012
## + HALF.BATH 1 765194 6014
## <none> 769374 6016
## + REMODELOld 1 767633 6016
##
## Step: AIC=5921
## .outcome ~ LIVING.AREA + LOT.SQFT + YR.BUILT + GROSS.AREA
##
## Df Deviance AIC
## + FLOORS 1 577741 5847
## + REMODELRecent 1 608598 5879
## + FULL.BATH 1 622639 5892
## + BEDROOMS 1 644248 5913
## + ROOMS 1 645373 5914
## + HALF.BATH 1 649490 5918
## <none> 655163 5921
## + REMODELOld 1 654384 5922
## + KITCHEN 1 654939 5923
## + FIREPLACE 1 655006 5923
##
## Step: AIC=5847
## .outcome ~ LIVING.AREA + LOT.SQFT + YR.BUILT + GROSS.AREA + FLOORS
##
## Df Deviance AIC
## + REMODELRecent 1 528519 5796
## + FULL.BATH 1 552707 5823
## + ROOMS 1 571739 5843
## + BEDROOMS 1 573374 5845
## + KITCHEN 1 574827 5846
## <none> 577741 5847
## + REMODELOld 1 577139 5849
## + HALF.BATH 1 577326 5849
## + FIREPLACE 1 577625 5849
##
## Step: AIC=5796
## .outcome ~ LIVING.AREA + LOT.SQFT + YR.BUILT + GROSS.AREA + FLOORS +
## REMODELRecent
##
## Df Deviance AIC
## + FULL.BATH 1 510808 5777
## + KITCHEN 1 523764 5792
## + ROOMS 1 524196 5793
## + BEDROOMS 1 525027 5794
## + REMODELOld 1 525681 5795
## <none> 528519 5796
## + HALF.BATH 1 527771 5797
## + FIREPLACE 1 528400 5798
##
## Step: AIC=5777
## .outcome ~ LIVING.AREA + LOT.SQFT + YR.BUILT + GROSS.AREA + FLOORS +
## REMODELRecent + FULL.BATH
##
## Df Deviance AIC
## + HALF.BATH 1 497600 5764
## + KITCHEN 1 507995 5776
## + ROOMS 1 508227 5776
## + REMODELOld 1 508334 5776
## <none> 510808 5777
## + BEDROOMS 1 509195 5778
## + FIREPLACE 1 510675 5779
##
## Step: AIC=5764
## .outcome ~ LIVING.AREA + LOT.SQFT + YR.BUILT + GROSS.AREA + FLOORS +
## REMODELRecent + FULL.BATH + HALF.BATH
##
## Df Deviance AIC
## + KITCHEN 1 495617 5763
## + REMODELOld 1 495715 5763
## <none> 497600 5764
## + ROOMS 1 496739 5765
## + BEDROOMS 1 496951 5765
## + FIREPLACE 1 497322 5765
##
## Step: AIC=5763
## .outcome ~ LIVING.AREA + LOT.SQFT + YR.BUILT + GROSS.AREA + FLOORS +
## REMODELRecent + FULL.BATH + HALF.BATH + KITCHEN
##
## Df Deviance AIC
## + REMODELOld 1 493796 5763
## <none> 495617 5763
## + ROOMS 1 495051 5765
## + BEDROOMS 1 495098 5765
## + FIREPLACE 1 495300 5765
##
## Step: AIC=5763
## .outcome ~ LIVING.AREA + LOT.SQFT + YR.BUILT + GROSS.AREA + FLOORS +
## REMODELRecent + FULL.BATH + HALF.BATH + KITCHEN + REMODELOld
##
## Df Deviance AIC
## <none> 493796 5763
## + ROOMS 1 493245 5764
## + BEDROOMS 1 493257 5764
## + FIREPLACE 1 493504 5765
coef(model$finalModel)
## (Intercept) LIVING.AREA LOT.SQFT YR.BUILT GROSS.AREA
## -1279.70645 0.04039 0.00629 0.69082 0.02897
## FLOORS REMODELRecent FULL.BATH HALF.BATH KITCHEN
## 25.01876 29.07483 17.85302 10.62199 8.92141
## REMODELOld
## 6.76882
model <- caret::train(TOTAL.VALUE ~ ., data=train.df, trControl=trControl,
method="glmStepAIC", direction='both')
## Start: AIC=5768
## .outcome ~ LOT.SQFT + YR.BUILT + GROSS.AREA + LIVING.AREA + FLOORS +
## ROOMS + BEDROOMS + FULL.BATH + HALF.BATH + KITCHEN + FIREPLACE +
## REMODELOld + REMODELRecent
##
## Df Deviance AIC
## - ROOMS 1 492967 5766
## - BEDROOMS 1 492994 5766
## - FIREPLACE 1 493053 5766
## <none> 492790 5768
## - KITCHEN 1 494464 5768
## - REMODELOld 1 494590 5768
## - HALF.BATH 1 503325 5779
## - FULL.BATH 1 516586 5794
## - REMODELRecent 1 535268 5816
## - LIVING.AREA 1 538806 5819
## - FLOORS 1 544783 5826
## - GROSS.AREA 1 566511 5850
## - YR.BUILT 1 596490 5881
## - LOT.SQFT 1 608258 5892
##
## Step: AIC=5766
## .outcome ~ LOT.SQFT + YR.BUILT + GROSS.AREA + LIVING.AREA + FLOORS +
## BEDROOMS + FULL.BATH + HALF.BATH + KITCHEN + FIREPLACE +
## REMODELOld + REMODELRecent
##
## Df Deviance AIC
## - FIREPLACE 1 493257 5764
## - BEDROOMS 1 493504 5765
## <none> 492967 5766
## - REMODELOld 1 494782 5766
## - KITCHEN 1 494793 5766
## + ROOMS 1 492790 5768
## - HALF.BATH 1 504128 5777
## - FULL.BATH 1 517506 5793
## - REMODELRecent 1 535768 5814
## - FLOORS 1 544955 5824
## - LIVING.AREA 1 545135 5824
## - GROSS.AREA 1 566566 5848
## - YR.BUILT 1 597340 5879
## - LOT.SQFT 1 609497 5892
##
## Step: AIC=5764
## .outcome ~ LOT.SQFT + YR.BUILT + GROSS.AREA + LIVING.AREA + FLOORS +
## BEDROOMS + FULL.BATH + HALF.BATH + KITCHEN + REMODELOld +
## REMODELRecent
##
## Df Deviance AIC
## - BEDROOMS 1 493796 5763
## <none> 493257 5764
## - KITCHEN 1 495047 5765
## - REMODELOld 1 495098 5765
## + FIREPLACE 1 492967 5766
## + ROOMS 1 493053 5766
## - HALF.BATH 1 504283 5776
## - FULL.BATH 1 517681 5791
## - REMODELRecent 1 536070 5812
## - FLOORS 1 545339 5823
## - LIVING.AREA 1 545743 5823
## - GROSS.AREA 1 572134 5852
## - YR.BUILT 1 611275 5891
## - LOT.SQFT 1 613496 5893
##
## Step: AIC=5763
## .outcome ~ LOT.SQFT + YR.BUILT + GROSS.AREA + LIVING.AREA + FLOORS +
## FULL.BATH + HALF.BATH + KITCHEN + REMODELOld + REMODELRecent
##
## Df Deviance AIC
## <none> 493796 5763
## - REMODELOld 1 495617 5763
## - KITCHEN 1 495715 5763
## + ROOMS 1 493245 5764
## + BEDROOMS 1 493257 5764
## + FIREPLACE 1 493504 5765
## - HALF.BATH 1 505628 5775
## - FULL.BATH 1 520435 5793
## - REMODELRecent 1 536747 5811
## - FLOORS 1 547545 5823
## - LIVING.AREA 1 553653 5830
## - GROSS.AREA 1 572364 5850
## - YR.BUILT 1 611602 5890
## - LOT.SQFT 1 613570 5892
coef(model$finalModel)
## (Intercept) LOT.SQFT YR.BUILT GROSS.AREA LIVING.AREA
## -1279.70645 0.00629 0.69082 0.02897 0.04039
## FLOORS FULL.BATH HALF.BATH KITCHEN REMODELOld
## 25.01876 17.85302 10.62199 8.92141 6.76882
## REMODELRecent
## 29.07483
rbind(Training=mlba::regressionSummary(predict(model, train.df), train.df$TOTAL.VALUE),
Holdout=mlba::regressionSummary(predict(model, holdout.df), holdout.df$TOTAL.VALUE))
## ME RMSE MAE
## Training 0.00000000000335 28.7 22.5
## Holdout -2.25 33.5 26
metric.stepwise <- metric.exhaustive