setwd("C:/Users/Sam/Documents/MATH_624/Module_10")
library(dplyr)
library(glmnet)
library(caTools)
library(olsrr)
train_df <- read.csv("train_reg_features-1.csv", stringsAsFactors = T)
train_df <- rbind(train_df) %>% select(-c(Id, Alley, FireplaceQu, PoolQC, Fence, MiscFeature, LotFrontage, Utilities))
train_df = na.omit(train_df)
This differs slightly from midterm due to using na.omit above.
model <- lm(SalePrice ~ ., data = train_df)
varsToRemove <- ols_step_backward_p(model)
## Note: model has aliased coefficients
## sums of squares computed by model comparison
## Note: model has aliased coefficients
## sums of squares computed by model comparison
## Note: model has aliased coefficients
## sums of squares computed by model comparison
## Note: model has aliased coefficients
## sums of squares computed by model comparison
## Note: model has aliased coefficients
## sums of squares computed by model comparison
## Note: model has aliased coefficients
## sums of squares computed by model comparison
## Note: model has aliased coefficients
## sums of squares computed by model comparison
## Note: model has aliased coefficients
## sums of squares computed by model comparison
## Note: model has aliased coefficients
## sums of squares computed by model comparison
## Note: model has aliased coefficients
## sums of squares computed by model comparison
## Note: model has aliased coefficients
## sums of squares computed by model comparison
## Note: model has aliased coefficients
## sums of squares computed by model comparison
## Note: model has aliased coefficients
## sums of squares computed by model comparison
## Note: model has aliased coefficients
## sums of squares computed by model comparison
## Note: model has aliased coefficients
## sums of squares computed by model comparison
## Note: model has aliased coefficients
## sums of squares computed by model comparison
## Note: model has aliased coefficients
## sums of squares computed by model comparison
## Note: model has aliased coefficients
## sums of squares computed by model comparison
## Note: model has aliased coefficients
## sums of squares computed by model comparison
## Note: model has aliased coefficients
## sums of squares computed by model comparison
## Note: model has aliased coefficients
## sums of squares computed by model comparison
varsToRemove
##
##
## Elimination Summary
## -------------------------------------------------------------------------------------
## Variable Adj.
## Step Removed R-Square R-Square C(p) AIC RMSE
## -------------------------------------------------------------------------------------
## 1 BsmtHalfBath 0.9283 0.9141 -81.9941 30897.8223 23125.4447
## 2 Electrical 0.9282 0.9143 -83.5521 30890.3522 23099.0255
## 3 EnclosedPorch 0.9282 0.9144 -85.5475 30888.3576 23088.7675
## 4 MiscVal 0.9282 0.9145 -87.5384 30886.3686 23078.5703
## 5 OpenPorchSF 0.9282 0.9145 -89.5234 30884.3865 23068.4476
## 6 HalfBath 0.9282 0.9146 -91.4976 30882.4174 23058.4496
## 7 MSSubClass 0.9282 0.9147 -93.4654 30880.4561 23048.5323
## 8 PavedDrive 0.9282 0.9148 -94.9681 30877.0519 23033.1997
## 9 LotShape 0.9281 0.915 -95.8811 30872.3533 23013.7936
## 10 BsmtFullBath 0.9281 0.915 -97.7867 30870.4662 23004.5881
## 11 Exterior2nd 0.9275 0.9153 -89.6386 30852.5542 22967.0285
## 12 GarageYrBlt 0.9275 0.9154 -91.5587 30850.6489 22957.8184
## 13 GarageType 0.9273 0.9155 -90.5449 30844.2177 22938.4696
## 14 YrSold 0.9273 0.9156 -92.4765 30842.2985 22929.2039
## 15 Heating 0.9272 0.9157 -92.9139 30838.1449 22915.2126
## 16 BsmtCond 0.927 0.9158 -93.0968 30834.2890 22903.8542
## 17 BsmtFinType2 0.9268 0.9158 -91.3215 30828.7325 22892.5481
## 18 CentralAir 0.9268 0.9159 -92.7015 30827.4608 22888.9412
## 19 SaleCondition 0.9264 0.9158 -89.4062 30823.6653 22892.9561
## 20 HeatingQC 0.9262 0.9158 -87.5529 30820.1621 22892.3259
## -------------------------------------------------------------------------------------
removed <- varsToRemove$removed
cleanTrain2 <- select(train_df, -removed)
## Note: Using an external vector in selections is ambiguous.
## i Use `all_of(removed)` instead of `removed` to silence this message.
## i See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
model2 <- lm(SalePrice ~ ., data = cleanTrain2)
results <- summary(model2)
results
##
## Call:
## lm(formula = SalePrice ~ ., data = cleanTrain2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -185075 -9599 395 9541 185075
##
## Coefficients: (2 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.619e+06 1.869e+05 -8.662 < 2e-16 ***
## MSZoningFV 4.102e+04 1.262e+04 3.251 0.001183 **
## MSZoningRH 2.446e+04 1.312e+04 1.865 0.062491 .
## MSZoningRL 3.197e+04 1.098e+04 2.912 0.003655 **
## MSZoningRM 2.690e+04 1.027e+04 2.620 0.008918 **
## LotArea 8.261e-01 1.018e-01 8.116 1.21e-15 ***
## StreetPave 4.953e+04 1.442e+04 3.435 0.000612 ***
## LandContourHLS 9.621e+03 5.370e+03 1.792 0.073441 .
## LandContourLow -1.280e+04 6.688e+03 -1.915 0.055782 .
## LandContourLvl 5.319e+03 3.928e+03 1.354 0.175885
## LotConfigCulDSac 6.344e+03 3.141e+03 2.020 0.043640 *
## LotConfigFR2 -9.598e+03 4.081e+03 -2.352 0.018851 *
## LotConfigFR3 -1.308e+04 1.258e+04 -1.039 0.298874
## LotConfigInside -1.707e+03 1.770e+03 -0.965 0.334882
## LandSlopeMod 6.243e+03 4.033e+03 1.548 0.121871
## LandSlopeSev -4.805e+04 1.099e+04 -4.370 1.35e-05 ***
## NeighborhoodBlueste 2.199e+03 1.877e+04 0.117 0.906768
## NeighborhoodBrDale 5.601e+03 1.103e+04 0.508 0.611675
## NeighborhoodBrkSide 6.989e+02 9.727e+03 0.072 0.942733
## NeighborhoodClearCr -1.374e+04 9.261e+03 -1.484 0.138075
## NeighborhoodCollgCr -8.755e+03 7.252e+03 -1.207 0.227589
## NeighborhoodCrawfor 1.135e+04 8.543e+03 1.328 0.184375
## NeighborhoodEdwards -2.036e+04 8.027e+03 -2.536 0.011342 *
## NeighborhoodGilbert -1.180e+04 7.601e+03 -1.553 0.120664
## NeighborhoodIDOTRR -1.453e+03 1.102e+04 -0.132 0.895167
## NeighborhoodMeadowV -9.340e+03 1.159e+04 -0.806 0.420364
## NeighborhoodMitchel -1.912e+04 8.206e+03 -2.330 0.019980 *
## NeighborhoodNAmes -1.753e+04 7.789e+03 -2.250 0.024623 *
## NeighborhoodNoRidge 2.558e+04 8.381e+03 3.053 0.002320 **
## NeighborhoodNPkVill 1.277e+04 1.096e+04 1.165 0.244105
## NeighborhoodNridgHt 1.641e+04 7.429e+03 2.208 0.027403 *
## NeighborhoodNWAmes -1.964e+04 7.969e+03 -2.465 0.013858 *
## NeighborhoodOldTown -9.344e+03 9.812e+03 -0.952 0.341103
## NeighborhoodSawyer -9.701e+03 8.136e+03 -1.192 0.233388
## NeighborhoodSawyerW -2.554e+03 7.844e+03 -0.326 0.744766
## NeighborhoodSomerst -2.371e+03 9.002e+03 -0.263 0.792313
## NeighborhoodStoneBr 3.307e+04 8.167e+03 4.048 5.50e-05 ***
## NeighborhoodSWISU -6.000e+03 9.982e+03 -0.601 0.547878
## NeighborhoodTimber -1.339e+04 8.060e+03 -1.662 0.096871 .
## NeighborhoodVeenker 1.410e+03 1.043e+04 0.135 0.892451
## Condition1Feedr 4.858e+03 5.431e+03 0.894 0.371239
## Condition1Norm 1.421e+04 4.398e+03 3.230 0.001272 **
## Condition1PosA 8.287e+03 9.967e+03 0.831 0.405916
## Condition1PosN 1.283e+04 7.486e+03 1.714 0.086742 .
## Condition1RRAe -1.405e+04 9.083e+03 -1.546 0.122287
## Condition1RRAn 1.040e+04 6.975e+03 1.491 0.136324
## Condition1RRNe -2.728e+03 1.766e+04 -0.154 0.877258
## Condition1RRNn 1.367e+04 1.265e+04 1.081 0.279900
## Condition2Feedr -2.188e+04 2.338e+04 -0.936 0.349514
## Condition2Norm -1.284e+04 1.992e+04 -0.645 0.519280
## Condition2PosA 3.881e+04 4.133e+04 0.939 0.347869
## Condition2PosN -2.514e+05 2.732e+04 -9.200 < 2e-16 ***
## Condition2RRAe -1.220e+05 4.377e+04 -2.788 0.005394 **
## Condition2RRAn -1.205e+04 3.098e+04 -0.389 0.697448
## Condition2RRNn -8.761e+03 2.670e+04 -0.328 0.742869
## BldgType2fmCon -1.271e+04 6.674e+03 -1.905 0.057035 .
## BldgTypeDuplex -1.191e+04 7.758e+03 -1.536 0.124897
## BldgTypeTwnhs -2.475e+04 5.548e+03 -4.462 8.92e-06 ***
## BldgTypeTwnhsE -2.025e+04 3.771e+03 -5.370 9.47e-08 ***
## HouseStyle1.5Unf 1.211e+04 8.445e+03 1.434 0.151820
## HouseStyle1Story 1.044e+04 3.942e+03 2.647 0.008220 **
## HouseStyle2.5Fin -2.450e+04 1.379e+04 -1.777 0.075790 .
## HouseStyle2.5Unf -1.014e+04 9.209e+03 -1.101 0.270971
## HouseStyle2Story -4.799e+03 3.299e+03 -1.455 0.145980
## HouseStyleSFoyer 4.066e+03 6.306e+03 0.645 0.519239
## HouseStyleSLvl 3.868e+03 4.867e+03 0.795 0.426888
## OverallQual 6.592e+03 1.068e+03 6.171 9.33e-10 ***
## OverallCond 6.583e+03 8.765e+02 7.510 1.17e-13 ***
## YearBuilt 3.532e+02 7.643e+01 4.621 4.24e-06 ***
## YearRemodAdd 9.334e+01 5.768e+01 1.618 0.105841
## RoofStyleGable 2.955e+03 1.799e+04 0.164 0.869568
## RoofStyleGambrel 7.884e+03 1.982e+04 0.398 0.690925
## RoofStyleHip 4.096e+03 1.805e+04 0.227 0.820462
## RoofStyleMansard 1.623e+04 2.118e+04 0.766 0.443614
## RoofStyleShed 9.222e+04 3.396e+04 2.715 0.006721 **
## RoofMatlCompShg 6.699e+05 3.181e+04 21.060 < 2e-16 ***
## RoofMatlMembran 7.671e+05 4.616e+04 16.619 < 2e-16 ***
## RoofMatlMetal 7.410e+05 4.558e+04 16.258 < 2e-16 ***
## RoofMatlRoll 6.767e+05 4.075e+04 16.605 < 2e-16 ***
## RoofMatlTar&Grv 6.729e+05 3.672e+04 18.324 < 2e-16 ***
## RoofMatlWdShake 6.626e+05 3.519e+04 18.830 < 2e-16 ***
## RoofMatlWdShngl 7.206e+05 3.283e+04 21.948 < 2e-16 ***
## Exterior1stBrkComm -3.091e+04 2.679e+04 -1.154 0.248874
## Exterior1stBrkFace 1.452e+04 7.884e+03 1.842 0.065798 .
## Exterior1stCBlock -2.146e+04 2.855e+04 -0.752 0.452451
## Exterior1stCemntBd 4.301e+03 8.183e+03 0.526 0.599262
## Exterior1stHdBoard -4.472e+03 7.153e+03 -0.625 0.531985
## Exterior1stImStucc -1.680e+04 2.457e+04 -0.684 0.494143
## Exterior1stMetalSd 2.888e+03 7.045e+03 0.410 0.681956
## Exterior1stPlywood -7.264e+03 7.498e+03 -0.969 0.332830
## Exterior1stStone -1.143e+04 2.007e+04 -0.569 0.569247
## Exterior1stStucco -2.245e+03 8.958e+03 -0.251 0.802142
## Exterior1stVinylSd -1.235e+01 7.074e+03 -0.002 0.998607
## Exterior1stWd Sdng -4.546e+02 7.036e+03 -0.065 0.948493
## Exterior1stWdShing -3.327e+03 8.755e+03 -0.380 0.703997
## MasVnrTypeBrkFace 9.084e+03 6.728e+03 1.350 0.177221
## MasVnrTypeNone 1.256e+04 6.779e+03 1.853 0.064076 .
## MasVnrTypeStone 1.356e+04 7.104e+03 1.909 0.056536 .
## MasVnrArea 2.015e+01 5.709e+00 3.530 0.000432 ***
## ExterQualFa 9.807e+02 1.412e+04 0.069 0.944632
## ExterQualGd -1.843e+04 4.791e+03 -3.846 0.000127 ***
## ExterQualTA -1.901e+04 5.320e+03 -3.573 0.000367 ***
## ExterCondFa 7.435e+03 2.608e+04 0.285 0.775643
## ExterCondGd -1.135e+03 2.509e+04 -0.045 0.963915
## ExterCondTA 3.453e+03 2.507e+04 0.138 0.890481
## FoundationCBlock 3.867e+03 3.385e+03 1.142 0.253554
## FoundationPConc 6.118e+03 3.634e+03 1.684 0.092490 .
## FoundationStone 5.709e+03 1.080e+04 0.529 0.597103
## FoundationWood -2.607e+04 1.458e+04 -1.789 0.073930 .
## BsmtQualFa -1.213e+04 6.305e+03 -1.924 0.054601 .
## BsmtQualGd -1.887e+04 3.287e+03 -5.742 1.19e-08 ***
## BsmtQualTA -1.611e+04 4.087e+03 -3.940 8.62e-05 ***
## BsmtExposureGd 1.425e+04 3.033e+03 4.699 2.92e-06 ***
## BsmtExposureMn -4.982e+03 3.059e+03 -1.629 0.103629
## BsmtExposureNo -6.728e+03 2.194e+03 -3.067 0.002214 **
## BsmtFinType1BLQ 3.082e+03 2.727e+03 1.130 0.258535
## BsmtFinType1GLQ 6.279e+03 2.523e+03 2.488 0.012976 *
## BsmtFinType1LwQ -4.698e+02 3.624e+03 -0.130 0.896885
## BsmtFinType1Rec 1.845e+03 2.966e+03 0.622 0.533880
## BsmtFinType1Unf 3.567e+03 2.927e+03 1.219 0.223193
## BsmtFinSF1 3.991e+01 5.329e+00 7.490 1.35e-13 ***
## BsmtFinSF2 3.068e+01 6.494e+00 4.725 2.58e-06 ***
## BsmtUnfSF 2.123e+01 5.080e+00 4.178 3.16e-05 ***
## TotalBsmtSF NA NA NA NA
## X1stFlrSF 4.835e+01 5.859e+00 8.252 4.15e-16 ***
## X2ndFlrSF 6.938e+01 5.187e+00 13.377 < 2e-16 ***
## LowQualFinSF 1.875e+01 2.212e+01 0.848 0.396701
## GrLivArea NA NA NA NA
## FullBath 2.585e+03 2.110e+03 1.225 0.220745
## BedroomAbvGr -4.219e+03 1.426e+03 -2.960 0.003141 **
## KitchenAbvGr -1.963e+04 6.927e+03 -2.834 0.004679 **
## KitchenQualFa -1.852e+04 6.592e+03 -2.809 0.005050 **
## KitchenQualGd -2.397e+04 3.485e+03 -6.878 9.83e-12 ***
## KitchenQualTA -2.136e+04 3.952e+03 -5.406 7.78e-08 ***
## TotRmsAbvGrd 1.426e+03 9.730e+02 1.466 0.142952
## FunctionalMaj2 -8.756e+03 1.540e+04 -0.568 0.569859
## FunctionalMin1 -1.167e+03 9.211e+03 -0.127 0.899189
## FunctionalMin2 1.303e+03 9.218e+03 0.141 0.887589
## FunctionalMod -8.855e+03 1.147e+04 -0.772 0.440403
## FunctionalSev -5.108e+04 2.743e+04 -1.862 0.062847 .
## FunctionalTyp 1.064e+04 8.075e+03 1.318 0.187922
## Fireplaces 2.401e+03 1.351e+03 1.777 0.075827 .
## GarageFinishRFn -2.471e+03 1.954e+03 -1.265 0.206212
## GarageFinishUnf 9.321e+02 2.335e+03 0.399 0.689759
## GarageCars 3.952e+03 2.263e+03 1.747 0.080970 .
## GarageArea 1.766e+01 7.508e+00 2.352 0.018831 *
## GarageQualFa -1.220e+05 3.031e+04 -4.025 6.07e-05 ***
## GarageQualGd -1.155e+05 3.116e+04 -3.706 0.000220 ***
## GarageQualPo -1.316e+05 3.639e+04 -3.617 0.000311 ***
## GarageQualTA -1.196e+05 3.000e+04 -3.987 7.10e-05 ***
## GarageCondFa 1.092e+05 3.478e+04 3.139 0.001738 **
## GarageCondGd 1.097e+05 3.604e+04 3.045 0.002375 **
## GarageCondPo 1.147e+05 3.712e+04 3.091 0.002043 **
## GarageCondTA 1.147e+05 3.444e+04 3.331 0.000892 ***
## WoodDeckSF 9.383e+00 5.850e+00 1.604 0.109010
## X3SsnPorch 2.806e+01 2.246e+01 1.249 0.211826
## ScreenPorch 1.992e+01 1.200e+01 1.659 0.097369 .
## PoolArea 8.433e+01 1.759e+01 4.795 1.84e-06 ***
## MoSold -3.679e+02 2.491e+02 -1.477 0.139982
## SaleTypeCon 2.648e+04 1.757e+04 1.507 0.132002
## SaleTypeConLD 2.138e+04 1.126e+04 1.899 0.057797 .
## SaleTypeConLI 1.162e+03 1.275e+04 0.091 0.927428
## SaleTypeConLw 4.895e+03 1.278e+04 0.383 0.701851
## SaleTypeCWD 1.475e+04 1.259e+04 1.171 0.241802
## SaleTypeNew 1.483e+04 4.861e+03 3.051 0.002330 **
## SaleTypeOth 2.243e+04 2.346e+04 0.956 0.339236
## SaleTypeWD 1.123e+03 3.971e+03 0.283 0.777296
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 22890 on 1173 degrees of freedom
## Multiple R-squared: 0.9262, Adjusted R-squared: 0.9158
## F-statistic: 89.72 on 164 and 1173 DF, p-value: < 2.2e-16
pvals <- data.frame(results$coefficients)
pvals <- filter(pvals, pvals$Pr...t.. < 0.05)
print(rownames(pvals))
## [1] "(Intercept)" "MSZoningFV" "MSZoningRL"
## [4] "MSZoningRM" "LotArea" "StreetPave"
## [7] "LotConfigCulDSac" "LotConfigFR2" "LandSlopeSev"
## [10] "NeighborhoodEdwards" "NeighborhoodMitchel" "NeighborhoodNAmes"
## [13] "NeighborhoodNoRidge" "NeighborhoodNridgHt" "NeighborhoodNWAmes"
## [16] "NeighborhoodStoneBr" "Condition1Norm" "Condition2PosN"
## [19] "Condition2RRAe" "BldgTypeTwnhs" "BldgTypeTwnhsE"
## [22] "HouseStyle1Story" "OverallQual" "OverallCond"
## [25] "YearBuilt" "RoofStyleShed" "RoofMatlCompShg"
## [28] "RoofMatlMembran" "RoofMatlMetal" "RoofMatlRoll"
## [31] "RoofMatlTar&Grv" "RoofMatlWdShake" "RoofMatlWdShngl"
## [34] "MasVnrArea" "ExterQualGd" "ExterQualTA"
## [37] "BsmtQualGd" "BsmtQualTA" "BsmtExposureGd"
## [40] "BsmtExposureNo" "BsmtFinType1GLQ" "BsmtFinSF1"
## [43] "BsmtFinSF2" "BsmtUnfSF" "X1stFlrSF"
## [46] "X2ndFlrSF" "BedroomAbvGr" "KitchenAbvGr"
## [49] "KitchenQualFa" "KitchenQualGd" "KitchenQualTA"
## [52] "GarageArea" "GarageQualFa" "GarageQualGd"
## [55] "GarageQualPo" "GarageQualTA" "GarageCondFa"
## [58] "GarageCondGd" "GarageCondPo" "GarageCondTA"
## [61] "PoolArea" "SaleTypeNew"
cleanTrain3 <- select(cleanTrain2, c(LotArea, LandContour, Neighborhood, Condition1,
Condition2, HouseStyle, OverallCond, YearBuilt,
RoofMatl, ExterQual, BsmtQual, X1stFlrSF, X2ndFlrSF,
KitchenAbvGr, TotRmsAbvGrd, ScreenPorch, PoolArea,
SaleType, SalePrice))
model3 <- lm(SalePrice ~ ., data = cleanTrain3)
results2 <- summary(model3)
results2
##
## Call:
## lm(formula = SalePrice ~ ., data = cleanTrain3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -182697 -13460 0 12406 183338
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.702e+06 1.539e+05 -11.061 < 2e-16 ***
## LotArea 6.612e-01 9.458e-02 6.991 4.42e-12 ***
## LandContourHLS 2.005e+04 6.318e+03 3.173 0.001547 **
## LandContourLow 6.790e+03 7.623e+03 0.891 0.373186
## LandContourLvl 7.292e+03 4.562e+03 1.598 0.110246
## NeighborhoodBlueste 5.434e+03 2.189e+04 0.248 0.803995
## NeighborhoodBrDale -3.877e+03 1.106e+04 -0.351 0.725921
## NeighborhoodBrkSide 1.685e+04 9.704e+03 1.737 0.082664 .
## NeighborhoodClearCr 1.046e+04 1.011e+04 1.034 0.301214
## NeighborhoodCollgCr 1.468e+04 7.452e+03 1.970 0.049047 *
## NeighborhoodCrawfor 2.813e+04 9.238e+03 3.045 0.002376 **
## NeighborhoodEdwards -9.856e+03 8.483e+03 -1.162 0.245504
## NeighborhoodGilbert 6.418e+03 8.047e+03 0.798 0.425228
## NeighborhoodIDOTRR 7.884e+03 1.023e+04 0.771 0.440903
## NeighborhoodMeadowV -1.805e+04 1.133e+04 -1.593 0.111486
## NeighborhoodMitchel -1.721e+03 8.765e+03 -0.196 0.844356
## NeighborhoodNAmes -4.731e+02 8.075e+03 -0.059 0.953287
## NeighborhoodNoRidge 6.075e+04 8.733e+03 6.956 5.60e-12 ***
## NeighborhoodNPkVill 6.607e+03 1.223e+04 0.540 0.589212
## NeighborhoodNridgHt 4.269e+04 8.023e+03 5.321 1.22e-07 ***
## NeighborhoodNWAmes -5.323e+03 8.344e+03 -0.638 0.523570
## NeighborhoodOldTown 5.492e+03 9.240e+03 0.594 0.552374
## NeighborhoodSawyer 1.220e+02 8.569e+03 0.014 0.988643
## NeighborhoodSawyerW 1.293e+04 8.297e+03 1.558 0.119461
## NeighborhoodSomerst 2.439e+04 7.803e+03 3.126 0.001815 **
## NeighborhoodStoneBr 6.144e+04 9.224e+03 6.661 4.06e-11 ***
## NeighborhoodSWISU 6.919e+03 1.101e+04 0.628 0.529933
## NeighborhoodTimber 1.093e+04 8.836e+03 1.238 0.216101
## NeighborhoodVeenker 2.637e+04 1.159e+04 2.275 0.023046 *
## Condition1Feedr -3.674e+03 6.395e+03 -0.574 0.565778
## Condition1Norm 3.726e+03 5.095e+03 0.731 0.464690
## Condition1PosA -2.128e+03 1.181e+04 -0.180 0.856981
## Condition1PosN -6.218e+02 8.863e+03 -0.070 0.944082
## Condition1RRAe -2.304e+04 1.094e+04 -2.107 0.035336 *
## Condition1RRAn 4.137e+03 8.163e+03 0.507 0.612415
## Condition1RRNe -8.448e+03 2.133e+04 -0.396 0.692115
## Condition1RRNn 3.492e+02 1.452e+04 0.024 0.980822
## Condition2Feedr -3.181e+04 2.613e+04 -1.217 0.223747
## Condition2Norm -1.894e+04 2.207e+04 -0.858 0.390973
## Condition2PosA 7.454e+03 3.824e+04 0.195 0.845504
## Condition2PosN -2.085e+05 3.125e+04 -6.671 3.79e-11 ***
## Condition2RRAe -3.404e+04 3.673e+04 -0.927 0.354186
## Condition2RRAn -3.460e+04 3.619e+04 -0.956 0.339220
## Condition2RRNn -1.381e+04 3.047e+04 -0.453 0.650355
## HouseStyle1.5Unf 1.898e+04 9.783e+03 1.940 0.052626 .
## HouseStyle1Story 1.253e+04 4.518e+03 2.774 0.005619 **
## HouseStyle2.5Fin -3.848e+03 1.312e+04 -0.293 0.769329
## HouseStyle2.5Unf 6.186e+03 1.063e+04 0.582 0.560790
## HouseStyle2Story 2.060e+03 3.821e+03 0.539 0.589906
## HouseStyleSFoyer 2.821e+04 7.025e+03 4.016 6.28e-05 ***
## HouseStyleSLvl 9.184e+03 5.336e+03 1.721 0.085453 .
## OverallCond 8.538e+03 8.713e+02 9.799 < 2e-16 ***
## YearBuilt 6.173e+02 7.213e+01 8.558 < 2e-16 ***
## RoofMatlCompShg 5.623e+05 3.362e+04 16.722 < 2e-16 ***
## RoofMatlMembran 6.130e+05 4.471e+04 13.709 < 2e-16 ***
## RoofMatlMetal 6.096e+05 4.507e+04 13.527 < 2e-16 ***
## RoofMatlRoll 5.771e+05 4.451e+04 12.965 < 2e-16 ***
## RoofMatlTar&Grv 5.396e+05 3.434e+04 15.715 < 2e-16 ***
## RoofMatlWdShake 5.499e+05 3.612e+04 15.225 < 2e-16 ***
## RoofMatlWdShngl 6.503e+05 3.528e+04 18.435 < 2e-16 ***
## ExterQualFa -5.197e+04 1.327e+04 -3.915 9.54e-05 ***
## ExterQualGd -4.058e+04 5.293e+03 -7.667 3.52e-14 ***
## ExterQualTA -4.647e+04 5.837e+03 -7.962 3.77e-15 ***
## BsmtQualFa -3.699e+04 7.330e+03 -5.046 5.17e-07 ***
## BsmtQualGd -4.064e+04 3.776e+03 -10.764 < 2e-16 ***
## BsmtQualTA -3.829e+04 4.649e+03 -8.237 4.39e-16 ***
## X1stFlrSF 9.725e+01 4.113e+00 23.645 < 2e-16 ***
## X2ndFlrSF 7.238e+01 5.709e+00 12.677 < 2e-16 ***
## KitchenAbvGr -3.921e+04 5.540e+03 -7.077 2.44e-12 ***
## TotRmsAbvGrd 1.228e+03 9.981e+02 1.231 0.218713
## ScreenPorch 4.991e+01 1.402e+01 3.560 0.000385 ***
## PoolArea 8.788e+01 2.062e+01 4.262 2.17e-05 ***
## SaleTypeCon 6.631e+04 2.136e+04 3.104 0.001954 **
## SaleTypeConLD 1.854e+04 1.279e+04 1.449 0.147507
## SaleTypeConLI 2.100e+04 1.543e+04 1.361 0.173664
## SaleTypeConLw 1.789e+04 1.554e+04 1.151 0.249800
## SaleTypeCWD 1.082e+04 1.521e+04 0.711 0.477023
## SaleTypeNew 2.492e+04 5.713e+03 4.362 1.39e-05 ***
## SaleTypeOth 2.026e+04 2.905e+04 0.697 0.485822
## SaleTypeWD 1.153e+04 4.680e+03 2.463 0.013909 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 28610 on 1258 degrees of freedom
## Multiple R-squared: 0.8763, Adjusted R-squared: 0.8686
## F-statistic: 112.9 on 79 and 1258 DF, p-value: < 2.2e-16
x = model.matrix(SalePrice ~ ., cleanTrain3)[,-1] # model.matrix()[,-1] is for taking off the intercept
x[1:3, ]
## LotArea LandContourHLS LandContourLow LandContourLvl NeighborhoodBlueste
## 1 8450 0 0 1 0
## 2 9600 0 0 1 0
## 3 11250 0 0 1 0
## NeighborhoodBrDale NeighborhoodBrkSide NeighborhoodClearCr
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## NeighborhoodCollgCr NeighborhoodCrawfor NeighborhoodEdwards
## 1 1 0 0
## 2 0 0 0
## 3 1 0 0
## NeighborhoodGilbert NeighborhoodIDOTRR NeighborhoodMeadowV
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## NeighborhoodMitchel NeighborhoodNAmes NeighborhoodNoRidge NeighborhoodNPkVill
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## NeighborhoodNridgHt NeighborhoodNWAmes NeighborhoodOldTown NeighborhoodSawyer
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## NeighborhoodSawyerW NeighborhoodSomerst NeighborhoodStoneBr NeighborhoodSWISU
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## NeighborhoodTimber NeighborhoodVeenker Condition1Feedr Condition1Norm
## 1 0 0 0 1
## 2 0 1 1 0
## 3 0 0 0 1
## Condition1PosA Condition1PosN Condition1RRAe Condition1RRAn Condition1RRNe
## 1 0 0 0 0 0
## 2 0 0 0 0 0
## 3 0 0 0 0 0
## Condition1RRNn Condition2Feedr Condition2Norm Condition2PosA Condition2PosN
## 1 0 0 1 0 0
## 2 0 0 1 0 0
## 3 0 0 1 0 0
## Condition2RRAe Condition2RRAn Condition2RRNn HouseStyle1.5Unf
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## HouseStyle1Story HouseStyle2.5Fin HouseStyle2.5Unf HouseStyle2Story
## 1 0 0 0 1
## 2 1 0 0 0
## 3 0 0 0 1
## HouseStyleSFoyer HouseStyleSLvl OverallCond YearBuilt RoofMatlCompShg
## 1 0 0 5 2003 1
## 2 0 0 8 1976 1
## 3 0 0 5 2001 1
## RoofMatlMembran RoofMatlMetal RoofMatlRoll RoofMatlTar&Grv RoofMatlWdShake
## 1 0 0 0 0 0
## 2 0 0 0 0 0
## 3 0 0 0 0 0
## RoofMatlWdShngl ExterQualFa ExterQualGd ExterQualTA BsmtQualFa BsmtQualGd
## 1 0 0 1 0 0 1
## 2 0 0 0 1 0 1
## 3 0 0 1 0 0 1
## BsmtQualTA X1stFlrSF X2ndFlrSF KitchenAbvGr TotRmsAbvGrd ScreenPorch PoolArea
## 1 0 856 854 1 8 0 0
## 2 0 1262 0 1 6 0 0
## 3 0 920 866 1 6 0 0
## SaleTypeCon SaleTypeConLD SaleTypeConLI SaleTypeConLw SaleTypeCWD SaleTypeNew
## 1 0 0 0 0 0 0
## 2 0 0 0 0 0 0
## 3 0 0 0 0 0 0
## SaleTypeOth SaleTypeWD
## 1 0 1
## 2 0 1
## 3 0 1
y = cleanTrain3$SalePrice # create the response vector
set.seed(1) # using a randomly generated seed helps to reproduce the results
train = sample(1338, 669) # Randomly sample 180 items from 260 items
test = (-train)
set.seed (1)
ridge.mod = glmnet(x[train,], y[train], alpha = 0) # fit ridge regression on training data
cv.out = cv.glmnet(x[train,], y[train], alpha = 0) # run cross validation to find
# the best lambda
plot(cv.out)
names(cv.out)
## [1] "lambda" "cvm" "cvsd" "cvup" "cvlo"
## [6] "nzero" "call" "name" "glmnet.fit" "lambda.min"
## [11] "lambda.1se" "index"
bestlam = cv.out$lambda.min
bestlam
## [1] 5067.486
ridge.mod = glmnet(x[train,], y[train], alpha = 0,lambda = bestlam)
ridge.pred = predict(ridge.mod, s = bestlam, newx = x[test,])
y.test = y[test] # response vector in the test data
mean((ridge.pred - y.test)^2)
## [1] 1522891631
ridge.out = glmnet(x, y, alpha =0)
ridge_results <- predict(ridge.out, type = "coefficients", s = bestlam)#[1:20,]
ridge_results
## 80 x 1 sparse Matrix of class "dgCMatrix"
## s1
## (Intercept) -1.006564e+06
## LotArea 5.722062e-01
## LandContourHLS 2.523322e+04
## LandContourLow 1.159704e+04
## LandContourLvl 1.029205e+04
## NeighborhoodBlueste -7.230541e+03
## NeighborhoodBrDale -2.193363e+04
## NeighborhoodBrkSide -6.864774e+02
## NeighborhoodClearCr 4.756808e+03
## NeighborhoodCollgCr 4.966337e+03
## NeighborhoodCrawfor 1.735575e+04
## NeighborhoodEdwards -2.616106e+04
## NeighborhoodGilbert -3.766336e+03
## NeighborhoodIDOTRR -1.325003e+04
## NeighborhoodMeadowV -3.057519e+04
## NeighborhoodMitchel -8.687550e+03
## NeighborhoodNAmes -1.211760e+04
## NeighborhoodNoRidge 6.364784e+04
## NeighborhoodNPkVill -7.998634e+03
## NeighborhoodNridgHt 4.695948e+04
## NeighborhoodNWAmes -1.141217e+04
## NeighborhoodOldTown -1.048017e+04
## NeighborhoodSawyer -1.140436e+04
## NeighborhoodSawyerW 4.319545e+03
## NeighborhoodSomerst 1.726918e+04
## NeighborhoodStoneBr 6.004745e+04
## NeighborhoodSWISU -7.229427e+03
## NeighborhoodTimber 7.693418e+03
## NeighborhoodVeenker 2.099372e+04
## Condition1Feedr -9.656329e+03
## Condition1Norm 4.435823e+03
## Condition1PosA 4.510080e+03
## Condition1PosN 1.586097e+03
## Condition1RRAe -2.167743e+04
## Condition1RRAn 3.802436e+03
## Condition1RRNe -9.295008e+03
## Condition1RRNn 8.765171e+03
## Condition2Feedr -1.958147e+04
## Condition2Norm -7.975861e+03
## Condition2PosA 5.556761e+04
## Condition2PosN -1.250836e+05
## Condition2RRAe -1.565302e+04
## Condition2RRAn -1.972465e+04
## Condition2RRNn 5.474364e+02
## HouseStyle1.5Unf 9.960760e+03
## HouseStyle1Story 5.434164e+03
## HouseStyle2.5Fin -2.134020e+03
## HouseStyle2.5Unf 4.822878e+03
## HouseStyle2Story 1.689879e+03
## HouseStyleSFoyer 1.828867e+04
## HouseStyleSLvl 3.573160e+03
## OverallCond 7.380703e+03
## YearBuilt 4.966175e+02
## RoofMatlCompShg 9.478364e+04
## RoofMatlMembran 1.381204e+05
## RoofMatlMetal 1.284230e+05
## RoofMatlRoll 9.866734e+04
## RoofMatlTar&Grv 7.820345e+04
## RoofMatlWdShake 8.109457e+04
## RoofMatlWdShngl 1.905016e+05
## ExterQualFa -2.920519e+04
## ExterQualGd -1.765741e+04
## ExterQualTA -2.726266e+04
## BsmtQualFa -3.088418e+04
## BsmtQualGd -3.123064e+04
## BsmtQualTA -3.007073e+04
## X1stFlrSF 7.159439e+01
## X2ndFlrSF 4.445320e+01
## KitchenAbvGr -3.808014e+04
## TotRmsAbvGrd 6.218870e+03
## ScreenPorch 6.370165e+01
## PoolArea 3.345285e+01
## SaleTypeCon 5.712067e+04
## SaleTypeConLD 1.027885e+04
## SaleTypeConLI 1.603913e+04
## SaleTypeConLw 9.004632e+03
## SaleTypeCWD 1.141126e+04
## SaleTypeNew 2.074691e+04
## SaleTypeOth 9.050317e+03
## SaleTypeWD 6.303251e+03
# Best Subset Selection Results:
## (Intercept) MSSubClass OverallQual BsmtFinType2Rec KitchenQualTA
## -71322.4474 -226.3198 43437.7309 9200.0694 -16200.1357
## GarageCondTA
## 8445.4309
# Best Forward Selection Results:
## (Intercept) MSSubClass OverallQual BsmtFinType2Rec KitchenQualTA
## -71322.4455 -226.3198 43437.7307 9200.0700 -16200.1363
## GarageCondTA
## 8445.4308
# Best Backward Selection Results:
## (Intercept) OverallQual RoofMatlWdShngl BsmtFinType2Rec KitchenAbvGr
## -97279.147 46025.185 105235.823 8693.319 -2698.769
## KitchenQualFa
## -15133.953
Coefficient estimates are significantly smaller compared to subset selection methods. All estimates from ridge are in the single digits, as opposed to subset selection where all are triple digits or above. As noted in the last homework, this could be due to computational limitations on the subset selection methods used.
set.seed(1)
lasso.mod = glmnet(x[train,], y[train], alpha = 1)
cv.out = cv.glmnet(x[train,], y[train], alpha = 1)
plot(cv.out)
bestlam = cv.out$lambda.min
bestlam
## [1] 109.1757
lasso.mod = glmnet(x[train,], y[train], alpha = 1,lambda = bestlam)
lasso.pred = predict(lasso.mod, s = bestlam, newx = x[test ,])
mean((lasso.pred - y.test)^2)
## [1] 1617807839
out = glmnet(x, y, alpha = 1)
plot(out)
# Best Subset Selection Results:
## (Intercept) MSSubClass OverallQual BsmtFinType2Rec KitchenQualTA
## -71322.4474 -226.3198 43437.7309 9200.0694 -16200.1357
## GarageCondTA
## 8445.4309
# Best Forward Selection Results:
## (Intercept) MSSubClass OverallQual BsmtFinType2Rec KitchenQualTA
## -71322.4455 -226.3198 43437.7307 9200.0700 -16200.1363
## GarageCondTA
## 8445.4308
# Best Backward Selection Results:
## (Intercept) OverallQual RoofMatlWdShngl BsmtFinType2Rec KitchenAbvGr
## -97279.147 46025.185 105235.823 8693.319 -2698.769
## KitchenQualFa
## -15133.953
lasso.coef = predict(out, type ="coefficients", s = bestlam)#[1:20,]
lasso.coef
## 80 x 1 sparse Matrix of class "dgCMatrix"
## s1
## (Intercept) -1.600965e+06
## LotArea 6.408578e-01
## LandContourHLS 1.947706e+04
## LandContourLow 6.289072e+03
## LandContourLvl 6.359078e+03
## NeighborhoodBlueste .
## NeighborhoodBrDale -9.448638e+03
## NeighborhoodBrkSide 9.628668e+03
## NeighborhoodClearCr 5.003764e+03
## NeighborhoodCollgCr 9.084922e+03
## NeighborhoodCrawfor 2.215560e+04
## NeighborhoodEdwards -1.615491e+04
## NeighborhoodGilbert 6.458136e+02
## NeighborhoodIDOTRR .
## NeighborhoodMeadowV -2.260729e+04
## NeighborhoodMitchel -5.960986e+03
## NeighborhoodNAmes -5.414477e+03
## NeighborhoodNoRidge 5.674575e+04
## NeighborhoodNPkVill .
## NeighborhoodNridgHt 3.948625e+04
## NeighborhoodNWAmes -9.493616e+03
## NeighborhoodOldTown -5.073392e+02
## NeighborhoodSawyer -4.507915e+03
## NeighborhoodSawyerW 6.803745e+03
## NeighborhoodSomerst 1.910646e+04
## NeighborhoodStoneBr 5.682144e+04
## NeighborhoodSWISU .
## NeighborhoodTimber 5.931797e+03
## NeighborhoodVeenker 2.089761e+04
## Condition1Feedr -4.526058e+03
## Condition1Norm 3.636494e+03
## Condition1PosA .
## Condition1PosN .
## Condition1RRAe -2.121727e+04
## Condition1RRAn 3.159832e+03
## Condition1RRNe -5.352478e+03
## Condition1RRNn .
## Condition2Feedr -1.312279e+04
## Condition2Norm -3.462222e+03
## Condition2PosA 2.678506e+04
## Condition2PosN -1.821386e+05
## Condition2RRAe -1.722697e+04
## Condition2RRAn -1.582039e+04
## Condition2RRNn 3.482886e+02
## HouseStyle1.5Unf 1.664660e+04
## HouseStyle1Story 9.334597e+03
## HouseStyle2.5Fin -2.711347e+03
## HouseStyle2.5Unf 3.219928e+03
## HouseStyle2Story 1.699449e+00
## HouseStyleSFoyer 2.369156e+04
## HouseStyleSLvl 6.158927e+03
## OverallCond 8.394707e+03
## YearBuilt 6.085841e+02
## RoofMatlCompShg 4.727020e+05
## RoofMatlMembran 5.204445e+05
## RoofMatlMetal 5.163634e+05
## RoofMatlRoll 4.821994e+05
## RoofMatlTar&Grv 4.510800e+05
## RoofMatlWdShake 4.584546e+05
## RoofMatlWdShngl 5.616317e+05
## ExterQualFa -4.631848e+04
## ExterQualGd -3.641636e+04
## ExterQualTA -4.294984e+04
## BsmtQualFa -3.650803e+04
## BsmtQualGd -3.988183e+04
## BsmtQualTA -3.816837e+04
## X1stFlrSF 9.435456e+01
## X2ndFlrSF 6.934499e+01
## KitchenAbvGr -3.764537e+04
## TotRmsAbvGrd 1.599474e+03
## ScreenPorch 5.045328e+01
## PoolArea 7.267543e+01
## SaleTypeCon 6.036500e+04
## SaleTypeConLD 1.315759e+04
## SaleTypeConLI 1.513381e+04
## SaleTypeConLw 1.140835e+04
## SaleTypeCWD 6.083039e+03
## SaleTypeNew 2.116910e+04
## SaleTypeOth 1.223097e+04
## SaleTypeWD 8.103810e+03
As before, coefficient estimates are significantly smaller compared to subset selection methods. All estimates from LASSO are in the single digits, as opposed to subset selection where all are triple digits or above. As noted in the last homework, this could be due to computational limitations on the subset selection methods used.