Code I used for Kaggle Home Prices
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.4
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##Importing Data
train <- read.csv("C:\\Users\\17814\\Downloads\\train.csv", header=TRUE)
test <- read.csv("C:\\Users\\17814\\Downloads\\test.csv", header=TRUE)
## Creating one training dataset with categorical variable and one with numeric variable for data visualization. Source that helped me here https://www.kaggle.com/pradeeptripathi/predicting-house-prices-using-r/code
cat_var <- names(train)[which(sapply(train, is.character))]
cat_car <- c(cat_var, 'BedroomAbvGr', 'HalfBath', ' KitchenAbvGr','BsmtFullBath', 'BsmtHalfBath', 'MSSubClass')
numeric_var <- names(train)[which(sapply(train, is.numeric))]
cat<-data.frame(train[cat_var])
num<- data.frame(train[numeric_var])
## Scatterplots for numeric variables
plot(num[,2], num[,38], xlab = "MSSubClass" , ylab = "SalePrice")
plot(num[,3], num[,38], xlab = "LotFrontage" , ylab = "SalePrice")
plot(num[,4], num[,38], xlab = "LotArea", ylab = "SalePrice")
plot(num[,5], num[,38], xlab = "OverallQual", ylab = "SalePrice")
plot(num[,6], num[,38], xlab = "OverallCond", ylab = "SalePrice")
plot(num[,7], num[,38], xlab = "YearBuilt", ylab = "SalePrice")
plot(num[,8], num[,38], xlab = "YearRemodAdd", ylab = "SalePrice")
plot(num[,9], num[,38], xlab = "MasVnrArea", ylab = "SalePrice")
plot(num[,10], num[,38], xlab = "BsmtFinSF1", ylab = "SalePrice")
plot(num[,11], num[,38], xlab = "BsmtFinSf2", ylab = "SalePrice")
plot(num[,12], num[,38], xlab = "BsmtUnfSF", ylab = "SalePrice")
plot(num[,13], num[,38], xlab = "TotalBsmtSF", ylab = "SalePrice")
plot(num[,14], num[,38], xlab = "X1stFlrSF", ylab = "SalePrice")
plot(num[,15], num[,38], xlab = "X2ndFlrSF", ylab = "SalePrice")
plot(num[,16], num[,38], xlab = "LowQualFinSF", ylab = "SalePrice")
plot(num[,17], num[,38], xlab = "GrLivArea", ylab = "SalePrice")
plot(num[,18], num[,38], xlab = "BsmtFullBath", ylab = "SalePrice")
plot(num[,19], num[,38], xlab = "BsmtHalfBath", ylab = "SalePrice")
plot(num[,20], num[,38], xlab = "FullBath", ylab = "SalePrice")
plot(num[,21], num[,38], xlab = "HalfBath", ylab = "SalePrice")
plot(num[,22], num[,38], xlab = "BedroomAbvGr", ylab = "SalePrice")
plot(num[,23], num[,38], xlab = "KitchenAbvGr", ylab = "SalePrice")
plot(num[,24], num[,38], xlab = "TotRmsAbvGrd", ylab = "SalePrice")
plot(num[,25], num[,38], xlab = "Fireplaces", ylab = "SalePrice")
plot(num[,26], num[,38], xlab = "GarageYrBlt", ylab = "SalePrice")
plot(num[,27], num[,38], xlab = "GarageCars", ylab = "SalePrice")
plot(num[,28], num[,38], xlab = "GarageArea", ylab = "SalePrice")
plot(num[,29], num[,38], xlab = "WoodDeckSF", ylab = "SalePrice")
plot(num[,30], num[,38], xlab = "OpenPorchSF", ylab = "SalePrice")
plot(num[,31], num[,38], xlab = "EnclosedPorch", ylab = "SalePrice")
plot(num[,32], num[,38], xlab = "X3SsnPorch", ylab = "SalePrice")
plot(num[,33], num[,38], xlab = "ScreenPorch", ylab = "SalePrice")
plot(num[,34], num[,38], xlab = "PoolArea", ylab = "SalePrice")
plot(num[,35], num[,38], xlab = "MiscVal", ylab = "SalePrice")
plot(num[,36], num[,38], xlab = "MoSold", ylab = "SalePrice")
plot(num[,37], num[,38], xlab = "YrSold", ylab = "SalePrice")
##Changing the NAs displayed in the categorical variables to None just to clean up data.
train$Alley[is.na(train$Alley)] <- "None"
test$Alley[is.na(test$Alley)] <- "None"
train$BsmtQual[is.na(train$BsmtQual)] <- "None"
test$BsmtQual[is.na(test$BsmtQual)] <- "None"
train$BsmtCond[is.na(train$BsmtCond)] <- "None"
test$BsmtCond[is.na(test$BsmtCond)] <- "None"
train$BsmtExposure[is.na(train$BsmtExposure)] <- "None"
test$BsmtExposure[is.na(test$BsmtExposure)] <- "None"
train$BsmtFinType1[is.na(train$BsmtFinType1)] <- "None"
test$BsmtFinType1[is.na(test$BsmtFinType1)] <- "None"
train$BsmtFinType2[is.na(train$BsmtFinType2)] <- "None"
test$BsmtFinType2[is.na(test$BsmtFinType2)] <- "None"
train$FireplaceQu[is.na(train$FireplaceQu)] <- "None"
test$FireplaceQu[is.na(test$FireplaceQu)] <- "None"
train$GarageType[is.na(train$GarageType)] <- "None"
test$GarageType[is.na(test$GarageType)] <- "None"
train$GarageFinish[is.na(train$GarageFinish)] <- "None"
test$GarageFinish[is.na(test$GarageFinish)] <- "None"
train$GarageQual[is.na(train$GarageQual)] <- "None"
test$GarageQual[is.na(test$GarageQual)] <- "None"
train$GarageCond[is.na(train$GarageCond)] <- "None"
test$GarageCond[is.na(test$GarageCond)] <- "None"
train$PoolQC[is.na(train$PoolQC)] <- "None"
test$PoolQC[is.na(test$PoolQC)] <- "None"
train$Fence[is.na(train$Fence)] <- "None"
test$Fence[is.na(test$Fence)] <- "None"
train$MiscFeature[is.na(train$MiscFeature)] <- "None"
test$MiscFeature[is.na(test$MiscFeature)] <- "None"
##Cleaning up data with medians
train$LotFrontage[is.na(train$LotFrontage)] <- median(train$LotFrontage, na.rm = TRUE)
test$LotFrontage[is.na(test$LotFrontage)] <- median(test$LotFrontage, na.rm = TRUE)
train$MasVnrArea[is.na(train$MasVnrArea)] <- median(train$MasVnrArea, na.rm = TRUE)
test$MasVnrArea[is.na(test$MasVnrArea)] <- median(test$MasVnrArea, na.rm = TRUE)
##Unique year for garage year built
train$GarageYrBlt[is.na(train$GarageYrBlt)] <- -1000
test$GarageYrBlt[is.na(test$GarageYrBlt)] <- -1000
##Cleaning up data with no basement and garage with 0
test$BsmtFinSF1[is.na(test$BsmtFinSF1)] <- 0
test$BsmtFinSF2[is.na(test$BsmtFinSF2)] <- 0
test$BsmtUnfSF[is.na(test$BsmtUnfSF)] <- 0
test$TotalBsmtSF[is.na(test$TotalBsmtSF)] <- 0
test$BsmtFullBath[is.na(test$BsmtFullBath)] <- 0
test$BsmtHalfBath[is.na(test$BsmtHalfBath)] <- 0
test$GarageCars[is.na(test$GarageCars)] <- 0
test$GarageArea[is.na(test$GarageArea)] <- 0
## "Most commonly used" assumption method for other missing variables
train$MasVnrType[is.na(train$MasVnrType)] <- "None"
test$MasVnrType[is.na(test$MasVnrType)] <- "None"
train$Electrical[is.na(train$Electrical)] <- "SBrkr"
test$MSZoning[is.na(test$MSZoning)] <- "RL"
test$Utilities[is.na(test$Utilities)] <- "AllPub"
test$Exterior1st[is.na(test$Exterior1st)] <- "VinylSd"
test$Exterior2nd[is.na(test$Exterior2nd)] <- "VinylSd"
test$KitchenQual[is.na(test$KitchenQual)] <- "TA"
test$Functional[is.na(test$Functional)] <- "Min2"
test$SaleType[is.na(test$SaleType)] <- "WD"
## Factoring Categorical and Ordinal Variables
train$MSZoning<- factor(train$MSZoning)
test$MSZoning<- factor(test$MSZoning)
train$Street <- factor(train$Street)
test$Street <- factor(test$Street)
train$LotShape <-factor(train$LotShape)
test$LotShape <-factor(test$LotShape)
train$LandContour<-factor(train$LandContour)
test$LandContour<-factor(test$LandContour)
train$Utilities<-factor(train$Utilities)
test$Utilities<-factor(test$Utilities)
train$LotConfig<-factor(train$LotConfig)
test$LotConfig<-factor(test$LotConfig)
train$LandSlope<-factor(train$LandSlope)
test$LandSlope<-factor(test$LandSlope)
train$Neighborhood<-factor(train$Neighborhood)
test$Neighborhood<-factor(test$Neighborhood)
train$Condition1<-factor(train$Condition1)
test$Condition1<-factor(test$Condition1)
train$Condition2<-factor(train$Condition2)
test$Condition2<-factor(test$Condition2)
train$BldgType<-factor(train$BldgType)
test$BldgType<-factor(test$BldgType)
train$HouseStyle<-factor(train$HouseStyle)
test$HouseStyle<-factor(test$HouseStyle)
train$RoofStyle<-factor(train$RoofStyle)
test$RoofStyle<-factor(test$RoofStyle)
train$RoofMatl<-factor(train$RoofMatl)
test$RoofMatl<-factor(test$RoofMatl)
train$Exterior1st<-factor(train$Exterior1st)
test$Exterior1st<-factor(test$Exterior1st)
train$Exterior2nd<-factor(train$Exterior2nd)
test$Exterior2nd<-factor(test$Exterior2nd)
train$ExterQual<-factor(train$ExterQual)
test$ExterQual<-factor(test$ExterQual)
train$ExterCond<-factor(train$ExterCond)
test$ExterCond<-factor(test$ExterCond)
train$Foundation<-factor(train$Foundation)
test$Foundation<-factor(test$Foundation)
train$Heating<-factor(train$Heating)
test$Heating<-factor(test$Heating)
train$HeatingQC<-factor(train$HeatingQC)
test$HeatingQC<-factor(test$HeatingQC)
train$CentralAir<-factor(train$CentralAir)
test$CentralAir<-factor(test$CentralAir)
train$KitchenQual<-factor(train$KitchenQual)
test$KitchenQual<-factor(test$KitchenQual)
train$Functional<-factor(train$Functional)
test$Functional<-factor(test$Functional)
train$PavedDrive<-factor(train$PavedDrive)
test$PavedDrive<-factor(test$PavedDrive)
train$SaleType<-factor(train$SaleType)
test$SaleType<-factor(test$SaleType)
train$SaleCondition<-factor(train$SaleCondition)
test$SaleCondition<-factor(test$SaleCondition)
train$Alley <- factor(train$Alley)
test$Alley <- factor(test$Alley)
train$BsmtQual <- factor(train$BsmtQual)
test$BsmtQual <- factor(test$BsmtQual)
train$BsmtCond <- factor(train$BsmtCond)
test$BsmtCond <- factor(test$BsmtCond)
train$BsmtExposure <- factor(train$BsmtExposure)
test$BsmtExposure <- factor(test$BsmtExposure)
train$BsmtFinType1 <- factor(train$BsmtFinType1)
test$BsmtFinType1 <- factor(test$BsmtFinType1)
train$BsmtFinType2 <- factor(train$BsmtFinType2)
test$BsmtFinType2 <- factor(test$BsmtFinType2)
train$FireplaceQu <- factor(train$FireplaceQu)
test$FireplaceQu <- factor(test$FireplaceQu)
train$GarageType <- factor(train$GarageType)
test$GarageType <- factor(test$GarageType)
train$GarageFinish <- factor(train$GarageFinish)
test$GarageFinish <- factor(test$GarageFinish)
train$GarageQual <- factor(train$GarageQual)
test$GarageQual <- factor(test$GarageQual)
train$GarageCond <- factor(train$GarageCond)
test$GarageCond <- factor(test$GarageCond)
train$PoolQC <- factor(train$PoolQC)
test$PoolQC <- factor(test$PoolQC)
train$Fence <- factor(train$Fence)
test$Fence <- factor(test$Fence)
train$MiscFeature <- factor(train$MiscFeature)
test$MiscFeature <- factor(test$MiscFeature)
train$MasVnrType <- factor(train$MasVnrType)
test$MasVnrType <- factor(test$MasVnrType)
train$Electrical <- factor(train$Electrical)
test$Electrical <- factor(test$Electrical)
## Final Summary Statistics
str(test)
## 'data.frame': 1459 obs. of 80 variables:
## $ Id : int 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 ...
## $ MSSubClass : int 20 20 60 60 120 60 20 60 20 20 ...
## $ MSZoning : Factor w/ 5 levels "C (all)","FV",..: 3 4 4 4 4 4 4 4 4 4 ...
## $ LotFrontage : num 80 81 74 78 43 75 67 63 85 70 ...
## $ LotArea : int 11622 14267 13830 9978 5005 10000 7980 8402 10176 8400 ...
## $ Street : Factor w/ 2 levels "Grvl","Pave": 2 2 2 2 2 2 2 2 2 2 ...
## $ Alley : Factor w/ 3 levels "Grvl","None",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ LotShape : Factor w/ 4 levels "IR1","IR2","IR3",..: 4 1 1 1 1 1 1 1 4 4 ...
## $ LandContour : Factor w/ 4 levels "Bnk","HLS","Low",..: 4 4 4 4 2 4 4 4 4 4 ...
## $ Utilities : Factor w/ 1 level "AllPub": 1 1 1 1 1 1 1 1 1 1 ...
## $ LotConfig : Factor w/ 5 levels "Corner","CulDSac",..: 5 1 5 5 5 1 5 5 5 1 ...
## $ LandSlope : Factor w/ 3 levels "Gtl","Mod","Sev": 1 1 1 1 1 1 1 1 1 1 ...
## $ Neighborhood : Factor w/ 25 levels "Blmngtn","Blueste",..: 13 13 9 9 22 9 9 9 9 13 ...
## $ Condition1 : Factor w/ 9 levels "Artery","Feedr",..: 2 3 3 3 3 3 3 3 3 3 ...
## $ Condition2 : Factor w/ 5 levels "Artery","Feedr",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ BldgType : Factor w/ 5 levels "1Fam","2fmCon",..: 1 1 1 1 5 1 1 1 1 1 ...
## $ HouseStyle : Factor w/ 7 levels "1.5Fin","1.5Unf",..: 3 3 5 5 3 5 3 5 3 3 ...
## $ OverallQual : int 5 6 5 6 8 6 6 6 7 4 ...
## $ OverallCond : int 6 6 5 6 5 5 7 5 5 5 ...
## $ YearBuilt : int 1961 1958 1997 1998 1992 1993 1992 1998 1990 1970 ...
## $ YearRemodAdd : int 1961 1958 1998 1998 1992 1994 2007 1998 1990 1970 ...
## $ RoofStyle : Factor w/ 6 levels "Flat","Gable",..: 2 4 2 2 2 2 2 2 2 2 ...
## $ RoofMatl : Factor w/ 4 levels "CompShg","Tar&Grv",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Exterior1st : Factor w/ 13 levels "AsbShng","AsphShn",..: 11 12 11 11 7 7 7 11 7 9 ...
## $ Exterior2nd : Factor w/ 15 levels "AsbShng","AsphShn",..: 13 14 13 13 7 7 7 13 7 10 ...
## $ MasVnrType : Factor w/ 4 levels "BrkCmn","BrkFace",..: 3 2 3 2 3 3 3 3 3 3 ...
## $ MasVnrArea : num 0 108 0 20 0 0 0 0 0 0 ...
## $ ExterQual : Factor w/ 4 levels "Ex","Fa","Gd",..: 4 4 4 4 3 4 4 4 4 4 ...
## $ ExterCond : Factor w/ 5 levels "Ex","Fa","Gd",..: 5 5 5 5 5 5 3 5 5 5 ...
## $ Foundation : Factor w/ 6 levels "BrkTil","CBlock",..: 2 2 3 3 3 3 3 3 3 2 ...
## $ BsmtQual : Factor w/ 5 levels "Ex","Fa","Gd",..: 5 5 3 5 3 3 3 3 3 5 ...
## $ BsmtCond : Factor w/ 5 levels "Fa","Gd","None",..: 5 5 5 5 5 5 5 5 5 5 ...
## $ BsmtExposure : Factor w/ 5 levels "Av","Gd","Mn",..: 4 4 4 4 4 4 4 4 2 4 ...
## $ BsmtFinType1 : Factor w/ 7 levels "ALQ","BLQ","GLQ",..: 6 1 3 3 1 7 1 7 3 1 ...
## $ BsmtFinSF1 : num 468 923 791 602 263 0 935 0 637 804 ...
## $ BsmtFinType2 : Factor w/ 7 levels "ALQ","BLQ","GLQ",..: 4 7 7 7 7 7 7 7 7 6 ...
## $ BsmtFinSF2 : num 144 0 0 0 0 0 0 0 0 78 ...
## $ BsmtUnfSF : num 270 406 137 324 1017 ...
## $ TotalBsmtSF : num 882 1329 928 926 1280 ...
## $ Heating : Factor w/ 4 levels "GasA","GasW",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ HeatingQC : Factor w/ 5 levels "Ex","Fa","Gd",..: 5 5 3 1 1 3 1 3 3 5 ...
## $ CentralAir : Factor w/ 2 levels "N","Y": 2 2 2 2 2 2 2 2 2 2 ...
## $ Electrical : Factor w/ 4 levels "FuseA","FuseF",..: 4 4 4 4 4 4 4 4 4 4 ...
## $ X1stFlrSF : int 896 1329 928 926 1280 763 1187 789 1341 882 ...
## $ X2ndFlrSF : int 0 0 701 678 0 892 0 676 0 0 ...
## $ LowQualFinSF : int 0 0 0 0 0 0 0 0 0 0 ...
## $ GrLivArea : int 896 1329 1629 1604 1280 1655 1187 1465 1341 882 ...
## $ BsmtFullBath : num 0 0 0 0 0 0 1 0 1 1 ...
## $ BsmtHalfBath : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FullBath : int 1 1 2 2 2 2 2 2 1 1 ...
## $ HalfBath : int 0 1 1 1 0 1 0 1 1 0 ...
## $ BedroomAbvGr : int 2 3 3 3 2 3 3 3 2 2 ...
## $ KitchenAbvGr : int 1 1 1 1 1 1 1 1 1 1 ...
## $ KitchenQual : Factor w/ 4 levels "Ex","Fa","Gd",..: 4 3 4 3 3 4 4 4 3 4 ...
## $ TotRmsAbvGrd : int 5 6 6 7 5 7 6 7 5 4 ...
## $ Functional : Factor w/ 7 levels "Maj1","Maj2",..: 7 7 7 7 7 7 7 7 7 7 ...
## $ Fireplaces : int 0 0 1 1 0 1 0 1 1 0 ...
## $ FireplaceQu : Factor w/ 6 levels "Ex","Fa","Gd",..: 4 4 6 3 4 6 4 3 5 4 ...
## $ GarageType : Factor w/ 7 levels "2Types","Attchd",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ GarageYrBlt : num 1961 1958 1997 1998 1992 ...
## $ GarageFinish : Factor w/ 4 levels "Fin","None","RFn",..: 4 4 1 1 3 1 1 1 4 1 ...
## $ GarageCars : num 1 1 2 2 2 2 2 2 2 2 ...
## $ GarageArea : num 730 312 482 470 506 440 420 393 506 525 ...
## $ GarageQual : Factor w/ 5 levels "Fa","Gd","None",..: 5 5 5 5 5 5 5 5 5 5 ...
## $ GarageCond : Factor w/ 6 levels "Ex","Fa","Gd",..: 6 6 6 6 6 6 6 6 6 6 ...
## $ PavedDrive : Factor w/ 3 levels "N","P","Y": 3 3 3 3 3 3 3 3 3 3 ...
## $ WoodDeckSF : int 140 393 212 360 0 157 483 0 192 240 ...
## $ OpenPorchSF : int 0 36 34 36 82 84 21 75 0 0 ...
## $ EnclosedPorch: int 0 0 0 0 0 0 0 0 0 0 ...
## $ X3SsnPorch : int 0 0 0 0 0 0 0 0 0 0 ...
## $ ScreenPorch : int 120 0 0 0 144 0 0 0 0 0 ...
## $ PoolArea : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PoolQC : Factor w/ 3 levels "Ex","Gd","None": 3 3 3 3 3 3 3 3 3 3 ...
## $ Fence : Factor w/ 5 levels "GdPrv","GdWo",..: 3 5 3 5 5 5 1 5 5 3 ...
## $ MiscFeature : Factor w/ 4 levels "Gar2","None",..: 2 1 2 2 2 2 4 2 2 2 ...
## $ MiscVal : int 0 12500 0 0 0 0 500 0 0 0 ...
## $ MoSold : int 6 6 3 6 1 4 3 5 2 4 ...
## $ YrSold : int 2010 2010 2010 2010 2010 2010 2010 2010 2010 2010 ...
## $ SaleType : Factor w/ 9 levels "COD","Con","ConLD",..: 9 9 9 9 9 9 9 9 9 9 ...
## $ SaleCondition: Factor w/ 6 levels "Abnorml","AdjLand",..: 5 5 5 5 5 5 5 5 5 5 ...
str(train)
## 'data.frame': 1460 obs. of 81 variables:
## $ Id : int 1 2 3 4 5 6 7 8 9 10 ...
## $ MSSubClass : int 60 20 60 70 60 50 20 60 50 190 ...
## $ MSZoning : Factor w/ 5 levels "C (all)","FV",..: 4 4 4 4 4 4 4 4 5 4 ...
## $ LotFrontage : int 65 80 68 60 84 85 75 69 51 50 ...
## $ LotArea : int 8450 9600 11250 9550 14260 14115 10084 10382 6120 7420 ...
## $ Street : Factor w/ 2 levels "Grvl","Pave": 2 2 2 2 2 2 2 2 2 2 ...
## $ Alley : Factor w/ 3 levels "Grvl","None",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ LotShape : Factor w/ 4 levels "IR1","IR2","IR3",..: 4 4 1 1 1 1 4 1 4 4 ...
## $ LandContour : Factor w/ 4 levels "Bnk","HLS","Low",..: 4 4 4 4 4 4 4 4 4 4 ...
## $ Utilities : Factor w/ 2 levels "AllPub","NoSeWa": 1 1 1 1 1 1 1 1 1 1 ...
## $ LotConfig : Factor w/ 5 levels "Corner","CulDSac",..: 5 3 5 1 3 5 5 1 5 1 ...
## $ LandSlope : Factor w/ 3 levels "Gtl","Mod","Sev": 1 1 1 1 1 1 1 1 1 1 ...
## $ Neighborhood : Factor w/ 25 levels "Blmngtn","Blueste",..: 6 25 6 7 14 12 21 17 18 4 ...
## $ Condition1 : Factor w/ 9 levels "Artery","Feedr",..: 3 2 3 3 3 3 3 5 1 1 ...
## $ Condition2 : Factor w/ 8 levels "Artery","Feedr",..: 3 3 3 3 3 3 3 3 3 1 ...
## $ BldgType : Factor w/ 5 levels "1Fam","2fmCon",..: 1 1 1 1 1 1 1 1 1 2 ...
## $ HouseStyle : Factor w/ 8 levels "1.5Fin","1.5Unf",..: 6 3 6 6 6 1 3 6 1 2 ...
## $ OverallQual : int 7 6 7 7 8 5 8 7 7 5 ...
## $ OverallCond : int 5 8 5 5 5 5 5 6 5 6 ...
## $ YearBuilt : int 2003 1976 2001 1915 2000 1993 2004 1973 1931 1939 ...
## $ YearRemodAdd : int 2003 1976 2002 1970 2000 1995 2005 1973 1950 1950 ...
## $ RoofStyle : Factor w/ 6 levels "Flat","Gable",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ RoofMatl : Factor w/ 8 levels "ClyTile","CompShg",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ Exterior1st : Factor w/ 15 levels "AsbShng","AsphShn",..: 13 9 13 14 13 13 13 7 4 9 ...
## $ Exterior2nd : Factor w/ 16 levels "AsbShng","AsphShn",..: 14 9 14 16 14 14 14 7 16 9 ...
## $ MasVnrType : Factor w/ 4 levels "BrkCmn","BrkFace",..: 2 3 2 3 2 3 4 4 3 3 ...
## $ MasVnrArea : num 196 0 162 0 350 0 186 240 0 0 ...
## $ ExterQual : Factor w/ 4 levels "Ex","Fa","Gd",..: 3 4 3 4 3 4 3 4 4 4 ...
## $ ExterCond : Factor w/ 5 levels "Ex","Fa","Gd",..: 5 5 5 5 5 5 5 5 5 5 ...
## $ Foundation : Factor w/ 6 levels "BrkTil","CBlock",..: 3 2 3 1 3 6 3 2 1 1 ...
## $ BsmtQual : Factor w/ 5 levels "Ex","Fa","Gd",..: 3 3 3 5 3 3 1 3 5 5 ...
## $ BsmtCond : Factor w/ 5 levels "Fa","Gd","None",..: 5 5 5 2 5 5 5 5 5 5 ...
## $ BsmtExposure : Factor w/ 5 levels "Av","Gd","Mn",..: 4 2 3 4 1 4 1 3 4 4 ...
## $ BsmtFinType1 : Factor w/ 7 levels "ALQ","BLQ","GLQ",..: 3 1 3 1 3 3 3 1 7 3 ...
## $ BsmtFinSF1 : int 706 978 486 216 655 732 1369 859 0 851 ...
## $ BsmtFinType2 : Factor w/ 7 levels "ALQ","BLQ","GLQ",..: 7 7 7 7 7 7 7 2 7 7 ...
## $ BsmtFinSF2 : int 0 0 0 0 0 0 0 32 0 0 ...
## $ BsmtUnfSF : int 150 284 434 540 490 64 317 216 952 140 ...
## $ TotalBsmtSF : int 856 1262 920 756 1145 796 1686 1107 952 991 ...
## $ Heating : Factor w/ 6 levels "Floor","GasA",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ HeatingQC : Factor w/ 5 levels "Ex","Fa","Gd",..: 1 1 1 3 1 1 1 1 3 1 ...
## $ CentralAir : Factor w/ 2 levels "N","Y": 2 2 2 2 2 2 2 2 2 2 ...
## $ Electrical : Factor w/ 5 levels "FuseA","FuseF",..: 5 5 5 5 5 5 5 5 2 5 ...
## $ X1stFlrSF : int 856 1262 920 961 1145 796 1694 1107 1022 1077 ...
## $ X2ndFlrSF : int 854 0 866 756 1053 566 0 983 752 0 ...
## $ LowQualFinSF : int 0 0 0 0 0 0 0 0 0 0 ...
## $ GrLivArea : int 1710 1262 1786 1717 2198 1362 1694 2090 1774 1077 ...
## $ BsmtFullBath : int 1 0 1 1 1 1 1 1 0 1 ...
## $ BsmtHalfBath : int 0 1 0 0 0 0 0 0 0 0 ...
## $ FullBath : int 2 2 2 1 2 1 2 2 2 1 ...
## $ HalfBath : int 1 0 1 0 1 1 0 1 0 0 ...
## $ BedroomAbvGr : int 3 3 3 3 4 1 3 3 2 2 ...
## $ KitchenAbvGr : int 1 1 1 1 1 1 1 1 2 2 ...
## $ KitchenQual : Factor w/ 4 levels "Ex","Fa","Gd",..: 3 4 3 3 3 4 3 4 4 4 ...
## $ TotRmsAbvGrd : int 8 6 6 7 9 5 7 7 8 5 ...
## $ Functional : Factor w/ 7 levels "Maj1","Maj2",..: 7 7 7 7 7 7 7 7 3 7 ...
## $ Fireplaces : int 0 1 1 1 1 0 1 2 2 2 ...
## $ FireplaceQu : Factor w/ 6 levels "Ex","Fa","Gd",..: 4 6 6 3 6 4 3 6 6 6 ...
## $ GarageType : Factor w/ 7 levels "2Types","Attchd",..: 2 2 2 6 2 2 2 2 6 2 ...
## $ GarageYrBlt : num 2003 1976 2001 1998 2000 ...
## $ GarageFinish : Factor w/ 4 levels "Fin","None","RFn",..: 3 3 3 4 3 4 3 3 4 3 ...
## $ GarageCars : int 2 2 2 3 3 2 2 2 2 1 ...
## $ GarageArea : int 548 460 608 642 836 480 636 484 468 205 ...
## $ GarageQual : Factor w/ 6 levels "Ex","Fa","Gd",..: 6 6 6 6 6 6 6 6 2 3 ...
## $ GarageCond : Factor w/ 6 levels "Ex","Fa","Gd",..: 6 6 6 6 6 6 6 6 6 6 ...
## $ PavedDrive : Factor w/ 3 levels "N","P","Y": 3 3 3 3 3 3 3 3 3 3 ...
## $ WoodDeckSF : int 0 298 0 0 192 40 255 235 90 0 ...
## $ OpenPorchSF : int 61 0 42 35 84 30 57 204 0 4 ...
## $ EnclosedPorch: int 0 0 0 272 0 0 0 228 205 0 ...
## $ X3SsnPorch : int 0 0 0 0 0 320 0 0 0 0 ...
## $ ScreenPorch : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PoolArea : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PoolQC : Factor w/ 4 levels "Ex","Fa","Gd",..: 4 4 4 4 4 4 4 4 4 4 ...
## $ Fence : Factor w/ 5 levels "GdPrv","GdWo",..: 5 5 5 5 5 3 5 5 5 5 ...
## $ MiscFeature : Factor w/ 5 levels "Gar2","None",..: 2 2 2 2 2 4 2 4 2 2 ...
## $ MiscVal : int 0 0 0 0 0 700 0 350 0 0 ...
## $ MoSold : int 2 5 9 2 12 10 8 11 4 1 ...
## $ YrSold : int 2008 2007 2008 2006 2008 2009 2007 2009 2008 2008 ...
## $ SaleType : Factor w/ 9 levels "COD","Con","ConLD",..: 9 9 9 9 9 9 9 9 9 9 ...
## $ SaleCondition: Factor w/ 6 levels "Abnorml","AdjLand",..: 5 5 5 1 5 5 5 5 1 5 ...
## $ SalePrice : int 208500 181500 223500 140000 250000 143000 307000 200000 129900 118000 ...
summary(test)
## Id MSSubClass MSZoning LotFrontage
## Min. :1461 Min. : 20.00 C (all): 15 Min. : 21.00
## 1st Qu.:1826 1st Qu.: 20.00 FV : 74 1st Qu.: 60.00
## Median :2190 Median : 50.00 RH : 10 Median : 67.00
## Mean :2190 Mean : 57.38 RL :1118 Mean : 68.33
## 3rd Qu.:2554 3rd Qu.: 70.00 RM : 242 3rd Qu.: 78.00
## Max. :2919 Max. :190.00 Max. :200.00
##
## LotArea Street Alley LotShape LandContour Utilities
## Min. : 1470 Grvl: 6 Grvl: 70 IR1:484 Bnk: 54 AllPub:1459
## 1st Qu.: 7391 Pave:1453 None:1352 IR2: 35 HLS: 70
## Median : 9399 Pave: 37 IR3: 6 Low: 24
## Mean : 9819 Reg:934 Lvl:1311
## 3rd Qu.:11518
## Max. :56600
##
## LotConfig LandSlope Neighborhood Condition1 Condition2
## Corner : 248 Gtl:1396 NAmes :218 Norm :1251 Artery: 3
## CulDSac: 82 Mod: 60 OldTown:126 Feedr : 83 Feedr : 7
## FR2 : 38 Sev: 3 CollgCr:117 Artery : 44 Norm :1444
## FR3 : 10 Somerst: 96 RRAn : 24 PosA : 3
## Inside :1081 Edwards: 94 PosN : 20 PosN : 2
## NridgHt: 89 RRAe : 17
## (Other):719 (Other): 20
## BldgType HouseStyle OverallQual OverallCond YearBuilt
## 1Fam :1205 1.5Fin:160 Min. : 1.000 Min. :1.000 Min. :1879
## 2fmCon: 31 1.5Unf: 5 1st Qu.: 5.000 1st Qu.:5.000 1st Qu.:1953
## Duplex: 57 1Story:745 Median : 6.000 Median :5.000 Median :1973
## Twnhs : 53 2.5Unf: 13 Mean : 6.079 Mean :5.554 Mean :1971
## TwnhsE: 113 2Story:427 3rd Qu.: 7.000 3rd Qu.:6.000 3rd Qu.:2001
## SFoyer: 46 Max. :10.000 Max. :9.000 Max. :2010
## SLvl : 63
## YearRemodAdd RoofStyle RoofMatl Exterior1st Exterior2nd
## Min. :1950 Flat : 7 CompShg:1442 VinylSd:511 VinylSd:511
## 1st Qu.:1963 Gable :1169 Tar&Grv: 12 MetalSd:230 MetalSd:233
## Median :1992 Gambrel: 11 WdShake: 4 HdBoard:220 HdBoard:199
## Mean :1984 Hip : 265 WdShngl: 1 Wd Sdng:205 Wd Sdng:194
## 3rd Qu.:2004 Mansard: 4 Plywood:113 Plywood:128
## Max. :2010 Shed : 3 CemntBd: 65 CmentBd: 66
## (Other):115 (Other):128
## MasVnrType MasVnrArea ExterQual ExterCond Foundation BsmtQual
## BrkCmn : 10 Min. : 0.00 Ex: 55 Ex: 9 BrkTil:165 Ex :137
## BrkFace:434 1st Qu.: 0.00 Fa: 21 Fa: 39 CBlock:601 Fa : 53
## None :894 Median : 0.00 Gd:491 Gd: 153 PConc :661 Gd :591
## Stone :121 Mean : 99.67 TA:892 Po: 2 Slab : 25 None: 44
## 3rd Qu.: 162.00 TA:1256 Stone : 5 TA :634
## Max. :1290.00 Wood : 2
##
## BsmtCond BsmtExposure BsmtFinType1 BsmtFinSF1 BsmtFinType2
## Fa : 59 Av :197 ALQ :209 Min. : 0.0 ALQ : 33
## Gd : 57 Gd :142 BLQ :121 1st Qu.: 0.0 BLQ : 35
## None: 45 Mn :125 GLQ :431 Median : 350.0 GLQ : 20
## Po : 3 No :951 LwQ : 80 Mean : 438.9 LwQ : 41
## TA :1295 None: 44 None: 42 3rd Qu.: 752.0 None: 42
## Rec :155 Max. :4010.0 Rec : 51
## Unf :421 Unf :1237
## BsmtFinSF2 BsmtUnfSF TotalBsmtSF Heating HeatingQC
## Min. : 0.00 Min. : 0.0 Min. : 0 GasA:1446 Ex:752
## 1st Qu.: 0.00 1st Qu.: 219.0 1st Qu.: 784 GasW: 9 Fa: 43
## Median : 0.00 Median : 460.0 Median : 988 Grav: 2 Gd:233
## Mean : 52.58 Mean : 553.9 Mean :1045 Wall: 2 Po: 2
## 3rd Qu.: 0.00 3rd Qu.: 797.5 3rd Qu.:1304 TA:429
## Max. :1526.00 Max. :2140.0 Max. :5095
##
## CentralAir Electrical X1stFlrSF X2ndFlrSF LowQualFinSF
## N: 101 FuseA: 94 Min. : 407.0 Min. : 0 Min. : 0.000
## Y:1358 FuseF: 23 1st Qu.: 873.5 1st Qu.: 0 1st Qu.: 0.000
## FuseP: 5 Median :1079.0 Median : 0 Median : 0.000
## SBrkr:1337 Mean :1156.5 Mean : 326 Mean : 3.543
## 3rd Qu.:1382.5 3rd Qu.: 676 3rd Qu.: 0.000
## Max. :5095.0 Max. :1862 Max. :1064.000
##
## GrLivArea BsmtFullBath BsmtHalfBath FullBath
## Min. : 407 Min. :0.0000 Min. :0.00000 Min. :0.000
## 1st Qu.:1118 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:1.000
## Median :1432 Median :0.0000 Median :0.00000 Median :2.000
## Mean :1486 Mean :0.4339 Mean :0.06511 Mean :1.571
## 3rd Qu.:1721 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.:2.000
## Max. :5095 Max. :3.0000 Max. :2.00000 Max. :4.000
##
## HalfBath BedroomAbvGr KitchenAbvGr KitchenQual TotRmsAbvGrd
## Min. :0.0000 Min. :0.000 Min. :0.000 Ex:105 Min. : 3.000
## 1st Qu.:0.0000 1st Qu.:2.000 1st Qu.:1.000 Fa: 31 1st Qu.: 5.000
## Median :0.0000 Median :3.000 Median :1.000 Gd:565 Median : 6.000
## Mean :0.3777 Mean :2.854 Mean :1.042 TA:758 Mean : 6.385
## 3rd Qu.:1.0000 3rd Qu.:3.000 3rd Qu.:1.000 3rd Qu.: 7.000
## Max. :2.0000 Max. :6.000 Max. :2.000 Max. :15.000
##
## Functional Fireplaces FireplaceQu GarageType GarageYrBlt
## Maj1: 5 Min. :0.0000 Ex : 19 2Types : 17 Min. :-1000
## Maj2: 4 1st Qu.:0.0000 Fa : 41 Attchd :853 1st Qu.: 1956
## Min1: 34 Median :0.0000 Gd :364 Basment: 17 Median : 1977
## Min2: 38 Mean :0.5812 None:730 BuiltIn: 98 Mean : 1819
## Mod : 20 3rd Qu.:1.0000 Po : 26 CarPort: 6 3rd Qu.: 2001
## Sev : 1 Max. :4.0000 TA :279 Detchd :392 Max. : 2207
## Typ :1357 None : 76
## GarageFinish GarageCars GarageArea GarageQual GarageCond
## Fin :367 Min. :0.000 Min. : 0.0 Fa : 76 Ex : 1
## None: 78 1st Qu.:1.000 1st Qu.: 317.5 Gd : 10 Fa : 39
## RFn :389 Median :2.000 Median : 480.0 None: 78 Gd : 6
## Unf :625 Mean :1.765 Mean : 472.4 Po : 2 None: 78
## 3rd Qu.:2.000 3rd Qu.: 576.0 TA :1293 Po : 7
## Max. :5.000 Max. :1488.0 TA :1328
##
## PavedDrive WoodDeckSF OpenPorchSF EnclosedPorch
## N: 126 Min. : 0.00 Min. : 0.00 Min. : 0.00
## P: 32 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00
## Y:1301 Median : 0.00 Median : 28.00 Median : 0.00
## Mean : 93.17 Mean : 48.31 Mean : 24.24
## 3rd Qu.: 168.00 3rd Qu.: 72.00 3rd Qu.: 0.00
## Max. :1424.00 Max. :742.00 Max. :1012.00
##
## X3SsnPorch ScreenPorch PoolArea PoolQC Fence
## Min. : 0.000 Min. : 0.00 Min. : 0.000 Ex : 2 GdPrv: 59
## 1st Qu.: 0.000 1st Qu.: 0.00 1st Qu.: 0.000 Gd : 1 GdWo : 58
## Median : 0.000 Median : 0.00 Median : 0.000 None:1456 MnPrv: 172
## Mean : 1.794 Mean : 17.06 Mean : 1.744 MnWw : 1
## 3rd Qu.: 0.000 3rd Qu.: 0.00 3rd Qu.: 0.000 None :1169
## Max. :360.000 Max. :576.00 Max. :800.000
##
## MiscFeature MiscVal MoSold YrSold SaleType
## Gar2: 3 Min. : 0.00 Min. : 1.000 Min. :2006 WD :1259
## None:1408 1st Qu.: 0.00 1st Qu.: 4.000 1st Qu.:2007 New : 117
## Othr: 2 Median : 0.00 Median : 6.000 Median :2008 COD : 44
## Shed: 46 Mean : 58.17 Mean : 6.104 Mean :2008 ConLD : 17
## 3rd Qu.: 0.00 3rd Qu.: 8.000 3rd Qu.:2009 CWD : 8
## Max. :17000.00 Max. :12.000 Max. :2010 ConLI : 4
## (Other): 10
## SaleCondition
## Abnorml: 89
## AdjLand: 8
## Alloca : 12
## Family : 26
## Normal :1204
## Partial: 120
##
summary(train)
## Id MSSubClass MSZoning LotFrontage
## Min. : 1.0 Min. : 20.0 C (all): 10 Min. : 21.00
## 1st Qu.: 365.8 1st Qu.: 20.0 FV : 65 1st Qu.: 60.00
## Median : 730.5 Median : 50.0 RH : 16 Median : 69.00
## Mean : 730.5 Mean : 56.9 RL :1151 Mean : 69.86
## 3rd Qu.:1095.2 3rd Qu.: 70.0 RM : 218 3rd Qu.: 79.00
## Max. :1460.0 Max. :190.0 Max. :313.00
##
## LotArea Street Alley LotShape LandContour Utilities
## Min. : 1300 Grvl: 6 Grvl: 50 IR1:484 Bnk: 63 AllPub:1459
## 1st Qu.: 7554 Pave:1454 None:1369 IR2: 41 HLS: 50 NoSeWa: 1
## Median : 9478 Pave: 41 IR3: 10 Low: 36
## Mean : 10517 Reg:925 Lvl:1311
## 3rd Qu.: 11602
## Max. :215245
##
## LotConfig LandSlope Neighborhood Condition1 Condition2
## Corner : 263 Gtl:1382 NAmes :225 Norm :1260 Norm :1445
## CulDSac: 94 Mod: 65 CollgCr:150 Feedr : 81 Feedr : 6
## FR2 : 47 Sev: 13 OldTown:113 Artery : 48 Artery : 2
## FR3 : 4 Edwards:100 RRAn : 26 PosN : 2
## Inside :1052 Somerst: 86 PosN : 19 RRNn : 2
## Gilbert: 79 RRAe : 11 PosA : 1
## (Other):707 (Other): 15 (Other): 2
## BldgType HouseStyle OverallQual OverallCond YearBuilt
## 1Fam :1220 1Story :726 Min. : 1.000 Min. :1.000 Min. :1872
## 2fmCon: 31 2Story :445 1st Qu.: 5.000 1st Qu.:5.000 1st Qu.:1954
## Duplex: 52 1.5Fin :154 Median : 6.000 Median :5.000 Median :1973
## Twnhs : 43 SLvl : 65 Mean : 6.099 Mean :5.575 Mean :1971
## TwnhsE: 114 SFoyer : 37 3rd Qu.: 7.000 3rd Qu.:6.000 3rd Qu.:2000
## 1.5Unf : 14 Max. :10.000 Max. :9.000 Max. :2010
## (Other): 19
## YearRemodAdd RoofStyle RoofMatl Exterior1st Exterior2nd
## Min. :1950 Flat : 13 CompShg:1434 VinylSd:515 VinylSd:504
## 1st Qu.:1967 Gable :1141 Tar&Grv: 11 HdBoard:222 MetalSd:214
## Median :1994 Gambrel: 11 WdShngl: 6 MetalSd:220 HdBoard:207
## Mean :1985 Hip : 286 WdShake: 5 Wd Sdng:206 Wd Sdng:197
## 3rd Qu.:2004 Mansard: 7 ClyTile: 1 Plywood:108 Plywood:142
## Max. :2010 Shed : 2 Membran: 1 CemntBd: 61 CmentBd: 60
## (Other): 2 (Other):128 (Other):136
## MasVnrType MasVnrArea ExterQual ExterCond Foundation BsmtQual
## BrkCmn : 15 Min. : 0.0 Ex: 52 Ex: 3 BrkTil:146 Ex :121
## BrkFace:445 1st Qu.: 0.0 Fa: 14 Fa: 28 CBlock:634 Fa : 35
## None :872 Median : 0.0 Gd:488 Gd: 146 PConc :647 Gd :618
## Stone :128 Mean : 103.1 TA:906 Po: 1 Slab : 24 None: 37
## 3rd Qu.: 164.2 TA:1282 Stone : 6 TA :649
## Max. :1600.0 Wood : 3
##
## BsmtCond BsmtExposure BsmtFinType1 BsmtFinSF1 BsmtFinType2
## Fa : 45 Av :221 ALQ :220 Min. : 0.0 ALQ : 19
## Gd : 65 Gd :134 BLQ :148 1st Qu.: 0.0 BLQ : 33
## None: 37 Mn :114 GLQ :418 Median : 383.5 GLQ : 14
## Po : 2 No :953 LwQ : 74 Mean : 443.6 LwQ : 46
## TA :1311 None: 38 None: 37 3rd Qu.: 712.2 None: 38
## Rec :133 Max. :5644.0 Rec : 54
## Unf :430 Unf :1256
## BsmtFinSF2 BsmtUnfSF TotalBsmtSF Heating HeatingQC
## Min. : 0.00 Min. : 0.0 Min. : 0.0 Floor: 1 Ex:741
## 1st Qu.: 0.00 1st Qu.: 223.0 1st Qu.: 795.8 GasA :1428 Fa: 49
## Median : 0.00 Median : 477.5 Median : 991.5 GasW : 18 Gd:241
## Mean : 46.55 Mean : 567.2 Mean :1057.4 Grav : 7 Po: 1
## 3rd Qu.: 0.00 3rd Qu.: 808.0 3rd Qu.:1298.2 OthW : 2 TA:428
## Max. :1474.00 Max. :2336.0 Max. :6110.0 Wall : 4
##
## CentralAir Electrical X1stFlrSF X2ndFlrSF LowQualFinSF
## N: 95 FuseA: 94 Min. : 334 Min. : 0 Min. : 0.000
## Y:1365 FuseF: 27 1st Qu.: 882 1st Qu.: 0 1st Qu.: 0.000
## FuseP: 3 Median :1087 Median : 0 Median : 0.000
## Mix : 1 Mean :1163 Mean : 347 Mean : 5.845
## SBrkr:1335 3rd Qu.:1391 3rd Qu.: 728 3rd Qu.: 0.000
## Max. :4692 Max. :2065 Max. :572.000
##
## GrLivArea BsmtFullBath BsmtHalfBath FullBath
## Min. : 334 Min. :0.0000 Min. :0.00000 Min. :0.000
## 1st Qu.:1130 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:1.000
## Median :1464 Median :0.0000 Median :0.00000 Median :2.000
## Mean :1515 Mean :0.4253 Mean :0.05753 Mean :1.565
## 3rd Qu.:1777 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.:2.000
## Max. :5642 Max. :3.0000 Max. :2.00000 Max. :3.000
##
## HalfBath BedroomAbvGr KitchenAbvGr KitchenQual TotRmsAbvGrd
## Min. :0.0000 Min. :0.000 Min. :0.000 Ex:100 Min. : 2.000
## 1st Qu.:0.0000 1st Qu.:2.000 1st Qu.:1.000 Fa: 39 1st Qu.: 5.000
## Median :0.0000 Median :3.000 Median :1.000 Gd:586 Median : 6.000
## Mean :0.3829 Mean :2.866 Mean :1.047 TA:735 Mean : 6.518
## 3rd Qu.:1.0000 3rd Qu.:3.000 3rd Qu.:1.000 3rd Qu.: 7.000
## Max. :2.0000 Max. :8.000 Max. :3.000 Max. :14.000
##
## Functional Fireplaces FireplaceQu GarageType GarageYrBlt
## Maj1: 14 Min. :0.000 Ex : 24 2Types : 6 Min. :-1000
## Maj2: 5 1st Qu.:0.000 Fa : 33 Attchd :870 1st Qu.: 1958
## Min1: 31 Median :1.000 Gd :380 Basment: 19 Median : 1977
## Min2: 34 Mean :0.613 None:690 BuiltIn: 88 Mean : 1813
## Mod : 15 3rd Qu.:1.000 Po : 20 CarPort: 9 3rd Qu.: 2001
## Sev : 1 Max. :3.000 TA :313 Detchd :387 Max. : 2010
## Typ :1360 None : 81
## GarageFinish GarageCars GarageArea GarageQual GarageCond
## Fin :352 Min. :0.000 Min. : 0.0 Ex : 3 Ex : 2
## None: 81 1st Qu.:1.000 1st Qu.: 334.5 Fa : 48 Fa : 35
## RFn :422 Median :2.000 Median : 480.0 Gd : 14 Gd : 9
## Unf :605 Mean :1.767 Mean : 473.0 None: 81 None: 81
## 3rd Qu.:2.000 3rd Qu.: 576.0 Po : 3 Po : 7
## Max. :4.000 Max. :1418.0 TA :1311 TA :1326
##
## PavedDrive WoodDeckSF OpenPorchSF EnclosedPorch X3SsnPorch
## N: 90 Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00
## P: 30 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00
## Y:1340 Median : 0.00 Median : 25.00 Median : 0.00 Median : 0.00
## Mean : 94.24 Mean : 46.66 Mean : 21.95 Mean : 3.41
## 3rd Qu.:168.00 3rd Qu.: 68.00 3rd Qu.: 0.00 3rd Qu.: 0.00
## Max. :857.00 Max. :547.00 Max. :552.00 Max. :508.00
##
## ScreenPorch PoolArea PoolQC Fence MiscFeature
## Min. : 0.00 Min. : 0.000 Ex : 2 GdPrv: 59 Gar2: 2
## 1st Qu.: 0.00 1st Qu.: 0.000 Fa : 2 GdWo : 54 None:1406
## Median : 0.00 Median : 0.000 Gd : 3 MnPrv: 157 Othr: 2
## Mean : 15.06 Mean : 2.759 None:1453 MnWw : 11 Shed: 49
## 3rd Qu.: 0.00 3rd Qu.: 0.000 None :1179 TenC: 1
## Max. :480.00 Max. :738.000
##
## MiscVal MoSold YrSold SaleType
## Min. : 0.00 Min. : 1.000 Min. :2006 WD :1267
## 1st Qu.: 0.00 1st Qu.: 5.000 1st Qu.:2007 New : 122
## Median : 0.00 Median : 6.000 Median :2008 COD : 43
## Mean : 43.49 Mean : 6.322 Mean :2008 ConLD : 9
## 3rd Qu.: 0.00 3rd Qu.: 8.000 3rd Qu.:2009 ConLI : 5
## Max. :15500.00 Max. :12.000 Max. :2010 ConLw : 5
## (Other): 9
## SaleCondition SalePrice
## Abnorml: 101 Min. : 34900
## AdjLand: 4 1st Qu.:129975
## Alloca : 12 Median :163000
## Family : 20 Mean :180921
## Normal :1198 3rd Qu.:214000
## Partial: 125 Max. :755000
##
##Histogram of SalePrice
hist(train$SalePrice/1000)
hist(log(train$SalePrice/1000))
##Models
set.seed(100)
#Caret package for train() function, useful tutorial from Cran https://cran.r-project.org/web/packages/caret/vignettes/caret.html#:~:text=The%20caret%20package%20(short%20for%20Classification%20And%20REgression,the%20package%20startup%20time%20can%20be%20greatly%20decreased).
library(caret)
## Warning: package 'caret' was built under R version 4.0.4
## Loading required package: lattice
## Loading required package: ggplot2
#Final clean up of data for model.
train_new <- train %>% select(-Id)
control = trainControl(method = "cv", number = 5, verboseIter = FALSE)
# GLM Regression
glm <- train(SalePrice~ ., data=train_new, method="glm", trControl = control)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading
glm
## Generalized Linear Model
##
## 1460 samples
## 79 predictor
##
## No pre-processing
## Resampling: Cross-Validated (5 fold)
## Summary of sample sizes: 1168, 1168, 1167, 1169, 1168
## Resampling results:
##
## RMSE Rsquared MAE
## 58871.56 0.6052654 20704.41
summary(glm)
##
## Call:
## NULL
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -177392 -9070 0 9648 177392
##
## Coefficients: (8 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.785e+05 1.057e+06 -0.547 0.584242
## MSSubClass -5.501e+01 8.253e+01 -0.667 0.505199
## MSZoningFV 3.239e+04 1.198e+04 2.703 0.006970 **
## MSZoningRH 2.254e+04 1.187e+04 1.898 0.057889 .
## MSZoningRL 2.515e+04 1.021e+04 2.464 0.013890 *
## MSZoningRM 2.183e+04 9.571e+03 2.281 0.022735 *
## LotFrontage 4.296e+01 4.386e+01 0.980 0.327477
## LotArea 7.042e-01 1.092e-01 6.448 1.64e-10 ***
## StreetPave 3.326e+04 1.217e+04 2.733 0.006361 **
## AlleyNone -1.432e+03 4.207e+03 -0.340 0.733595
## AlleyPave -5.308e+02 6.016e+03 -0.088 0.929711
## LotShapeIR2 4.998e+03 4.206e+03 1.188 0.234915
## LotShapeIR3 5.419e+03 8.840e+03 0.613 0.539996
## LotShapeReg 1.812e+03 1.598e+03 1.134 0.257103
## LandContourHLS 7.518e+03 5.112e+03 1.471 0.141635
## LandContourLow -1.122e+04 6.377e+03 -1.759 0.078818 .
## LandContourLvl 5.441e+03 3.696e+03 1.472 0.141236
## UtilitiesNoSeWa -3.785e+04 2.629e+04 -1.439 0.150289
## LotConfigCulDSac 8.583e+03 3.296e+03 2.604 0.009324 **
## LotConfigFR2 -7.337e+03 4.009e+03 -1.830 0.067476 .
## LotConfigFR3 -1.681e+04 1.253e+04 -1.341 0.180014
## LotConfigInside -1.209e+03 1.783e+03 -0.678 0.497919
## LandSlopeMod 7.408e+03 3.968e+03 1.867 0.062181 .
## LandSlopeSev -4.115e+04 1.140e+04 -3.611 0.000317 ***
## NeighborhoodBlueste 7.870e+03 1.920e+04 0.410 0.681999
## NeighborhoodBrDale -2.257e+03 1.094e+04 -0.206 0.836619
## NeighborhoodBrkSide -5.649e+03 9.461e+03 -0.597 0.550537
## NeighborhoodClearCr -1.454e+04 9.190e+03 -1.583 0.113789
## NeighborhoodCollgCr -1.023e+04 7.238e+03 -1.413 0.157906
## NeighborhoodCrawfor 1.185e+04 8.528e+03 1.389 0.165048
## NeighborhoodEdwards -2.151e+04 7.976e+03 -2.697 0.007089 **
## NeighborhoodGilbert -1.152e+04 7.653e+03 -1.505 0.132463
## NeighborhoodIDOTRR -1.204e+04 1.072e+04 -1.123 0.261536
## NeighborhoodMeadowV -6.905e+03 1.117e+04 -0.618 0.536625
## NeighborhoodMitchel -2.106e+04 8.152e+03 -2.583 0.009903 **
## NeighborhoodNAmes -1.739e+04 7.817e+03 -2.225 0.026274 *
## NeighborhoodNoRidge 2.542e+04 8.395e+03 3.028 0.002511 **
## NeighborhoodNPkVill 1.305e+04 1.401e+04 0.932 0.351715
## NeighborhoodNridgHt 1.782e+04 7.493e+03 2.378 0.017560 *
## NeighborhoodNWAmes -1.740e+04 7.985e+03 -2.179 0.029512 *
## NeighborhoodOldTown -1.431e+04 9.636e+03 -1.485 0.137899
## NeighborhoodSawyer -1.124e+04 8.100e+03 -1.387 0.165582
## NeighborhoodSawyerW -2.982e+03 7.762e+03 -0.384 0.700912
## NeighborhoodSomerst -2.465e+03 8.986e+03 -0.274 0.783904
## NeighborhoodStoneBr 3.920e+04 8.269e+03 4.741 2.38e-06 ***
## NeighborhoodSWISU -8.380e+03 9.682e+03 -0.866 0.386929
## NeighborhoodTimber -9.554e+03 8.080e+03 -1.182 0.237306
## NeighborhoodVeenker -3.557e+02 1.047e+04 -0.034 0.972900
## Condition1Feedr 7.161e+03 5.006e+03 1.430 0.152879
## Condition1Norm 1.633e+04 4.179e+03 3.907 9.84e-05 ***
## Condition1PosA 9.238e+03 9.986e+03 0.925 0.355077
## Condition1PosN 1.507e+04 7.421e+03 2.030 0.042552 *
## Condition1RRAe -1.536e+04 9.049e+03 -1.697 0.089959 .
## Condition1RRAn 1.321e+04 6.938e+03 1.904 0.057130 .
## Condition1RRNe -3.573e+03 1.744e+04 -0.205 0.837681
## Condition1RRNn 1.145e+04 1.281e+04 0.894 0.371622
## Condition2Feedr -6.016e+03 2.336e+04 -0.258 0.796827
## Condition2Norm -1.018e+04 2.025e+04 -0.503 0.615069
## Condition2PosA 4.223e+04 3.694e+04 1.143 0.253148
## Condition2PosN -2.391e+05 2.758e+04 -8.669 < 2e-16 ***
## Condition2RRAe -1.272e+05 6.495e+04 -1.959 0.050391 .
## Condition2RRAn -2.308e+04 3.143e+04 -0.734 0.462942
## Condition2RRNn -2.762e+03 2.702e+04 -0.102 0.918581
## BldgType2fmCon -3.090e+03 1.246e+04 -0.248 0.804241
## BldgTypeDuplex -6.982e+03 7.399e+03 -0.944 0.345553
## BldgTypeTwnhs -1.837e+04 9.993e+03 -1.838 0.066303 .
## BldgTypeTwnhsE -1.451e+04 9.006e+03 -1.611 0.107385
## HouseStyle1.5Unf 1.186e+04 7.924e+03 1.496 0.134855
## HouseStyle1Story 5.143e+03 4.376e+03 1.175 0.240094
## HouseStyle2.5Fin -1.749e+04 1.236e+04 -1.415 0.157306
## HouseStyle2.5Unf -9.411e+03 9.221e+03 -1.021 0.307661
## HouseStyle2Story -6.061e+03 3.490e+03 -1.736 0.082747 .
## HouseStyleSFoyer 1.230e+03 6.244e+03 0.197 0.843881
## HouseStyleSLvl 3.820e+03 5.543e+03 0.689 0.490888
## OverallQual 6.773e+03 1.011e+03 6.701 3.16e-11 ***
## OverallCond 5.800e+03 8.703e+02 6.664 4.05e-11 ***
## YearBuilt 3.208e+02 7.687e+01 4.174 3.21e-05 ***
## YearRemodAdd 1.048e+02 5.569e+01 1.883 0.059983 .
## RoofStyleGable 9.507e+03 1.841e+04 0.516 0.605684
## RoofStyleGambrel 1.280e+04 2.016e+04 0.635 0.525569
## RoofStyleHip 9.300e+03 1.848e+04 0.503 0.614927
## RoofStyleMansard 1.980e+04 2.137e+04 0.927 0.354340
## RoofStyleShed 9.950e+04 3.446e+04 2.887 0.003954 **
## RoofMatlCompShg 5.742e+05 5.264e+04 10.907 < 2e-16 ***
## RoofMatlMembran 6.691e+05 6.248e+04 10.708 < 2e-16 ***
## RoofMatlMetal 6.370e+05 6.208e+04 10.261 < 2e-16 ***
## RoofMatlRoll 5.615e+05 5.825e+04 9.639 < 2e-16 ***
## `RoofMatlTar&Grv` 5.750e+05 5.643e+04 10.189 < 2e-16 ***
## RoofMatlWdShake 5.659e+05 5.497e+04 10.295 < 2e-16 ***
## RoofMatlWdShngl 6.290e+05 5.359e+04 11.736 < 2e-16 ***
## Exterior1stAsphShn -2.396e+04 3.293e+04 -0.728 0.467036
## Exterior1stBrkComm -3.523e+03 2.773e+04 -0.127 0.898910
## Exterior1stBrkFace 7.899e+03 1.275e+04 0.620 0.535677
## Exterior1stCBlock -1.468e+04 2.722e+04 -0.539 0.589707
## Exterior1stCemntBd -1.143e+04 1.901e+04 -0.601 0.547743
## Exterior1stHdBoard -1.289e+04 1.293e+04 -0.997 0.319076
## Exterior1stImStucc -2.201e+04 2.811e+04 -0.783 0.433694
## Exterior1stMetalSd -5.752e+03 1.458e+04 -0.395 0.693245
## Exterior1stPlywood -1.363e+04 1.276e+04 -1.068 0.285533
## Exterior1stStone -1.015e+03 2.426e+04 -0.042 0.966636
## Exterior1stStucco -7.053e+03 1.407e+04 -0.501 0.616207
## Exterior1stVinylSd -1.379e+04 1.332e+04 -1.035 0.300880
## `Exterior1stWd Sdng` -1.373e+04 1.237e+04 -1.110 0.267084
## Exterior1stWdShing -9.311e+03 1.335e+04 -0.697 0.485711
## Exterior2ndAsphShn 1.126e+04 2.217e+04 0.508 0.611543
## `Exterior2ndBrk Cmn` 5.616e+03 2.004e+04 0.280 0.779326
## Exterior2ndBrkFace 3.866e+03 1.320e+04 0.293 0.769730
## Exterior2ndCBlock NA NA NA NA
## Exterior2ndCmentBd 1.196e+04 1.869e+04 0.640 0.522382
## Exterior2ndHdBoard 8.066e+03 1.241e+04 0.650 0.515847
## Exterior2ndImStucc 1.675e+04 1.433e+04 1.169 0.242711
## Exterior2ndMetalSd 5.623e+03 1.419e+04 0.396 0.691918
## Exterior2ndOther -1.798e+04 2.704e+04 -0.665 0.506211
## Exterior2ndPlywood 6.325e+03 1.205e+04 0.525 0.599748
## Exterior2ndStone -1.132e+04 1.711e+04 -0.661 0.508463
## Exterior2ndStucco 5.401e+03 1.360e+04 0.397 0.691365
## Exterior2ndVinylSd 1.276e+04 1.280e+04 0.996 0.319257
## `Exterior2ndWd Sdng` 1.173e+04 1.194e+04 0.983 0.325832
## `Exterior2ndWd Shng` 5.316e+03 1.245e+04 0.427 0.669472
## MasVnrTypeBrkFace 4.135e+03 6.823e+03 0.606 0.544638
## MasVnrTypeNone 7.230e+03 6.894e+03 1.049 0.294568
## MasVnrTypeStone 9.385e+03 7.223e+03 1.299 0.194107
## MasVnrArea 2.082e+01 5.777e+00 3.604 0.000326 ***
## ExterQualFa -7.399e+03 1.107e+04 -0.668 0.503958
## ExterQualGd -2.081e+04 4.770e+03 -4.362 1.40e-05 ***
## ExterQualTA -2.001e+04 5.287e+03 -3.786 0.000161 ***
## ExterCondFa -2.930e+03 1.804e+04 -0.162 0.870993
## ExterCondGd -7.366e+03 1.720e+04 -0.428 0.668623
## ExterCondPo 7.985e+03 3.161e+04 0.253 0.800589
## ExterCondTA -4.372e+03 1.717e+04 -0.255 0.799049
## FoundationCBlock 2.791e+03 3.166e+03 0.882 0.378199
## FoundationPConc 4.015e+03 3.412e+03 1.177 0.239544
## FoundationSlab -7.081e+03 1.002e+04 -0.706 0.480055
## FoundationStone 9.910e+03 1.138e+04 0.871 0.383997
## FoundationWood -2.752e+04 1.475e+04 -1.866 0.062241 .
## BsmtQualFa -1.119e+04 6.339e+03 -1.766 0.077697 .
## BsmtQualGd -1.785e+04 3.328e+03 -5.365 9.70e-08 ***
## BsmtQualNone 3.741e+04 3.656e+04 1.023 0.306370
## BsmtQualTA -1.402e+04 4.142e+03 -3.386 0.000733 ***
## BsmtCondGd -5.998e+01 5.268e+03 -0.011 0.990917
## BsmtCondNone NA NA NA NA
## BsmtCondPo 6.657e+04 2.977e+04 2.236 0.025531 *
## BsmtCondTA 2.632e+03 4.239e+03 0.621 0.534789
## BsmtExposureGd 1.422e+04 2.992e+03 4.752 2.26e-06 ***
## BsmtExposureMn -3.518e+03 3.012e+03 -1.168 0.243068
## BsmtExposureNo -5.155e+03 2.173e+03 -2.373 0.017816 *
## BsmtExposureNone -1.070e+04 2.295e+04 -0.466 0.641243
## BsmtFinType1BLQ 2.939e+03 2.794e+03 1.052 0.292988
## BsmtFinType1GLQ 5.617e+03 2.516e+03 2.232 0.025780 *
## BsmtFinType1LwQ -3.205e+03 3.737e+03 -0.857 0.391341
## BsmtFinType1None NA NA NA NA
## BsmtFinType1Rec 1.675e+02 2.995e+03 0.056 0.955399
## BsmtFinType1Unf 2.820e+03 2.907e+03 0.970 0.332124
## BsmtFinSF1 3.849e+01 5.317e+00 7.240 7.99e-13 ***
## BsmtFinType2BLQ -1.294e+04 7.552e+03 -1.713 0.086879 .
## BsmtFinType2GLQ -2.497e+03 9.333e+03 -0.268 0.789092
## BsmtFinType2LwQ -1.396e+04 7.379e+03 -1.892 0.058727 .
## BsmtFinType2None -2.845e+04 2.493e+04 -1.141 0.253946
## BsmtFinType2Rec -9.945e+03 7.094e+03 -1.402 0.161177
## BsmtFinType2Unf -8.042e+03 7.557e+03 -1.064 0.287508
## BsmtFinSF2 3.161e+01 9.043e+00 3.495 0.000491 ***
## BsmtUnfSF 2.091e+01 4.873e+00 4.291 1.92e-05 ***
## TotalBsmtSF NA NA NA NA
## HeatingGasA 9.184e+03 2.550e+04 0.360 0.718816
## HeatingGasW 6.875e+03 2.630e+04 0.261 0.793802
## HeatingGrav 1.061e+03 2.798e+04 0.038 0.969773
## HeatingOthW -1.115e+04 3.143e+04 -0.355 0.722917
## HeatingWall 2.237e+04 2.966e+04 0.754 0.450822
## HeatingQCFa 7.830e+02 4.706e+03 0.166 0.867877
## HeatingQCGd -3.923e+03 2.061e+03 -1.904 0.057188 .
## HeatingQCPo 2.213e+03 2.651e+04 0.083 0.933493
## HeatingQCTA -3.199e+03 2.065e+03 -1.549 0.121667
## CentralAirY -1.720e+02 3.860e+03 -0.045 0.964454
## ElectricalFuseF 1.886e+01 5.741e+03 0.003 0.997380
## ElectricalFuseP -8.184e+03 1.857e+04 -0.441 0.659487
## ElectricalMix -4.139e+04 4.439e+04 -0.932 0.351323
## ElectricalSBrkr -2.123e+03 2.943e+03 -0.721 0.470804
## X1stFlrSF 4.425e+01 5.633e+00 7.856 8.70e-15 ***
## X2ndFlrSF 6.223e+01 5.686e+00 10.945 < 2e-16 ***
## LowQualFinSF -3.801e+00 1.901e+01 -0.200 0.841556
## GrLivArea NA NA NA NA
## BsmtFullBath 1.593e+03 1.976e+03 0.806 0.420379
## BsmtHalfBath -4.180e+02 3.022e+03 -0.138 0.889992
## FullBath 3.705e+03 2.196e+03 1.687 0.091816 .
## HalfBath 1.893e+03 2.090e+03 0.905 0.365390
## BedroomAbvGr -3.689e+03 1.362e+03 -2.708 0.006873 **
## KitchenAbvGr -1.377e+04 5.675e+03 -2.427 0.015376 *
## KitchenQualFa -2.000e+04 6.189e+03 -3.232 0.001263 **
## KitchenQualGd -2.356e+04 3.473e+03 -6.783 1.84e-11 ***
## KitchenQualTA -2.258e+04 3.917e+03 -5.766 1.03e-08 ***
## TotRmsAbvGrd 1.811e+03 9.535e+02 1.899 0.057825 .
## FunctionalMaj2 -1.417e+03 1.435e+04 -0.099 0.921333
## FunctionalMin1 7.270e+03 8.584e+03 0.847 0.397189
## FunctionalMin2 8.529e+03 8.611e+03 0.990 0.322140
## FunctionalMod -5.141e+03 1.053e+04 -0.488 0.625624
## FunctionalSev -3.934e+04 2.951e+04 -1.333 0.182706
## FunctionalTyp 1.823e+04 7.443e+03 2.450 0.014431 *
## Fireplaces 6.223e+03 2.550e+03 2.441 0.014795 *
## FireplaceQuFa -9.088e+02 6.868e+03 -0.132 0.894760
## FireplaceQuGd 2.683e+03 5.309e+03 0.505 0.613432
## FireplaceQuNone 8.714e+03 6.215e+03 1.402 0.161113
## FireplaceQuPo 1.222e+04 7.898e+03 1.548 0.121921
## FireplaceQuTA 3.615e+03 5.520e+03 0.655 0.512690
## GarageTypeAttchd 1.959e+04 1.100e+04 1.781 0.075128 .
## GarageTypeBasment 2.415e+04 1.275e+04 1.895 0.058375 .
## GarageTypeBuiltIn 1.948e+04 1.147e+04 1.699 0.089634 .
## GarageTypeCarPort 2.431e+04 1.467e+04 1.657 0.097784 .
## GarageTypeDetchd 2.263e+04 1.101e+04 2.056 0.039963 *
## GarageTypeNone -3.112e+04 1.824e+05 -0.171 0.864530
## GarageYrBlt -1.828e+01 6.117e+01 -0.299 0.765094
## GarageFinishNone NA NA NA NA
## GarageFinishRFn -2.404e+03 1.957e+03 -1.229 0.219451
## GarageFinishUnf -6.045e+02 2.423e+03 -0.249 0.803033
## GarageCars 3.928e+03 2.274e+03 1.727 0.084339 .
## GarageArea 1.826e+01 7.879e+00 2.317 0.020651 *
## GarageQualFa -1.249e+05 3.010e+04 -4.149 3.58e-05 ***
## GarageQualGd -1.199e+05 3.090e+04 -3.880 0.000110 ***
## GarageQualNone NA NA NA NA
## GarageQualPo -1.425e+05 3.837e+04 -3.713 0.000214 ***
## GarageQualTA -1.188e+05 2.981e+04 -3.987 7.10e-05 ***
## GarageCondFa 1.120e+05 3.472e+04 3.224 0.001296 **
## GarageCondGd 1.109e+05 3.607e+04 3.073 0.002164 **
## GarageCondNone NA NA NA NA
## GarageCondPo 1.179e+05 3.726e+04 3.163 0.001600 **
## GarageCondTA 1.136e+05 3.442e+04 3.300 0.000996 ***
## PavedDriveP -3.573e+03 5.543e+03 -0.645 0.519329
## PavedDriveY -2.236e+02 3.455e+03 -0.065 0.948404
## WoodDeckSF 1.522e+01 5.864e+00 2.596 0.009539 **
## OpenPorchSF 7.639e-01 1.155e+01 0.066 0.947289
## EnclosedPorch 2.852e+00 1.246e+01 0.229 0.818970
## X3SsnPorch 3.351e+01 2.233e+01 1.501 0.133613
## ScreenPorch 3.600e+01 1.248e+01 2.886 0.003977 **
## PoolArea 6.874e+02 2.264e+02 3.037 0.002443 **
## PoolQCFa -1.574e+05 4.082e+04 -3.855 0.000122 ***
## PoolQCGd -1.274e+05 3.680e+04 -3.462 0.000554 ***
## PoolQCNone 2.571e+05 1.225e+05 2.099 0.036011 *
## FenceGdWo 7.929e+03 4.898e+03 1.619 0.105790
## FenceMnPrv 9.422e+03 3.997e+03 2.357 0.018582 *
## FenceMnWw 3.096e+03 8.200e+03 0.378 0.705828
## FenceNone 8.893e+03 3.665e+03 2.427 0.015384 *
## MiscFeatureNone 2.126e+03 9.705e+04 0.022 0.982524
## MiscFeatureOthr 1.648e+04 9.061e+04 0.182 0.855718
## MiscFeatureShed 4.506e+03 9.298e+04 0.048 0.961353
## MiscFeatureTenC 3.432e+04 9.642e+04 0.356 0.721952
## MiscVal 1.035e-01 6.105e+00 0.017 0.986474
## MoSold -4.646e+02 2.446e+02 -1.900 0.057720 .
## YrSold -5.706e+02 5.143e+02 -1.110 0.267386
## SaleTypeCon 2.567e+04 1.751e+04 1.466 0.143004
## SaleTypeConLD 1.617e+04 9.669e+03 1.673 0.094637 .
## SaleTypeConLI 4.119e+03 1.152e+04 0.358 0.720660
## SaleTypeConLw 1.183e+03 1.213e+04 0.098 0.922342
## SaleTypeCWD 1.524e+04 1.283e+04 1.188 0.235095
## SaleTypeNew 2.104e+04 1.539e+04 1.367 0.171787
## SaleTypeOth 7.476e+03 1.445e+04 0.517 0.605018
## SaleTypeWD -3.614e+02 4.169e+03 -0.087 0.930935
## SaleConditionAdjLand 9.586e+03 1.458e+04 0.658 0.510929
## SaleConditionAlloca 8.413e+02 8.844e+03 0.095 0.924224
## SaleConditionFamily 7.417e+02 6.078e+03 0.122 0.902906
## SaleConditionNormal 6.695e+03 2.899e+03 2.309 0.021119 *
## SaleConditionPartial -1.023e+02 1.481e+04 -0.007 0.994493
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 508918991)
##
## Null deviance: 9.2079e+12 on 1459 degrees of freedom
## Residual deviance: 6.1477e+11 on 1208 degrees of freedom
## AIC: 33642
##
## Number of Fisher Scoring iterations: 2
prediction1 <- predict(glm,test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading
s <- data.frame(Id=test$Id,SalePrice=prediction1)
write.csv(s,file="Kevin Clifford_Kaggle House Prices_GLM.csv",row.names=F)
#GLM Net- Uses Lasso and Ridge regressions for better fit. This site helped for finding root mean squared error using resamples() function: https://www.rdocumentation.org/packages/caret/versions/6.0-84/topics/resamples
library(glmnet)
## Warning: package 'glmnet' was built under R version 4.0.4
## Loading required package: Matrix
## Loaded glmnet 4.1-1
glmnet <- train(SalePrice~ ., data=train_new, method="glmnet", trControl = control)
glmnet
## glmnet
##
## 1460 samples
## 79 predictor
##
## No pre-processing
## Resampling: Cross-Validated (5 fold)
## Summary of sample sizes: 1168, 1169, 1168, 1168, 1167
## Resampling results across tuning parameters:
##
## alpha lambda RMSE Rsquared MAE
## 0.10 125.6321 37770.01 0.7913462 18742.80
## 0.10 1256.3207 33775.47 0.8255678 17895.84
## 0.10 12563.2069 32835.42 0.8357409 18041.20
## 0.55 125.6321 37774.91 0.7907672 18257.10
## 0.55 1256.3207 33247.50 0.8308132 17743.08
## 0.55 12563.2069 38151.43 0.7938050 22454.99
## 1.00 125.6321 37906.46 0.7890542 18058.30
## 1.00 1256.3207 33306.75 0.8307887 18118.35
## 1.00 12563.2069 42116.76 0.7577761 25966.44
##
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were alpha = 0.1 and lambda = 12563.21.
summary(glmnet)
## Length Class Mode
## a0 100 -none- numeric
## beta 25900 dgCMatrix S4
## df 100 -none- numeric
## dim 2 -none- numeric
## lambda 100 -none- numeric
## dev.ratio 100 -none- numeric
## nulldev 1 -none- numeric
## npasses 1 -none- numeric
## jerr 1 -none- numeric
## offset 1 -none- logical
## call 5 -none- call
## nobs 1 -none- numeric
## lambdaOpt 1 -none- numeric
## xNames 259 -none- character
## problemType 1 -none- character
## tuneValue 2 data.frame list
## obsLevels 1 -none- logical
## param 0 -none- list
prediction2 <- predict(glmnet,test)
list1 <- list(linear = glm, net = glmnet)
resamps <- resamples(list1)
summary(resamps)
##
## Call:
## summary.resamples(object = resamps)
##
## Models: linear, net
## Number of resamples: 5
##
## MAE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## linear 18896.47 20143.32 20584.08 20704.41 21254.16 22644.03 0
## net 15350.36 17610.88 17769.81 18041.20 18761.07 20713.86 0
##
## RMSE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## linear 44993.10 48625.97 60919.28 58871.56 61546.54 78272.89 0
## net 22922.41 26233.40 34354.68 32835.42 38361.63 42305.00 0
##
## Rsquared
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## linear 0.4349752 0.5752046 0.6193942 0.6052654 0.6776039 0.7191493 0
## net 0.7097667 0.8026265 0.8813028 0.8357409 0.8896727 0.8953357 0
bwplot(resamps, metric = "RMSE")
s2 <- data.frame(Id=test$Id,SalePrice=prediction2)
write.csv(s2,file="Kevin Clifford_Kaggle House Prices_GLMNet.csv",row.names=F)
##Stochastic Gradient Boosting
library(xgboost)
## Warning: package 'xgboost' was built under R version 4.0.4
##
## Attaching package: 'xgboost'
## The following object is masked from 'package:dplyr':
##
## slice
library(gbm)
## Warning: package 'gbm' was built under R version 4.0.4
## Loaded gbm 2.1.8
gbm <- train(SalePrice ~ ., data = train_new, method = "gbm", trControl = control)
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 60: Condition2RRAe has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 84: RoofMatlMembran has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 93: Exterior1stCBlock has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 96: Exterior1stImStucc has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 107: Exterior2ndCBlock has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 175: ElectricalMix has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 218: GarageQualPo has no variation.
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 5735085969.9644 nan 0.1000 540551052.1650
## 2 5242092906.7147 nan 0.1000 445674619.4666
## 3 4879452064.5736 nan 0.1000 363892692.5585
## 4 4525423223.7691 nan 0.1000 324515661.0092
## 5 4273503200.7931 nan 0.1000 260404852.9228
## 6 3999388422.6271 nan 0.1000 254277037.8376
## 7 3789273493.6055 nan 0.1000 214787451.1403
## 8 3581666078.3444 nan 0.1000 150958845.9049
## 9 3403863447.8444 nan 0.1000 195875699.1030
## 10 3227245674.5831 nan 0.1000 150347549.8288
## 20 2121157308.6483 nan 0.1000 67847700.0207
## 40 1321104831.9636 nan 0.1000 17157182.3076
## 60 1061645823.9684 nan 0.1000 -1753327.2132
## 80 930282770.5020 nan 0.1000 -9470683.3229
## 100 863311632.9214 nan 0.1000 -1801814.4903
## 120 813952615.1381 nan 0.1000 -3507356.2712
## 140 788629582.7734 nan 0.1000 -14973979.0935
## 150 777018099.5310 nan 0.1000 -6937440.5553
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 60: Condition2RRAe has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 84: RoofMatlMembran has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 93: Exterior1stCBlock has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 96: Exterior1stImStucc has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 107: Exterior2ndCBlock has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 175: ElectricalMix has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 218: GarageQualPo has no variation.
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 5631925813.1281 nan 0.1000 688451234.8898
## 2 5070056968.1068 nan 0.1000 569951322.9763
## 3 4596580686.4983 nan 0.1000 474143818.1535
## 4 4202012658.4982 nan 0.1000 445268061.1316
## 5 3843267477.1552 nan 0.1000 336553187.2505
## 6 3527174243.7892 nan 0.1000 299204648.8761
## 7 3251597839.6771 nan 0.1000 221922690.1699
## 8 2997039657.3797 nan 0.1000 170792220.4556
## 9 2755514930.7748 nan 0.1000 203785739.9076
## 10 2558478982.1460 nan 0.1000 159118057.8050
## 20 1504129938.4532 nan 0.1000 47021641.4144
## 40 920777964.0722 nan 0.1000 -6618999.7672
## 60 745400030.4891 nan 0.1000 -2410407.4379
## 80 652675605.5061 nan 0.1000 282634.2527
## 100 592029893.3364 nan 0.1000 -4816558.3284
## 120 546500185.5147 nan 0.1000 -9047082.4504
## 140 508636475.6179 nan 0.1000 -2720081.8511
## 150 492368631.6117 nan 0.1000 -3779927.9265
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 60: Condition2RRAe has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 84: RoofMatlMembran has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 93: Exterior1stCBlock has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 96: Exterior1stImStucc has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 107: Exterior2ndCBlock has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 175: ElectricalMix has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 218: GarageQualPo has no variation.
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 5539860787.2579 nan 0.1000 758239638.8446
## 2 4892500275.5947 nan 0.1000 701053990.4923
## 3 4329825669.1303 nan 0.1000 516905646.8839
## 4 3890772583.7589 nan 0.1000 401165229.0170
## 5 3518070362.4241 nan 0.1000 342627406.3212
## 6 3219627903.2779 nan 0.1000 275023945.9648
## 7 2923091195.7944 nan 0.1000 279516168.8032
## 8 2679776068.2033 nan 0.1000 234402899.8719
## 9 2487487417.2729 nan 0.1000 191146809.5755
## 10 2312690417.5453 nan 0.1000 172656389.4276
## 20 1281760796.8006 nan 0.1000 53279297.7841
## 40 792453169.0777 nan 0.1000 -3805892.7632
## 60 624734920.1878 nan 0.1000 2372887.0470
## 80 536677099.3430 nan 0.1000 -3359792.7226
## 100 473692094.6267 nan 0.1000 -669640.8972
## 120 427316562.5209 nan 0.1000 -902416.0856
## 140 392554205.9336 nan 0.1000 -2580624.4870
## 150 373583859.5398 nan 0.1000 -6829582.1155
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 17: UtilitiesNoSeWa has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 90: Exterior1stAsphShn has no variation.
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 5860843363.7897 nan 0.1000 572929996.3762
## 2 5397424244.3934 nan 0.1000 476071043.7548
## 3 4993883483.8235 nan 0.1000 382103546.7528
## 4 4651974743.8990 nan 0.1000 346844731.5422
## 5 4379821541.6335 nan 0.1000 269833259.1090
## 6 4119921800.7176 nan 0.1000 278572285.6848
## 7 3862033376.1286 nan 0.1000 231296420.2186
## 8 3627848725.0780 nan 0.1000 237862514.5383
## 9 3441738607.9588 nan 0.1000 201999926.0850
## 10 3276926814.1022 nan 0.1000 177927741.7477
## 20 2154682499.0756 nan 0.1000 77089755.2286
## 40 1319173925.8511 nan 0.1000 18233936.4385
## 60 1055661679.9958 nan 0.1000 2841543.4117
## 80 955659438.4360 nan 0.1000 -17140949.7165
## 100 884236557.5684 nan 0.1000 -787555.8630
## 120 833576905.6381 nan 0.1000 -6691138.6290
## 140 798015992.9121 nan 0.1000 -2893891.5012
## 150 784551646.9327 nan 0.1000 -380377.8621
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 17: UtilitiesNoSeWa has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 90: Exterior1stAsphShn has no variation.
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 5748350089.5269 nan 0.1000 721347136.2611
## 2 5156473644.2479 nan 0.1000 550050288.1747
## 3 4632553320.8081 nan 0.1000 559504288.9371
## 4 4219543103.2699 nan 0.1000 431958152.7142
## 5 3857989613.7920 nan 0.1000 240091574.2058
## 6 3582663607.6691 nan 0.1000 271110368.4775
## 7 3324503809.6543 nan 0.1000 148236510.5080
## 8 3045097363.8065 nan 0.1000 261614714.8290
## 9 2821907424.9766 nan 0.1000 161898236.5895
## 10 2627932947.6677 nan 0.1000 203107119.8300
## 20 1500014011.8231 nan 0.1000 53250634.2203
## 40 927676750.7551 nan 0.1000 11096694.7285
## 60 762241361.1557 nan 0.1000 2324151.5148
## 80 686116236.0731 nan 0.1000 -9294387.6734
## 100 610995887.8219 nan 0.1000 -1393446.9216
## 120 573699778.5983 nan 0.1000 -261375.1234
## 140 527523996.8383 nan 0.1000 2973627.5455
## 150 508852684.1709 nan 0.1000 -4975400.5693
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 17: UtilitiesNoSeWa has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 90: Exterior1stAsphShn has no variation.
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 5641805140.1116 nan 0.1000 762300482.6885
## 2 5037319026.9308 nan 0.1000 636323598.1574
## 3 4487875416.4027 nan 0.1000 567457124.1782
## 4 4003955125.7946 nan 0.1000 474125270.8806
## 5 3587094736.1357 nan 0.1000 307494251.3743
## 6 3230367200.4779 nan 0.1000 349567377.1446
## 7 2937624818.4935 nan 0.1000 254585980.5248
## 8 2725092096.0483 nan 0.1000 228290748.2757
## 9 2511991718.5936 nan 0.1000 181566487.6826
## 10 2308181115.0350 nan 0.1000 187726224.0299
## 20 1302346792.1534 nan 0.1000 36250900.4603
## 40 781499671.2768 nan 0.1000 -1101467.2305
## 60 617649978.5409 nan 0.1000 -2648973.8836
## 80 544796529.1957 nan 0.1000 -2252767.7835
## 100 480909916.6241 nan 0.1000 -589845.2667
## 120 437885117.7539 nan 0.1000 -1678595.5235
## 140 397866370.6075 nan 0.1000 -2155201.4290
## 150 384324348.0346 nan 0.1000 -61419.7617
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 58: Condition2PosA has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 61: Condition2RRAn has no variation.
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 5696848855.3840 nan 0.1000 532066080.9138
## 2 5210081407.5169 nan 0.1000 429316472.7731
## 3 4802967115.1632 nan 0.1000 395111226.5573
## 4 4479570845.7675 nan 0.1000 335119571.8133
## 5 4194810851.1995 nan 0.1000 244116494.2885
## 6 3912278160.0564 nan 0.1000 280882840.0484
## 7 3686865715.4238 nan 0.1000 223199396.5223
## 8 3459150626.3256 nan 0.1000 199677122.4393
## 9 3290282530.8311 nan 0.1000 175314398.4624
## 10 3125537636.2177 nan 0.1000 157533996.6740
## 20 1972251758.8003 nan 0.1000 54993055.2971
## 40 1160824375.2555 nan 0.1000 10279190.1143
## 60 875191446.4765 nan 0.1000 9576152.7362
## 80 734103765.6859 nan 0.1000 3654053.3294
## 100 660654292.1881 nan 0.1000 -528991.2536
## 120 610750222.6760 nan 0.1000 -2992503.3424
## 140 577271868.3089 nan 0.1000 -4320060.6029
## 150 565941014.4374 nan 0.1000 -1420912.7909
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 58: Condition2PosA has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 61: Condition2RRAn has no variation.
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 5575051373.5681 nan 0.1000 672835978.8004
## 2 4984636421.3406 nan 0.1000 584620091.1767
## 3 4500599686.5056 nan 0.1000 493603494.8248
## 4 4040729995.3954 nan 0.1000 458301912.1614
## 5 3672076549.0848 nan 0.1000 264644586.2293
## 6 3386648849.1627 nan 0.1000 264751296.6457
## 7 3128996785.1380 nan 0.1000 246002881.4797
## 8 2878308855.9908 nan 0.1000 225177900.3968
## 9 2670907026.0388 nan 0.1000 211509609.2690
## 10 2480020493.5433 nan 0.1000 171235700.7846
## 20 1381820640.0998 nan 0.1000 61864097.9246
## 40 753526660.0241 nan 0.1000 4871669.1348
## 60 560846511.2954 nan 0.1000 -2155947.1644
## 80 479080928.4428 nan 0.1000 -1365659.5398
## 100 431875224.3046 nan 0.1000 -2026744.8811
## 120 399560846.2650 nan 0.1000 -1744440.4656
## 140 375201061.4404 nan 0.1000 -1324778.0782
## 150 363234158.1593 nan 0.1000 -1381142.8848
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 58: Condition2PosA has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 61: Condition2RRAn has no variation.
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 5499269842.9941 nan 0.1000 788208745.0401
## 2 4859482369.9768 nan 0.1000 641426217.2596
## 3 4301727550.0944 nan 0.1000 553661137.0581
## 4 3849863494.3693 nan 0.1000 419627058.3433
## 5 3454742712.8109 nan 0.1000 378471329.9826
## 6 3148424709.3397 nan 0.1000 314958742.2167
## 7 2859146328.4671 nan 0.1000 308203973.7660
## 8 2595628670.1896 nan 0.1000 212944571.1861
## 9 2401786471.0026 nan 0.1000 192252356.2253
## 10 2209910503.0802 nan 0.1000 183990137.1782
## 20 1161846084.1230 nan 0.1000 49625030.7119
## 40 593507419.9170 nan 0.1000 7491348.5962
## 60 452036977.3561 nan 0.1000 2661872.0009
## 80 391380487.0148 nan 0.1000 -3151052.5409
## 100 350233739.4903 nan 0.1000 -1741705.5999
## 120 320594387.3725 nan 0.1000 -2835345.4606
## 140 300329537.7407 nan 0.1000 -1928856.1144
## 150 287575216.5143 nan 0.1000 -1910945.4289
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 85: RoofMatlMetal has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 112: Exterior2ndOther has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 128: ExterCondPo has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 195: FunctionalSev has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 243: MiscFeatureTenC has no variation.
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 5852487693.9677 nan 0.1000 559330226.5789
## 2 5397849127.7163 nan 0.1000 433476106.5704
## 3 4973396208.3584 nan 0.1000 401756275.0230
## 4 4632037779.1534 nan 0.1000 329840378.4331
## 5 4333691616.5374 nan 0.1000 248967045.3791
## 6 4063122697.5475 nan 0.1000 240287034.1756
## 7 3823114738.5578 nan 0.1000 220568228.2871
## 8 3585369832.8827 nan 0.1000 222531818.1444
## 9 3389554547.6553 nan 0.1000 197351774.8507
## 10 3208066980.2321 nan 0.1000 155257819.8869
## 20 2088668433.7011 nan 0.1000 62494674.0581
## 40 1302607080.4428 nan 0.1000 22675137.3514
## 60 1039267353.8627 nan 0.1000 8011153.8342
## 80 948075614.8636 nan 0.1000 -2201020.0051
## 100 873083718.5833 nan 0.1000 -9009475.0947
## 120 808062559.7145 nan 0.1000 2717618.5822
## 140 776465972.1903 nan 0.1000 -4822082.9284
## 150 758358412.0828 nan 0.1000 80973.0635
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 85: RoofMatlMetal has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 112: Exterior2ndOther has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 128: ExterCondPo has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 195: FunctionalSev has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 243: MiscFeatureTenC has no variation.
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 5681415207.4317 nan 0.1000 688981320.2185
## 2 5103541208.3973 nan 0.1000 642209189.7869
## 3 4650026467.3467 nan 0.1000 477421803.8418
## 4 4206728961.4220 nan 0.1000 395695694.5014
## 5 3854061461.4970 nan 0.1000 341790042.5346
## 6 3554076864.9321 nan 0.1000 306089027.7455
## 7 3285191677.4589 nan 0.1000 227641391.8009
## 8 3033507924.0050 nan 0.1000 213139282.2546
## 9 2850442366.4386 nan 0.1000 141549384.8010
## 10 2648557304.8449 nan 0.1000 150556228.3905
## 20 1515176284.1114 nan 0.1000 54231202.9691
## 40 939435707.5222 nan 0.1000 12333525.6529
## 60 759824900.9872 nan 0.1000 -3945177.8840
## 80 671776808.4002 nan 0.1000 -5852677.4296
## 100 617587757.5397 nan 0.1000 -2479636.9517
## 120 567955017.0431 nan 0.1000 -1724029.7303
## 140 528113304.0727 nan 0.1000 -6493139.0109
## 150 509393452.5726 nan 0.1000 -3742275.1838
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 85: RoofMatlMetal has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 112: Exterior2ndOther has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 128: ExterCondPo has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 195: FunctionalSev has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 243: MiscFeatureTenC has no variation.
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 5622120067.2629 nan 0.1000 760516769.6642
## 2 5005985602.7723 nan 0.1000 613306256.5100
## 3 4467410929.0090 nan 0.1000 542182347.4592
## 4 4035705502.0023 nan 0.1000 485035732.7380
## 5 3661743964.1401 nan 0.1000 429025388.2809
## 6 3344615022.5268 nan 0.1000 312366274.6931
## 7 3056801345.5585 nan 0.1000 319592084.8836
## 8 2802597378.1046 nan 0.1000 253116871.4306
## 9 2590493991.4368 nan 0.1000 215454397.2287
## 10 2364391279.3595 nan 0.1000 170662672.4147
## 20 1297293833.2871 nan 0.1000 45638247.3830
## 40 786335638.3644 nan 0.1000 -4375076.7726
## 60 638210389.1741 nan 0.1000 -7992718.4287
## 80 547075510.4179 nan 0.1000 2013356.6594
## 100 484499839.3458 nan 0.1000 -2712147.5091
## 120 436988675.1762 nan 0.1000 -3708618.8964
## 140 401953673.6800 nan 0.1000 -2714534.4280
## 150 388596868.1888 nan 0.1000 -2173626.2004
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 86: RoofMatlRoll has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 170: HeatingQCPo has no variation.
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 5662885048.8750 nan 0.1000 495566815.5317
## 2 5216335697.3520 nan 0.1000 443204152.1425
## 3 4802639870.4909 nan 0.1000 389587844.8188
## 4 4498075936.6207 nan 0.1000 309728235.5377
## 5 4217724867.4502 nan 0.1000 250265978.2480
## 6 3992607440.7157 nan 0.1000 235960822.4097
## 7 3741183065.2479 nan 0.1000 204274392.1405
## 8 3517028500.4030 nan 0.1000 147968127.6745
## 9 3325375975.7747 nan 0.1000 183826173.9089
## 10 3181419889.4924 nan 0.1000 138138130.1901
## 20 2146576558.7401 nan 0.1000 72961422.9982
## 40 1338148931.2260 nan 0.1000 18095447.5657
## 60 1066000104.3873 nan 0.1000 1550422.7874
## 80 948281115.8763 nan 0.1000 4541181.0675
## 100 878675553.1601 nan 0.1000 -7222015.7570
## 120 832523040.8585 nan 0.1000 48066.3387
## 140 797391677.3059 nan 0.1000 -5373513.6872
## 150 787670824.5424 nan 0.1000 -6496872.6942
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 86: RoofMatlRoll has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 170: HeatingQCPo has no variation.
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 5460938589.0926 nan 0.1000 649704601.4175
## 2 4962246297.8212 nan 0.1000 412633837.9716
## 3 4529820165.3055 nan 0.1000 427503763.0116
## 4 4123449057.1885 nan 0.1000 417838297.9802
## 5 3757488051.7089 nan 0.1000 386096560.7135
## 6 3465498322.2079 nan 0.1000 290483546.8708
## 7 3196629191.7525 nan 0.1000 236621759.2026
## 8 2982923600.6426 nan 0.1000 245865382.8500
## 9 2789981204.0470 nan 0.1000 210921594.2272
## 10 2601321587.7280 nan 0.1000 138754424.0352
## 20 1589794810.4896 nan 0.1000 53034190.7485
## 40 989956655.1313 nan 0.1000 4709352.0749
## 60 796642274.9895 nan 0.1000 -3339873.8692
## 80 723994254.9558 nan 0.1000 -12637153.9650
## 100 647849370.3910 nan 0.1000 -8063378.9095
## 120 590350371.4689 nan 0.1000 -5590516.9528
## 140 552799721.2884 nan 0.1000 -7928102.9892
## 150 531232238.6672 nan 0.1000 -3524502.0065
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 86: RoofMatlRoll has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 170: HeatingQCPo has no variation.
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 5434858714.2949 nan 0.1000 726310816.2850
## 2 4877486197.8246 nan 0.1000 575803918.8800
## 3 4370239114.3693 nan 0.1000 504374527.5252
## 4 3965054064.8667 nan 0.1000 443903780.4099
## 5 3621635593.0799 nan 0.1000 367427812.6173
## 6 3274203978.4147 nan 0.1000 304669345.6223
## 7 2971496351.7006 nan 0.1000 235410981.4763
## 8 2740115181.0141 nan 0.1000 184062193.8387
## 9 2507401017.4131 nan 0.1000 209366548.6786
## 10 2331265965.3121 nan 0.1000 173085264.8086
## 20 1361429735.1856 nan 0.1000 51528871.5630
## 40 827282915.7420 nan 0.1000 -886512.8103
## 60 684411670.5939 nan 0.1000 -1907265.4823
## 80 592450783.0612 nan 0.1000 -2964265.7917
## 100 529530637.1863 nan 0.1000 -2219777.4919
## 120 475414847.1548 nan 0.1000 -3108190.4210
## 140 432873836.1028 nan 0.1000 -1437244.9993
## 150 414243230.5084 nan 0.1000 -5241870.5232
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 5545628029.2140 nan 0.1000 735302236.3829
## 2 4900410948.9771 nan 0.1000 585782991.1821
## 3 4330148893.2325 nan 0.1000 580196567.6990
## 4 3886669158.7787 nan 0.1000 495512939.1733
## 5 3502686234.8608 nan 0.1000 388623613.1038
## 6 3192797457.3015 nan 0.1000 290697015.1415
## 7 2924791580.7005 nan 0.1000 256103888.0530
## 8 2666184164.0378 nan 0.1000 224803156.3987
## 9 2452073064.6760 nan 0.1000 196925019.4767
## 10 2267581640.0371 nan 0.1000 186836142.7000
## 20 1256177180.8557 nan 0.1000 32963672.0561
## 40 774121833.2204 nan 0.1000 2399322.0574
## 60 626284662.5430 nan 0.1000 891304.7498
## 80 550936071.6012 nan 0.1000 -7692125.3266
## 100 492662362.9109 nan 0.1000 -5548559.9168
gbm
## Stochastic Gradient Boosting
##
## 1460 samples
## 79 predictor
##
## No pre-processing
## Resampling: Cross-Validated (5 fold)
## Summary of sample sizes: 1168, 1169, 1167, 1168, 1168
## Resampling results across tuning parameters:
##
## interaction.depth n.trees RMSE Rsquared MAE
## 1 50 35147.97 0.8151006 23225.98
## 1 100 31913.67 0.8370412 20474.54
## 1 150 31920.23 0.8373932 19886.22
## 2 50 32177.03 0.8384646 20397.17
## 2 100 30674.50 0.8493836 18501.91
## 2 150 30283.39 0.8526504 17839.56
## 3 50 31024.51 0.8466650 18949.65
## 3 100 29849.51 0.8560883 17354.96
## 3 150 29885.80 0.8553373 16990.12
##
## Tuning parameter 'shrinkage' was held constant at a value of 0.1
##
## Tuning parameter 'n.minobsinnode' was held constant at a value of 10
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were n.trees = 100, interaction.depth =
## 3, shrinkage = 0.1 and n.minobsinnode = 10.
summary(gbm)
## var rel.inf
## OverallQual OverallQual 43.11219936
## GrLivArea GrLivArea 15.13528277
## TotalBsmtSF TotalBsmtSF 7.61707331
## GarageCars GarageCars 7.32774626
## BsmtFinSF1 BsmtFinSF1 5.56281580
## LotArea LotArea 2.08359914
## YearBuilt YearBuilt 1.93767036
## X1stFlrSF X1stFlrSF 1.90035299
## Fireplaces Fireplaces 1.57148898
## FullBath FullBath 1.31207345
## TotRmsAbvGrd TotRmsAbvGrd 1.17146205
## YearRemodAdd YearRemodAdd 1.16908886
## X2ndFlrSF X2ndFlrSF 1.01542349
## OpenPorchSF OpenPorchSF 0.95190180
## GarageArea GarageArea 0.70736518
## OverallCond OverallCond 0.64138906
## LotFrontage LotFrontage 0.63079704
## BedroomAbvGr BedroomAbvGr 0.60263414
## GarageTypeAttchd GarageTypeAttchd 0.53203165
## CentralAirY CentralAirY 0.48918088
## NeighborhoodEdwards NeighborhoodEdwards 0.47970061
## GarageYrBlt GarageYrBlt 0.43494218
## MSZoningRM MSZoningRM 0.39309776
## BsmtExposureGd BsmtExposureGd 0.31831066
## NeighborhoodCrawfor NeighborhoodCrawfor 0.29936290
## KitchenQualTA KitchenQualTA 0.29750562
## MasVnrArea MasVnrArea 0.29105472
## SaleTypeNew SaleTypeNew 0.27841284
## Condition2Norm Condition2Norm 0.21738030
## FireplaceQuNone FireplaceQuNone 0.21672513
## ScreenPorch ScreenPorch 0.19610618
## HalfBath HalfBath 0.17566936
## KitchenQualGd KitchenQualGd 0.17264499
## NeighborhoodNoRidge NeighborhoodNoRidge 0.11182506
## NeighborhoodStoneBr NeighborhoodStoneBr 0.09427246
## RoofMatlCompShg RoofMatlCompShg 0.09414928
## GarageTypeBuiltIn GarageTypeBuiltIn 0.09383143
## KitchenAbvGr KitchenAbvGr 0.08149295
## WoodDeckSF WoodDeckSF 0.07994454
## BsmtUnfSF BsmtUnfSF 0.04567379
## Exterior2ndStucco Exterior2ndStucco 0.03783563
## LowQualFinSF LowQualFinSF 0.03455218
## FireplaceQuGd FireplaceQuGd 0.02931821
## BsmtFullBath BsmtFullBath 0.02838737
## BsmtFinSF2 BsmtFinSF2 0.02622727
## MSSubClass MSSubClass 0.00000000
## MSZoningFV MSZoningFV 0.00000000
## MSZoningRH MSZoningRH 0.00000000
## MSZoningRL MSZoningRL 0.00000000
## StreetPave StreetPave 0.00000000
## AlleyNone AlleyNone 0.00000000
## AlleyPave AlleyPave 0.00000000
## LotShapeIR2 LotShapeIR2 0.00000000
## LotShapeIR3 LotShapeIR3 0.00000000
## LotShapeReg LotShapeReg 0.00000000
## LandContourHLS LandContourHLS 0.00000000
## LandContourLow LandContourLow 0.00000000
## LandContourLvl LandContourLvl 0.00000000
## UtilitiesNoSeWa UtilitiesNoSeWa 0.00000000
## LotConfigCulDSac LotConfigCulDSac 0.00000000
## LotConfigFR2 LotConfigFR2 0.00000000
## LotConfigFR3 LotConfigFR3 0.00000000
## LotConfigInside LotConfigInside 0.00000000
## LandSlopeMod LandSlopeMod 0.00000000
## LandSlopeSev LandSlopeSev 0.00000000
## NeighborhoodBlueste NeighborhoodBlueste 0.00000000
## NeighborhoodBrDale NeighborhoodBrDale 0.00000000
## NeighborhoodBrkSide NeighborhoodBrkSide 0.00000000
## NeighborhoodClearCr NeighborhoodClearCr 0.00000000
## NeighborhoodCollgCr NeighborhoodCollgCr 0.00000000
## NeighborhoodGilbert NeighborhoodGilbert 0.00000000
## NeighborhoodIDOTRR NeighborhoodIDOTRR 0.00000000
## NeighborhoodMeadowV NeighborhoodMeadowV 0.00000000
## NeighborhoodMitchel NeighborhoodMitchel 0.00000000
## NeighborhoodNAmes NeighborhoodNAmes 0.00000000
## NeighborhoodNPkVill NeighborhoodNPkVill 0.00000000
## NeighborhoodNridgHt NeighborhoodNridgHt 0.00000000
## NeighborhoodNWAmes NeighborhoodNWAmes 0.00000000
## NeighborhoodOldTown NeighborhoodOldTown 0.00000000
## NeighborhoodSawyer NeighborhoodSawyer 0.00000000
## NeighborhoodSawyerW NeighborhoodSawyerW 0.00000000
## NeighborhoodSomerst NeighborhoodSomerst 0.00000000
## NeighborhoodSWISU NeighborhoodSWISU 0.00000000
## NeighborhoodTimber NeighborhoodTimber 0.00000000
## NeighborhoodVeenker NeighborhoodVeenker 0.00000000
## Condition1Feedr Condition1Feedr 0.00000000
## Condition1Norm Condition1Norm 0.00000000
## Condition1PosA Condition1PosA 0.00000000
## Condition1PosN Condition1PosN 0.00000000
## Condition1RRAe Condition1RRAe 0.00000000
## Condition1RRAn Condition1RRAn 0.00000000
## Condition1RRNe Condition1RRNe 0.00000000
## Condition1RRNn Condition1RRNn 0.00000000
## Condition2Feedr Condition2Feedr 0.00000000
## Condition2PosA Condition2PosA 0.00000000
## Condition2PosN Condition2PosN 0.00000000
## Condition2RRAe Condition2RRAe 0.00000000
## Condition2RRAn Condition2RRAn 0.00000000
## Condition2RRNn Condition2RRNn 0.00000000
## BldgType2fmCon BldgType2fmCon 0.00000000
## BldgTypeDuplex BldgTypeDuplex 0.00000000
## BldgTypeTwnhs BldgTypeTwnhs 0.00000000
## BldgTypeTwnhsE BldgTypeTwnhsE 0.00000000
## HouseStyle1.5Unf HouseStyle1.5Unf 0.00000000
## HouseStyle1Story HouseStyle1Story 0.00000000
## HouseStyle2.5Fin HouseStyle2.5Fin 0.00000000
## HouseStyle2.5Unf HouseStyle2.5Unf 0.00000000
## HouseStyle2Story HouseStyle2Story 0.00000000
## HouseStyleSFoyer HouseStyleSFoyer 0.00000000
## HouseStyleSLvl HouseStyleSLvl 0.00000000
## RoofStyleGable RoofStyleGable 0.00000000
## RoofStyleGambrel RoofStyleGambrel 0.00000000
## RoofStyleHip RoofStyleHip 0.00000000
## RoofStyleMansard RoofStyleMansard 0.00000000
## RoofStyleShed RoofStyleShed 0.00000000
## RoofMatlMembran RoofMatlMembran 0.00000000
## RoofMatlMetal RoofMatlMetal 0.00000000
## RoofMatlRoll RoofMatlRoll 0.00000000
## RoofMatlTar&Grv RoofMatlTar&Grv 0.00000000
## RoofMatlWdShake RoofMatlWdShake 0.00000000
## RoofMatlWdShngl RoofMatlWdShngl 0.00000000
## Exterior1stAsphShn Exterior1stAsphShn 0.00000000
## Exterior1stBrkComm Exterior1stBrkComm 0.00000000
## Exterior1stBrkFace Exterior1stBrkFace 0.00000000
## Exterior1stCBlock Exterior1stCBlock 0.00000000
## Exterior1stCemntBd Exterior1stCemntBd 0.00000000
## Exterior1stHdBoard Exterior1stHdBoard 0.00000000
## Exterior1stImStucc Exterior1stImStucc 0.00000000
## Exterior1stMetalSd Exterior1stMetalSd 0.00000000
## Exterior1stPlywood Exterior1stPlywood 0.00000000
## Exterior1stStone Exterior1stStone 0.00000000
## Exterior1stStucco Exterior1stStucco 0.00000000
## Exterior1stVinylSd Exterior1stVinylSd 0.00000000
## Exterior1stWd Sdng Exterior1stWd Sdng 0.00000000
## Exterior1stWdShing Exterior1stWdShing 0.00000000
## Exterior2ndAsphShn Exterior2ndAsphShn 0.00000000
## Exterior2ndBrk Cmn Exterior2ndBrk Cmn 0.00000000
## Exterior2ndBrkFace Exterior2ndBrkFace 0.00000000
## Exterior2ndCBlock Exterior2ndCBlock 0.00000000
## Exterior2ndCmentBd Exterior2ndCmentBd 0.00000000
## Exterior2ndHdBoard Exterior2ndHdBoard 0.00000000
## Exterior2ndImStucc Exterior2ndImStucc 0.00000000
## Exterior2ndMetalSd Exterior2ndMetalSd 0.00000000
## Exterior2ndOther Exterior2ndOther 0.00000000
## Exterior2ndPlywood Exterior2ndPlywood 0.00000000
## Exterior2ndStone Exterior2ndStone 0.00000000
## Exterior2ndVinylSd Exterior2ndVinylSd 0.00000000
## Exterior2ndWd Sdng Exterior2ndWd Sdng 0.00000000
## Exterior2ndWd Shng Exterior2ndWd Shng 0.00000000
## MasVnrTypeBrkFace MasVnrTypeBrkFace 0.00000000
## MasVnrTypeNone MasVnrTypeNone 0.00000000
## MasVnrTypeStone MasVnrTypeStone 0.00000000
## ExterQualFa ExterQualFa 0.00000000
## ExterQualGd ExterQualGd 0.00000000
## ExterQualTA ExterQualTA 0.00000000
## ExterCondFa ExterCondFa 0.00000000
## ExterCondGd ExterCondGd 0.00000000
## ExterCondPo ExterCondPo 0.00000000
## ExterCondTA ExterCondTA 0.00000000
## FoundationCBlock FoundationCBlock 0.00000000
## FoundationPConc FoundationPConc 0.00000000
## FoundationSlab FoundationSlab 0.00000000
## FoundationStone FoundationStone 0.00000000
## FoundationWood FoundationWood 0.00000000
## BsmtQualFa BsmtQualFa 0.00000000
## BsmtQualGd BsmtQualGd 0.00000000
## BsmtQualNone BsmtQualNone 0.00000000
## BsmtQualTA BsmtQualTA 0.00000000
## BsmtCondGd BsmtCondGd 0.00000000
## BsmtCondNone BsmtCondNone 0.00000000
## BsmtCondPo BsmtCondPo 0.00000000
## BsmtCondTA BsmtCondTA 0.00000000
## BsmtExposureMn BsmtExposureMn 0.00000000
## BsmtExposureNo BsmtExposureNo 0.00000000
## BsmtExposureNone BsmtExposureNone 0.00000000
## BsmtFinType1BLQ BsmtFinType1BLQ 0.00000000
## BsmtFinType1GLQ BsmtFinType1GLQ 0.00000000
## BsmtFinType1LwQ BsmtFinType1LwQ 0.00000000
## BsmtFinType1None BsmtFinType1None 0.00000000
## BsmtFinType1Rec BsmtFinType1Rec 0.00000000
## BsmtFinType1Unf BsmtFinType1Unf 0.00000000
## BsmtFinType2BLQ BsmtFinType2BLQ 0.00000000
## BsmtFinType2GLQ BsmtFinType2GLQ 0.00000000
## BsmtFinType2LwQ BsmtFinType2LwQ 0.00000000
## BsmtFinType2None BsmtFinType2None 0.00000000
## BsmtFinType2Rec BsmtFinType2Rec 0.00000000
## BsmtFinType2Unf BsmtFinType2Unf 0.00000000
## HeatingGasA HeatingGasA 0.00000000
## HeatingGasW HeatingGasW 0.00000000
## HeatingGrav HeatingGrav 0.00000000
## HeatingOthW HeatingOthW 0.00000000
## HeatingWall HeatingWall 0.00000000
## HeatingQCFa HeatingQCFa 0.00000000
## HeatingQCGd HeatingQCGd 0.00000000
## HeatingQCPo HeatingQCPo 0.00000000
## HeatingQCTA HeatingQCTA 0.00000000
## ElectricalFuseF ElectricalFuseF 0.00000000
## ElectricalFuseP ElectricalFuseP 0.00000000
## ElectricalMix ElectricalMix 0.00000000
## ElectricalSBrkr ElectricalSBrkr 0.00000000
## BsmtHalfBath BsmtHalfBath 0.00000000
## KitchenQualFa KitchenQualFa 0.00000000
## FunctionalMaj2 FunctionalMaj2 0.00000000
## FunctionalMin1 FunctionalMin1 0.00000000
## FunctionalMin2 FunctionalMin2 0.00000000
## FunctionalMod FunctionalMod 0.00000000
## FunctionalSev FunctionalSev 0.00000000
## FunctionalTyp FunctionalTyp 0.00000000
## FireplaceQuFa FireplaceQuFa 0.00000000
## FireplaceQuPo FireplaceQuPo 0.00000000
## FireplaceQuTA FireplaceQuTA 0.00000000
## GarageTypeBasment GarageTypeBasment 0.00000000
## GarageTypeCarPort GarageTypeCarPort 0.00000000
## GarageTypeDetchd GarageTypeDetchd 0.00000000
## GarageTypeNone GarageTypeNone 0.00000000
## GarageFinishNone GarageFinishNone 0.00000000
## GarageFinishRFn GarageFinishRFn 0.00000000
## GarageFinishUnf GarageFinishUnf 0.00000000
## GarageQualFa GarageQualFa 0.00000000
## GarageQualGd GarageQualGd 0.00000000
## GarageQualNone GarageQualNone 0.00000000
## GarageQualPo GarageQualPo 0.00000000
## GarageQualTA GarageQualTA 0.00000000
## GarageCondFa GarageCondFa 0.00000000
## GarageCondGd GarageCondGd 0.00000000
## GarageCondNone GarageCondNone 0.00000000
## GarageCondPo GarageCondPo 0.00000000
## GarageCondTA GarageCondTA 0.00000000
## PavedDriveP PavedDriveP 0.00000000
## PavedDriveY PavedDriveY 0.00000000
## EnclosedPorch EnclosedPorch 0.00000000
## X3SsnPorch X3SsnPorch 0.00000000
## PoolArea PoolArea 0.00000000
## PoolQCFa PoolQCFa 0.00000000
## PoolQCGd PoolQCGd 0.00000000
## PoolQCNone PoolQCNone 0.00000000
## FenceGdWo FenceGdWo 0.00000000
## FenceMnPrv FenceMnPrv 0.00000000
## FenceMnWw FenceMnWw 0.00000000
## FenceNone FenceNone 0.00000000
## MiscFeatureNone MiscFeatureNone 0.00000000
## MiscFeatureOthr MiscFeatureOthr 0.00000000
## MiscFeatureShed MiscFeatureShed 0.00000000
## MiscFeatureTenC MiscFeatureTenC 0.00000000
## MiscVal MiscVal 0.00000000
## MoSold MoSold 0.00000000
## YrSold YrSold 0.00000000
## SaleTypeCon SaleTypeCon 0.00000000
## SaleTypeConLD SaleTypeConLD 0.00000000
## SaleTypeConLI SaleTypeConLI 0.00000000
## SaleTypeConLw SaleTypeConLw 0.00000000
## SaleTypeCWD SaleTypeCWD 0.00000000
## SaleTypeOth SaleTypeOth 0.00000000
## SaleTypeWD SaleTypeWD 0.00000000
## SaleConditionAdjLand SaleConditionAdjLand 0.00000000
## SaleConditionAlloca SaleConditionAlloca 0.00000000
## SaleConditionFamily SaleConditionFamily 0.00000000
## SaleConditionNormal SaleConditionNormal 0.00000000
## SaleConditionPartial SaleConditionPartial 0.00000000
prediction3 <- predict(gbm,test)
list2 <- list(glm1 = glm, glmnet1 = glmnet, gbm1= gbm)
resamps2 <- resamples(list2)
summary(resamps2)
##
## Call:
## summary.resamples(object = resamps2)
##
## Models: glm1, glmnet1, gbm1
## Number of resamples: 5
##
## MAE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## glm1 18896.47 20143.32 20584.08 20704.41 21254.16 22644.03 0
## glmnet1 15350.36 17610.88 17769.81 18041.20 18761.07 20713.86 0
## gbm1 15945.55 15980.61 18139.53 17354.96 18162.45 18546.68 0
##
## RMSE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## glm1 44993.10 48625.97 60919.28 58871.56 61546.54 78272.89 0
## glmnet1 22922.41 26233.40 34354.68 32835.42 38361.63 42305.00 0
## gbm1 23666.12 24710.75 26860.84 29849.51 29169.68 44840.19 0
##
## Rsquared
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## glm1 0.4349752 0.5752046 0.6193942 0.6052654 0.6776039 0.7191493 0
## glmnet1 0.7097667 0.8026265 0.8813028 0.8357409 0.8896727 0.8953357 0
## gbm1 0.7195253 0.8662022 0.8749075 0.8560883 0.8979867 0.9218197 0
bwplot(resamps2, metric = "RMSE")
s3 <- data.frame(Id=test$Id,SalePrice=prediction3)
write.csv(s3,file="Kevin Clifford_Kaggle House Prices_GBM.csv",row.names=F)
# Random Forest
library(randomForest)
## Warning: package 'randomForest' was built under R version 4.0.4
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
##
## margin
## The following object is masked from 'package:dplyr':
##
## combine
library(ranger)
## Warning: package 'ranger' was built under R version 4.0.4
##
## Attaching package: 'ranger'
## The following object is masked from 'package:randomForest':
##
## importance
rf <- train(SalePrice ~ ., data = train, method = "ranger", importance = 'impurity', trControl = control)
## Growing trees.. Progress: 85%. Estimated remaining time: 5 seconds.
## Growing trees.. Progress: 81%. Estimated remaining time: 7 seconds.
## Growing trees.. Progress: 98%. Estimated remaining time: 0 seconds.
rf
## Random Forest
##
## 1460 samples
## 80 predictor
##
## No pre-processing
## Resampling: Cross-Validated (5 fold)
## Summary of sample sizes: 1169, 1168, 1167, 1168, 1168
## Resampling results across tuning parameters:
##
## mtry splitrule RMSE Rsquared MAE
## 2 variance 47285.05 0.7823763 29901.49
## 2 extratrees 51304.19 0.7366241 33315.51
## 131 variance 28666.69 0.8741177 16960.58
## 131 extratrees 29731.42 0.8673496 17428.16
## 260 variance 29369.28 0.8656443 17638.92
## 260 extratrees 29146.18 0.8711159 17402.65
##
## Tuning parameter 'min.node.size' was held constant at a value of 5
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were mtry = 131, splitrule = variance
## and min.node.size = 5.
summary(rf)
## Length Class Mode
## predictions 1460 -none- numeric
## num.trees 1 -none- numeric
## num.independent.variables 1 -none- numeric
## mtry 1 -none- numeric
## min.node.size 1 -none- numeric
## variable.importance 260 -none- numeric
## prediction.error 1 -none- numeric
## forest 7 ranger.forest list
## splitrule 1 -none- character
## treetype 1 -none- character
## r.squared 1 -none- numeric
## call 9 -none- call
## importance.mode 1 -none- character
## num.samples 1 -none- numeric
## replace 1 -none- logical
## xNames 260 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 1 -none- logical
## param 1 -none- list
prediction4 <- predict(rf,test)
list3 <- list(glm1 = glm, glmnet1 = glmnet, gbm1= gbm, rf1=rf)
resamps3 <- resamples(list3)
summary(resamps3)
##
## Call:
## summary.resamples(object = resamps3)
##
## Models: glm1, glmnet1, gbm1, rf1
## Number of resamples: 5
##
## MAE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## glm1 18896.47 20143.32 20584.08 20704.41 21254.16 22644.03 0
## glmnet1 15350.36 17610.88 17769.81 18041.20 18761.07 20713.86 0
## gbm1 15945.55 15980.61 18139.53 17354.96 18162.45 18546.68 0
## rf1 16611.97 16726.21 16871.56 16960.58 17050.04 17543.13 0
##
## RMSE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## glm1 44993.10 48625.97 60919.28 58871.56 61546.54 78272.89 0
## glmnet1 22922.41 26233.40 34354.68 32835.42 38361.63 42305.00 0
## gbm1 23666.12 24710.75 26860.84 29849.51 29169.68 44840.19 0
## rf1 25895.67 27330.67 27635.50 28666.69 30323.21 32148.39 0
##
## Rsquared
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## glm1 0.4349752 0.5752046 0.6193942 0.6052654 0.6776039 0.7191493 0
## glmnet1 0.7097667 0.8026265 0.8813028 0.8357409 0.8896727 0.8953357 0
## gbm1 0.7195253 0.8662022 0.8749075 0.8560883 0.8979867 0.9218197 0
## rf1 0.8384291 0.8655175 0.8817581 0.8741177 0.8914742 0.8934095 0
bwplot(resamps3, metric = "RMSE")
s4 <- data.frame(Id=test$Id,SalePrice=prediction4)
write.csv(s4,file="Kevin Clifford_Kaggle House Prices_RF.csv", row.names=F)