pacman::p_load(pacman,tidyr,ggthemes,ggplot2,plotly,GGally,rio,
stringr,shiny,rmarkdown,lubridate,psych,ipred,caret,ROCR,pROC,
DT,dummies,rpart,rpart.plot,httr,randomForest,readr,doParallel,
xgboost,truncnorm,DMwR)
# additional packages
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v tibble 3.0.4 v dplyr 1.0.2
## v purrr 0.3.4 v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x psych::%+%() masks ggplot2::%+%()
## x purrr::accumulate() masks foreach::accumulate()
## x psych::alpha() masks ggplot2::alpha()
## x lubridate::as.difftime() masks base::as.difftime()
## x dplyr::combine() masks randomForest::combine()
## x httr::config() masks plotly::config()
## x lubridate::date() masks base::date()
## x dplyr::filter() masks plotly::filter(), stats::filter()
## x lubridate::intersect() masks base::intersect()
## x dplyr::lag() masks stats::lag()
## x purrr::lift() masks caret::lift()
## x randomForest::margin() masks ggplot2::margin()
## x httr::progress() masks caret::progress()
## x lubridate::setdiff() masks base::setdiff()
## x dplyr::slice() masks xgboost::slice(), plotly::slice()
## x lubridate::union() masks base::union()
## x purrr::when() masks foreach::when()
library("ggpubr")
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## The following object is masked from 'package:plotly':
##
## select
library(fastDummies)
#set working directory
setwd("C:\\Users\\Xholi\\Downloads")
#load the data
train<-read.csv("House Prices\\train.csv")
test<-read.csv("House Prices\\test.csv")
final<-read.csv("House Prices\\sample_submission.csv")
set.seed(1992)
#Target analysis
theme_set(
theme_bw() +
theme(legend.position = "top")
)
#Visualize the target
#boxplots
#outliers
#Normal distribution
#skew and kurtosis
#1stly for all prices, then we compare by price groups
#add bell curve onto ds
#reload train data set
set.seed(1992)
train<-read.csv("House Prices\\train.csv")
#for all prices
Target=function(txt)
{ds=ggplot(train,aes(SalePrice))+
geom_histogram(color='black',fill=txt,bins = 80)
bxp=ggplot(train,aes(SalePrice))+
geom_boxplot(color='black',fill=txt)
qq= ggplot(train, aes(sample=SalePrice))+
stat_qq() +
stat_qq_line()
ggarrange(ds, qq, bxp,
labels = c("1", "2", "3"),
ncol = 2, nrow = 2)
}
Target('pink')

#houses greater $700k
train<-read.csv("House Prices\\train.csv")
train=train%>%filter(SalePrice>7e5) #$700 000
train$SalePrice=log(train$SalePrice) #or box cox transformation
Target1=function(txt)
{dp=ggplot(train,aes(SalePrice))+
geom_histogram(color='black',fill=txt,bins = 80) #Skew? Kurtosis?
bxp=ggplot(train,aes(SalePrice))+
geom_boxplot(color='black',fill=txt) #Outliers?
qq= ggplot(train, aes(sample=SalePrice))+
stat_qq() +
stat_qq_line() #Normal distribution?
ggarrange(dp, qq, bxp,
labels = c("1", "2", "3"),
ncol = 2, nrow = 2)
}
Target1('red')

#Houses less than or equal to $500k but greater than $100k
#reload train data set
set.seed(1992)
train<-read.csv("House Prices\\train.csv")
#houses greater than 100k but less or equal to 500k
train=train%>%filter(SalePrice>1e5&SalePrice<=5e5) #$700 000
train$SalePrice=log(train$SalePrice) #or box cox transformation
Target2=function(txt)
{dz=ggplot(train,aes(SalePrice))+
geom_histogram(color='black',fill=txt,bins = 80)
bxp=ggplot(train,aes(SalePrice))+
geom_boxplot(color='black',fill=txt)
qq= ggplot(train, aes(sample=SalePrice))+
stat_qq() +
stat_qq_line()
ggarrange(dz, qq, bxp,
labels = c("1", "2", "3"),
ncol = 2, nrow = 2)
}
Target2('blue')

#reload train data set
set.seed(1992)
train<-read.csv("House Prices\\train.csv")
#houses greater than 700k but less or equal to $2M
train=train%>%filter(SalePrice>7e5&SalePrice<=2e6) #$700 000
train$SalePrice=log(train$SalePrice) #or box cox transformation
Target3=function(txt)
{ds=ggplot(train,aes(SalePrice))+
geom_histogram(color='black',fill=txt,bins = 80) #Skew, Kurtosis
bxp=ggplot(train,aes(SalePrice))+
geom_boxplot(color='black',fill=txt) #Outliers
qq= ggplot(train, aes(sample=SalePrice))+
stat_qq() +
stat_qq_line() #Normal distribution?
ggarrange(ds, qq, bxp,
labels = c("1", "2", "3"),
ncol = 2, nrow = 2)
}
Target3('green')

#Data Exploration
set.seed(1992)
train<-read.csv("House Prices\\train.csv")
#identifies the type of dwelling involved
x<-train$MSSubClass[train$SalePrice>=7e5]
x
## [1] 60 60
summary(x)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 60 60 60 60 60 60
#2-STORY 1946 & NEWER this is the type of dwelling priced at $700k and more
#Identifies the general zoning classification of the
x<-train$MSZoning[train$SalePrice>=7e5]
x
## [1] "RL" "RL"
#and are of zoning type Residential Low Density
#Linear feet of street connected to property
x<-train$LotFrontage[train$SalePrice>=7e5]
x
## [1] 104 160
summary(x)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 104 118 132 132 146 160
#from 104 to 160 feet
#Lot size in square feet
x<-train$LotArea[train$SalePrice>=7e5]
x
## [1] 21535 15623
summary(x)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 15623 17101 18579 18579 20057 21535
# and has lot area of ranges 15623 square feet to 21535 square feet
#Type of road access to property
x<-train$Street[train$SalePrice>=7e5]
x
## [1] "Pave" "Pave"
#and the streets are paved
#year built
x<-train$YearBuilt[train$SalePrice>=7e5]
x
## [1] 1994 1996
summary(x)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1994 1994 1995 1995 1996 1996
#and are built in the years 1994-1996
SalePrice=train$SalePrice #Save the target. At the moment it's not used
train=dplyr::select(train,-SalePrice) #The features, variables, predictors. Whatever you want to call it..
Sales_data=rbind(train,test)
Sales_data=Sales_data[-1] # Id out
#data manupilation
#Define the regressors by their type
cont=select_if(Sales_data,function(x) is.numeric(x))
catg=select_if(Sales_data,function(x) !is.numeric(x))
#check missing data
sum(is.na(Sales_data))
## [1] 13965
#there are 13965 missing vaules
#plot missing values
t(lapply(Sales_data, function(x) sum(is.na(x))))
## MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape LandContour
## [1,] 0 4 486 0 0 2721 0 0
## Utilities LotConfig LandSlope Neighborhood Condition1 Condition2 BldgType
## [1,] 2 0 0 0 0 0 0
## HouseStyle OverallQual OverallCond YearBuilt YearRemodAdd RoofStyle
## [1,] 0 0 0 0 0 0
## RoofMatl Exterior1st Exterior2nd MasVnrType MasVnrArea ExterQual ExterCond
## [1,] 0 1 1 24 23 0 0
## Foundation BsmtQual BsmtCond BsmtExposure BsmtFinType1 BsmtFinSF1
## [1,] 0 81 82 82 79 1
## BsmtFinType2 BsmtFinSF2 BsmtUnfSF TotalBsmtSF Heating HeatingQC CentralAir
## [1,] 80 1 1 1 0 0 0
## Electrical X1stFlrSF X2ndFlrSF LowQualFinSF GrLivArea BsmtFullBath
## [1,] 1 0 0 0 0 2
## BsmtHalfBath FullBath HalfBath BedroomAbvGr KitchenAbvGr KitchenQual
## [1,] 2 0 0 0 0 1
## TotRmsAbvGrd Functional Fireplaces FireplaceQu GarageType GarageYrBlt
## [1,] 0 2 0 1420 157 159
## GarageFinish GarageCars GarageArea GarageQual GarageCond PavedDrive
## [1,] 159 1 1 159 159 0
## WoodDeckSF OpenPorchSF EnclosedPorch X3SsnPorch ScreenPorch PoolArea
## [1,] 0 0 0 0 0 0
## PoolQC Fence MiscFeature MiscVal MoSold YrSold SaleType SaleCondition
## [1,] 2909 2348 2814 0 0 0 1 0
#address the missing values
#1
summary(Sales_data$MasVnrArea)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0 0.0 0.0 102.2 164.0 1600.0 23
Sales_data$MasVnrArea=sapply(Sales_data$MasVnrArea, function(x) ifelse(is.na(x),0,x))
#2
summary(Sales_data$GarageYrBlt)#Max value: 2207?
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1895 1960 1979 1978 2002 2207 159
Sales_data$GarageYrBlt=sapply(Sales_data$GarageYrBlt,
function(x) ifelse(is.na(x),1978,x))
Sales_data$GarageYrBlt=sapply(Sales_data$GarageYrBlt,
function(x) ifelse(x>2020,1978,x))
#3
summary(Sales_data$LotFrontage)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 21.00 59.00 68.00 69.31 80.00 313.00 486
Sales_data$LotFrontage=sapply(Sales_data$LotFrontage, function(x) ifelse(is.na(x),68,x))
#specifically
cont=lapply(cont, function(x) ifelse(is.na(x),mean(x,na.rm = TRUE),x))%>%data.frame
sum(is.na(Sales_data))
## [1] 13297
#there are still nas to remove
#CATEGORY MISSING VALUES OR NAS
getmode <- function(v) #return the mode of the variable
{
v=na.omit(v)
uniqv <- unique(v)
uniqv[which.max(tabulate(match(v, uniqv)))]
}
df1=dplyr::select(catg,-Fence,-Alley,-MiscFeature,-PoolQC)
df2=dplyr::select(catg,Fence,Alley,MiscFeature,PoolQC)
df1=lapply(df1,function(x) ifelse(is.na(x),getmode(x),x))%>%data.frame()
df2=lapply(df2,function(x) ifelse(is.na(x),'NG',x))%>%data.frame() #create a new group and not alter distributions
Clean_data.cat=cbind(df1,df2)
sum(is.na(Clean_data.cat))
## [1] 0
str(Clean_data.cat)
## 'data.frame': 2919 obs. of 43 variables:
## $ MSZoning : chr "RL" "RL" "RL" "RL" ...
## $ Street : chr "Pave" "Pave" "Pave" "Pave" ...
## $ LotShape : chr "Reg" "Reg" "IR1" "IR1" ...
## $ LandContour : chr "Lvl" "Lvl" "Lvl" "Lvl" ...
## $ Utilities : chr "AllPub" "AllPub" "AllPub" "AllPub" ...
## $ LotConfig : chr "Inside" "FR2" "Inside" "Corner" ...
## $ LandSlope : chr "Gtl" "Gtl" "Gtl" "Gtl" ...
## $ Neighborhood : chr "CollgCr" "Veenker" "CollgCr" "Crawfor" ...
## $ Condition1 : chr "Norm" "Feedr" "Norm" "Norm" ...
## $ Condition2 : chr "Norm" "Norm" "Norm" "Norm" ...
## $ BldgType : chr "1Fam" "1Fam" "1Fam" "1Fam" ...
## $ HouseStyle : chr "2Story" "1Story" "2Story" "2Story" ...
## $ RoofStyle : chr "Gable" "Gable" "Gable" "Gable" ...
## $ RoofMatl : chr "CompShg" "CompShg" "CompShg" "CompShg" ...
## $ Exterior1st : chr "VinylSd" "MetalSd" "VinylSd" "Wd Sdng" ...
## $ Exterior2nd : chr "VinylSd" "MetalSd" "VinylSd" "Wd Shng" ...
## $ MasVnrType : chr "BrkFace" "None" "BrkFace" "None" ...
## $ ExterQual : chr "Gd" "TA" "Gd" "TA" ...
## $ ExterCond : chr "TA" "TA" "TA" "TA" ...
## $ Foundation : chr "PConc" "CBlock" "PConc" "BrkTil" ...
## $ BsmtQual : chr "Gd" "Gd" "Gd" "TA" ...
## $ BsmtCond : chr "TA" "TA" "TA" "Gd" ...
## $ BsmtExposure : chr "No" "Gd" "Mn" "No" ...
## $ BsmtFinType1 : chr "GLQ" "ALQ" "GLQ" "ALQ" ...
## $ BsmtFinType2 : chr "Unf" "Unf" "Unf" "Unf" ...
## $ Heating : chr "GasA" "GasA" "GasA" "GasA" ...
## $ HeatingQC : chr "Ex" "Ex" "Ex" "Gd" ...
## $ CentralAir : chr "Y" "Y" "Y" "Y" ...
## $ Electrical : chr "SBrkr" "SBrkr" "SBrkr" "SBrkr" ...
## $ KitchenQual : chr "Gd" "TA" "Gd" "Gd" ...
## $ Functional : chr "Typ" "Typ" "Typ" "Typ" ...
## $ FireplaceQu : chr "Gd" "TA" "TA" "Gd" ...
## $ GarageType : chr "Attchd" "Attchd" "Attchd" "Detchd" ...
## $ GarageFinish : chr "RFn" "RFn" "RFn" "Unf" ...
## $ GarageQual : chr "TA" "TA" "TA" "TA" ...
## $ GarageCond : chr "TA" "TA" "TA" "TA" ...
## $ PavedDrive : chr "Y" "Y" "Y" "Y" ...
## $ SaleType : chr "WD" "WD" "WD" "WD" ...
## $ SaleCondition: chr "Normal" "Normal" "Normal" "Abnorml" ...
## $ Fence : chr "NG" "NG" "NG" "NG" ...
## $ Alley : chr "NG" "NG" "NG" "NG" ...
## $ MiscFeature : chr "NG" "NG" "NG" "NG" ...
## $ PoolQC : chr "NG" "NG" "NG" "NG" ...
#Transformations
cont$GarageYrBlt=sapply(cont$GarageYrBlt, function(x) ifelse(x>2020,1978,x))
cont$YearBuilt=2020-cont$YearBuilt #Ex: Age of house
cont$YearRemodAdd=2020-cont$YearRemodAdd
cont$GarageYrBlt=2020-cont$GarageYrBlt
cont$YrSold=2020-cont$YrSold
#Fix distributions (Float and int)
fix=function(y) #Box Cox transformation. Return the transformed variable in a dataframe
{if (sum(y==0)>10) return(y) #Float filter
else {result = boxcox(y~1, lambda = seq(-10,20,0.5))
mylambda = result$x[which.max(result$y)]
if (mylambda!=0) return((y^mylambda-1)/mylambda)
else return(log(y+0.01))}
}
str(Clean_data.cat)
## 'data.frame': 2919 obs. of 43 variables:
## $ MSZoning : chr "RL" "RL" "RL" "RL" ...
## $ Street : chr "Pave" "Pave" "Pave" "Pave" ...
## $ LotShape : chr "Reg" "Reg" "IR1" "IR1" ...
## $ LandContour : chr "Lvl" "Lvl" "Lvl" "Lvl" ...
## $ Utilities : chr "AllPub" "AllPub" "AllPub" "AllPub" ...
## $ LotConfig : chr "Inside" "FR2" "Inside" "Corner" ...
## $ LandSlope : chr "Gtl" "Gtl" "Gtl" "Gtl" ...
## $ Neighborhood : chr "CollgCr" "Veenker" "CollgCr" "Crawfor" ...
## $ Condition1 : chr "Norm" "Feedr" "Norm" "Norm" ...
## $ Condition2 : chr "Norm" "Norm" "Norm" "Norm" ...
## $ BldgType : chr "1Fam" "1Fam" "1Fam" "1Fam" ...
## $ HouseStyle : chr "2Story" "1Story" "2Story" "2Story" ...
## $ RoofStyle : chr "Gable" "Gable" "Gable" "Gable" ...
## $ RoofMatl : chr "CompShg" "CompShg" "CompShg" "CompShg" ...
## $ Exterior1st : chr "VinylSd" "MetalSd" "VinylSd" "Wd Sdng" ...
## $ Exterior2nd : chr "VinylSd" "MetalSd" "VinylSd" "Wd Shng" ...
## $ MasVnrType : chr "BrkFace" "None" "BrkFace" "None" ...
## $ ExterQual : chr "Gd" "TA" "Gd" "TA" ...
## $ ExterCond : chr "TA" "TA" "TA" "TA" ...
## $ Foundation : chr "PConc" "CBlock" "PConc" "BrkTil" ...
## $ BsmtQual : chr "Gd" "Gd" "Gd" "TA" ...
## $ BsmtCond : chr "TA" "TA" "TA" "Gd" ...
## $ BsmtExposure : chr "No" "Gd" "Mn" "No" ...
## $ BsmtFinType1 : chr "GLQ" "ALQ" "GLQ" "ALQ" ...
## $ BsmtFinType2 : chr "Unf" "Unf" "Unf" "Unf" ...
## $ Heating : chr "GasA" "GasA" "GasA" "GasA" ...
## $ HeatingQC : chr "Ex" "Ex" "Ex" "Gd" ...
## $ CentralAir : chr "Y" "Y" "Y" "Y" ...
## $ Electrical : chr "SBrkr" "SBrkr" "SBrkr" "SBrkr" ...
## $ KitchenQual : chr "Gd" "TA" "Gd" "Gd" ...
## $ Functional : chr "Typ" "Typ" "Typ" "Typ" ...
## $ FireplaceQu : chr "Gd" "TA" "TA" "Gd" ...
## $ GarageType : chr "Attchd" "Attchd" "Attchd" "Detchd" ...
## $ GarageFinish : chr "RFn" "RFn" "RFn" "Unf" ...
## $ GarageQual : chr "TA" "TA" "TA" "TA" ...
## $ GarageCond : chr "TA" "TA" "TA" "TA" ...
## $ PavedDrive : chr "Y" "Y" "Y" "Y" ...
## $ SaleType : chr "WD" "WD" "WD" "WD" ...
## $ SaleCondition: chr "Normal" "Normal" "Normal" "Abnorml" ...
## $ Fence : chr "NG" "NG" "NG" "NG" ...
## $ Alley : chr "NG" "NG" "NG" "NG" ...
## $ MiscFeature : chr "NG" "NG" "NG" "NG" ...
## $ PoolQC : chr "NG" "NG" "NG" "NG" ...
cont1=cont%>%dplyr::select( MSSubClass,LotFrontage,LotArea,
YearBuilt,TotalBsmtSF,X1stFlrSF, MoSold,
GrLivArea,GarageArea,GarageYrBlt,YearRemodAdd,OverallQual,
OverallCond,TotRmsAbvGrd) #Select only floats
cont2=cont%>%dplyr::select( -MSSubClass,-LotFrontage,-LotArea,
-BsmtUnfSF,-BsmtFinSF1,-YearBuilt,-TotalBsmtSF,-X1stFlrSF,
-GrLivArea,-GarageArea,-GarageYrBlt,-MoSold,-YearRemodAdd ) #Maybe ordinal variables or special case. See the below annotation
cont1=lapply(cont1, function(x) ifelse(x>quantile(x,0.999),mean(x,na.rm = TRUE),x))%>%data.frame #Remove some outliers
boxplot(scale(cont1))

#removing out-liers
cont1$LotArea=sapply(cont1, function(x) ifelse(x>quantile(x,0.998),mean(x,na.rm = TRUE),x))
cont1=lapply(cont1,
function(x) ifelse(x>quantile(x,0.999),
mean(x,na.rm = TRUE),x))%>%data.frame
boxplot(scale(cont1))

#data exploration continuation
{
p=ggplot(cont1,aes(LotFrontage))+geom_histogram(fill='brown',alpha=1,bins = 80)
l=ggplot(cont2,aes(OverallCond))+geom_histogram(fill='blue',alpha=1,bins=80)
o= ggplot(cont2, aes(MasVnrArea))+geom_histogram(fill='pink',alpha=1,bins=80)
t= ggplot(cont2, aes(PoolArea))+geom_histogram(fill='thistle1',alpha=1,bins=80)
ggarrange(p, l, o,t,
labels = c("P", "L", "O","T"),
ncol = 2, nrow = 2)
}

{
p2=ggplot(cont1,aes(YearBuilt))+geom_histogram(fill='red',alpha=1,bins = 80)
l2=ggplot(cont1,aes(MSSubClass))+geom_histogram(fill='blue',alpha=1,bins=80)
o2= ggplot(cont1, aes(MoSold))+geom_histogram(fill='pink',alpha=1,bins=80)
t2= ggplot(cont1, aes(GrLivArea))+geom_histogram(fill='magenta',alpha=1,bins=80)
ggarrange(p2,l2,o2,t2,
labels = c("P2","L2","O2","T2"),
ncol = 2, nrow = 2)
}

B=cont[,c('BsmtUnfSF','BsmtFinSF1')] #Look at the cell above for a moment. These variables were not considered
B$BsmtUnfSF=cut(cont$BsmtUnfSF,breaks=3)
B$BsmtFinSF1=cut(cont$BsmtFinSF1,breaks=3) #There are more case but it's not as problematic as these two cases
{
for (i in 1:15)
cont1[,i]=fix(cont1[,i])
cont1=scale(cont1)%>%data.frame()
}













#Correlation
cor.plot(cont1,
numbers=T,
upper=FALSE,
main = "Pearson's correlation",
show.legend = F)

glimpse(catg)
## Rows: 2,919
## Columns: 43
## $ MSZoning <chr> "RL", "RL", "RL", "RL", "RL", "RL", "RL", "RL", "RM",...
## $ Street <chr> "Pave", "Pave", "Pave", "Pave", "Pave", "Pave", "Pave...
## $ Alley <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ LotShape <chr> "Reg", "Reg", "IR1", "IR1", "IR1", "IR1", "Reg", "IR1...
## $ LandContour <chr> "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl...
## $ Utilities <chr> "AllPub", "AllPub", "AllPub", "AllPub", "AllPub", "Al...
## $ LotConfig <chr> "Inside", "FR2", "Inside", "Corner", "FR2", "Inside",...
## $ LandSlope <chr> "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl...
## $ Neighborhood <chr> "CollgCr", "Veenker", "CollgCr", "Crawfor", "NoRidge"...
## $ Condition1 <chr> "Norm", "Feedr", "Norm", "Norm", "Norm", "Norm", "Nor...
## $ Condition2 <chr> "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", "Norm...
## $ BldgType <chr> "1Fam", "1Fam", "1Fam", "1Fam", "1Fam", "1Fam", "1Fam...
## $ HouseStyle <chr> "2Story", "1Story", "2Story", "2Story", "2Story", "1....
## $ RoofStyle <chr> "Gable", "Gable", "Gable", "Gable", "Gable", "Gable",...
## $ RoofMatl <chr> "CompShg", "CompShg", "CompShg", "CompShg", "CompShg"...
## $ Exterior1st <chr> "VinylSd", "MetalSd", "VinylSd", "Wd Sdng", "VinylSd"...
## $ Exterior2nd <chr> "VinylSd", "MetalSd", "VinylSd", "Wd Shng", "VinylSd"...
## $ MasVnrType <chr> "BrkFace", "None", "BrkFace", "None", "BrkFace", "Non...
## $ ExterQual <chr> "Gd", "TA", "Gd", "TA", "Gd", "TA", "Gd", "TA", "TA",...
## $ ExterCond <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA",...
## $ Foundation <chr> "PConc", "CBlock", "PConc", "BrkTil", "PConc", "Wood"...
## $ BsmtQual <chr> "Gd", "Gd", "Gd", "TA", "Gd", "Gd", "Ex", "Gd", "TA",...
## $ BsmtCond <chr> "TA", "TA", "TA", "Gd", "TA", "TA", "TA", "TA", "TA",...
## $ BsmtExposure <chr> "No", "Gd", "Mn", "No", "Av", "No", "Av", "Mn", "No",...
## $ BsmtFinType1 <chr> "GLQ", "ALQ", "GLQ", "ALQ", "GLQ", "GLQ", "GLQ", "ALQ...
## $ BsmtFinType2 <chr> "Unf", "Unf", "Unf", "Unf", "Unf", "Unf", "Unf", "BLQ...
## $ Heating <chr> "GasA", "GasA", "GasA", "GasA", "GasA", "GasA", "GasA...
## $ HeatingQC <chr> "Ex", "Ex", "Ex", "Gd", "Ex", "Ex", "Ex", "Ex", "Gd",...
## $ CentralAir <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"...
## $ Electrical <chr> "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr",...
## $ KitchenQual <chr> "Gd", "TA", "Gd", "Gd", "Gd", "TA", "Gd", "TA", "TA",...
## $ Functional <chr> "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ...
## $ FireplaceQu <chr> NA, "TA", "TA", "Gd", "TA", NA, "Gd", "TA", "TA", "TA...
## $ GarageType <chr> "Attchd", "Attchd", "Attchd", "Detchd", "Attchd", "At...
## $ GarageFinish <chr> "RFn", "RFn", "RFn", "Unf", "RFn", "Unf", "RFn", "RFn...
## $ GarageQual <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "Fa",...
## $ GarageCond <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA",...
## $ PavedDrive <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"...
## $ PoolQC <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ Fence <chr> NA, NA, NA, NA, NA, "MnPrv", NA, NA, NA, NA, NA, NA, ...
## $ MiscFeature <chr> NA, NA, NA, NA, NA, "Shed", NA, "Shed", NA, NA, NA, N...
## $ SaleType <chr> "WD", "WD", "WD", "WD", "WD", "WD", "WD", "WD", "WD",...
## $ SaleCondition <chr> "Normal", "Normal", "Normal", "Abnorml", "Normal", "N...
BACK_UP.catg=catg
catg=catg%>%dplyr::select(-Utilities)
#Transforming the characters to 1 and 0
{
catg$Condition1=ifelse(catg$Condition1=='Norm',1,0)
catg$Condition2=ifelse(catg$Condition2=='Norm',1,0)
catg$LandSlope=ifelse(catg$LandSlope=='Gtl',1,0)
catg$RoofStyle=ifelse(catg$RoofStyle=='Gable',1,0)
catg$RoofMatl=ifelse(catg$RoofMatl=='CompShg',1,0)
catg$Electrical=ifelse(catg$Electrical=='SBrkr',1,0)
catg$Heating=ifelse(catg$Heating=='GasA',1,0)
catg$Functional=ifelse(catg$Functional=='Typ',1,0)
catg$GarageQual=ifelse(catg$GarageQual=='TA',1,0)
catg$GarageCond=ifelse(catg$GarageCond=='TA',1,0)
catg$MiscFeature=ifelse(catg$MiscFeature=='NA',1,0)
catg$SaleType=ifelse(catg$SaleType=='WD',1,0)
catg$PoolQC=ifelse(catg$PoolQC=='NG',1,0)
}
glimpse(catg)
## Rows: 2,919
## Columns: 42
## $ MSZoning <chr> "RL", "RL", "RL", "RL", "RL", "RL", "RL", "RL", "RM",...
## $ Street <chr> "Pave", "Pave", "Pave", "Pave", "Pave", "Pave", "Pave...
## $ Alley <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ LotShape <chr> "Reg", "Reg", "IR1", "IR1", "IR1", "IR1", "Reg", "IR1...
## $ LandContour <chr> "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl...
## $ LotConfig <chr> "Inside", "FR2", "Inside", "Corner", "FR2", "Inside",...
## $ LandSlope <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ Neighborhood <chr> "CollgCr", "Veenker", "CollgCr", "Crawfor", "NoRidge"...
## $ Condition1 <dbl> 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ Condition2 <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ BldgType <chr> "1Fam", "1Fam", "1Fam", "1Fam", "1Fam", "1Fam", "1Fam...
## $ HouseStyle <chr> "2Story", "1Story", "2Story", "2Story", "2Story", "1....
## $ RoofStyle <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1,...
## $ RoofMatl <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ Exterior1st <chr> "VinylSd", "MetalSd", "VinylSd", "Wd Sdng", "VinylSd"...
## $ Exterior2nd <chr> "VinylSd", "MetalSd", "VinylSd", "Wd Shng", "VinylSd"...
## $ MasVnrType <chr> "BrkFace", "None", "BrkFace", "None", "BrkFace", "Non...
## $ ExterQual <chr> "Gd", "TA", "Gd", "TA", "Gd", "TA", "Gd", "TA", "TA",...
## $ ExterCond <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA",...
## $ Foundation <chr> "PConc", "CBlock", "PConc", "BrkTil", "PConc", "Wood"...
## $ BsmtQual <chr> "Gd", "Gd", "Gd", "TA", "Gd", "Gd", "Ex", "Gd", "TA",...
## $ BsmtCond <chr> "TA", "TA", "TA", "Gd", "TA", "TA", "TA", "TA", "TA",...
## $ BsmtExposure <chr> "No", "Gd", "Mn", "No", "Av", "No", "Av", "Mn", "No",...
## $ BsmtFinType1 <chr> "GLQ", "ALQ", "GLQ", "ALQ", "GLQ", "GLQ", "GLQ", "ALQ...
## $ BsmtFinType2 <chr> "Unf", "Unf", "Unf", "Unf", "Unf", "Unf", "Unf", "BLQ...
## $ Heating <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ HeatingQC <chr> "Ex", "Ex", "Ex", "Gd", "Ex", "Ex", "Ex", "Ex", "Gd",...
## $ CentralAir <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"...
## $ Electrical <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1,...
## $ KitchenQual <chr> "Gd", "TA", "Gd", "Gd", "Gd", "TA", "Gd", "TA", "TA",...
## $ Functional <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ FireplaceQu <chr> NA, "TA", "TA", "Gd", "TA", NA, "Gd", "TA", "TA", "TA...
## $ GarageType <chr> "Attchd", "Attchd", "Attchd", "Detchd", "Attchd", "At...
## $ GarageFinish <chr> "RFn", "RFn", "RFn", "Unf", "RFn", "Unf", "RFn", "RFn...
## $ GarageQual <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ GarageCond <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ PavedDrive <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"...
## $ PoolQC <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ Fence <chr> NA, NA, NA, NA, NA, "MnPrv", NA, NA, NA, NA, NA, NA, ...
## $ MiscFeature <dbl> NA, NA, NA, NA, NA, 0, NA, 0, NA, NA, NA, NA, NA, NA,...
## $ SaleType <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1,...
## $ SaleCondition <chr> "Normal", "Normal", "Normal", "Abnorml", "Normal", "N...
#because of the weird continuous values in cont2 we will analyze in another way
#CATEGORIC
cont2a=cont2%>%dplyr::select( OverallQual,OverallCond,FullBath,BedroomAbvGr,KitchenAbvGr,TotRmsAbvGrd,Fireplaces,GarageCars,YrSold )
cont2b=cont2%>%dplyr::select(-OverallQual,-YrSold,-OverallCond,-FullBath,-BedroomAbvGr,-KitchenAbvGr,-TotRmsAbvGrd,-Fireplaces,-GarageCars )
cont2b=lapply(cont2b,function(x) ifelse(x==0,0,1))%>%data.frame() #0 and 1. Many 0's
catg1=cbind(catg,cont2a,cont2b,B)
catg1=lapply(catg1,factor)%>%data.frame()
catg_dummy=dummy_cols(catg1,remove_first_dummy = T,remove_selected_columns = T)%>%data.frame()
catg_dummy=lapply(catg_dummy,factor)%>%data.frame()
#Last details to modeling
DF=cbind(cont1,catg_dummy)
DF1<-DF[,-c(28:279)] #removed all the non-frequent values
training=DF1[c(1:1460),] #SPLIT THE DATA INTO TRAIN AND TEST
testing=DF1[c(1459:2917),]
Y_train=cbind(training,SalePrice)
str(Y_train)
## 'data.frame': 1460 obs. of 28 variables:
## $ MSSubClass : num 0.516 -1.173 0.516 0.712 0.516 ...
## $ LotFrontage : num -0.14204 0.58601 0.00847 -0.39912 0.77079 ...
## $ LotArea.MSSubClass : num 0.516 -1.173 0.516 0.712 0.516 ...
## $ LotArea.LotFrontage : num -0.1856 0.5728 -0.0327 -0.4418 0.7728 ...
## $ LotArea.LotArea : num -0.2283 0.0752 0.5051 0.0621 1.2751 ...
## $ LotArea.YearBuilt : num -1.1587 0.0794 -1.0316 1.5706 -0.9715 ...
## $ LotArea.TotalBsmtSF : num -0.455 0.527 -0.3 -0.697 0.244 ...
## $ LotArea.X1stFlrSF : num -0.782 0.444 -0.554 -0.417 0.136 ...
## $ LotArea.MoSold : num -1.588 -0.427 1.024 -1.588 2.072 ...
## $ LotArea.GrLivArea : num 0.585 -0.367 0.721 0.598 1.372 ...
## $ LotArea.GarageArea : num 0.3706 -0.0487 0.6564 0.8184 1.7427 ...
## $ LotArea.GarageYrBlt : num -1.116 0.296 -0.971 -0.773 -0.903 ...
## $ LotArea.YearRemodAdd: num -0.898 0.633 -0.806 0.839 -0.637 ...
## $ LotArea.OverallQual : num 0.6519 -0.0524 0.6519 0.6519 1.3472 ...
## $ LotArea.OverallCond : num -0.478 2.068 -0.478 -0.478 -0.478 ...
## $ LotArea.TotRmsAbvGrd: num 1.004 -0.286 -0.286 0.359 1.649 ...
## $ YearBuilt : num -1.048 -0.152 -0.982 1.872 -0.949 ...
## $ TotalBsmtSF : num -0.455 0.519 -0.302 -0.695 0.238 ...
## $ X1stFlrSF : num -0.8023 0.2903 -0.6301 -0.5198 -0.0246 ...
## $ MoSold : num -1.552 -0.447 1.027 -1.552 2.132 ...
## $ GrLivArea : num 0.445 -0.479 0.602 0.46 1.453 ...
## $ GarageArea : num 0.364 -0.052 0.648 0.809 1.727 ...
## $ GarageYrBlt : num -1.0205 0.0857 -0.9385 -0.8156 -0.8976 ...
## $ YearRemodAdd : num -0.897 0.396 -0.849 0.683 -0.753 ...
## $ OverallQual : num 0.6461 -0.0632 0.6461 0.6461 1.3553 ...
## $ OverallCond : num -0.507 2.188 -0.507 -0.507 -0.507 ...
## $ TotRmsAbvGrd : num 1.004 -0.286 -0.286 0.359 1.649 ...
## $ SalePrice : int 208500 181500 223500 140000 250000 143000 307000 200000 129900 118000 ...
#Modeling
#MODEL
#LASSO Regression analysis-k-fold cross validation
ctrl_specs <-trainControl(method = "cv",
savePredictions = "all",
number = 10,
classProbs = T)
#we have to create a vector for potential lambdas
lambda_vector <-10^seq(-5,5,length=500)
set.seed(1992)
Model1 <-train(SalePrice~.,data =Y_train,
method="glmnet",
tuneGrid=expand.grid(alpha=1,lambda=lambda_vector),
trControl=ctrl_specs,
preProcess=c("center","scale"),
na.action = na.omit)
## Warning in train.default(x, y, weights = w, ...): cannnot compute class
## probabilities for regression
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
Model1
## glmnet
##
## 1460 samples
## 27 predictor
##
## Pre-processing: centered (27), scaled (27)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 1315, 1314, 1314, 1314, 1314, 1313, ...
## Resampling results across tuning parameters:
##
## lambda RMSE Rsquared MAE
## 1.000000e-05 31546.09 0.8454893 20624.82
## 1.047225e-05 31546.09 0.8454893 20624.82
## 1.096681e-05 31546.09 0.8454893 20624.82
## 1.148472e-05 31546.09 0.8454893 20624.82
## 1.202708e-05 31546.09 0.8454893 20624.82
## 1.259506e-05 31546.09 0.8454893 20624.82
## 1.318987e-05 31546.09 0.8454893 20624.82
## 1.381276e-05 31546.09 0.8454893 20624.82
## 1.446507e-05 31546.09 0.8454893 20624.82
## 1.514819e-05 31546.09 0.8454893 20624.82
## 1.586357e-05 31546.09 0.8454893 20624.82
## 1.661273e-05 31546.09 0.8454893 20624.82
## 1.739726e-05 31546.09 0.8454893 20624.82
## 1.821885e-05 31546.09 0.8454893 20624.82
## 1.907924e-05 31546.09 0.8454893 20624.82
## 1.998026e-05 31546.09 0.8454893 20624.82
## 2.092383e-05 31546.09 0.8454893 20624.82
## 2.191197e-05 31546.09 0.8454893 20624.82
## 2.294676e-05 31546.09 0.8454893 20624.82
## 2.403043e-05 31546.09 0.8454893 20624.82
## 2.516527e-05 31546.09 0.8454893 20624.82
## 2.635371e-05 31546.09 0.8454893 20624.82
## 2.759826e-05 31546.09 0.8454893 20624.82
## 2.890160e-05 31546.09 0.8454893 20624.82
## 3.026648e-05 31546.09 0.8454893 20624.82
## 3.169582e-05 31546.09 0.8454893 20624.82
## 3.319266e-05 31546.09 0.8454893 20624.82
## 3.476019e-05 31546.09 0.8454893 20624.82
## 3.640175e-05 31546.09 0.8454893 20624.82
## 3.812083e-05 31546.09 0.8454893 20624.82
## 3.992109e-05 31546.09 0.8454893 20624.82
## 4.180637e-05 31546.09 0.8454893 20624.82
## 4.378069e-05 31546.09 0.8454893 20624.82
## 4.584824e-05 31546.09 0.8454893 20624.82
## 4.801343e-05 31546.09 0.8454893 20624.82
## 5.028087e-05 31546.09 0.8454893 20624.82
## 5.265540e-05 31546.09 0.8454893 20624.82
## 5.514206e-05 31546.09 0.8454893 20624.82
## 5.774615e-05 31546.09 0.8454893 20624.82
## 6.047322e-05 31546.09 0.8454893 20624.82
## 6.332908e-05 31546.09 0.8454893 20624.82
## 6.631981e-05 31546.09 0.8454893 20624.82
## 6.945178e-05 31546.09 0.8454893 20624.82
## 7.273165e-05 31546.09 0.8454893 20624.82
## 7.616642e-05 31546.09 0.8454893 20624.82
## 7.976339e-05 31546.09 0.8454893 20624.82
## 8.353023e-05 31546.09 0.8454893 20624.82
## 8.747496e-05 31546.09 0.8454893 20624.82
## 9.160598e-05 31546.09 0.8454893 20624.82
## 9.593209e-05 31546.09 0.8454893 20624.82
## 1.004625e-04 31546.09 0.8454893 20624.82
## 1.052069e-04 31546.09 0.8454893 20624.82
## 1.101753e-04 31546.09 0.8454893 20624.82
## 1.153783e-04 31546.09 0.8454893 20624.82
## 1.208271e-04 31546.09 0.8454893 20624.82
## 1.265332e-04 31546.09 0.8454893 20624.82
## 1.325087e-04 31546.09 0.8454893 20624.82
## 1.387665e-04 31546.09 0.8454893 20624.82
## 1.453198e-04 31546.09 0.8454893 20624.82
## 1.521825e-04 31546.09 0.8454893 20624.82
## 1.593694e-04 31546.09 0.8454893 20624.82
## 1.668956e-04 31546.09 0.8454893 20624.82
## 1.747773e-04 31546.09 0.8454893 20624.82
## 1.830312e-04 31546.09 0.8454893 20624.82
## 1.916748e-04 31546.09 0.8454893 20624.82
## 2.007267e-04 31546.09 0.8454893 20624.82
## 2.102061e-04 31546.09 0.8454893 20624.82
## 2.201331e-04 31546.09 0.8454893 20624.82
## 2.305289e-04 31546.09 0.8454893 20624.82
## 2.414157e-04 31546.09 0.8454893 20624.82
## 2.528166e-04 31546.09 0.8454893 20624.82
## 2.647559e-04 31546.09 0.8454893 20624.82
## 2.772591e-04 31546.09 0.8454893 20624.82
## 2.903527e-04 31546.09 0.8454893 20624.82
## 3.040646e-04 31546.09 0.8454893 20624.82
## 3.184242e-04 31546.09 0.8454893 20624.82
## 3.334618e-04 31546.09 0.8454893 20624.82
## 3.492096e-04 31546.09 0.8454893 20624.82
## 3.657011e-04 31546.09 0.8454893 20624.82
## 3.829714e-04 31546.09 0.8454893 20624.82
## 4.010573e-04 31546.09 0.8454893 20624.82
## 4.199973e-04 31546.09 0.8454893 20624.82
## 4.398317e-04 31546.09 0.8454893 20624.82
## 4.606029e-04 31546.09 0.8454893 20624.82
## 4.823549e-04 31546.09 0.8454893 20624.82
## 5.051342e-04 31546.09 0.8454893 20624.82
## 5.289893e-04 31546.09 0.8454893 20624.82
## 5.539709e-04 31546.09 0.8454893 20624.82
## 5.801323e-04 31546.09 0.8454893 20624.82
## 6.075292e-04 31546.09 0.8454893 20624.82
## 6.362198e-04 31546.09 0.8454893 20624.82
## 6.662655e-04 31546.09 0.8454893 20624.82
## 6.977300e-04 31546.09 0.8454893 20624.82
## 7.306804e-04 31546.09 0.8454893 20624.82
## 7.651869e-04 31546.09 0.8454893 20624.82
## 8.013230e-04 31546.09 0.8454893 20624.82
## 8.391656e-04 31546.09 0.8454893 20624.82
## 8.787954e-04 31546.09 0.8454893 20624.82
## 9.202967e-04 31546.09 0.8454893 20624.82
## 9.637579e-04 31546.09 0.8454893 20624.82
## 1.009272e-03 31546.09 0.8454893 20624.82
## 1.056935e-03 31546.09 0.8454893 20624.82
## 1.106848e-03 31546.09 0.8454893 20624.82
## 1.159120e-03 31546.09 0.8454893 20624.82
## 1.213859e-03 31546.09 0.8454893 20624.82
## 1.271184e-03 31546.09 0.8454893 20624.82
## 1.331216e-03 31546.09 0.8454893 20624.82
## 1.394083e-03 31546.09 0.8454893 20624.82
## 1.459919e-03 31546.09 0.8454893 20624.82
## 1.528864e-03 31546.09 0.8454893 20624.82
## 1.601064e-03 31546.09 0.8454893 20624.82
## 1.676675e-03 31546.09 0.8454893 20624.82
## 1.755856e-03 31546.09 0.8454893 20624.82
## 1.838777e-03 31546.09 0.8454893 20624.82
## 1.925614e-03 31546.09 0.8454893 20624.82
## 2.016551e-03 31546.09 0.8454893 20624.82
## 2.111783e-03 31546.09 0.8454893 20624.82
## 2.211512e-03 31546.09 0.8454893 20624.82
## 2.315951e-03 31546.09 0.8454893 20624.82
## 2.425323e-03 31546.09 0.8454893 20624.82
## 2.539859e-03 31546.09 0.8454893 20624.82
## 2.659804e-03 31546.09 0.8454893 20624.82
## 2.785414e-03 31546.09 0.8454893 20624.82
## 2.916956e-03 31546.09 0.8454893 20624.82
## 3.054710e-03 31546.09 0.8454893 20624.82
## 3.198969e-03 31546.09 0.8454893 20624.82
## 3.350041e-03 31546.09 0.8454893 20624.82
## 3.508247e-03 31546.09 0.8454893 20624.82
## 3.673925e-03 31546.09 0.8454893 20624.82
## 3.847427e-03 31546.09 0.8454893 20624.82
## 4.029122e-03 31546.09 0.8454893 20624.82
## 4.219398e-03 31546.09 0.8454893 20624.82
## 4.418660e-03 31546.09 0.8454893 20624.82
## 4.627332e-03 31546.09 0.8454893 20624.82
## 4.845859e-03 31546.09 0.8454893 20624.82
## 5.074705e-03 31546.09 0.8454893 20624.82
## 5.314359e-03 31546.09 0.8454893 20624.82
## 5.565331e-03 31546.09 0.8454893 20624.82
## 5.828155e-03 31546.09 0.8454893 20624.82
## 6.103390e-03 31546.09 0.8454893 20624.82
## 6.391624e-03 31546.09 0.8454893 20624.82
## 6.693470e-03 31546.09 0.8454893 20624.82
## 7.009570e-03 31546.09 0.8454893 20624.82
## 7.340598e-03 31546.09 0.8454893 20624.82
## 7.687260e-03 31546.09 0.8454893 20624.82
## 8.050292e-03 31546.09 0.8454893 20624.82
## 8.430468e-03 31546.09 0.8454893 20624.82
## 8.828599e-03 31546.09 0.8454893 20624.82
## 9.245531e-03 31546.09 0.8454893 20624.82
## 9.682153e-03 31546.09 0.8454893 20624.82
## 1.013939e-02 31546.09 0.8454893 20624.82
## 1.061823e-02 31546.09 0.8454893 20624.82
## 1.111968e-02 31546.09 0.8454893 20624.82
## 1.164481e-02 31546.09 0.8454893 20624.82
## 1.219473e-02 31546.09 0.8454893 20624.82
## 1.277063e-02 31546.09 0.8454893 20624.82
## 1.337373e-02 31546.09 0.8454893 20624.82
## 1.400531e-02 31546.09 0.8454893 20624.82
## 1.466671e-02 31546.09 0.8454893 20624.82
## 1.535935e-02 31546.09 0.8454893 20624.82
## 1.608469e-02 31546.09 0.8454893 20624.82
## 1.684430e-02 31546.09 0.8454893 20624.82
## 1.763977e-02 31546.09 0.8454893 20624.82
## 1.847281e-02 31546.09 0.8454893 20624.82
## 1.934520e-02 31546.09 0.8454893 20624.82
## 2.025878e-02 31546.09 0.8454893 20624.82
## 2.121550e-02 31546.09 0.8454893 20624.82
## 2.221741e-02 31546.09 0.8454893 20624.82
## 2.326663e-02 31546.09 0.8454893 20624.82
## 2.436540e-02 31546.09 0.8454893 20624.82
## 2.551606e-02 31546.09 0.8454893 20624.82
## 2.672106e-02 31546.09 0.8454893 20624.82
## 2.798297e-02 31546.09 0.8454893 20624.82
## 2.930447e-02 31546.09 0.8454893 20624.82
## 3.068838e-02 31546.09 0.8454893 20624.82
## 3.213764e-02 31546.09 0.8454893 20624.82
## 3.365535e-02 31546.09 0.8454893 20624.82
## 3.524473e-02 31546.09 0.8454893 20624.82
## 3.690917e-02 31546.09 0.8454893 20624.82
## 3.865221e-02 31546.09 0.8454893 20624.82
## 4.047757e-02 31546.09 0.8454893 20624.82
## 4.238913e-02 31546.09 0.8454893 20624.82
## 4.439097e-02 31546.09 0.8454893 20624.82
## 4.648734e-02 31546.09 0.8454893 20624.82
## 4.868271e-02 31546.09 0.8454893 20624.82
## 5.098176e-02 31546.09 0.8454893 20624.82
## 5.338938e-02 31546.09 0.8454893 20624.82
## 5.591071e-02 31546.09 0.8454893 20624.82
## 5.855110e-02 31546.09 0.8454893 20624.82
## 6.131619e-02 31546.09 0.8454893 20624.82
## 6.421186e-02 31546.09 0.8454893 20624.82
## 6.724427e-02 31546.09 0.8454893 20624.82
## 7.041990e-02 31546.09 0.8454893 20624.82
## 7.374549e-02 31546.09 0.8454893 20624.82
## 7.722814e-02 31546.09 0.8454893 20624.82
## 8.087525e-02 31546.09 0.8454893 20624.82
## 8.469460e-02 31546.09 0.8454893 20624.82
## 8.869432e-02 31546.09 0.8454893 20624.82
## 9.288292e-02 31546.09 0.8454893 20624.82
## 9.726934e-02 31546.09 0.8454893 20624.82
## 1.018629e-01 31546.09 0.8454893 20624.82
## 1.066734e-01 31546.09 0.8454893 20624.82
## 1.117111e-01 31546.09 0.8454893 20624.82
## 1.169866e-01 31546.09 0.8454893 20624.82
## 1.225114e-01 31546.09 0.8454893 20624.82
## 1.282970e-01 31546.09 0.8454893 20624.82
## 1.343558e-01 31546.09 0.8454893 20624.82
## 1.407008e-01 31546.09 0.8454893 20624.82
## 1.473454e-01 31546.09 0.8454893 20624.82
## 1.543038e-01 31546.09 0.8454893 20624.82
## 1.615909e-01 31546.09 0.8454893 20624.82
## 1.692220e-01 31546.09 0.8454893 20624.82
## 1.772136e-01 31546.09 0.8454893 20624.82
## 1.855825e-01 31546.09 0.8454893 20624.82
## 1.943467e-01 31546.09 0.8454893 20624.82
## 2.035248e-01 31546.09 0.8454893 20624.82
## 2.131362e-01 31546.09 0.8454893 20624.82
## 2.232016e-01 31546.09 0.8454893 20624.82
## 2.337424e-01 31546.09 0.8454893 20624.82
## 2.447809e-01 31546.09 0.8454893 20624.82
## 2.563407e-01 31546.09 0.8454893 20624.82
## 2.684465e-01 31546.09 0.8454893 20624.82
## 2.811239e-01 31546.09 0.8454893 20624.82
## 2.944000e-01 31546.09 0.8454893 20624.82
## 3.083031e-01 31546.09 0.8454893 20624.82
## 3.228628e-01 31546.09 0.8454893 20624.82
## 3.381101e-01 31546.09 0.8454893 20624.82
## 3.540774e-01 31546.09 0.8454893 20624.82
## 3.707988e-01 31546.09 0.8454893 20624.82
## 3.883098e-01 31546.09 0.8454893 20624.82
## 4.066478e-01 31546.09 0.8454893 20624.82
## 4.258518e-01 31546.09 0.8454893 20624.82
## 4.459628e-01 31546.09 0.8454893 20624.82
## 4.670234e-01 31546.09 0.8454893 20624.82
## 4.890787e-01 31546.09 0.8454893 20624.82
## 5.121755e-01 31546.09 0.8454893 20624.82
## 5.363631e-01 31546.09 0.8454893 20624.82
## 5.616930e-01 31546.09 0.8454893 20624.82
## 5.882190e-01 31546.09 0.8454893 20624.82
## 6.159978e-01 31546.09 0.8454893 20624.82
## 6.450884e-01 31546.09 0.8454893 20624.82
## 6.755528e-01 31546.09 0.8454893 20624.82
## 7.074559e-01 31546.09 0.8454893 20624.82
## 7.408657e-01 31546.09 0.8454893 20624.82
## 7.758532e-01 31546.09 0.8454893 20624.82
## 8.124930e-01 31546.09 0.8454893 20624.82
## 8.508632e-01 31546.09 0.8454893 20624.82
## 8.910453e-01 31546.09 0.8454893 20624.82
## 9.331251e-01 31546.09 0.8454893 20624.82
## 9.771921e-01 31546.09 0.8454893 20624.82
## 1.023340e+00 31546.09 0.8454893 20624.82
## 1.071668e+00 31546.09 0.8454893 20624.82
## 1.122277e+00 31546.09 0.8454893 20624.82
## 1.175277e+00 31546.09 0.8454893 20624.82
## 1.230780e+00 31546.09 0.8454893 20624.82
## 1.288904e+00 31546.09 0.8454893 20624.82
## 1.349772e+00 31546.09 0.8454893 20624.82
## 1.413516e+00 31546.09 0.8454893 20624.82
## 1.480269e+00 31546.09 0.8454893 20624.82
## 1.550175e+00 31546.09 0.8454893 20624.82
## 1.623382e+00 31546.09 0.8454893 20624.82
## 1.700047e+00 31546.09 0.8454893 20624.82
## 1.780332e+00 31546.09 0.8454893 20624.82
## 1.864409e+00 31546.09 0.8454893 20624.82
## 1.952456e+00 31546.09 0.8454893 20624.82
## 2.044661e+00 31546.09 0.8454893 20624.82
## 2.141220e+00 31546.09 0.8454893 20624.82
## 2.242340e+00 31546.09 0.8454893 20624.82
## 2.348235e+00 31546.09 0.8454893 20624.82
## 2.459130e+00 31546.09 0.8454893 20624.82
## 2.575263e+00 31546.09 0.8454893 20624.82
## 2.696881e+00 31546.09 0.8454893 20624.82
## 2.824241e+00 31546.09 0.8454893 20624.82
## 2.957617e+00 31546.09 0.8454893 20624.82
## 3.097291e+00 31546.09 0.8454893 20624.82
## 3.243561e+00 31546.09 0.8454893 20624.82
## 3.396739e+00 31546.09 0.8454893 20624.82
## 3.557150e+00 31546.09 0.8454893 20624.82
## 3.725137e+00 31546.09 0.8454893 20624.82
## 3.901058e+00 31546.09 0.8454893 20624.82
## 4.085286e+00 31546.09 0.8454893 20624.82
## 4.278214e+00 31546.09 0.8454893 20624.82
## 4.480254e+00 31546.09 0.8454893 20624.82
## 4.691835e+00 31546.09 0.8454893 20624.82
## 4.913407e+00 31546.09 0.8454893 20624.82
## 5.145444e+00 31546.09 0.8454893 20624.82
## 5.388438e+00 31546.09 0.8454893 20624.82
## 5.642908e+00 31546.09 0.8454893 20624.82
## 5.909396e+00 31546.09 0.8454893 20624.82
## 6.188468e+00 31546.13 0.8454889 20624.86
## 6.480720e+00 31548.13 0.8454667 20627.56
## 6.786773e+00 31550.84 0.8454359 20631.43
## 7.107280e+00 31553.67 0.8454036 20635.49
## 7.442922e+00 31556.74 0.8453688 20639.80
## 7.794416e+00 31560.06 0.8453313 20644.39
## 8.162509e+00 31563.66 0.8452908 20650.06
## 8.547985e+00 31567.50 0.8452479 20655.99
## 8.951665e+00 31571.63 0.8452019 20662.15
## 9.374409e+00 31576.07 0.8451526 20668.65
## 9.817117e+00 31580.84 0.8450999 20675.76
## 1.028073e+01 31586.03 0.8450428 20683.31
## 1.076624e+01 31591.72 0.8449806 20691.19
## 1.127468e+01 31597.86 0.8449133 20699.49
## 1.180713e+01 31604.56 0.8448404 20708.25
## 1.236472e+01 31611.83 0.8447614 20717.56
## 1.294865e+01 31619.74 0.8446757 20727.41
## 1.356015e+01 31628.33 0.8445831 20738.00
## 1.420053e+01 31637.61 0.8444829 20749.05
## 1.487115e+01 31647.81 0.8443734 20761.02
## 1.557345e+01 31659.04 0.8442535 20773.91
## 1.630891e+01 31671.33 0.8441229 20788.25
## 1.707910e+01 31684.77 0.8439803 20804.10
## 1.788566e+01 31698.82 0.8438332 20820.79
## 1.873032e+01 31713.65 0.8436794 20838.80
## 1.961486e+01 31729.38 0.8435181 20857.85
## 2.054117e+01 31746.59 0.8433425 20877.76
## 2.151123e+01 31765.01 0.8431554 20898.89
## 2.252711e+01 31783.74 0.8429659 20920.07
## 2.359095e+01 31804.23 0.8427592 20942.58
## 2.470504e+01 31826.42 0.8425364 20967.22
## 2.587174e+01 31851.95 0.8422791 20993.59
## 2.709354e+01 31879.80 0.8419976 21021.71
## 2.837304e+01 31910.02 0.8416931 21050.77
## 2.971296e+01 31942.82 0.8413631 21080.66
## 3.111616e+01 31978.92 0.8409997 21112.22
## 3.258562e+01 32018.45 0.8406013 21146.28
## 3.412449e+01 32062.77 0.8401536 21183.28
## 3.573602e+01 32112.93 0.8396465 21222.81
## 3.742366e+01 32167.91 0.8390902 21265.63
## 3.919100e+01 32229.15 0.8384693 21313.21
## 4.104181e+01 32296.17 0.8377898 21364.73
## 4.298001e+01 32370.34 0.8370377 21420.47
## 4.500975e+01 32450.26 0.8362296 21479.18
## 4.713535e+01 32536.60 0.8353584 21542.20
## 4.936132e+01 32626.78 0.8344542 21608.24
## 5.169242e+01 32717.95 0.8335379 21674.40
## 5.413360e+01 32803.35 0.8326921 21738.77
## 5.669007e+01 32858.63 0.8321378 21786.30
## 5.936727e+01 32904.88 0.8316714 21826.88
## 6.217090e+01 32924.28 0.8314862 21842.83
## 6.510694e+01 32941.99 0.8313147 21856.59
## 6.818162e+01 32943.65 0.8312999 21856.48
## 7.140151e+01 32944.32 0.8312915 21855.12
## 7.477346e+01 32943.70 0.8312902 21852.60
## 7.830465e+01 32943.37 0.8312855 21850.05
## 8.200261e+01 32943.42 0.8312772 21847.46
## 8.587519e+01 32943.93 0.8312652 21844.59
## 8.993067e+01 32945.64 0.8312433 21841.65
## 9.417766e+01 32947.67 0.8312179 21838.66
## 9.862522e+01 32950.02 0.8311889 21835.80
## 1.032828e+02 32951.86 0.8311646 21832.71
## 1.081604e+02 32952.52 0.8311510 21829.09
## 1.132683e+02 32954.25 0.8311273 21825.53
## 1.186174e+02 32957.91 0.8310854 21822.46
## 1.242191e+02 32961.91 0.8310404 21819.27
## 1.300854e+02 32966.05 0.8309947 21815.86
## 1.362287e+02 32970.56 0.8309450 21812.21
## 1.426621e+02 32975.06 0.8308947 21808.43
## 1.493993e+02 32980.20 0.8308380 21804.80
## 1.564548e+02 32986.29 0.8307726 21801.79
## 1.638434e+02 32993.19 0.8306987 21800.22
## 1.715809e+02 33000.71 0.8306178 21798.77
## 1.796838e+02 33009.19 0.8305272 21797.90
## 1.881694e+02 33018.64 0.8304261 21797.21
## 1.970558e+02 33029.15 0.8303141 21797.04
## 2.063618e+02 33040.65 0.8301920 21797.03
## 2.161073e+02 33053.56 0.8300555 21798.04
## 2.263130e+02 33068.16 0.8299020 21801.03
## 2.370006e+02 33084.38 0.8297319 21805.73
## 2.481930e+02 33102.11 0.8295462 21811.63
## 2.599140e+02 33121.89 0.8293388 21818.61
## 2.721885e+02 33143.99 0.8291061 21826.44
## 2.850426e+02 33168.81 0.8288439 21834.80
## 2.985038e+02 33195.48 0.8285603 21844.22
## 3.126007e+02 33223.67 0.8282602 21854.87
## 3.273634e+02 33246.30 0.8280122 21859.93
## 3.428231e+02 33270.77 0.8277458 21865.74
## 3.590130e+02 33291.14 0.8275176 21866.56
## 3.759675e+02 33312.63 0.8272750 21868.60
## 3.937226e+02 33333.36 0.8270356 21870.02
## 4.123163e+02 33357.18 0.8267620 21873.38
## 4.317880e+02 33389.59 0.8263909 21880.23
## 4.521792e+02 33425.55 0.8259817 21889.06
## 4.735335e+02 33463.65 0.8255529 21897.57
## 4.958962e+02 33505.53 0.8250848 21908.38
## 5.193150e+02 33545.00 0.8246600 21915.88
## 5.438397e+02 33589.26 0.8241851 21926.79
## 5.695227e+02 33638.52 0.8236587 21942.12
## 5.964185e+02 33692.71 0.8230808 21965.85
## 6.245845e+02 33754.13 0.8224332 21997.64
## 6.540806e+02 33822.40 0.8217154 22034.96
## 6.849697e+02 33874.61 0.8211525 22069.87
## 7.173175e+02 33931.79 0.8205362 22109.58
## 7.511929e+02 33993.64 0.8198713 22148.12
## 7.866682e+02 34056.52 0.8191924 22186.52
## 8.238187e+02 34122.02 0.8184937 22224.76
## 8.627237e+02 34195.06 0.8177166 22266.53
## 9.034660e+02 34275.82 0.8168545 22312.65
## 9.461324e+02 34364.47 0.8159048 22362.76
## 9.908137e+02 34460.77 0.8148681 22418.80
## 1.037605e+03 34566.18 0.8137281 22479.47
## 1.086606e+03 34645.47 0.8128662 22533.59
## 1.137921e+03 34733.17 0.8119111 22593.00
## 1.191660e+03 34821.44 0.8109471 22653.40
## 1.247936e+03 34917.39 0.8098965 22718.29
## 1.306870e+03 35007.54 0.8089102 22780.79
## 1.368587e+03 35087.81 0.8080361 22837.88
## 1.433219e+03 35131.59 0.8075795 22867.99
## 1.500903e+03 35165.99 0.8072275 22890.68
## 1.571784e+03 35186.71 0.8070444 22898.38
## 1.646012e+03 35201.17 0.8069358 22898.58
## 1.723745e+03 35217.37 0.8068139 22900.11
## 1.805149e+03 35235.50 0.8066769 22902.83
## 1.890397e+03 35255.15 0.8065300 22906.15
## 1.979672e+03 35276.46 0.8063722 22910.82
## 2.073162e+03 35300.25 0.8061947 22916.22
## 2.171068e+03 35326.61 0.8059973 22923.39
## 2.273597e+03 35356.14 0.8057745 22933.32
## 2.380968e+03 35388.77 0.8055278 22945.18
## 2.493409e+03 35423.89 0.8052649 22958.51
## 2.611161e+03 35462.23 0.8049783 22973.03
## 2.734474e+03 35503.04 0.8046789 22989.06
## 2.863610e+03 35546.67 0.8043627 23006.63
## 2.998844e+03 35591.43 0.8040472 23023.32
## 3.140465e+03 35637.91 0.8037291 23039.26
## 3.288774e+03 35685.21 0.8034228 23051.78
## 3.444087e+03 35733.35 0.8031291 23063.27
## 3.606735e+03 35779.33 0.8028900 23070.09
## 3.777064e+03 35824.31 0.8026897 23072.82
## 3.955436e+03 35872.63 0.8024762 23078.97
## 4.142232e+03 35924.97 0.8022470 23088.69
## 4.337850e+03 35981.89 0.8019991 23101.04
## 4.542706e+03 36044.16 0.8017273 23117.77
## 4.757236e+03 36112.64 0.8014259 23140.80
## 4.981898e+03 36187.90 0.8010910 23168.88
## 5.217169e+03 36268.46 0.8007471 23198.01
## 5.463550e+03 36356.37 0.8003711 23231.43
## 5.721568e+03 36452.60 0.7999554 23271.76
## 5.991770e+03 36557.97 0.7994954 23319.72
## 6.274732e+03 36670.86 0.7990130 23372.44
## 6.571058e+03 36790.54 0.7985235 23429.41
## 6.881377e+03 36920.57 0.7979893 23497.38
## 7.206351e+03 37061.42 0.7974080 23573.84
## 7.546673e+03 37215.10 0.7967648 23657.02
## 7.903066e+03 37382.88 0.7960518 23747.94
## 8.276289e+03 37563.74 0.7952987 23848.29
## 8.667139e+03 37757.73 0.7945101 23956.61
## 9.076446e+03 37967.30 0.7936557 24077.56
## 9.505083e+03 38184.31 0.7928531 24200.18
## 9.953962e+03 38414.71 0.7920449 24330.76
## 1.042404e+04 38656.85 0.7912648 24467.25
## 1.091632e+04 38921.15 0.7903856 24618.80
## 1.143184e+04 39208.51 0.7894133 24792.72
## 1.197171e+04 39522.74 0.7882987 24987.40
## 1.253708e+04 39864.33 0.7870473 25207.30
## 1.312915e+04 40229.63 0.7857155 25450.63
## 1.374917e+04 40611.81 0.7844334 25709.23
## 1.439848e+04 41011.56 0.7832493 25985.26
## 1.507845e+04 41416.50 0.7823969 26261.15
## 1.579053e+04 41853.68 0.7814469 26566.28
## 1.653624e+04 42319.17 0.7805153 26896.64
## 1.731717e+04 42823.95 0.7794370 27259.86
## 1.813498e+04 43371.20 0.7782010 27661.79
## 1.899141e+04 43964.17 0.7767634 28111.65
## 1.988828e+04 44605.91 0.7750857 28604.21
## 2.082751e+04 45299.81 0.7731176 29152.66
## 2.181109e+04 46049.38 0.7707961 29751.81
## 2.284112e+04 46858.25 0.7680417 30412.90
## 2.391980e+04 47730.21 0.7647535 31131.28
## 2.504942e+04 48667.83 0.7608296 31904.78
## 2.623238e+04 49675.11 0.7561088 32735.21
## 2.747121e+04 50754.39 0.7504134 33644.08
## 2.876854e+04 51899.19 0.7437629 34614.17
## 3.012714e+04 53113.92 0.7359679 35635.20
## 3.154990e+04 54372.99 0.7277441 36692.20
## 3.303985e+04 55696.59 0.7182301 37809.86
## 3.460016e+04 57056.45 0.7082136 38966.01
## 3.623416e+04 58479.63 0.6966054 40178.94
## 3.794533e+04 59874.78 0.6883613 41377.13
## 3.973730e+04 61356.65 0.6779486 42657.34
## 4.161391e+04 62915.81 0.6653806 44004.91
## 4.357913e+04 64546.26 0.6497847 45403.16
## 4.563716e+04 66075.64 0.6423422 46706.86
## 4.779239e+04 67673.11 0.6350818 48053.18
## 5.004939e+04 69287.82 0.6349388 49397.94
## 5.241298e+04 71014.68 0.6349388 50836.74
## 5.488820e+04 72861.52 0.6349388 52364.98
## 5.748030e+04 74834.58 0.6349388 53997.95
## 6.019482e+04 76940.23 0.6349388 55735.41
## 6.303753e+04 78867.75 0.6508166 57334.34
## 6.601449e+04 78994.59 NaN 57439.25
## 6.913204e+04 78994.59 NaN 57439.25
## 7.239681e+04 78994.59 NaN 57439.25
## 7.581576e+04 78994.59 NaN 57439.25
## 7.939618e+04 78994.59 NaN 57439.25
## 8.314568e+04 78994.59 NaN 57439.25
## 8.707225e+04 78994.59 NaN 57439.25
## 9.118425e+04 78994.59 NaN 57439.25
## 9.549045e+04 78994.59 NaN 57439.25
## 1.000000e+05 78994.59 NaN 57439.25
##
## Tuning parameter 'alpha' was held constant at a value of 1
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were alpha = 1 and lambda = 5.909396.
Model1$bestTune
## alpha lambda
## 289 1 5.909396
#LASSO Regression model coefficients(Parameter Estimates)
round(coef(Model1$finalModel,Model1$bestTune$lambda),3)
## 28 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) 180921.196
## MSSubClass 1659.373
## LotFrontage 2985.564
## LotArea.MSSubClass 87.785
## LotArea.LotFrontage 93.181
## LotArea.LotArea 5591.137
## LotArea.YearBuilt 1121.894
## LotArea.TotalBsmtSF 1468.517
## LotArea.X1stFlrSF -22229.427
## LotArea.MoSold 13229.194
## LotArea.GrLivArea -26899.756
## LotArea.GarageArea 13619.219
## LotArea.GarageYrBlt .
## LotArea.YearRemodAdd -17600.400
## LotArea.OverallQual -603142.430
## LotArea.OverallCond 27681.688
## LotArea.TotRmsAbvGrd 191.230
## YearBuilt -16606.910
## TotalBsmtSF 11849.965
## X1stFlrSF 27358.116
## MoSold -13363.706
## GrLivArea 48586.863
## GarageArea -5424.190
## GarageYrBlt 3161.592
## YearRemodAdd 13298.079
## OverallQual 627601.425
## OverallCond -18734.345
## TotRmsAbvGrd 367.156
#Plot log(lambda) & RMSE
plot(log(Model1$results$lambda),Model1$results$RMSE,
xlab="Log(lambda)",
ylab="RMSE",
xlim=c(-20,20))

log(Model1$bestTune$lambda)
## [1] 1.776544
#variable importance
varImp(Model1)
## glmnet variable importance
##
## only 20 most important variables shown (out of 27)
##
## Overall
## OverallQual 100.0000
## LotArea.OverallQual 96.1028
## GrLivArea 7.7417
## LotArea.OverallCond 4.4107
## X1stFlrSF 4.3592
## LotArea.GrLivArea 4.2861
## LotArea.X1stFlrSF 3.5420
## OverallCond 2.9851
## LotArea.YearRemodAdd 2.8044
## YearBuilt 2.6461
## LotArea.GarageArea 2.1700
## MoSold 2.1293
## YearRemodAdd 2.1189
## LotArea.MoSold 2.1079
## TotalBsmtSF 1.8881
## LotArea.LotArea 0.8909
## GarageArea 0.8643
## GarageYrBlt 0.5038
## LotFrontage 0.4757
## MSSubClass 0.2644
#most important features
#OverallQual
#LotArea.OverallQual
#GrLivArea
#LotArea.OverallCond
#X1stFlrSF
#LotArea.GrLivArea
#LotArea.X1stFlrSF
#OverallCond
#LotArea.YearRemodAdd
#YearBuilt
#LotArea.GarageArea
#MoSold
#plot variable importance
ggplot(varImp(Model1))+
labs(title = "Model Variable importance Rank")

X_test=cbind(testing,final)
X_test<-X_test[,-28] #removing id
p=predict(Model1,X_test)
a=c(1461:2919)
pred=data.frame(Id=a,SalePrice=p)
#model 1 sale price prediction plot
summary(pred$SalePrice)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 19025 125267 161730 179402 216790 551548
#the average SalePrice from model 1 is $179 402,with the minimum being
#$19 025 and the maximum SalePrice of $551 548
ggplot(pred,aes(SalePrice))+geom_histogram(fill='green',alpha=1,bins=50)+
labs(x="Sale Price",y="Frequency",
title = "Model 1 Sale Price")

#MODEL 2
#2.a with 2 repeats and 3 folds
#2.b with 2 repeats and 4 folds
#RANDOM FOREST MODELLING-RPART
#2.a
rf_ctr_specs1 <-trainControl(method = "repeatedcv",
repeats = 2,
number=3,
search = "random")
Model2.a <-train(SalePrice~.,data=Y_train,
method="rf",
trControl=rf_ctr_specs1)
plot(varImp(Model2.a,scale=F),main="Model 2.a RandomForest-3 FOLDS-2-REPEATED CV")

#the 12 features that drive Sale price of houses are as follows :
#OverallQual
#LotArea.OverallQual
#GrLivArea
#LotArea.GrLivArea
#GarageArea
#YearBuilt
#TotalBsmtSF
#X1stFlrSF
#LotArea.GarageArea
#LotArea.YearBuilt
#LotArea.TotalBsmtSF
#LotArea.X1stFlrSF
p=predict(Model2.a,X_test)
a=c(1461:2919)
pred=data.frame(Id=a,SalePrice=p)
#model2.a Sale price prediction plot
summary(pred$SalePrice)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 57097 131211 158774 179207 209921 540113
#the average SalePrice of Model 2.a is $179 207 with a minimum of $57 097 and a
#maximum of $540 113
ggplot(pred,aes(SalePrice))+geom_histogram(fill='red',alpha=1,bins=50)+
labs(x="Sale Price",
y="Frequency",
title = "Model2.a Sale Price")

varImp(Model2.a)
## rf variable importance
##
## only 20 most important variables shown (out of 27)
##
## Overall
## LotArea.OverallQual 100.0000
## OverallQual 94.4319
## GrLivArea 27.9248
## LotArea.GrLivArea 23.8680
## LotArea.X1stFlrSF 11.0893
## TotalBsmtSF 11.0730
## X1stFlrSF 9.6366
## LotArea.TotalBsmtSF 9.5538
## GarageArea 7.9547
## YearBuilt 6.1327
## LotArea.GarageArea 6.0279
## LotArea.YearBuilt 5.7409
## LotArea.LotArea 3.2176
## TotRmsAbvGrd 2.6084
## LotArea.TotRmsAbvGrd 2.1630
## LotArea.LotFrontage 1.8774
## LotArea.YearRemodAdd 1.6506
## LotFrontage 1.5874
## YearRemodAdd 1.5059
## LotArea.GarageYrBlt 0.6117
#2.b
rf_ctr_specs <-trainControl(method = "repeatedcv",
repeats = 2,
number=4,
search = "random")
Model2.b <-train(SalePrice~.,data=Y_train,
method="rf",
trControl=rf_ctr_specs)
plot(varImp(Model2.b,scale=F),main="Model 2b RandomForest-4 FOLDS-2-REAPEATED CV")

p=predict(Model2.b,X_test)
a=c(1461:2919)
pred=data.frame(Id=a,SalePrice=p)
#Saleprice model 2.b
summary(pred$SalePrice)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 58713 130805 158915 179138 209678 542263
#the average SalePrice of Model 2.b is $179 138 with the minimum of $58 713
#and a maximum SalePrice of $542 263
ggplot(pred,aes(SalePrice))+geom_histogram(fill='blue',alpha=1,bins=50)+
labs(x="Sales Price",
y="Frequency",
title = "Model2.b Sale price")

varImp(Model2.b)
## rf variable importance
##
## only 20 most important variables shown (out of 27)
##
## Overall
## LotArea.OverallQual 100.0000
## OverallQual 96.7313
## GrLivArea 29.2644
## LotArea.GrLivArea 26.6588
## TotalBsmtSF 11.9623
## LotArea.TotalBsmtSF 11.3418
## X1stFlrSF 11.1933
## GarageArea 10.8132
## LotArea.X1stFlrSF 10.7874
## LotArea.YearBuilt 8.2482
## LotArea.GarageArea 8.1897
## YearBuilt 6.0405
## LotArea.LotArea 3.8454
## TotRmsAbvGrd 3.4297
## LotArea.TotRmsAbvGrd 2.8296
## LotArea.YearRemodAdd 2.0389
## LotArea.LotFrontage 1.9464
## LotFrontage 1.8324
## YearRemodAdd 1.7660
## LotArea.GarageYrBlt 0.8081
Model2.b$results$Rsquared
## [1] 0.8612799 0.8615274 0.8576080
# 0.8606251 0.8541651 0.8496587
Model2.b$results$RsquaredSD
## [1] 0.01329931 0.01315689 0.01418043
#important features
# OverallQual
# LotArea.OverallQual
# GrLivArea
# LotArea.GrLivArea
# GarageArea
# TotalBsmtSF
# LotArea.GarageArea
# YearBuilt
# LotArea.X1stFlrSF
# LotArea.TotalBsmtSF
# X1stFlrSF
# LotArea.YearBuilt
#compare the 12 most important features according to diff models
#select the frequent features as the main drivers of house Prices
# the once marked with 3 asterisks appear to be important in all 3 models,
#this suggest that they might be the core influences of House Prices
# OverallQual ***
# LotArea.OverallQual ***
# GrLivArea ***
# LotArea.OverallCond ***
# X1stFlrSF ***
# LotArea.GrLivArea ***
# LotArea.X1stFlrSF ***
# YearBuilt ***
# LotArea.GarageArea ***
#full data clean
Fdata_clean<-rbind(Y_train,X_test)
summary(Fdata_clean$SalePrice)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 34900 154795 176735 180053 191896 755000
#the data's average SalePrice is $180 053 with a minimum of $34 900
#and a maximum SalePrice of $755 000
#comparison SalePrice
#the average SalePrice from model 1 is $179 402,with the minimum being
#$19 025 and the maximum SalePrice of $551 548
#the average SalePrice of Model 2.a is $179 207 with a minimum of $57 097 and a
#maximum of $540 113
#the average SalePrice of Model 2.b is $179 138 with the minimum of $58 713
#and a maximum SalePrice of $542 263
#Model 1 estimates are the closest.
#Constructing a more accurate forecast from the 3 model results
#average
c=(179402+179207+179138)/3
c
## [1] 179249
# $179 249 actual=$180 053
#minimum
d=(19025+57097+58713)/3
d
## [1] 44945
# $44 945 actual= $34 900
#Maximum
e=(551548+542263+540113)/3
e
## [1] 544641.3
# $544 641.3 actual=$755 000