pacman::p_load(pacman,tidyr,ggthemes,ggplot2,plotly,GGally,rio,
               stringr,shiny,rmarkdown,lubridate,psych,ipred,caret,ROCR,pROC,
               DT,dummies,rpart,rpart.plot,httr,randomForest,readr,doParallel,
               xgboost,truncnorm,DMwR)



# additional packages
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v tibble  3.0.4     v dplyr   1.0.2
## v purrr   0.3.4     v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x psych::%+%()             masks ggplot2::%+%()
## x purrr::accumulate()      masks foreach::accumulate()
## x psych::alpha()           masks ggplot2::alpha()
## x lubridate::as.difftime() masks base::as.difftime()
## x dplyr::combine()         masks randomForest::combine()
## x httr::config()           masks plotly::config()
## x lubridate::date()        masks base::date()
## x dplyr::filter()          masks plotly::filter(), stats::filter()
## x lubridate::intersect()   masks base::intersect()
## x dplyr::lag()             masks stats::lag()
## x purrr::lift()            masks caret::lift()
## x randomForest::margin()   masks ggplot2::margin()
## x httr::progress()         masks caret::progress()
## x lubridate::setdiff()     masks base::setdiff()
## x dplyr::slice()           masks xgboost::slice(), plotly::slice()
## x lubridate::union()       masks base::union()
## x purrr::when()            masks foreach::when()
library("ggpubr")
library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## The following object is masked from 'package:plotly':
## 
##     select
library(fastDummies)

#set working directory
setwd("C:\\Users\\Xholi\\Downloads")

#load the data
train<-read.csv("House Prices\\train.csv")
test<-read.csv("House Prices\\test.csv")
final<-read.csv("House Prices\\sample_submission.csv")



set.seed(1992)

#Target analysis
theme_set(
  theme_bw() +
    theme(legend.position = "top")
)

#Visualize the target
#boxplots
#outliers
#Normal distribution
#skew and kurtosis

#1stly for all prices, then we compare by price groups

#add bell curve onto ds

#reload train data set
set.seed(1992)
train<-read.csv("House Prices\\train.csv")

#for all prices
Target=function(txt) 
{ds=ggplot(train,aes(SalePrice))+
  geom_histogram(color='black',fill=txt,bins = 80) 
bxp=ggplot(train,aes(SalePrice))+
  geom_boxplot(color='black',fill=txt) 
qq= ggplot(train, aes(sample=SalePrice))+
  stat_qq() + 
  stat_qq_line() 
ggarrange(ds, qq, bxp,
          labels = c("1", "2", "3"),
          ncol = 2, nrow = 2)
}
Target('pink')

#houses greater $700k
train<-read.csv("House Prices\\train.csv")
train=train%>%filter(SalePrice>7e5) #$700 000
train$SalePrice=log(train$SalePrice) #or box cox transformation

Target1=function(txt) 
{dp=ggplot(train,aes(SalePrice))+
  geom_histogram(color='black',fill=txt,bins = 80) #Skew? Kurtosis?
bxp=ggplot(train,aes(SalePrice))+
  geom_boxplot(color='black',fill=txt) #Outliers?
qq= ggplot(train, aes(sample=SalePrice))+
  stat_qq() + 
  stat_qq_line() #Normal distribution?
ggarrange(dp, qq, bxp,
          labels = c("1", "2", "3"),
          ncol = 2, nrow = 2)
}
Target1('red')

#Houses less than or equal to $500k but greater than $100k

#reload train data set
set.seed(1992)
train<-read.csv("House Prices\\train.csv")
#houses greater than 100k but less or equal to 500k
train=train%>%filter(SalePrice>1e5&SalePrice<=5e5) #$700 000
train$SalePrice=log(train$SalePrice) #or box cox transformation

Target2=function(txt) 
{dz=ggplot(train,aes(SalePrice))+
  geom_histogram(color='black',fill=txt,bins = 80) 
bxp=ggplot(train,aes(SalePrice))+
  geom_boxplot(color='black',fill=txt) 
qq= ggplot(train, aes(sample=SalePrice))+
  stat_qq() + 
  stat_qq_line() 
ggarrange(dz, qq, bxp,
          labels = c("1", "2", "3"),
          ncol = 2, nrow = 2)
}
Target2('blue')

#reload train data set
set.seed(1992)
train<-read.csv("House Prices\\train.csv")
#houses greater than 700k but less or equal to $2M
train=train%>%filter(SalePrice>7e5&SalePrice<=2e6) #$700 000
train$SalePrice=log(train$SalePrice) #or box cox transformation

Target3=function(txt) 
{ds=ggplot(train,aes(SalePrice))+
  geom_histogram(color='black',fill=txt,bins = 80) #Skew, Kurtosis
bxp=ggplot(train,aes(SalePrice))+
  geom_boxplot(color='black',fill=txt) #Outliers
qq= ggplot(train, aes(sample=SalePrice))+
  stat_qq() + 
  stat_qq_line() #Normal distribution?
ggarrange(ds, qq, bxp,
          labels = c("1", "2", "3"),
          ncol = 2, nrow = 2)
}
Target3('green')

#Data Exploration

set.seed(1992)
train<-read.csv("House Prices\\train.csv")

#identifies the type of dwelling involved
x<-train$MSSubClass[train$SalePrice>=7e5]
x
## [1] 60 60
summary(x)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      60      60      60      60      60      60
#2-STORY 1946 & NEWER this is the type of dwelling priced at $700k and more

#Identifies the general zoning classification of the
x<-train$MSZoning[train$SalePrice>=7e5]
x
## [1] "RL" "RL"
#and are of zoning type Residential Low Density

#Linear feet of street connected to property
x<-train$LotFrontage[train$SalePrice>=7e5]
x
## [1] 104 160
summary(x)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     104     118     132     132     146     160
#from 104 to 160 feet

#Lot size in square feet
x<-train$LotArea[train$SalePrice>=7e5]
x
## [1] 21535 15623
summary(x)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   15623   17101   18579   18579   20057   21535
# and has lot area of ranges 15623 square feet to 21535 square feet

#Type of road access to property
x<-train$Street[train$SalePrice>=7e5]
x
## [1] "Pave" "Pave"
#and the streets are paved

#year built
x<-train$YearBuilt[train$SalePrice>=7e5]
x
## [1] 1994 1996
summary(x)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1994    1994    1995    1995    1996    1996
#and are built in the years 1994-1996

SalePrice=train$SalePrice #Save the target. At the moment it's not used
train=dplyr::select(train,-SalePrice) #The features, variables, predictors. Whatever you want to call it..
Sales_data=rbind(train,test)
Sales_data=Sales_data[-1] # Id out


#data manupilation
#Define the regressors by their type
cont=select_if(Sales_data,function(x) is.numeric(x)) 
catg=select_if(Sales_data,function(x) !is.numeric(x))

#check missing data
sum(is.na(Sales_data))
## [1] 13965
#there are 13965 missing vaules

#plot missing values
t(lapply(Sales_data, function(x) sum(is.na(x))))
##      MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape LandContour
## [1,] 0          4        486         0       0      2721  0        0          
##      Utilities LotConfig LandSlope Neighborhood Condition1 Condition2 BldgType
## [1,] 2         0         0         0            0          0          0       
##      HouseStyle OverallQual OverallCond YearBuilt YearRemodAdd RoofStyle
## [1,] 0          0           0           0         0            0        
##      RoofMatl Exterior1st Exterior2nd MasVnrType MasVnrArea ExterQual ExterCond
## [1,] 0        1           1           24         23         0         0        
##      Foundation BsmtQual BsmtCond BsmtExposure BsmtFinType1 BsmtFinSF1
## [1,] 0          81       82       82           79           1         
##      BsmtFinType2 BsmtFinSF2 BsmtUnfSF TotalBsmtSF Heating HeatingQC CentralAir
## [1,] 80           1          1         1           0       0         0         
##      Electrical X1stFlrSF X2ndFlrSF LowQualFinSF GrLivArea BsmtFullBath
## [1,] 1          0         0         0            0         2           
##      BsmtHalfBath FullBath HalfBath BedroomAbvGr KitchenAbvGr KitchenQual
## [1,] 2            0        0        0            0            1          
##      TotRmsAbvGrd Functional Fireplaces FireplaceQu GarageType GarageYrBlt
## [1,] 0            2          0          1420        157        159        
##      GarageFinish GarageCars GarageArea GarageQual GarageCond PavedDrive
## [1,] 159          1          1          159        159        0         
##      WoodDeckSF OpenPorchSF EnclosedPorch X3SsnPorch ScreenPorch PoolArea
## [1,] 0          0           0             0          0           0       
##      PoolQC Fence MiscFeature MiscVal MoSold YrSold SaleType SaleCondition
## [1,] 2909   2348  2814        0       0      0      1        0
#address the missing values
#1
summary(Sales_data$MasVnrArea)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     0.0     0.0     0.0   102.2   164.0  1600.0      23
Sales_data$MasVnrArea=sapply(Sales_data$MasVnrArea, function(x) ifelse(is.na(x),0,x))
#2
summary(Sales_data$GarageYrBlt)#Max value: 2207?
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    1895    1960    1979    1978    2002    2207     159
Sales_data$GarageYrBlt=sapply(Sales_data$GarageYrBlt,
                              function(x) ifelse(is.na(x),1978,x))
Sales_data$GarageYrBlt=sapply(Sales_data$GarageYrBlt,
                              function(x) ifelse(x>2020,1978,x)) 

#3
summary(Sales_data$LotFrontage) 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   21.00   59.00   68.00   69.31   80.00  313.00     486
Sales_data$LotFrontage=sapply(Sales_data$LotFrontage, function(x) ifelse(is.na(x),68,x))



#specifically
cont=lapply(cont, function(x) ifelse(is.na(x),mean(x,na.rm = TRUE),x))%>%data.frame 

sum(is.na(Sales_data))
## [1] 13297
#there are still nas to remove

#CATEGORY MISSING VALUES OR NAS
getmode <- function(v) #return the mode of the variable
{
  v=na.omit(v)
  uniqv <- unique(v)
  uniqv[which.max(tabulate(match(v, uniqv)))]
}
df1=dplyr::select(catg,-Fence,-Alley,-MiscFeature,-PoolQC) 
df2=dplyr::select(catg,Fence,Alley,MiscFeature,PoolQC) 

df1=lapply(df1,function(x) ifelse(is.na(x),getmode(x),x))%>%data.frame() 

df2=lapply(df2,function(x) ifelse(is.na(x),'NG',x))%>%data.frame() #create a new group and not alter distributions

Clean_data.cat=cbind(df1,df2)

sum(is.na(Clean_data.cat))
## [1] 0
str(Clean_data.cat)
## 'data.frame':    2919 obs. of  43 variables:
##  $ MSZoning     : chr  "RL" "RL" "RL" "RL" ...
##  $ Street       : chr  "Pave" "Pave" "Pave" "Pave" ...
##  $ LotShape     : chr  "Reg" "Reg" "IR1" "IR1" ...
##  $ LandContour  : chr  "Lvl" "Lvl" "Lvl" "Lvl" ...
##  $ Utilities    : chr  "AllPub" "AllPub" "AllPub" "AllPub" ...
##  $ LotConfig    : chr  "Inside" "FR2" "Inside" "Corner" ...
##  $ LandSlope    : chr  "Gtl" "Gtl" "Gtl" "Gtl" ...
##  $ Neighborhood : chr  "CollgCr" "Veenker" "CollgCr" "Crawfor" ...
##  $ Condition1   : chr  "Norm" "Feedr" "Norm" "Norm" ...
##  $ Condition2   : chr  "Norm" "Norm" "Norm" "Norm" ...
##  $ BldgType     : chr  "1Fam" "1Fam" "1Fam" "1Fam" ...
##  $ HouseStyle   : chr  "2Story" "1Story" "2Story" "2Story" ...
##  $ RoofStyle    : chr  "Gable" "Gable" "Gable" "Gable" ...
##  $ RoofMatl     : chr  "CompShg" "CompShg" "CompShg" "CompShg" ...
##  $ Exterior1st  : chr  "VinylSd" "MetalSd" "VinylSd" "Wd Sdng" ...
##  $ Exterior2nd  : chr  "VinylSd" "MetalSd" "VinylSd" "Wd Shng" ...
##  $ MasVnrType   : chr  "BrkFace" "None" "BrkFace" "None" ...
##  $ ExterQual    : chr  "Gd" "TA" "Gd" "TA" ...
##  $ ExterCond    : chr  "TA" "TA" "TA" "TA" ...
##  $ Foundation   : chr  "PConc" "CBlock" "PConc" "BrkTil" ...
##  $ BsmtQual     : chr  "Gd" "Gd" "Gd" "TA" ...
##  $ BsmtCond     : chr  "TA" "TA" "TA" "Gd" ...
##  $ BsmtExposure : chr  "No" "Gd" "Mn" "No" ...
##  $ BsmtFinType1 : chr  "GLQ" "ALQ" "GLQ" "ALQ" ...
##  $ BsmtFinType2 : chr  "Unf" "Unf" "Unf" "Unf" ...
##  $ Heating      : chr  "GasA" "GasA" "GasA" "GasA" ...
##  $ HeatingQC    : chr  "Ex" "Ex" "Ex" "Gd" ...
##  $ CentralAir   : chr  "Y" "Y" "Y" "Y" ...
##  $ Electrical   : chr  "SBrkr" "SBrkr" "SBrkr" "SBrkr" ...
##  $ KitchenQual  : chr  "Gd" "TA" "Gd" "Gd" ...
##  $ Functional   : chr  "Typ" "Typ" "Typ" "Typ" ...
##  $ FireplaceQu  : chr  "Gd" "TA" "TA" "Gd" ...
##  $ GarageType   : chr  "Attchd" "Attchd" "Attchd" "Detchd" ...
##  $ GarageFinish : chr  "RFn" "RFn" "RFn" "Unf" ...
##  $ GarageQual   : chr  "TA" "TA" "TA" "TA" ...
##  $ GarageCond   : chr  "TA" "TA" "TA" "TA" ...
##  $ PavedDrive   : chr  "Y" "Y" "Y" "Y" ...
##  $ SaleType     : chr  "WD" "WD" "WD" "WD" ...
##  $ SaleCondition: chr  "Normal" "Normal" "Normal" "Abnorml" ...
##  $ Fence        : chr  "NG" "NG" "NG" "NG" ...
##  $ Alley        : chr  "NG" "NG" "NG" "NG" ...
##  $ MiscFeature  : chr  "NG" "NG" "NG" "NG" ...
##  $ PoolQC       : chr  "NG" "NG" "NG" "NG" ...
#Transformations
cont$GarageYrBlt=sapply(cont$GarageYrBlt, function(x) ifelse(x>2020,1978,x))
cont$YearBuilt=2020-cont$YearBuilt   #Ex: Age of house
cont$YearRemodAdd=2020-cont$YearRemodAdd
cont$GarageYrBlt=2020-cont$GarageYrBlt
cont$YrSold=2020-cont$YrSold

#Fix distributions (Float and int)

fix=function(y) #Box Cox transformation. Return the transformed variable in a dataframe
{if (sum(y==0)>10) return(y) #Float filter
  else {result = boxcox(y~1, lambda = seq(-10,20,0.5))
  mylambda = result$x[which.max(result$y)]
  if (mylambda!=0) return((y^mylambda-1)/mylambda)
  else return(log(y+0.01))}
}
str(Clean_data.cat)
## 'data.frame':    2919 obs. of  43 variables:
##  $ MSZoning     : chr  "RL" "RL" "RL" "RL" ...
##  $ Street       : chr  "Pave" "Pave" "Pave" "Pave" ...
##  $ LotShape     : chr  "Reg" "Reg" "IR1" "IR1" ...
##  $ LandContour  : chr  "Lvl" "Lvl" "Lvl" "Lvl" ...
##  $ Utilities    : chr  "AllPub" "AllPub" "AllPub" "AllPub" ...
##  $ LotConfig    : chr  "Inside" "FR2" "Inside" "Corner" ...
##  $ LandSlope    : chr  "Gtl" "Gtl" "Gtl" "Gtl" ...
##  $ Neighborhood : chr  "CollgCr" "Veenker" "CollgCr" "Crawfor" ...
##  $ Condition1   : chr  "Norm" "Feedr" "Norm" "Norm" ...
##  $ Condition2   : chr  "Norm" "Norm" "Norm" "Norm" ...
##  $ BldgType     : chr  "1Fam" "1Fam" "1Fam" "1Fam" ...
##  $ HouseStyle   : chr  "2Story" "1Story" "2Story" "2Story" ...
##  $ RoofStyle    : chr  "Gable" "Gable" "Gable" "Gable" ...
##  $ RoofMatl     : chr  "CompShg" "CompShg" "CompShg" "CompShg" ...
##  $ Exterior1st  : chr  "VinylSd" "MetalSd" "VinylSd" "Wd Sdng" ...
##  $ Exterior2nd  : chr  "VinylSd" "MetalSd" "VinylSd" "Wd Shng" ...
##  $ MasVnrType   : chr  "BrkFace" "None" "BrkFace" "None" ...
##  $ ExterQual    : chr  "Gd" "TA" "Gd" "TA" ...
##  $ ExterCond    : chr  "TA" "TA" "TA" "TA" ...
##  $ Foundation   : chr  "PConc" "CBlock" "PConc" "BrkTil" ...
##  $ BsmtQual     : chr  "Gd" "Gd" "Gd" "TA" ...
##  $ BsmtCond     : chr  "TA" "TA" "TA" "Gd" ...
##  $ BsmtExposure : chr  "No" "Gd" "Mn" "No" ...
##  $ BsmtFinType1 : chr  "GLQ" "ALQ" "GLQ" "ALQ" ...
##  $ BsmtFinType2 : chr  "Unf" "Unf" "Unf" "Unf" ...
##  $ Heating      : chr  "GasA" "GasA" "GasA" "GasA" ...
##  $ HeatingQC    : chr  "Ex" "Ex" "Ex" "Gd" ...
##  $ CentralAir   : chr  "Y" "Y" "Y" "Y" ...
##  $ Electrical   : chr  "SBrkr" "SBrkr" "SBrkr" "SBrkr" ...
##  $ KitchenQual  : chr  "Gd" "TA" "Gd" "Gd" ...
##  $ Functional   : chr  "Typ" "Typ" "Typ" "Typ" ...
##  $ FireplaceQu  : chr  "Gd" "TA" "TA" "Gd" ...
##  $ GarageType   : chr  "Attchd" "Attchd" "Attchd" "Detchd" ...
##  $ GarageFinish : chr  "RFn" "RFn" "RFn" "Unf" ...
##  $ GarageQual   : chr  "TA" "TA" "TA" "TA" ...
##  $ GarageCond   : chr  "TA" "TA" "TA" "TA" ...
##  $ PavedDrive   : chr  "Y" "Y" "Y" "Y" ...
##  $ SaleType     : chr  "WD" "WD" "WD" "WD" ...
##  $ SaleCondition: chr  "Normal" "Normal" "Normal" "Abnorml" ...
##  $ Fence        : chr  "NG" "NG" "NG" "NG" ...
##  $ Alley        : chr  "NG" "NG" "NG" "NG" ...
##  $ MiscFeature  : chr  "NG" "NG" "NG" "NG" ...
##  $ PoolQC       : chr  "NG" "NG" "NG" "NG" ...
cont1=cont%>%dplyr::select( MSSubClass,LotFrontage,LotArea, 
                            YearBuilt,TotalBsmtSF,X1stFlrSF, MoSold,   
                            GrLivArea,GarageArea,GarageYrBlt,YearRemodAdd,OverallQual,
                            OverallCond,TotRmsAbvGrd) #Select only floats
cont2=cont%>%dplyr::select( -MSSubClass,-LotFrontage,-LotArea, 
                            -BsmtUnfSF,-BsmtFinSF1,-YearBuilt,-TotalBsmtSF,-X1stFlrSF,    
                            -GrLivArea,-GarageArea,-GarageYrBlt,-MoSold,-YearRemodAdd ) #Maybe ordinal variables or special case. See the below annotation
cont1=lapply(cont1, function(x) ifelse(x>quantile(x,0.999),mean(x,na.rm = TRUE),x))%>%data.frame #Remove some outliers


boxplot(scale(cont1))

#removing out-liers
cont1$LotArea=sapply(cont1, function(x) ifelse(x>quantile(x,0.998),mean(x,na.rm = TRUE),x))
cont1=lapply(cont1, 
             function(x) ifelse(x>quantile(x,0.999),
                                mean(x,na.rm = TRUE),x))%>%data.frame 

boxplot(scale(cont1))

#data exploration continuation
{
p=ggplot(cont1,aes(LotFrontage))+geom_histogram(fill='brown',alpha=1,bins = 80)
l=ggplot(cont2,aes(OverallCond))+geom_histogram(fill='blue',alpha=1,bins=80) 
o= ggplot(cont2, aes(MasVnrArea))+geom_histogram(fill='pink',alpha=1,bins=80)
t= ggplot(cont2, aes(PoolArea))+geom_histogram(fill='thistle1',alpha=1,bins=80) 
ggarrange(p, l, o,t,
          labels = c("P", "L", "O","T"),
          ncol = 2, nrow = 2)
}

{
p2=ggplot(cont1,aes(YearBuilt))+geom_histogram(fill='red',alpha=1,bins = 80) 
l2=ggplot(cont1,aes(MSSubClass))+geom_histogram(fill='blue',alpha=1,bins=80) 
o2= ggplot(cont1, aes(MoSold))+geom_histogram(fill='pink',alpha=1,bins=80)
t2= ggplot(cont1, aes(GrLivArea))+geom_histogram(fill='magenta',alpha=1,bins=80) 

ggarrange(p2,l2,o2,t2,
          labels = c("P2","L2","O2","T2"),
          ncol = 2, nrow = 2)
  
}

B=cont[,c('BsmtUnfSF','BsmtFinSF1')]  #Look at the cell above for a moment. These variables were not considered
B$BsmtUnfSF=cut(cont$BsmtUnfSF,breaks=3) 
B$BsmtFinSF1=cut(cont$BsmtFinSF1,breaks=3) #There are more case but it's not as problematic as these two cases
{
for (i in 1:15)
  cont1[,i]=fix(cont1[,i])

cont1=scale(cont1)%>%data.frame() 
}

#Correlation
cor.plot(cont1,
         numbers=T,
         upper=FALSE,
         main = "Pearson's correlation",
         show.legend = F) 

glimpse(catg)
## Rows: 2,919
## Columns: 43
## $ MSZoning      <chr> "RL", "RL", "RL", "RL", "RL", "RL", "RL", "RL", "RM",...
## $ Street        <chr> "Pave", "Pave", "Pave", "Pave", "Pave", "Pave", "Pave...
## $ Alley         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ LotShape      <chr> "Reg", "Reg", "IR1", "IR1", "IR1", "IR1", "Reg", "IR1...
## $ LandContour   <chr> "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl...
## $ Utilities     <chr> "AllPub", "AllPub", "AllPub", "AllPub", "AllPub", "Al...
## $ LotConfig     <chr> "Inside", "FR2", "Inside", "Corner", "FR2", "Inside",...
## $ LandSlope     <chr> "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl...
## $ Neighborhood  <chr> "CollgCr", "Veenker", "CollgCr", "Crawfor", "NoRidge"...
## $ Condition1    <chr> "Norm", "Feedr", "Norm", "Norm", "Norm", "Norm", "Nor...
## $ Condition2    <chr> "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", "Norm...
## $ BldgType      <chr> "1Fam", "1Fam", "1Fam", "1Fam", "1Fam", "1Fam", "1Fam...
## $ HouseStyle    <chr> "2Story", "1Story", "2Story", "2Story", "2Story", "1....
## $ RoofStyle     <chr> "Gable", "Gable", "Gable", "Gable", "Gable", "Gable",...
## $ RoofMatl      <chr> "CompShg", "CompShg", "CompShg", "CompShg", "CompShg"...
## $ Exterior1st   <chr> "VinylSd", "MetalSd", "VinylSd", "Wd Sdng", "VinylSd"...
## $ Exterior2nd   <chr> "VinylSd", "MetalSd", "VinylSd", "Wd Shng", "VinylSd"...
## $ MasVnrType    <chr> "BrkFace", "None", "BrkFace", "None", "BrkFace", "Non...
## $ ExterQual     <chr> "Gd", "TA", "Gd", "TA", "Gd", "TA", "Gd", "TA", "TA",...
## $ ExterCond     <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA",...
## $ Foundation    <chr> "PConc", "CBlock", "PConc", "BrkTil", "PConc", "Wood"...
## $ BsmtQual      <chr> "Gd", "Gd", "Gd", "TA", "Gd", "Gd", "Ex", "Gd", "TA",...
## $ BsmtCond      <chr> "TA", "TA", "TA", "Gd", "TA", "TA", "TA", "TA", "TA",...
## $ BsmtExposure  <chr> "No", "Gd", "Mn", "No", "Av", "No", "Av", "Mn", "No",...
## $ BsmtFinType1  <chr> "GLQ", "ALQ", "GLQ", "ALQ", "GLQ", "GLQ", "GLQ", "ALQ...
## $ BsmtFinType2  <chr> "Unf", "Unf", "Unf", "Unf", "Unf", "Unf", "Unf", "BLQ...
## $ Heating       <chr> "GasA", "GasA", "GasA", "GasA", "GasA", "GasA", "GasA...
## $ HeatingQC     <chr> "Ex", "Ex", "Ex", "Gd", "Ex", "Ex", "Ex", "Ex", "Gd",...
## $ CentralAir    <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"...
## $ Electrical    <chr> "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr",...
## $ KitchenQual   <chr> "Gd", "TA", "Gd", "Gd", "Gd", "TA", "Gd", "TA", "TA",...
## $ Functional    <chr> "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ...
## $ FireplaceQu   <chr> NA, "TA", "TA", "Gd", "TA", NA, "Gd", "TA", "TA", "TA...
## $ GarageType    <chr> "Attchd", "Attchd", "Attchd", "Detchd", "Attchd", "At...
## $ GarageFinish  <chr> "RFn", "RFn", "RFn", "Unf", "RFn", "Unf", "RFn", "RFn...
## $ GarageQual    <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "Fa",...
## $ GarageCond    <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA",...
## $ PavedDrive    <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"...
## $ PoolQC        <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ Fence         <chr> NA, NA, NA, NA, NA, "MnPrv", NA, NA, NA, NA, NA, NA, ...
## $ MiscFeature   <chr> NA, NA, NA, NA, NA, "Shed", NA, "Shed", NA, NA, NA, N...
## $ SaleType      <chr> "WD", "WD", "WD", "WD", "WD", "WD", "WD", "WD", "WD",...
## $ SaleCondition <chr> "Normal", "Normal", "Normal", "Abnorml", "Normal", "N...
BACK_UP.catg=catg 
catg=catg%>%dplyr::select(-Utilities) 
#Transforming the characters to 1 and 0
{
catg$Condition1=ifelse(catg$Condition1=='Norm',1,0)
catg$Condition2=ifelse(catg$Condition2=='Norm',1,0)

catg$LandSlope=ifelse(catg$LandSlope=='Gtl',1,0)

catg$RoofStyle=ifelse(catg$RoofStyle=='Gable',1,0)
catg$RoofMatl=ifelse(catg$RoofMatl=='CompShg',1,0)
catg$Electrical=ifelse(catg$Electrical=='SBrkr',1,0)
catg$Heating=ifelse(catg$Heating=='GasA',1,0)
catg$Functional=ifelse(catg$Functional=='Typ',1,0)
catg$GarageQual=ifelse(catg$GarageQual=='TA',1,0)
catg$GarageCond=ifelse(catg$GarageCond=='TA',1,0)
catg$MiscFeature=ifelse(catg$MiscFeature=='NA',1,0)
catg$SaleType=ifelse(catg$SaleType=='WD',1,0)

catg$PoolQC=ifelse(catg$PoolQC=='NG',1,0)
}
glimpse(catg)
## Rows: 2,919
## Columns: 42
## $ MSZoning      <chr> "RL", "RL", "RL", "RL", "RL", "RL", "RL", "RL", "RM",...
## $ Street        <chr> "Pave", "Pave", "Pave", "Pave", "Pave", "Pave", "Pave...
## $ Alley         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ LotShape      <chr> "Reg", "Reg", "IR1", "IR1", "IR1", "IR1", "Reg", "IR1...
## $ LandContour   <chr> "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl...
## $ LotConfig     <chr> "Inside", "FR2", "Inside", "Corner", "FR2", "Inside",...
## $ LandSlope     <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ Neighborhood  <chr> "CollgCr", "Veenker", "CollgCr", "Crawfor", "NoRidge"...
## $ Condition1    <dbl> 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ Condition2    <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ BldgType      <chr> "1Fam", "1Fam", "1Fam", "1Fam", "1Fam", "1Fam", "1Fam...
## $ HouseStyle    <chr> "2Story", "1Story", "2Story", "2Story", "2Story", "1....
## $ RoofStyle     <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1,...
## $ RoofMatl      <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ Exterior1st   <chr> "VinylSd", "MetalSd", "VinylSd", "Wd Sdng", "VinylSd"...
## $ Exterior2nd   <chr> "VinylSd", "MetalSd", "VinylSd", "Wd Shng", "VinylSd"...
## $ MasVnrType    <chr> "BrkFace", "None", "BrkFace", "None", "BrkFace", "Non...
## $ ExterQual     <chr> "Gd", "TA", "Gd", "TA", "Gd", "TA", "Gd", "TA", "TA",...
## $ ExterCond     <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA",...
## $ Foundation    <chr> "PConc", "CBlock", "PConc", "BrkTil", "PConc", "Wood"...
## $ BsmtQual      <chr> "Gd", "Gd", "Gd", "TA", "Gd", "Gd", "Ex", "Gd", "TA",...
## $ BsmtCond      <chr> "TA", "TA", "TA", "Gd", "TA", "TA", "TA", "TA", "TA",...
## $ BsmtExposure  <chr> "No", "Gd", "Mn", "No", "Av", "No", "Av", "Mn", "No",...
## $ BsmtFinType1  <chr> "GLQ", "ALQ", "GLQ", "ALQ", "GLQ", "GLQ", "GLQ", "ALQ...
## $ BsmtFinType2  <chr> "Unf", "Unf", "Unf", "Unf", "Unf", "Unf", "Unf", "BLQ...
## $ Heating       <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ HeatingQC     <chr> "Ex", "Ex", "Ex", "Gd", "Ex", "Ex", "Ex", "Ex", "Gd",...
## $ CentralAir    <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"...
## $ Electrical    <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1,...
## $ KitchenQual   <chr> "Gd", "TA", "Gd", "Gd", "Gd", "TA", "Gd", "TA", "TA",...
## $ Functional    <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ FireplaceQu   <chr> NA, "TA", "TA", "Gd", "TA", NA, "Gd", "TA", "TA", "TA...
## $ GarageType    <chr> "Attchd", "Attchd", "Attchd", "Detchd", "Attchd", "At...
## $ GarageFinish  <chr> "RFn", "RFn", "RFn", "Unf", "RFn", "Unf", "RFn", "RFn...
## $ GarageQual    <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ GarageCond    <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ PavedDrive    <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"...
## $ PoolQC        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ Fence         <chr> NA, NA, NA, NA, NA, "MnPrv", NA, NA, NA, NA, NA, NA, ...
## $ MiscFeature   <dbl> NA, NA, NA, NA, NA, 0, NA, 0, NA, NA, NA, NA, NA, NA,...
## $ SaleType      <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1,...
## $ SaleCondition <chr> "Normal", "Normal", "Normal", "Abnorml", "Normal", "N...
#because of the weird continuous values in cont2 we will analyze in another way
#CATEGORIC 
cont2a=cont2%>%dplyr::select( OverallQual,OverallCond,FullBath,BedroomAbvGr,KitchenAbvGr,TotRmsAbvGrd,Fireplaces,GarageCars,YrSold )
cont2b=cont2%>%dplyr::select(-OverallQual,-YrSold,-OverallCond,-FullBath,-BedroomAbvGr,-KitchenAbvGr,-TotRmsAbvGrd,-Fireplaces,-GarageCars )

cont2b=lapply(cont2b,function(x) ifelse(x==0,0,1))%>%data.frame()   #0 and 1. Many 0's
catg1=cbind(catg,cont2a,cont2b,B)
catg1=lapply(catg1,factor)%>%data.frame()
catg_dummy=dummy_cols(catg1,remove_first_dummy = T,remove_selected_columns = T)%>%data.frame()
catg_dummy=lapply(catg_dummy,factor)%>%data.frame()

#Last details to modeling
DF=cbind(cont1,catg_dummy)
DF1<-DF[,-c(28:279)] #removed all the non-frequent values
training=DF1[c(1:1460),]   #SPLIT THE DATA INTO TRAIN AND TEST
testing=DF1[c(1459:2917),]
Y_train=cbind(training,SalePrice) 

str(Y_train)
## 'data.frame':    1460 obs. of  28 variables:
##  $ MSSubClass          : num  0.516 -1.173 0.516 0.712 0.516 ...
##  $ LotFrontage         : num  -0.14204 0.58601 0.00847 -0.39912 0.77079 ...
##  $ LotArea.MSSubClass  : num  0.516 -1.173 0.516 0.712 0.516 ...
##  $ LotArea.LotFrontage : num  -0.1856 0.5728 -0.0327 -0.4418 0.7728 ...
##  $ LotArea.LotArea     : num  -0.2283 0.0752 0.5051 0.0621 1.2751 ...
##  $ LotArea.YearBuilt   : num  -1.1587 0.0794 -1.0316 1.5706 -0.9715 ...
##  $ LotArea.TotalBsmtSF : num  -0.455 0.527 -0.3 -0.697 0.244 ...
##  $ LotArea.X1stFlrSF   : num  -0.782 0.444 -0.554 -0.417 0.136 ...
##  $ LotArea.MoSold      : num  -1.588 -0.427 1.024 -1.588 2.072 ...
##  $ LotArea.GrLivArea   : num  0.585 -0.367 0.721 0.598 1.372 ...
##  $ LotArea.GarageArea  : num  0.3706 -0.0487 0.6564 0.8184 1.7427 ...
##  $ LotArea.GarageYrBlt : num  -1.116 0.296 -0.971 -0.773 -0.903 ...
##  $ LotArea.YearRemodAdd: num  -0.898 0.633 -0.806 0.839 -0.637 ...
##  $ LotArea.OverallQual : num  0.6519 -0.0524 0.6519 0.6519 1.3472 ...
##  $ LotArea.OverallCond : num  -0.478 2.068 -0.478 -0.478 -0.478 ...
##  $ LotArea.TotRmsAbvGrd: num  1.004 -0.286 -0.286 0.359 1.649 ...
##  $ YearBuilt           : num  -1.048 -0.152 -0.982 1.872 -0.949 ...
##  $ TotalBsmtSF         : num  -0.455 0.519 -0.302 -0.695 0.238 ...
##  $ X1stFlrSF           : num  -0.8023 0.2903 -0.6301 -0.5198 -0.0246 ...
##  $ MoSold              : num  -1.552 -0.447 1.027 -1.552 2.132 ...
##  $ GrLivArea           : num  0.445 -0.479 0.602 0.46 1.453 ...
##  $ GarageArea          : num  0.364 -0.052 0.648 0.809 1.727 ...
##  $ GarageYrBlt         : num  -1.0205 0.0857 -0.9385 -0.8156 -0.8976 ...
##  $ YearRemodAdd        : num  -0.897 0.396 -0.849 0.683 -0.753 ...
##  $ OverallQual         : num  0.6461 -0.0632 0.6461 0.6461 1.3553 ...
##  $ OverallCond         : num  -0.507 2.188 -0.507 -0.507 -0.507 ...
##  $ TotRmsAbvGrd        : num  1.004 -0.286 -0.286 0.359 1.649 ...
##  $ SalePrice           : int  208500 181500 223500 140000 250000 143000 307000 200000 129900 118000 ...
#Modeling
#MODEL 
#LASSO Regression analysis-k-fold cross validation

ctrl_specs <-trainControl(method = "cv",
                          savePredictions = "all",
                          number = 10,
                          classProbs = T)
#we have to create a vector for potential lambdas
lambda_vector <-10^seq(-5,5,length=500)
set.seed(1992)
Model1 <-train(SalePrice~.,data =Y_train,
               method="glmnet",
               tuneGrid=expand.grid(alpha=1,lambda=lambda_vector),
               trControl=ctrl_specs,
               preProcess=c("center","scale"),
               na.action = na.omit)
## Warning in train.default(x, y, weights = w, ...): cannnot compute class
## probabilities for regression
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
Model1
## glmnet 
## 
## 1460 samples
##   27 predictor
## 
## Pre-processing: centered (27), scaled (27) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 1315, 1314, 1314, 1314, 1314, 1313, ... 
## Resampling results across tuning parameters:
## 
##   lambda        RMSE      Rsquared   MAE     
##   1.000000e-05  31546.09  0.8454893  20624.82
##   1.047225e-05  31546.09  0.8454893  20624.82
##   1.096681e-05  31546.09  0.8454893  20624.82
##   1.148472e-05  31546.09  0.8454893  20624.82
##   1.202708e-05  31546.09  0.8454893  20624.82
##   1.259506e-05  31546.09  0.8454893  20624.82
##   1.318987e-05  31546.09  0.8454893  20624.82
##   1.381276e-05  31546.09  0.8454893  20624.82
##   1.446507e-05  31546.09  0.8454893  20624.82
##   1.514819e-05  31546.09  0.8454893  20624.82
##   1.586357e-05  31546.09  0.8454893  20624.82
##   1.661273e-05  31546.09  0.8454893  20624.82
##   1.739726e-05  31546.09  0.8454893  20624.82
##   1.821885e-05  31546.09  0.8454893  20624.82
##   1.907924e-05  31546.09  0.8454893  20624.82
##   1.998026e-05  31546.09  0.8454893  20624.82
##   2.092383e-05  31546.09  0.8454893  20624.82
##   2.191197e-05  31546.09  0.8454893  20624.82
##   2.294676e-05  31546.09  0.8454893  20624.82
##   2.403043e-05  31546.09  0.8454893  20624.82
##   2.516527e-05  31546.09  0.8454893  20624.82
##   2.635371e-05  31546.09  0.8454893  20624.82
##   2.759826e-05  31546.09  0.8454893  20624.82
##   2.890160e-05  31546.09  0.8454893  20624.82
##   3.026648e-05  31546.09  0.8454893  20624.82
##   3.169582e-05  31546.09  0.8454893  20624.82
##   3.319266e-05  31546.09  0.8454893  20624.82
##   3.476019e-05  31546.09  0.8454893  20624.82
##   3.640175e-05  31546.09  0.8454893  20624.82
##   3.812083e-05  31546.09  0.8454893  20624.82
##   3.992109e-05  31546.09  0.8454893  20624.82
##   4.180637e-05  31546.09  0.8454893  20624.82
##   4.378069e-05  31546.09  0.8454893  20624.82
##   4.584824e-05  31546.09  0.8454893  20624.82
##   4.801343e-05  31546.09  0.8454893  20624.82
##   5.028087e-05  31546.09  0.8454893  20624.82
##   5.265540e-05  31546.09  0.8454893  20624.82
##   5.514206e-05  31546.09  0.8454893  20624.82
##   5.774615e-05  31546.09  0.8454893  20624.82
##   6.047322e-05  31546.09  0.8454893  20624.82
##   6.332908e-05  31546.09  0.8454893  20624.82
##   6.631981e-05  31546.09  0.8454893  20624.82
##   6.945178e-05  31546.09  0.8454893  20624.82
##   7.273165e-05  31546.09  0.8454893  20624.82
##   7.616642e-05  31546.09  0.8454893  20624.82
##   7.976339e-05  31546.09  0.8454893  20624.82
##   8.353023e-05  31546.09  0.8454893  20624.82
##   8.747496e-05  31546.09  0.8454893  20624.82
##   9.160598e-05  31546.09  0.8454893  20624.82
##   9.593209e-05  31546.09  0.8454893  20624.82
##   1.004625e-04  31546.09  0.8454893  20624.82
##   1.052069e-04  31546.09  0.8454893  20624.82
##   1.101753e-04  31546.09  0.8454893  20624.82
##   1.153783e-04  31546.09  0.8454893  20624.82
##   1.208271e-04  31546.09  0.8454893  20624.82
##   1.265332e-04  31546.09  0.8454893  20624.82
##   1.325087e-04  31546.09  0.8454893  20624.82
##   1.387665e-04  31546.09  0.8454893  20624.82
##   1.453198e-04  31546.09  0.8454893  20624.82
##   1.521825e-04  31546.09  0.8454893  20624.82
##   1.593694e-04  31546.09  0.8454893  20624.82
##   1.668956e-04  31546.09  0.8454893  20624.82
##   1.747773e-04  31546.09  0.8454893  20624.82
##   1.830312e-04  31546.09  0.8454893  20624.82
##   1.916748e-04  31546.09  0.8454893  20624.82
##   2.007267e-04  31546.09  0.8454893  20624.82
##   2.102061e-04  31546.09  0.8454893  20624.82
##   2.201331e-04  31546.09  0.8454893  20624.82
##   2.305289e-04  31546.09  0.8454893  20624.82
##   2.414157e-04  31546.09  0.8454893  20624.82
##   2.528166e-04  31546.09  0.8454893  20624.82
##   2.647559e-04  31546.09  0.8454893  20624.82
##   2.772591e-04  31546.09  0.8454893  20624.82
##   2.903527e-04  31546.09  0.8454893  20624.82
##   3.040646e-04  31546.09  0.8454893  20624.82
##   3.184242e-04  31546.09  0.8454893  20624.82
##   3.334618e-04  31546.09  0.8454893  20624.82
##   3.492096e-04  31546.09  0.8454893  20624.82
##   3.657011e-04  31546.09  0.8454893  20624.82
##   3.829714e-04  31546.09  0.8454893  20624.82
##   4.010573e-04  31546.09  0.8454893  20624.82
##   4.199973e-04  31546.09  0.8454893  20624.82
##   4.398317e-04  31546.09  0.8454893  20624.82
##   4.606029e-04  31546.09  0.8454893  20624.82
##   4.823549e-04  31546.09  0.8454893  20624.82
##   5.051342e-04  31546.09  0.8454893  20624.82
##   5.289893e-04  31546.09  0.8454893  20624.82
##   5.539709e-04  31546.09  0.8454893  20624.82
##   5.801323e-04  31546.09  0.8454893  20624.82
##   6.075292e-04  31546.09  0.8454893  20624.82
##   6.362198e-04  31546.09  0.8454893  20624.82
##   6.662655e-04  31546.09  0.8454893  20624.82
##   6.977300e-04  31546.09  0.8454893  20624.82
##   7.306804e-04  31546.09  0.8454893  20624.82
##   7.651869e-04  31546.09  0.8454893  20624.82
##   8.013230e-04  31546.09  0.8454893  20624.82
##   8.391656e-04  31546.09  0.8454893  20624.82
##   8.787954e-04  31546.09  0.8454893  20624.82
##   9.202967e-04  31546.09  0.8454893  20624.82
##   9.637579e-04  31546.09  0.8454893  20624.82
##   1.009272e-03  31546.09  0.8454893  20624.82
##   1.056935e-03  31546.09  0.8454893  20624.82
##   1.106848e-03  31546.09  0.8454893  20624.82
##   1.159120e-03  31546.09  0.8454893  20624.82
##   1.213859e-03  31546.09  0.8454893  20624.82
##   1.271184e-03  31546.09  0.8454893  20624.82
##   1.331216e-03  31546.09  0.8454893  20624.82
##   1.394083e-03  31546.09  0.8454893  20624.82
##   1.459919e-03  31546.09  0.8454893  20624.82
##   1.528864e-03  31546.09  0.8454893  20624.82
##   1.601064e-03  31546.09  0.8454893  20624.82
##   1.676675e-03  31546.09  0.8454893  20624.82
##   1.755856e-03  31546.09  0.8454893  20624.82
##   1.838777e-03  31546.09  0.8454893  20624.82
##   1.925614e-03  31546.09  0.8454893  20624.82
##   2.016551e-03  31546.09  0.8454893  20624.82
##   2.111783e-03  31546.09  0.8454893  20624.82
##   2.211512e-03  31546.09  0.8454893  20624.82
##   2.315951e-03  31546.09  0.8454893  20624.82
##   2.425323e-03  31546.09  0.8454893  20624.82
##   2.539859e-03  31546.09  0.8454893  20624.82
##   2.659804e-03  31546.09  0.8454893  20624.82
##   2.785414e-03  31546.09  0.8454893  20624.82
##   2.916956e-03  31546.09  0.8454893  20624.82
##   3.054710e-03  31546.09  0.8454893  20624.82
##   3.198969e-03  31546.09  0.8454893  20624.82
##   3.350041e-03  31546.09  0.8454893  20624.82
##   3.508247e-03  31546.09  0.8454893  20624.82
##   3.673925e-03  31546.09  0.8454893  20624.82
##   3.847427e-03  31546.09  0.8454893  20624.82
##   4.029122e-03  31546.09  0.8454893  20624.82
##   4.219398e-03  31546.09  0.8454893  20624.82
##   4.418660e-03  31546.09  0.8454893  20624.82
##   4.627332e-03  31546.09  0.8454893  20624.82
##   4.845859e-03  31546.09  0.8454893  20624.82
##   5.074705e-03  31546.09  0.8454893  20624.82
##   5.314359e-03  31546.09  0.8454893  20624.82
##   5.565331e-03  31546.09  0.8454893  20624.82
##   5.828155e-03  31546.09  0.8454893  20624.82
##   6.103390e-03  31546.09  0.8454893  20624.82
##   6.391624e-03  31546.09  0.8454893  20624.82
##   6.693470e-03  31546.09  0.8454893  20624.82
##   7.009570e-03  31546.09  0.8454893  20624.82
##   7.340598e-03  31546.09  0.8454893  20624.82
##   7.687260e-03  31546.09  0.8454893  20624.82
##   8.050292e-03  31546.09  0.8454893  20624.82
##   8.430468e-03  31546.09  0.8454893  20624.82
##   8.828599e-03  31546.09  0.8454893  20624.82
##   9.245531e-03  31546.09  0.8454893  20624.82
##   9.682153e-03  31546.09  0.8454893  20624.82
##   1.013939e-02  31546.09  0.8454893  20624.82
##   1.061823e-02  31546.09  0.8454893  20624.82
##   1.111968e-02  31546.09  0.8454893  20624.82
##   1.164481e-02  31546.09  0.8454893  20624.82
##   1.219473e-02  31546.09  0.8454893  20624.82
##   1.277063e-02  31546.09  0.8454893  20624.82
##   1.337373e-02  31546.09  0.8454893  20624.82
##   1.400531e-02  31546.09  0.8454893  20624.82
##   1.466671e-02  31546.09  0.8454893  20624.82
##   1.535935e-02  31546.09  0.8454893  20624.82
##   1.608469e-02  31546.09  0.8454893  20624.82
##   1.684430e-02  31546.09  0.8454893  20624.82
##   1.763977e-02  31546.09  0.8454893  20624.82
##   1.847281e-02  31546.09  0.8454893  20624.82
##   1.934520e-02  31546.09  0.8454893  20624.82
##   2.025878e-02  31546.09  0.8454893  20624.82
##   2.121550e-02  31546.09  0.8454893  20624.82
##   2.221741e-02  31546.09  0.8454893  20624.82
##   2.326663e-02  31546.09  0.8454893  20624.82
##   2.436540e-02  31546.09  0.8454893  20624.82
##   2.551606e-02  31546.09  0.8454893  20624.82
##   2.672106e-02  31546.09  0.8454893  20624.82
##   2.798297e-02  31546.09  0.8454893  20624.82
##   2.930447e-02  31546.09  0.8454893  20624.82
##   3.068838e-02  31546.09  0.8454893  20624.82
##   3.213764e-02  31546.09  0.8454893  20624.82
##   3.365535e-02  31546.09  0.8454893  20624.82
##   3.524473e-02  31546.09  0.8454893  20624.82
##   3.690917e-02  31546.09  0.8454893  20624.82
##   3.865221e-02  31546.09  0.8454893  20624.82
##   4.047757e-02  31546.09  0.8454893  20624.82
##   4.238913e-02  31546.09  0.8454893  20624.82
##   4.439097e-02  31546.09  0.8454893  20624.82
##   4.648734e-02  31546.09  0.8454893  20624.82
##   4.868271e-02  31546.09  0.8454893  20624.82
##   5.098176e-02  31546.09  0.8454893  20624.82
##   5.338938e-02  31546.09  0.8454893  20624.82
##   5.591071e-02  31546.09  0.8454893  20624.82
##   5.855110e-02  31546.09  0.8454893  20624.82
##   6.131619e-02  31546.09  0.8454893  20624.82
##   6.421186e-02  31546.09  0.8454893  20624.82
##   6.724427e-02  31546.09  0.8454893  20624.82
##   7.041990e-02  31546.09  0.8454893  20624.82
##   7.374549e-02  31546.09  0.8454893  20624.82
##   7.722814e-02  31546.09  0.8454893  20624.82
##   8.087525e-02  31546.09  0.8454893  20624.82
##   8.469460e-02  31546.09  0.8454893  20624.82
##   8.869432e-02  31546.09  0.8454893  20624.82
##   9.288292e-02  31546.09  0.8454893  20624.82
##   9.726934e-02  31546.09  0.8454893  20624.82
##   1.018629e-01  31546.09  0.8454893  20624.82
##   1.066734e-01  31546.09  0.8454893  20624.82
##   1.117111e-01  31546.09  0.8454893  20624.82
##   1.169866e-01  31546.09  0.8454893  20624.82
##   1.225114e-01  31546.09  0.8454893  20624.82
##   1.282970e-01  31546.09  0.8454893  20624.82
##   1.343558e-01  31546.09  0.8454893  20624.82
##   1.407008e-01  31546.09  0.8454893  20624.82
##   1.473454e-01  31546.09  0.8454893  20624.82
##   1.543038e-01  31546.09  0.8454893  20624.82
##   1.615909e-01  31546.09  0.8454893  20624.82
##   1.692220e-01  31546.09  0.8454893  20624.82
##   1.772136e-01  31546.09  0.8454893  20624.82
##   1.855825e-01  31546.09  0.8454893  20624.82
##   1.943467e-01  31546.09  0.8454893  20624.82
##   2.035248e-01  31546.09  0.8454893  20624.82
##   2.131362e-01  31546.09  0.8454893  20624.82
##   2.232016e-01  31546.09  0.8454893  20624.82
##   2.337424e-01  31546.09  0.8454893  20624.82
##   2.447809e-01  31546.09  0.8454893  20624.82
##   2.563407e-01  31546.09  0.8454893  20624.82
##   2.684465e-01  31546.09  0.8454893  20624.82
##   2.811239e-01  31546.09  0.8454893  20624.82
##   2.944000e-01  31546.09  0.8454893  20624.82
##   3.083031e-01  31546.09  0.8454893  20624.82
##   3.228628e-01  31546.09  0.8454893  20624.82
##   3.381101e-01  31546.09  0.8454893  20624.82
##   3.540774e-01  31546.09  0.8454893  20624.82
##   3.707988e-01  31546.09  0.8454893  20624.82
##   3.883098e-01  31546.09  0.8454893  20624.82
##   4.066478e-01  31546.09  0.8454893  20624.82
##   4.258518e-01  31546.09  0.8454893  20624.82
##   4.459628e-01  31546.09  0.8454893  20624.82
##   4.670234e-01  31546.09  0.8454893  20624.82
##   4.890787e-01  31546.09  0.8454893  20624.82
##   5.121755e-01  31546.09  0.8454893  20624.82
##   5.363631e-01  31546.09  0.8454893  20624.82
##   5.616930e-01  31546.09  0.8454893  20624.82
##   5.882190e-01  31546.09  0.8454893  20624.82
##   6.159978e-01  31546.09  0.8454893  20624.82
##   6.450884e-01  31546.09  0.8454893  20624.82
##   6.755528e-01  31546.09  0.8454893  20624.82
##   7.074559e-01  31546.09  0.8454893  20624.82
##   7.408657e-01  31546.09  0.8454893  20624.82
##   7.758532e-01  31546.09  0.8454893  20624.82
##   8.124930e-01  31546.09  0.8454893  20624.82
##   8.508632e-01  31546.09  0.8454893  20624.82
##   8.910453e-01  31546.09  0.8454893  20624.82
##   9.331251e-01  31546.09  0.8454893  20624.82
##   9.771921e-01  31546.09  0.8454893  20624.82
##   1.023340e+00  31546.09  0.8454893  20624.82
##   1.071668e+00  31546.09  0.8454893  20624.82
##   1.122277e+00  31546.09  0.8454893  20624.82
##   1.175277e+00  31546.09  0.8454893  20624.82
##   1.230780e+00  31546.09  0.8454893  20624.82
##   1.288904e+00  31546.09  0.8454893  20624.82
##   1.349772e+00  31546.09  0.8454893  20624.82
##   1.413516e+00  31546.09  0.8454893  20624.82
##   1.480269e+00  31546.09  0.8454893  20624.82
##   1.550175e+00  31546.09  0.8454893  20624.82
##   1.623382e+00  31546.09  0.8454893  20624.82
##   1.700047e+00  31546.09  0.8454893  20624.82
##   1.780332e+00  31546.09  0.8454893  20624.82
##   1.864409e+00  31546.09  0.8454893  20624.82
##   1.952456e+00  31546.09  0.8454893  20624.82
##   2.044661e+00  31546.09  0.8454893  20624.82
##   2.141220e+00  31546.09  0.8454893  20624.82
##   2.242340e+00  31546.09  0.8454893  20624.82
##   2.348235e+00  31546.09  0.8454893  20624.82
##   2.459130e+00  31546.09  0.8454893  20624.82
##   2.575263e+00  31546.09  0.8454893  20624.82
##   2.696881e+00  31546.09  0.8454893  20624.82
##   2.824241e+00  31546.09  0.8454893  20624.82
##   2.957617e+00  31546.09  0.8454893  20624.82
##   3.097291e+00  31546.09  0.8454893  20624.82
##   3.243561e+00  31546.09  0.8454893  20624.82
##   3.396739e+00  31546.09  0.8454893  20624.82
##   3.557150e+00  31546.09  0.8454893  20624.82
##   3.725137e+00  31546.09  0.8454893  20624.82
##   3.901058e+00  31546.09  0.8454893  20624.82
##   4.085286e+00  31546.09  0.8454893  20624.82
##   4.278214e+00  31546.09  0.8454893  20624.82
##   4.480254e+00  31546.09  0.8454893  20624.82
##   4.691835e+00  31546.09  0.8454893  20624.82
##   4.913407e+00  31546.09  0.8454893  20624.82
##   5.145444e+00  31546.09  0.8454893  20624.82
##   5.388438e+00  31546.09  0.8454893  20624.82
##   5.642908e+00  31546.09  0.8454893  20624.82
##   5.909396e+00  31546.09  0.8454893  20624.82
##   6.188468e+00  31546.13  0.8454889  20624.86
##   6.480720e+00  31548.13  0.8454667  20627.56
##   6.786773e+00  31550.84  0.8454359  20631.43
##   7.107280e+00  31553.67  0.8454036  20635.49
##   7.442922e+00  31556.74  0.8453688  20639.80
##   7.794416e+00  31560.06  0.8453313  20644.39
##   8.162509e+00  31563.66  0.8452908  20650.06
##   8.547985e+00  31567.50  0.8452479  20655.99
##   8.951665e+00  31571.63  0.8452019  20662.15
##   9.374409e+00  31576.07  0.8451526  20668.65
##   9.817117e+00  31580.84  0.8450999  20675.76
##   1.028073e+01  31586.03  0.8450428  20683.31
##   1.076624e+01  31591.72  0.8449806  20691.19
##   1.127468e+01  31597.86  0.8449133  20699.49
##   1.180713e+01  31604.56  0.8448404  20708.25
##   1.236472e+01  31611.83  0.8447614  20717.56
##   1.294865e+01  31619.74  0.8446757  20727.41
##   1.356015e+01  31628.33  0.8445831  20738.00
##   1.420053e+01  31637.61  0.8444829  20749.05
##   1.487115e+01  31647.81  0.8443734  20761.02
##   1.557345e+01  31659.04  0.8442535  20773.91
##   1.630891e+01  31671.33  0.8441229  20788.25
##   1.707910e+01  31684.77  0.8439803  20804.10
##   1.788566e+01  31698.82  0.8438332  20820.79
##   1.873032e+01  31713.65  0.8436794  20838.80
##   1.961486e+01  31729.38  0.8435181  20857.85
##   2.054117e+01  31746.59  0.8433425  20877.76
##   2.151123e+01  31765.01  0.8431554  20898.89
##   2.252711e+01  31783.74  0.8429659  20920.07
##   2.359095e+01  31804.23  0.8427592  20942.58
##   2.470504e+01  31826.42  0.8425364  20967.22
##   2.587174e+01  31851.95  0.8422791  20993.59
##   2.709354e+01  31879.80  0.8419976  21021.71
##   2.837304e+01  31910.02  0.8416931  21050.77
##   2.971296e+01  31942.82  0.8413631  21080.66
##   3.111616e+01  31978.92  0.8409997  21112.22
##   3.258562e+01  32018.45  0.8406013  21146.28
##   3.412449e+01  32062.77  0.8401536  21183.28
##   3.573602e+01  32112.93  0.8396465  21222.81
##   3.742366e+01  32167.91  0.8390902  21265.63
##   3.919100e+01  32229.15  0.8384693  21313.21
##   4.104181e+01  32296.17  0.8377898  21364.73
##   4.298001e+01  32370.34  0.8370377  21420.47
##   4.500975e+01  32450.26  0.8362296  21479.18
##   4.713535e+01  32536.60  0.8353584  21542.20
##   4.936132e+01  32626.78  0.8344542  21608.24
##   5.169242e+01  32717.95  0.8335379  21674.40
##   5.413360e+01  32803.35  0.8326921  21738.77
##   5.669007e+01  32858.63  0.8321378  21786.30
##   5.936727e+01  32904.88  0.8316714  21826.88
##   6.217090e+01  32924.28  0.8314862  21842.83
##   6.510694e+01  32941.99  0.8313147  21856.59
##   6.818162e+01  32943.65  0.8312999  21856.48
##   7.140151e+01  32944.32  0.8312915  21855.12
##   7.477346e+01  32943.70  0.8312902  21852.60
##   7.830465e+01  32943.37  0.8312855  21850.05
##   8.200261e+01  32943.42  0.8312772  21847.46
##   8.587519e+01  32943.93  0.8312652  21844.59
##   8.993067e+01  32945.64  0.8312433  21841.65
##   9.417766e+01  32947.67  0.8312179  21838.66
##   9.862522e+01  32950.02  0.8311889  21835.80
##   1.032828e+02  32951.86  0.8311646  21832.71
##   1.081604e+02  32952.52  0.8311510  21829.09
##   1.132683e+02  32954.25  0.8311273  21825.53
##   1.186174e+02  32957.91  0.8310854  21822.46
##   1.242191e+02  32961.91  0.8310404  21819.27
##   1.300854e+02  32966.05  0.8309947  21815.86
##   1.362287e+02  32970.56  0.8309450  21812.21
##   1.426621e+02  32975.06  0.8308947  21808.43
##   1.493993e+02  32980.20  0.8308380  21804.80
##   1.564548e+02  32986.29  0.8307726  21801.79
##   1.638434e+02  32993.19  0.8306987  21800.22
##   1.715809e+02  33000.71  0.8306178  21798.77
##   1.796838e+02  33009.19  0.8305272  21797.90
##   1.881694e+02  33018.64  0.8304261  21797.21
##   1.970558e+02  33029.15  0.8303141  21797.04
##   2.063618e+02  33040.65  0.8301920  21797.03
##   2.161073e+02  33053.56  0.8300555  21798.04
##   2.263130e+02  33068.16  0.8299020  21801.03
##   2.370006e+02  33084.38  0.8297319  21805.73
##   2.481930e+02  33102.11  0.8295462  21811.63
##   2.599140e+02  33121.89  0.8293388  21818.61
##   2.721885e+02  33143.99  0.8291061  21826.44
##   2.850426e+02  33168.81  0.8288439  21834.80
##   2.985038e+02  33195.48  0.8285603  21844.22
##   3.126007e+02  33223.67  0.8282602  21854.87
##   3.273634e+02  33246.30  0.8280122  21859.93
##   3.428231e+02  33270.77  0.8277458  21865.74
##   3.590130e+02  33291.14  0.8275176  21866.56
##   3.759675e+02  33312.63  0.8272750  21868.60
##   3.937226e+02  33333.36  0.8270356  21870.02
##   4.123163e+02  33357.18  0.8267620  21873.38
##   4.317880e+02  33389.59  0.8263909  21880.23
##   4.521792e+02  33425.55  0.8259817  21889.06
##   4.735335e+02  33463.65  0.8255529  21897.57
##   4.958962e+02  33505.53  0.8250848  21908.38
##   5.193150e+02  33545.00  0.8246600  21915.88
##   5.438397e+02  33589.26  0.8241851  21926.79
##   5.695227e+02  33638.52  0.8236587  21942.12
##   5.964185e+02  33692.71  0.8230808  21965.85
##   6.245845e+02  33754.13  0.8224332  21997.64
##   6.540806e+02  33822.40  0.8217154  22034.96
##   6.849697e+02  33874.61  0.8211525  22069.87
##   7.173175e+02  33931.79  0.8205362  22109.58
##   7.511929e+02  33993.64  0.8198713  22148.12
##   7.866682e+02  34056.52  0.8191924  22186.52
##   8.238187e+02  34122.02  0.8184937  22224.76
##   8.627237e+02  34195.06  0.8177166  22266.53
##   9.034660e+02  34275.82  0.8168545  22312.65
##   9.461324e+02  34364.47  0.8159048  22362.76
##   9.908137e+02  34460.77  0.8148681  22418.80
##   1.037605e+03  34566.18  0.8137281  22479.47
##   1.086606e+03  34645.47  0.8128662  22533.59
##   1.137921e+03  34733.17  0.8119111  22593.00
##   1.191660e+03  34821.44  0.8109471  22653.40
##   1.247936e+03  34917.39  0.8098965  22718.29
##   1.306870e+03  35007.54  0.8089102  22780.79
##   1.368587e+03  35087.81  0.8080361  22837.88
##   1.433219e+03  35131.59  0.8075795  22867.99
##   1.500903e+03  35165.99  0.8072275  22890.68
##   1.571784e+03  35186.71  0.8070444  22898.38
##   1.646012e+03  35201.17  0.8069358  22898.58
##   1.723745e+03  35217.37  0.8068139  22900.11
##   1.805149e+03  35235.50  0.8066769  22902.83
##   1.890397e+03  35255.15  0.8065300  22906.15
##   1.979672e+03  35276.46  0.8063722  22910.82
##   2.073162e+03  35300.25  0.8061947  22916.22
##   2.171068e+03  35326.61  0.8059973  22923.39
##   2.273597e+03  35356.14  0.8057745  22933.32
##   2.380968e+03  35388.77  0.8055278  22945.18
##   2.493409e+03  35423.89  0.8052649  22958.51
##   2.611161e+03  35462.23  0.8049783  22973.03
##   2.734474e+03  35503.04  0.8046789  22989.06
##   2.863610e+03  35546.67  0.8043627  23006.63
##   2.998844e+03  35591.43  0.8040472  23023.32
##   3.140465e+03  35637.91  0.8037291  23039.26
##   3.288774e+03  35685.21  0.8034228  23051.78
##   3.444087e+03  35733.35  0.8031291  23063.27
##   3.606735e+03  35779.33  0.8028900  23070.09
##   3.777064e+03  35824.31  0.8026897  23072.82
##   3.955436e+03  35872.63  0.8024762  23078.97
##   4.142232e+03  35924.97  0.8022470  23088.69
##   4.337850e+03  35981.89  0.8019991  23101.04
##   4.542706e+03  36044.16  0.8017273  23117.77
##   4.757236e+03  36112.64  0.8014259  23140.80
##   4.981898e+03  36187.90  0.8010910  23168.88
##   5.217169e+03  36268.46  0.8007471  23198.01
##   5.463550e+03  36356.37  0.8003711  23231.43
##   5.721568e+03  36452.60  0.7999554  23271.76
##   5.991770e+03  36557.97  0.7994954  23319.72
##   6.274732e+03  36670.86  0.7990130  23372.44
##   6.571058e+03  36790.54  0.7985235  23429.41
##   6.881377e+03  36920.57  0.7979893  23497.38
##   7.206351e+03  37061.42  0.7974080  23573.84
##   7.546673e+03  37215.10  0.7967648  23657.02
##   7.903066e+03  37382.88  0.7960518  23747.94
##   8.276289e+03  37563.74  0.7952987  23848.29
##   8.667139e+03  37757.73  0.7945101  23956.61
##   9.076446e+03  37967.30  0.7936557  24077.56
##   9.505083e+03  38184.31  0.7928531  24200.18
##   9.953962e+03  38414.71  0.7920449  24330.76
##   1.042404e+04  38656.85  0.7912648  24467.25
##   1.091632e+04  38921.15  0.7903856  24618.80
##   1.143184e+04  39208.51  0.7894133  24792.72
##   1.197171e+04  39522.74  0.7882987  24987.40
##   1.253708e+04  39864.33  0.7870473  25207.30
##   1.312915e+04  40229.63  0.7857155  25450.63
##   1.374917e+04  40611.81  0.7844334  25709.23
##   1.439848e+04  41011.56  0.7832493  25985.26
##   1.507845e+04  41416.50  0.7823969  26261.15
##   1.579053e+04  41853.68  0.7814469  26566.28
##   1.653624e+04  42319.17  0.7805153  26896.64
##   1.731717e+04  42823.95  0.7794370  27259.86
##   1.813498e+04  43371.20  0.7782010  27661.79
##   1.899141e+04  43964.17  0.7767634  28111.65
##   1.988828e+04  44605.91  0.7750857  28604.21
##   2.082751e+04  45299.81  0.7731176  29152.66
##   2.181109e+04  46049.38  0.7707961  29751.81
##   2.284112e+04  46858.25  0.7680417  30412.90
##   2.391980e+04  47730.21  0.7647535  31131.28
##   2.504942e+04  48667.83  0.7608296  31904.78
##   2.623238e+04  49675.11  0.7561088  32735.21
##   2.747121e+04  50754.39  0.7504134  33644.08
##   2.876854e+04  51899.19  0.7437629  34614.17
##   3.012714e+04  53113.92  0.7359679  35635.20
##   3.154990e+04  54372.99  0.7277441  36692.20
##   3.303985e+04  55696.59  0.7182301  37809.86
##   3.460016e+04  57056.45  0.7082136  38966.01
##   3.623416e+04  58479.63  0.6966054  40178.94
##   3.794533e+04  59874.78  0.6883613  41377.13
##   3.973730e+04  61356.65  0.6779486  42657.34
##   4.161391e+04  62915.81  0.6653806  44004.91
##   4.357913e+04  64546.26  0.6497847  45403.16
##   4.563716e+04  66075.64  0.6423422  46706.86
##   4.779239e+04  67673.11  0.6350818  48053.18
##   5.004939e+04  69287.82  0.6349388  49397.94
##   5.241298e+04  71014.68  0.6349388  50836.74
##   5.488820e+04  72861.52  0.6349388  52364.98
##   5.748030e+04  74834.58  0.6349388  53997.95
##   6.019482e+04  76940.23  0.6349388  55735.41
##   6.303753e+04  78867.75  0.6508166  57334.34
##   6.601449e+04  78994.59        NaN  57439.25
##   6.913204e+04  78994.59        NaN  57439.25
##   7.239681e+04  78994.59        NaN  57439.25
##   7.581576e+04  78994.59        NaN  57439.25
##   7.939618e+04  78994.59        NaN  57439.25
##   8.314568e+04  78994.59        NaN  57439.25
##   8.707225e+04  78994.59        NaN  57439.25
##   9.118425e+04  78994.59        NaN  57439.25
##   9.549045e+04  78994.59        NaN  57439.25
##   1.000000e+05  78994.59        NaN  57439.25
## 
## Tuning parameter 'alpha' was held constant at a value of 1
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were alpha = 1 and lambda = 5.909396.
Model1$bestTune
##     alpha   lambda
## 289     1 5.909396
#LASSO Regression model coefficients(Parameter Estimates)
round(coef(Model1$finalModel,Model1$bestTune$lambda),3)
## 28 x 1 sparse Matrix of class "dgCMatrix"
##                                1
## (Intercept)           180921.196
## MSSubClass              1659.373
## LotFrontage             2985.564
## LotArea.MSSubClass        87.785
## LotArea.LotFrontage       93.181
## LotArea.LotArea         5591.137
## LotArea.YearBuilt       1121.894
## LotArea.TotalBsmtSF     1468.517
## LotArea.X1stFlrSF     -22229.427
## LotArea.MoSold         13229.194
## LotArea.GrLivArea     -26899.756
## LotArea.GarageArea     13619.219
## LotArea.GarageYrBlt        .    
## LotArea.YearRemodAdd  -17600.400
## LotArea.OverallQual  -603142.430
## LotArea.OverallCond    27681.688
## LotArea.TotRmsAbvGrd     191.230
## YearBuilt             -16606.910
## TotalBsmtSF            11849.965
## X1stFlrSF              27358.116
## MoSold                -13363.706
## GrLivArea              48586.863
## GarageArea             -5424.190
## GarageYrBlt             3161.592
## YearRemodAdd           13298.079
## OverallQual           627601.425
## OverallCond           -18734.345
## TotRmsAbvGrd             367.156
#Plot log(lambda) & RMSE
plot(log(Model1$results$lambda),Model1$results$RMSE,
     xlab="Log(lambda)",
     ylab="RMSE",
     xlim=c(-20,20))

log(Model1$bestTune$lambda)
## [1] 1.776544
#variable importance
varImp(Model1)
## glmnet variable importance
## 
##   only 20 most important variables shown (out of 27)
## 
##                       Overall
## OverallQual          100.0000
## LotArea.OverallQual   96.1028
## GrLivArea              7.7417
## LotArea.OverallCond    4.4107
## X1stFlrSF              4.3592
## LotArea.GrLivArea      4.2861
## LotArea.X1stFlrSF      3.5420
## OverallCond            2.9851
## LotArea.YearRemodAdd   2.8044
## YearBuilt              2.6461
## LotArea.GarageArea     2.1700
## MoSold                 2.1293
## YearRemodAdd           2.1189
## LotArea.MoSold         2.1079
## TotalBsmtSF            1.8881
## LotArea.LotArea        0.8909
## GarageArea             0.8643
## GarageYrBlt            0.5038
## LotFrontage            0.4757
## MSSubClass             0.2644
#most important features 
  #OverallQual          
  #LotArea.OverallQual
  #GrLivArea              
  #LotArea.OverallCond 
  #X1stFlrSF             
  #LotArea.GrLivArea      
  #LotArea.X1stFlrSF      
  #OverallCond            
  #LotArea.YearRemodAdd   
  #YearBuilt              
  #LotArea.GarageArea     
  #MoSold                 


#plot variable importance
ggplot(varImp(Model1))+
  labs(title = "Model  Variable importance Rank")

X_test=cbind(testing,final)
X_test<-X_test[,-28] #removing id

p=predict(Model1,X_test)
a=c(1461:2919)
pred=data.frame(Id=a,SalePrice=p)

#model 1 sale price prediction plot
summary(pred$SalePrice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   19025  125267  161730  179402  216790  551548
#the average SalePrice from model 1 is $179 402,with the minimum being
#$19 025 and the maximum SalePrice of $551 548
ggplot(pred,aes(SalePrice))+geom_histogram(fill='green',alpha=1,bins=50)+
  labs(x="Sale Price",y="Frequency",
       title = "Model 1 Sale Price")

#MODEL 2
#2.a  with 2 repeats and 3 folds
#2.b with 2 repeats and 4 folds
#RANDOM FOREST MODELLING-RPART

#2.a
rf_ctr_specs1 <-trainControl(method = "repeatedcv",
                             repeats = 2,
                             number=3,
                             search = "random")
Model2.a <-train(SalePrice~.,data=Y_train,
                 method="rf",
                 trControl=rf_ctr_specs1)
plot(varImp(Model2.a,scale=F),main="Model 2.a RandomForest-3 FOLDS-2-REPEATED CV")

#the 12 features that drive Sale price of houses are as follows :
#OverallQual        
#LotArea.OverallQual   
#GrLivArea             
#LotArea.GrLivArea     
#GarageArea            
#YearBuilt             
#TotalBsmtSF           
#X1stFlrSF             
#LotArea.GarageArea    
#LotArea.YearBuilt     
#LotArea.TotalBsmtSF   
#LotArea.X1stFlrSF     

p=predict(Model2.a,X_test)
a=c(1461:2919)
pred=data.frame(Id=a,SalePrice=p)

#model2.a Sale price prediction plot
summary(pred$SalePrice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   57097  131211  158774  179207  209921  540113
#the average SalePrice of Model 2.a is $179 207 with a minimum of $57 097 and a 
#maximum of $540 113
ggplot(pred,aes(SalePrice))+geom_histogram(fill='red',alpha=1,bins=50)+
  labs(x="Sale Price",
       y="Frequency",
       title = "Model2.a Sale Price")

varImp(Model2.a)
## rf variable importance
## 
##   only 20 most important variables shown (out of 27)
## 
##                       Overall
## LotArea.OverallQual  100.0000
## OverallQual           94.4319
## GrLivArea             27.9248
## LotArea.GrLivArea     23.8680
## LotArea.X1stFlrSF     11.0893
## TotalBsmtSF           11.0730
## X1stFlrSF              9.6366
## LotArea.TotalBsmtSF    9.5538
## GarageArea             7.9547
## YearBuilt              6.1327
## LotArea.GarageArea     6.0279
## LotArea.YearBuilt      5.7409
## LotArea.LotArea        3.2176
## TotRmsAbvGrd           2.6084
## LotArea.TotRmsAbvGrd   2.1630
## LotArea.LotFrontage    1.8774
## LotArea.YearRemodAdd   1.6506
## LotFrontage            1.5874
## YearRemodAdd           1.5059
## LotArea.GarageYrBlt    0.6117
#2.b
rf_ctr_specs <-trainControl(method = "repeatedcv",
                            repeats = 2,
                            number=4,
                            search = "random")
Model2.b <-train(SalePrice~.,data=Y_train,
                 method="rf",
                 trControl=rf_ctr_specs)
plot(varImp(Model2.b,scale=F),main="Model 2b RandomForest-4 FOLDS-2-REAPEATED CV")

p=predict(Model2.b,X_test)
a=c(1461:2919)
pred=data.frame(Id=a,SalePrice=p)

#Saleprice model 2.b
summary(pred$SalePrice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   58713  130805  158915  179138  209678  542263
#the average SalePrice of Model 2.b is $179 138 with the minimum of $58 713
#and a maximum SalePrice of $542 263
ggplot(pred,aes(SalePrice))+geom_histogram(fill='blue',alpha=1,bins=50)+
  labs(x="Sales Price",
       y="Frequency",
       title = "Model2.b Sale price")

varImp(Model2.b)
## rf variable importance
## 
##   only 20 most important variables shown (out of 27)
## 
##                       Overall
## LotArea.OverallQual  100.0000
## OverallQual           96.7313
## GrLivArea             29.2644
## LotArea.GrLivArea     26.6588
## TotalBsmtSF           11.9623
## LotArea.TotalBsmtSF   11.3418
## X1stFlrSF             11.1933
## GarageArea            10.8132
## LotArea.X1stFlrSF     10.7874
## LotArea.YearBuilt      8.2482
## LotArea.GarageArea     8.1897
## YearBuilt              6.0405
## LotArea.LotArea        3.8454
## TotRmsAbvGrd           3.4297
## LotArea.TotRmsAbvGrd   2.8296
## LotArea.YearRemodAdd   2.0389
## LotArea.LotFrontage    1.9464
## LotFrontage            1.8324
## YearRemodAdd           1.7660
## LotArea.GarageYrBlt    0.8081
Model2.b$results$Rsquared
## [1] 0.8612799 0.8615274 0.8576080
# 0.8606251 0.8541651 0.8496587
Model2.b$results$RsquaredSD
## [1] 0.01329931 0.01315689 0.01418043
#important features
  # OverallQual          
  # LotArea.OverallQual 
  # GrLivArea             
  # LotArea.GrLivArea     
  # GarageArea            
  # TotalBsmtSF           
  # LotArea.GarageArea    
  # YearBuilt             
  # LotArea.X1stFlrSF     
  # LotArea.TotalBsmtSF   
  # X1stFlrSF             
  # LotArea.YearBuilt     


#compare the 12 most important features according to diff models
#select the frequent features as the main drivers of house Prices
# the once marked with 3 asterisks appear to be important in all 3 models,
#this suggest that they might be the core influences of House Prices
 
  # OverallQual         ***
  # LotArea.OverallQual ***
  # GrLivArea           ***
  # LotArea.OverallCond ***
  # X1stFlrSF           ***
  # LotArea.GrLivArea   ***
  # LotArea.X1stFlrSF   ***
  # YearBuilt           ***
  # LotArea.GarageArea  ***
                  
#full data clean
Fdata_clean<-rbind(Y_train,X_test)
summary(Fdata_clean$SalePrice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   34900  154795  176735  180053  191896  755000
#the data's average SalePrice is $180 053 with a minimum of $34 900
#and a maximum SalePrice of $755 000

#comparison SalePrice
  #the average SalePrice from model 1 is $179 402,with the minimum being
  #$19 025 and the maximum SalePrice of $551 548

  #the average SalePrice of Model 2.a is $179 207 with a minimum of $57 097 and a 
  #maximum of $540 113

  #the average SalePrice of Model 2.b is $179 138 with the minimum of $58 713
  #and a maximum SalePrice of $542 263

#Model 1 estimates are the closest.

#Constructing a more accurate forecast from the 3 model results

  #average
  c=(179402+179207+179138)/3
  c
## [1] 179249
  # $179 249 actual=$180 053
  #minimum
  d=(19025+57097+58713)/3
  d
## [1] 44945
  # $44 945 actual= $34 900
  #Maximum
  e=(551548+542263+540113)/3
  e
## [1] 544641.3
  # $544 641.3 actual=$755 000