In this project, we are presented with a train and test set of the Ames Iowa house data from Kaggle. The goal is to create a model that is capable of predicting house prices using the available variables in the dataset. The Mulitple Linear Regression model will be explored and utilized in this analysis.

Data Exploration & Cleaning

# Importing the necessary libraries and packages
library(ggplot2)
library(data.table)
library(mice)
library(caTools)
library(dplyr)
library(MASS)
library(glmnet)
library(factoextra)
library(randomForest)
# Importing the data
data <- read.csv("training.csv")
# Exploring basic information about the data
colnames(data)
 [1] "Id"            "MSSubClass"    "MSZoning"      "LotFrontage"   "LotArea"      
 [6] "Street"        "Alley"         "LotShape"      "LandContour"   "Utilities"    
[11] "LotConfig"     "LandSlope"     "Neighborhood"  "Condition1"    "Condition2"   
[16] "BldgType"      "HouseStyle"    "OverallQual"   "OverallCond"   "YearBuilt"    
[21] "YearRemodAdd"  "RoofStyle"     "RoofMatl"      "Exterior1st"   "Exterior2nd"  
[26] "MasVnrType"    "MasVnrArea"    "ExterQual"     "ExterCond"     "Foundation"   
[31] "BsmtQual"      "BsmtCond"      "BsmtExposure"  "BsmtFinType1"  "BsmtFinSF1"   
[36] "BsmtFinType2"  "BsmtFinSF2"    "BsmtUnfSF"     "TotalBsmtSF"   "Heating"      
[41] "HeatingQC"     "CentralAir"    "Electrical"    "X1stFlrSF"     "X2ndFlrSF"    
[46] "LowQualFinSF"  "GrLivArea"     "BsmtFullBath"  "BsmtHalfBath"  "FullBath"     
[51] "HalfBath"      "BedroomAbvGr"  "KitchenAbvGr"  "KitchenQual"   "TotRmsAbvGrd" 
[56] "Functional"    "Fireplaces"    "FireplaceQu"   "GarageType"    "GarageYrBlt"  
[61] "GarageFinish"  "GarageCars"    "GarageArea"    "GarageQual"    "GarageCond"   
[66] "PavedDrive"    "WoodDeckSF"    "OpenPorchSF"   "EnclosedPorch" "X3SsnPorch"   
[71] "ScreenPorch"   "PoolArea"      "PoolQC"        "Fence"         "MiscFeature"  
[76] "MiscVal"       "MoSold"        "YrSold"        "SaleType"      "SaleCondition"
[81] "SalePrice"    
str(data)
'data.frame':   1060 obs. of  81 variables:
 $ Id           : int  1 3 4 5 8 10 11 12 14 15 ...
 $ MSSubClass   : int  60 60 70 60 60 190 20 60 20 20 ...
 $ MSZoning     : Factor w/ 5 levels "C (all)","FV",..: 4 4 4 4 4 4 4 4 4 4 ...
 $ LotFrontage  : int  65 68 60 84 NA 50 70 85 91 NA ...
 $ LotArea      : int  8450 11250 9550 14260 10382 7420 11200 11924 10652 10920 ...
 $ Street       : Factor w/ 2 levels "Grvl","Pave": 2 2 2 2 2 2 2 2 2 2 ...
 $ Alley        : Factor w/ 2 levels "Grvl","Pave": NA NA NA NA NA NA NA NA NA NA ...
 $ LotShape     : Factor w/ 4 levels "IR1","IR2","IR3",..: 4 1 1 1 1 4 4 1 1 1 ...
 $ LandContour  : Factor w/ 4 levels "Bnk","HLS","Low",..: 4 4 4 4 4 4 4 4 4 4 ...
 $ Utilities    : Factor w/ 1 level "AllPub": 1 1 1 1 1 1 1 1 1 1 ...
 $ LotConfig    : Factor w/ 5 levels "Corner","CulDSac",..: 5 5 1 3 1 1 5 5 5 1 ...
 $ LandSlope    : Factor w/ 3 levels "Gtl","Mod","Sev": 1 1 1 1 1 1 1 1 1 1 ...
 $ Neighborhood : Factor w/ 25 levels "Blmngtn","Blueste",..: 6 6 7 14 17 4 19 16 6 13 ...
 $ Condition1   : Factor w/ 9 levels "Artery","Feedr",..: 3 3 3 3 5 1 3 3 3 3 ...
 $ Condition2   : Factor w/ 6 levels "Artery","Feedr",..: 3 3 3 3 3 1 3 3 3 3 ...
 $ BldgType     : Factor w/ 5 levels "1Fam","2fmCon",..: 1 1 1 1 1 2 1 1 1 1 ...
 $ HouseStyle   : Factor w/ 8 levels "1.5Fin","1.5Unf",..: 6 6 6 6 6 2 3 6 3 3 ...
 $ OverallQual  : int  7 7 7 8 7 5 5 9 7 6 ...
 $ OverallCond  : int  5 5 5 5 6 6 5 5 5 5 ...
 $ YearBuilt    : int  2003 2001 1915 2000 1973 1939 1965 2005 2006 1960 ...
 $ YearRemodAdd : int  2003 2002 1970 2000 1973 1950 1965 2006 2007 1960 ...
 $ RoofStyle    : Factor w/ 6 levels "Flat","Gable",..: 2 2 2 2 2 2 4 4 2 4 ...
 $ RoofMatl     : Factor w/ 7 levels "ClyTile","CompShg",..: 2 2 2 2 2 2 2 2 2 2 ...
 $ Exterior1st  : Factor w/ 14 levels "AsbShng","AsphShn",..: 12 12 13 12 7 8 7 14 12 8 ...
 $ Exterior2nd  : Factor w/ 15 levels "AsbShng","AsphShn",..: 13 13 15 13 7 9 7 15 13 9 ...
 $ MasVnrType   : Factor w/ 4 levels "BrkCmn","BrkFace",..: 2 2 3 2 4 3 3 4 4 2 ...
 $ MasVnrArea   : int  196 162 0 350 240 0 0 286 306 212 ...
 $ ExterQual    : Factor w/ 4 levels "Ex","Fa","Gd",..: 3 3 4 3 4 4 4 1 3 4 ...
 $ ExterCond    : Factor w/ 5 levels "Ex","Fa","Gd",..: 5 5 5 5 5 5 5 5 5 5 ...
 $ Foundation   : Factor w/ 6 levels "BrkTil","CBlock",..: 3 3 1 3 2 1 2 3 3 2 ...
 $ BsmtQual     : Factor w/ 4 levels "Ex","Fa","Gd",..: 3 3 4 3 3 4 4 1 3 4 ...
 $ BsmtCond     : Factor w/ 4 levels "Fa","Gd","Po",..: 4 4 2 4 4 4 4 4 4 4 ...
 $ BsmtExposure : Factor w/ 4 levels "Av","Gd","Mn",..: 4 3 4 1 3 4 4 4 1 4 ...
 $ BsmtFinType1 : Factor w/ 6 levels "ALQ","BLQ","GLQ",..: 3 3 1 3 1 3 5 3 6 2 ...
 $ BsmtFinSF1   : int  706 486 216 655 859 851 906 998 0 733 ...
 $ BsmtFinType2 : Factor w/ 6 levels "ALQ","BLQ","GLQ",..: 6 6 6 6 2 6 6 6 6 6 ...
 $ BsmtFinSF2   : int  0 0 0 0 32 0 0 0 0 0 ...
 $ BsmtUnfSF    : int  150 434 540 490 216 140 134 177 1494 520 ...
 $ TotalBsmtSF  : int  856 920 756 1145 1107 991 1040 1175 1494 1253 ...
 $ Heating      : Factor w/ 6 levels "Floor","GasA",..: 2 2 2 2 2 2 2 2 2 2 ...
 $ HeatingQC    : Factor w/ 5 levels "Ex","Fa","Gd",..: 1 1 3 1 1 1 1 1 1 5 ...
 $ CentralAir   : Factor w/ 2 levels "N","Y": 2 2 2 2 2 2 2 2 2 2 ...
 $ Electrical   : Factor w/ 5 levels "FuseA","FuseF",..: 5 5 5 5 5 5 5 5 5 5 ...
 $ X1stFlrSF    : int  856 920 961 1145 1107 1077 1040 1182 1494 1253 ...
 $ X2ndFlrSF    : int  854 866 756 1053 983 0 0 1142 0 0 ...
 $ LowQualFinSF : int  0 0 0 0 0 0 0 0 0 0 ...
 $ GrLivArea    : int  1710 1786 1717 2198 2090 1077 1040 2324 1494 1253 ...
 $ BsmtFullBath : int  1 1 1 1 1 1 1 1 0 1 ...
 $ BsmtHalfBath : int  0 0 0 0 0 0 0 0 0 0 ...
 $ FullBath     : int  2 2 1 2 2 1 1 3 2 1 ...
 $ HalfBath     : int  1 1 0 1 1 0 0 0 0 1 ...
 $ BedroomAbvGr : int  3 3 3 4 3 2 3 4 3 2 ...
 $ KitchenAbvGr : int  1 1 1 1 1 2 1 1 1 1 ...
 $ KitchenQual  : Factor w/ 4 levels "Ex","Fa","Gd",..: 3 3 3 3 4 4 4 1 3 4 ...
 $ TotRmsAbvGrd : int  8 6 7 9 7 5 5 11 7 5 ...
 $ Functional   : Factor w/ 7 levels "Maj1","Maj2",..: 7 7 7 7 7 7 7 7 7 7 ...
 $ Fireplaces   : int  0 1 1 1 2 2 0 2 1 1 ...
 $ FireplaceQu  : Factor w/ 5 levels "Ex","Fa","Gd",..: NA 5 3 5 5 5 NA 3 3 2 ...
 $ GarageType   : Factor w/ 6 levels "2Types","Attchd",..: 2 2 6 2 2 2 6 4 2 2 ...
 $ GarageYrBlt  : int  2003 2001 1998 2000 1973 1939 1965 2005 2006 1960 ...
 $ GarageFinish : Factor w/ 3 levels "Fin","RFn","Unf": 2 2 3 2 2 2 3 1 2 2 ...
 $ GarageCars   : int  2 2 3 3 2 1 1 3 3 1 ...
 $ GarageArea   : int  548 608 642 836 484 205 384 736 840 352 ...
 $ GarageQual   : Factor w/ 5 levels "Ex","Fa","Gd",..: 5 5 5 5 5 3 5 5 5 5 ...
 $ GarageCond   : Factor w/ 5 levels "Ex","Fa","Gd",..: 5 5 5 5 5 5 5 5 5 5 ...
 $ PavedDrive   : Factor w/ 3 levels "N","P","Y": 3 3 3 3 3 3 3 3 3 3 ...
 $ WoodDeckSF   : int  0 0 0 192 235 0 0 147 160 0 ...
 $ OpenPorchSF  : int  61 42 35 84 204 4 0 21 33 213 ...
 $ EnclosedPorch: int  0 0 272 0 228 0 0 0 0 176 ...
 $ X3SsnPorch   : int  0 0 0 0 0 0 0 0 0 0 ...
 $ ScreenPorch  : int  0 0 0 0 0 0 0 0 0 0 ...
 $ PoolArea     : int  0 0 0 0 0 0 0 0 0 0 ...
 $ PoolQC       : Factor w/ 3 levels "Ex","Fa","Gd": NA NA NA NA NA NA NA NA NA NA ...
 $ Fence        : Factor w/ 4 levels "GdPrv","GdWo",..: NA NA NA NA NA NA NA NA NA 2 ...
 $ MiscFeature  : Factor w/ 4 levels "Gar2","Othr",..: NA NA NA NA 3 NA NA NA NA NA ...
 $ MiscVal      : int  0 0 0 0 350 0 0 0 0 0 ...
 $ MoSold       : int  2 9 2 12 11 1 2 7 8 5 ...
 $ YrSold       : int  2008 2008 2006 2008 2009 2008 2008 2006 2007 2008 ...
 $ SaleType     : Factor w/ 9 levels "COD","Con","ConLD",..: 9 9 9 9 9 9 9 7 7 9 ...
 $ SaleCondition: Factor w/ 6 levels "Abnorml","AdjLand",..: 5 5 1 5 5 5 5 6 6 5 ...
 $ SalePrice    : int  208500 223500 140000 250000 200000 118000 129500 345000 279500 157000 ...

It looks like we have 1060 observations and 80 variables, excluding target variables ‘SalePrice’; 23 nominal, 23 ordinal, 14 discrete, and 20 continuous. Also, it is clear that we are missing values in our dataset. Let’s determine the distribution of the mising data.

# Checking what prcent of all the data is composed of missing values 
(sum(is.na(data))/(nrow(data)*ncol(data)))*100
[1] 5.885162
# Let's get a break down of the number of missing values in each column
missing.col <- (colSums(is.na(data))[colSums(is.na(data)) > 0]/nrow(data))*100
missing.col.names <- row.names(data.frame(missing.col))
missing.col.percent <- data.frame(missing.col)
missing.col.percent <- setorder(data.frame(missing.col), missing.col)
colnames(missing.col.percent) <- 'percent.missing'
missing.col.percent

It seems that ‘LotFrontage’, ‘FireplaceQu’, ‘Fence’, ‘Alley’, ‘MiscFeature’, and ‘PoolQC’ possess significant proportions of missing data, over 5%. More pressing is the fact that some of the variables, such as ‘PoolQC’, contain no data at all and even ‘LotFrontage’, which has the least missing data of the variables previously mentioned, is missing almost 20% of its data.

After consulting the data documentation, it seems that many of the variables use ‘NA’ as respresenting the absence of a future. For example, ‘FireplaceQu’ represents fireplace quality of a home. Now, although this variable is missing almost half of it’s data, that is due to the fact that a missing value denotes the absence of a fireplace in the home and not a missing piece of infromation of data. So, before continuing, let’s add ‘NA’ as a feature level where it is appropriate based on the data documentation.

# Adding 'NA' as a factor level
col_list <- c('Alley', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'FireplaceQu', 'GarageType', 'GarageFinish', 'GarageQual', 'GarageCond', 'PoolQC', 'Fence', 'MiscFeature')
data[col_list] <- lapply(data[col_list], addNA)

Now, let’s see how much missing data remains in the dataset.

(sum(is.na(data))/(nrow(data)*ncol(data)))*100
[1] 0.3004892
missing.col <- (colSums(is.na(data))[colSums(is.na(data)) > 0]/nrow(data))*100
missing.col.names <- row.names(data.frame(missing.col))
missing.col.percent <- data.frame(missing.col)
missing.col.percent <- setorder(data.frame(missing.col), missing.col)
colnames(missing.col.percent) <- 'percent.missing'
missing.col.percent
str(data[,c('MasVnrType','MasVnrArea','GarageYrBlt','LotFrontage')])
'data.frame':   1060 obs. of  4 variables:
 $ MasVnrType : Factor w/ 4 levels "BrkCmn","BrkFace",..: 2 2 3 2 4 3 3 4 4 2 ...
 $ MasVnrArea : int  196 162 0 350 240 0 0 286 306 212 ...
 $ GarageYrBlt: int  2003 2001 1998 2000 1973 1939 1965 2005 2006 1960 ...
 $ LotFrontage: int  65 68 60 84 NA 50 70 85 91 NA ...

Wow, what a difference. We have reduced the total proportion of missing data by almost half, ~6% versus ~3%, and where we had 18 variables with missing values, we now only have 4.

Now, we will use the ‘mice’ package to impute any missing values that remain. In order to prevent the injection of any personal bias, we will preemptively split the data into a train and test set first and impute the two data sets independently. Before that, let’s see if there are any columns we can, and should, remove all together because of a lack of additional predictability or information.

# Checking which variables have only a single value for all observations
for(x in colnames(data)){
  if(length(unique(data[,x])) == 1){
    print(x)
  }
}
[1] "Utilities"

We see that ‘Utilities’ gives us no additional predicitve power as the only value present for all observations is ‘AllPub’. Further more, the ‘Id’ variable serves no purpose other than identification and should o be included in the model. For these reasons, both of these columns will be removed from the dataset.

# Dropping the 'Id' and 'Utilities' columns
data <- data[, !(names(data) %in% c('Id','Utilities'))]

Let’s continue with the imputation of the reamining missing values.

# Creating a train and test split
set.seed(2019)
split <- sample.split(data$SalePrice, SplitRatio = 0.8)
train <- subset(data, split == TRUE)
test <- subset(data, split == FALSE)
test.x <- test[ ,!(names(test) %in% c("SalePrice"))]
test.y <- test$SalePrice
# Imputing missing values
train.mids <- mice(train, m=1, method='cart', seed=2019)
test.mids <- mice(test.x, m=1, method='cart', seed=2019)
train.imputed <- complete(train.mids,1)
test.imputed <- complete(test.mids,1)
# Confirming that there are no more missing values in the dataset
train.imp.miss <- (sum(is.na(train.imputed))/(nrow(train.imputed)*ncol(train.imputed)))*100
test.imp.miss <- (sum(is.na(test.imputed))/(nrow(test.imputed)*ncol(test.imputed)))*100
print("Percent missing data in imputed train set: ")
[1] "Percent missing data in imputed train set: "
print(train.imp.miss)
[1] 0
print("Percent missing data in imputed test set: ")
[1] "Percent missing data in imputed test set: "
print(test.imp.miss)
[1] 0

Now, let’s continue by exploring the correlations between the variables in the dataset

# Creating a dataframe of correlations between SalePrice and numeric vars.
train.numeric <- dplyr::select_if(data, is.numeric)
corr.matrix <- cor(train.numeric, use = 'pairwise.complete.obs')
corr.df <- data.frame(row=rownames(corr.matrix)[row(corr.matrix)[upper.tri(corr.matrix)]], col=colnames(corr.matrix)[col(corr.matrix)[upper.tri(corr.matrix)]],corr=corr.matrix[upper.tri(corr.matrix)])
corr.df <- corr.df[order(corr.df$'corr'),]
sale.price.corr <- corr.df[corr.df$col == 'SalePrice',]
rbind(head(sale.price.corr,5), tail(sale.price.corr,5))

It looks like ‘OverallQual’, ‘GrLivArea’, ‘GarageCars’, and ‘GarageArea’ all have “significant” correlations with the response varaible, SalePrice. By “significant”, we mean a correlation above 0.6. These may be important predictors of SalePrice. Let us also visualize the distribution of SalePrice.

# Plot of SalePrice distribution
sp.plot <- ggplot(data = train.imputed, aes(x=train.imputed$SalePrice)) + geom_histogram(bins = 50, color = 'white', fill = 'dodgerblue') + 
  labs(title = "Histogram of 'SalePrice'") +
  xlab("SalePrice")
log.sp.plot <- ggplot(data = train.imputed, aes(x=log(train.imputed$SalePrice))) + geom_histogram(bins = 50, color = 'white', fill = 'dodgerblue') +
  labs(title = "Histogram of log('SalePrice')") +
  xlab("log('SalePrice')")
sp.plot

log.sp.plot

The distribution does seem to become more normal when a long transform is applied. This may improve the performance of a linear model. Will need to come back later and determine if the log trandform really has a beneficial effect.

Let’s now utilize methodical approaches to select the features that will provide us with the most predictability in our model. We will explore forward, backward, and stepwise selection as well as Lasso Regression.

# Some levels present in test set that are not accounted for in train set
 train.imputed <- train.imputed[ ,!(names(train.imputed) %in% c("Exterior1st"))]
 test.imputed <- test.imputed[ ,!(names(test.imputed) %in% c("Exterior1st"))]
# Creating a dataframe with the log transform of SalePrice
train.imputed.log <- train.imputed
train.imputed.log$SalePrice <- log(train.imputed.log$SalePrice)
# Selection results (on log(SalePrice))
step.f.log$call # Forward results
lm(formula = SalePrice ~ OverallQual + Neighborhood + GrLivArea + 
    GarageCars + OverallCond + BsmtFullBath + RoofMatl + TotalBsmtSF + 
    YearBuilt + Condition2 + MSZoning + Fireplaces + BsmtFinSF1 + 
    LotArea + Functional + SaleType + SaleCondition + Heating + 
    BldgType + ScreenPorch + YearRemodAdd + CentralAir + WoodDeckSF + 
    EnclosedPorch + KitchenQual + GarageArea + PoolQC + HeatingQC + 
    TotRmsAbvGrd + BsmtExposure + LotConfig + LotFrontage + X3SsnPorch + 
    Condition1 + GarageCond + GarageYrBlt + HouseStyle + X1stFlrSF + 
    Fence + LandSlope + Foundation, data = train.imputed.log)
print("######################")
[1] "######################"
step.b.log$call # Backward results
lm(formula = SalePrice ~ MSSubClass + MSZoning + LotFrontage + 
    LotArea + LotConfig + LandSlope + Neighborhood + Condition1 + 
    Condition2 + HouseStyle + OverallQual + OverallCond + YearBuilt + 
    YearRemodAdd + RoofMatl + MasVnrArea + Foundation + BsmtFinSF1 + 
    Heating + HeatingQC + CentralAir + X1stFlrSF + X2ndFlrSF + 
    BsmtFullBath + KitchenQual + TotRmsAbvGrd + Functional + 
    Fireplaces + GarageYrBlt + GarageCars + GarageArea + GarageCond + 
    WoodDeckSF + EnclosedPorch + X3SsnPorch + ScreenPorch + PoolQC + 
    Fence + SaleType + SaleCondition, data = train.imputed.log)
print("######################")
[1] "######################"
step.s.log$call # Stepwise results
lm(formula = SalePrice ~ MSSubClass + MSZoning + LotFrontage + 
    LotArea + LotConfig + LandSlope + Neighborhood + Condition1 + 
    Condition2 + HouseStyle + OverallQual + OverallCond + YearBuilt + 
    YearRemodAdd + RoofMatl + Foundation + BsmtFinSF1 + Heating + 
    HeatingQC + CentralAir + X1stFlrSF + X2ndFlrSF + BsmtFullBath + 
    KitchenQual + TotRmsAbvGrd + Functional + Fireplaces + GarageYrBlt + 
    GarageCars + GarageArea + GarageCond + WoodDeckSF + EnclosedPorch + 
    X3SsnPorch + ScreenPorch + PoolQC + Fence + SaleType + SaleCondition + 
    TotalBsmtSF + KitchenAbvGr, data = train.imputed.log)

Principal Component Analysis

# Conducting PCA on the stepwise selected feature sets using log SalePrice
# Scaling the subset of the predictors
train.imputed.log.pca <- train.imputed.log[,c('MSSubClass', 'MSZoning', 'LotFrontage', 
    'LotArea' , 'LotConfig' , 'LandSlope' , 'Neighborhood' , 'Condition1' , 
    'Condition2' , 'HouseStyle' , 'OverallQual' , 'OverallCond' , 'YearBuilt' , 
    'YearRemodAdd' , 'RoofMatl' , 'Foundation' , 'BsmtFinSF1' , 'Heating' , 
    'HeatingQC' , 'CentralAir' , 'X1stFlrSF' , 'X2ndFlrSF' , 'BsmtFullBath' , 
    'KitchenQual' , 'TotRmsAbvGrd' , 'Functional' , 'Fireplaces' , 'GarageYrBlt' , 
    'GarageCars' , 'GarageArea' , 'GarageCond' , 'WoodDeckSF' , 'EnclosedPorch' , 
    'X3SsnPorch' , 'ScreenPorch' , 'PoolQC' , 'Fence' , 'SaleType' , 'SaleCondition' , 
    'TotalBsmtSF' , 'KitchenAbvGr')]
test.imputed.log.pca <- test.x[,c('MSSubClass', 'MSZoning', 'LotFrontage', 
    'LotArea' , 'LotConfig' , 'LandSlope' , 'Neighborhood' , 'Condition1' , 
    'Condition2' , 'HouseStyle' , 'OverallQual' , 'OverallCond' , 'YearBuilt' , 
    'YearRemodAdd' , 'RoofMatl' , 'Foundation' , 'BsmtFinSF1' , 'Heating' , 
    'HeatingQC' , 'CentralAir' , 'X1stFlrSF' , 'X2ndFlrSF' , 'BsmtFullBath' , 
    'KitchenQual' , 'TotRmsAbvGrd' , 'Functional' , 'Fireplaces' , 'GarageYrBlt' , 
    'GarageCars' , 'GarageArea' , 'GarageCond' , 'WoodDeckSF' , 'EnclosedPorch' , 
    'X3SsnPorch' , 'ScreenPorch' , 'PoolQC' , 'Fence' , 'SaleType' , 'SaleCondition' , 
    'TotalBsmtSF' , 'KitchenAbvGr')]
unusable.cols <- c("Neighborhood", "Condition1", "HouseStyle", "RoofMatl", "Heating", "GarageCond", "SaleCondition", "LandSlope", "Condition2", "Foundation", "HeatingQC", "Functional", "PoolQC", "SaleType")
pca.matrix.train <- train.imputed.log.pca[,!(names(train.imputed.log.pca) %in% unusable.cols)]
pca.matrix.train <- model.matrix( ~.-1, data=pca.matrix.train )
pca.matrix.train <- scale(pca.matrix.train)
pca.matrix.test <- test.imputed.log.pca[,!(names(test.imputed.log.pca) %in% unusable.cols)]
pca.matrix.test <- model.matrix( ~.-1, data=pca.matrix.test )
pca.matrix.test <- scale(pca.matrix.test)
# PCA
all.pca <- prcomp(pca.matrix.train, scale = FALSE)
summary(all.pca)
Importance of components:
                          PC1     PC2     PC3     PC4     PC5     PC6     PC7     PC8     PC9
Standard deviation     2.6543 1.74991 1.49577 1.40279 1.29090 1.24162 1.20703 1.18736 1.14632
Proportion of Variance 0.1807 0.07852 0.05737 0.05046 0.04273 0.03953 0.03736 0.03615 0.03369
Cumulative Proportion  0.1807 0.25917 0.31654 0.36700 0.40973 0.44925 0.48661 0.52276 0.55645
                         PC10    PC11    PC12    PC13    PC14    PC15    PC16    PC17    PC18
Standard deviation     1.0672 1.05741 1.04090 1.02670 1.00043 0.99244 0.98070 0.96227 0.93821
Proportion of Variance 0.0292 0.02867 0.02778 0.02703 0.02566 0.02525 0.02466 0.02374 0.02257
Cumulative Proportion  0.5857 0.61433 0.64211 0.66914 0.69480 0.72005 0.74472 0.76846 0.79103
                          PC19    PC20    PC21    PC22   PC23   PC24    PC25   PC26    PC27
Standard deviation     0.91036 0.90483 0.88163 0.84133 0.8189 0.7876 0.75323 0.7441 0.69868
Proportion of Variance 0.02125 0.02099 0.01993 0.01815 0.0172 0.0159 0.01455 0.0142 0.01252
Cumulative Proportion  0.81228 0.83327 0.85320 0.87135 0.8885 0.9044 0.91900 0.9332 0.94571
                          PC28    PC29    PC30    PC31    PC32    PC33    PC34    PC35    PC36
Standard deviation     0.64126 0.57725 0.51007 0.50374 0.44622 0.39704 0.36157 0.33215 0.30779
Proportion of Variance 0.01054 0.00854 0.00667 0.00651 0.00511 0.00404 0.00335 0.00283 0.00243
Cumulative Proportion  0.95626 0.96480 0.97147 0.97798 0.98308 0.98713 0.99048 0.99331 0.99574
                          PC37   PC38      PC39
Standard deviation     0.30342 0.2724 9.809e-15
Proportion of Variance 0.00236 0.0019 0.000e+00
Cumulative Proportion  0.99810 1.0000 1.000e+00
# Visulize Scree Plot
fviz_eig(all.pca)

# Plot of individual groupings
#fviz_pca_ind(all.pca, col.ind = "cos2", gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"), repel = TRUE)
# Variable contribution to each PC
fviz_pca_var(all.pca, axes = c(1, 2), col.var = "contrib", gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"), repel = FALSE)

fviz_pca_var(all.pca, axes = c(2, 3), col.var = "contrib", gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"), repel = FALSE)

fviz_pca_var(all.pca, axes = c(3, 4), col.var = "contrib", gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"), repel = FALSE)

# Biplot
#fviz_pca_biplot(all.pca, repel = TRUE, col.var = "#2E9FDF", col.ind = "#696969")
# Predicting PCA values for each observation
pca.pred.train <- predict(all.pca, newdata = pca.matrix.train)
train.pca <- pca.pred.train
train.pca <- data.frame(train.pca)
train.pca$SalePrice <- train.imputed.log$SalePrice
pca.pred.test <- predict(all.pca, newdata = pca.matrix.test)
test.pca <- pca.pred.test
test.pca <- data.frame(test.pca)

Model Building

Let’s predict the ‘SalePrice’, of each test observation, to be the average of all the train ‘SalePrice’ values, and use the resulting metrics as a benchmark against which all following model performances will be compared.

# Let's predict the average for each test observation and use it as a benchmark
sale.price.mean <- mean(train.imputed$SalePrice)
avg.vector <- rep(c(sale.price.mean), times = 212)
cat("Bias: ", mean(avg.vector-test.y))
Bias:  7299.447
cat("\nMaximum Deviation: ", max(avg.vector-test.y))

Maximum Deviation:  146626.7
cat("\nMean Absolute Deviation: ", mean(abs(avg.vector-test.y)))

Mean Absolute Deviation:  53682.19
cat("\nMean Square Error: ", mean((avg.vector-test.y)**2))

Mean Square Error:  4959006479
cat("\nRoot Mean Square Error: ", sqrt(mean((avg.vector-test.y)**2)))

Root Mean Square Error:  70420.21

Moving on to actually building the models.

# Linear model with forward selected features (log SalePrice)
forward.lm.log <- lm(formula(step.f.log$call), data = train.imputed.log)
forward.predictions.log <- exp(predict(forward.lm.log, test.imputed))
summary(forward.lm.log)$adj.r.squared
[1] 0.9268791
cat("Bias: ", mean(forward.predictions.log-test.y))
Bias:  -522.4962
cat("\nMaximum Deviation: ", max(forward.predictions.log-test.y))

Maximum Deviation:  94900.78
cat("\nMean Absolute Deviation: ", mean(abs(forward.predictions.log-test.y)))

Mean Absolute Deviation:  15715.34
cat("\nMean Square Error: ", mean((forward.predictions.log-test.y)**2))

Mean Square Error:  516572433
cat("\nRoot Mean Square Error: ", sqrt(mean((forward.predictions.log-test.y)**2)))

Root Mean Square Error:  22728.23

Linear model with backward selected features

# Linear model with backward selected features (log SalePrice)
backward.lm.log <- lm(formula(step.b.log$call), data = train.imputed.log)
backward.predictions.log <- exp(predict(backward.lm.log, test.imputed))
summary(backward.lm.log)$adj.r.squared
[1] 0.9265143
cat("Bias: ", mean(backward.predictions.log-test.y))
Bias:  -448.4182
cat("\nMaximum Deviation: ", max(backward.predictions.log-test.y))

Maximum Deviation:  77974.29
cat("\nMean Absolute Deviation: ", mean(abs(backward.predictions.log-test.y)))

Mean Absolute Deviation:  15836.28
cat("\nMean Square Error: ", mean((backward.predictions.log-test.y)**2))

Mean Square Error:  527659567
cat("\nRoot Mean Square Error: ", sqrt(mean((backward.predictions.log-test.y)**2)))

Root Mean Square Error:  22970.84

Linear model with stepwise selected features

# Linear model with stepwise selected features (log SalePrice)
stepwise.lm.log <- lm(formula(step.s.log$call), data = train.imputed.log)
stepwise.predictions.log <- exp(predict(stepwise.lm.log, test.imputed))
summary(stepwise.lm.log)$adj.r.squared
[1] 0.9266278
cat("Bias: ", mean(stepwise.predictions.log-test.y))
Bias:  -475.9706
cat("\nMaximum Deviation: ", max(stepwise.predictions.log-test.y))

Maximum Deviation:  88851.67
cat("\nMean Absolute Deviation: ", mean(abs(stepwise.predictions.log-test.y)))

Mean Absolute Deviation:  15491.4
cat("\nMean Square Error: ", mean((stepwise.predictions.log-test.y)**2))

Mean Square Error:  507986943
cat("\nRoot Mean Square Error: ", sqrt(mean((stepwise.predictions.log-test.y)**2)))

Root Mean Square Error:  22538.57

PCA

# Linear model with PCA terms from log SalePrice and stepwise variables
pca.fit <- lm(SalePrice~., data = train.pca)
pca.predictions <- exp(predict(pca.fit, test.pca))
cat("Bias: ", mean(pca.predictions-test.y))
Bias:  2950.563
cat("\nMaximum Deviation: ", max(pca.predictions-test.y))

Maximum Deviation:  264325.1
cat("\nMean Absolute Deviation: ", mean(abs(pca.predictions-test.y)))

Mean Absolute Deviation:  71223.44
cat("\nMean Square Error: ", mean((pca.predictions-test.y)**2))

Mean Square Error:  9501566441
cat("\nRoot Mean Square Error: ", sqrt(mean((pca.predictions-test.y)**2)))

Root Mean Square Error:  97475.98

LASSO Regression

# LASSO Regression (log SalePrice)
lr.train.log <- model.matrix(SalePrice~., data = train.imputed.log)
test.imputed$SalePrice <- test.y
lr.test <- model.matrix(SalePrice~., data = test.imputed)
grid <- 10^seq(4, -2, length = 100)
set.seed (2019)
cv.lasso.log <- cv.glmnet(lr.train.log, log(train$SalePrice), alpha = 1, lambda = grid)
bestlam.log <- cv.lasso.log$lambda.min
bestlam.log # Best lambda = 0.03511192
[1] 0.03511192
set.seed(2019)
# Fitting the LASSO (log SalePrice)
fit.lasso.log <- glmnet(lr.train.log, train.imputed.log$SalePrice, alpha = 1, lambda = 0.03511192)
lasso.predict.log <- exp(predict(fit.lasso.log, lr.test))
cat("Bias: ", mean(lasso.predict.log-test.y))
Bias:  -7266.865
cat("\nMaximum Deviation: ", max(lasso.predict.log-test.y))

Maximum Deviation:  62073.74
cat("\nMean Absolute Deviation: ", mean(abs(lasso.predict.log-test.y)))

Mean Absolute Deviation:  19777
cat("\nMean Square Error: ", mean((lasso.predict.log-test.y)**2))

Mean Square Error:  868229681
cat("\nRoot Mean Square Error: ", sqrt(mean((lasso.predict.log-test.y)**2)))

Root Mean Square Error:  29465.74

A random Forest was used to determine the most significant interaction terms of the stepwise feature set.

# Linear model with stepwise selected features (log SalePrice) (with intereactions)
test.imputed$SalePrice <- NULL
stepwise.int.lm <- lm(SalePrice ~  MSZoning + LotFrontage + LotConfig +
    HouseStyle + OverallCond + YearBuilt + + LandSlope + Condition1 + Condition2 +
    YearRemodAdd + Foundation + BsmtFinSF1 + Heating + RoofMatl +
    HeatingQC + CentralAir  + X2ndFlrSF + BsmtFullBath + 
    KitchenQual + TotRmsAbvGrd + Functional + Fireplaces + GarageYrBlt + 
    GarageCars + GarageArea + GarageCond + WoodDeckSF + EnclosedPorch + 
    X3SsnPorch + ScreenPorch + PoolQC + Fence + SaleType + SaleCondition + 
    TotalBsmtSF + KitchenAbvGr + OverallQual:X1stFlrSF + OverallQual:Neighborhood +    OverallQual:LotArea  + OverallQual:BsmtUnfSF + OverallQual:BsmtFinSF1 + OverallQual:YearBuilt + OverallQual:MoSold + OverallQual:YearRemodAdd + OverallQual:BsmtExposure + OverallQual:MSSubClass, data = train.imputed.log)
stepwise.int.lm.pred <- exp(predict(stepwise.int.lm, test.imputed))
cat("Bias: ", mean(stepwise.int.lm.pred-test.y))
Bias:  -1018.832
cat("\nMaximum Deviation: ", max(stepwise.int.lm.pred-test.y))

Maximum Deviation:  79370.68
cat("\nMean Absolute Deviation: ", mean(abs(stepwise.int.lm.pred-test.y)))

Mean Absolute Deviation:  15081.78
cat("\nMean Square Error: ", mean((stepwise.int.lm.pred-test.y)**2))

Mean Square Error:  456506704
cat("\nRoot Mean Square Error: ", sqrt(mean((stepwise.int.lm.pred-test.y)**2)))

Root Mean Square Error:  21366.02
summary(stepwise.int.lm)

Call:
lm(formula = interaction.fit <- lm(data = train.imputed, SalePrice ~ 
    MSZoning + LotFrontage + LotConfig + LandSlope + Condition1 + 
        Condition2 + HouseStyle + OverallCond + YearBuilt + YearRemodAdd + 
        RoofMatl + Foundation + BsmtFinSF1 + Heating + HeatingQC + 
        CentralAir + X2ndFlrSF + BsmtFullBath + KitchenQual + 
        TotRmsAbvGrd + Functional + Fireplaces + GarageYrBlt + 
        GarageCars + GarageArea + GarageCond + WoodDeckSF + EnclosedPorch + 
        X3SsnPorch + ScreenPorch + PoolQC + Fence + SaleType + 
        SaleCondition + TotalBsmtSF + KitchenAbvGr + OverallQual:X1stFlrSF + 
        OverallQual:Neighborhood + OverallQual:LotArea + OverallQual:BsmtUnfSF + 
        OverallQual:BsmtFinSF1 + OverallQual:YearBuilt + OverallQual:MoSold + 
        OverallQual:YearRemodAdd + OverallQual:BsmtExposure + 
        OverallQual:MSSubClass), data = train.imputed.log)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.62615 -0.05029  0.00291  0.05886  0.53931 

Coefficients:
                                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)                      1.804e+00  1.122e+00   1.608 0.108238    
MSZoningFV                       5.828e-01  7.264e-02   8.023 4.28e-15 ***
MSZoningRH                       4.677e-01  6.827e-02   6.851 1.59e-11 ***
MSZoningRL                       4.861e-01  5.644e-02   8.613  < 2e-16 ***
MSZoningRM                       4.571e-01  5.489e-02   8.328 4.24e-16 ***
LotFrontage                      7.537e-04  2.656e-04   2.837 0.004680 ** 
LotConfigCulDSac                 6.280e-02  2.189e-02   2.868 0.004248 ** 
LotConfigFR2                    -3.125e-02  2.674e-02  -1.169 0.242913    
LotConfigFR3                    -3.626e-02  9.594e-02  -0.378 0.705616    
LotConfigInside                 -9.526e-03  1.176e-02  -0.810 0.418086    
LandSlopeMod                     4.208e-02  2.493e-02   1.688 0.091833 .  
LandSlopeSev                     7.570e-02  7.489e-02   1.011 0.312409    
Condition1Feedr                  4.735e-02  3.047e-02   1.554 0.120578    
Condition1Norm                   7.628e-02  2.363e-02   3.228 0.001302 ** 
Condition1PosA                   4.644e-02  6.314e-02   0.736 0.462256    
Condition1PosN                   9.158e-02  4.546e-02   2.015 0.044318 *  
Condition1RRAe                  -2.485e-02  5.215e-02  -0.476 0.633911    
Condition1RRAn                   4.030e-02  4.480e-02   0.900 0.368650    
Condition1RRNe                  -3.793e-02  8.888e-02  -0.427 0.669672    
Condition1RRNn                  -2.185e-02  8.157e-02  -0.268 0.788872    
Condition2Feedr                  1.631e-01  1.507e-01   1.083 0.279397    
Condition2Norm                   1.797e-02  1.303e-01   0.138 0.890400    
Condition2PosN                  -7.096e-01  1.678e-01  -4.228 2.66e-05 ***
Condition2RRAn                  -1.027e-01  1.753e-01  -0.586 0.558187    
Condition2RRNn                   3.957e-02  1.777e-01   0.223 0.823873    
HouseStyle1.5Unf                -6.340e-02  4.882e-02  -1.299 0.194510    
HouseStyle1Story                -9.156e-02  2.358e-02  -3.883 0.000113 ***
HouseStyle2.5Fin                 1.014e-01  7.576e-02   1.338 0.181206    
HouseStyle2.5Unf                 1.896e-02  5.703e-02   0.332 0.739673    
HouseStyle2Story                -5.203e-02  2.065e-02  -2.520 0.011964 *  
HouseStyleSFoyer                -9.202e-02  3.739e-02  -2.461 0.014078 *  
HouseStyleSLvl                  -6.694e-02  3.114e-02  -2.150 0.031894 *  
OverallCond                      3.352e-02  5.734e-03   5.846 7.68e-09 ***
YearBuilt                        4.374e-03  1.092e-03   4.006 6.83e-05 ***
YearRemodAdd                    -5.485e-04  1.033e-03  -0.531 0.595484    
RoofMatlCompShg                  2.731e+00  2.656e-01  10.279  < 2e-16 ***
RoofMatlMetal                    2.637e+00  3.105e-01   8.495  < 2e-16 ***
RoofMatlRoll                     2.783e+00  2.902e-01   9.590  < 2e-16 ***
RoofMatlTar&Grv                  2.797e+00  2.588e-01  10.807  < 2e-16 ***
RoofMatlWdShake                  2.733e+00  2.852e-01   9.583  < 2e-16 ***
RoofMatlWdShngl                  2.856e+00  2.650e-01  10.779  < 2e-16 ***
FoundationCBlock                 1.121e-02  1.996e-02   0.562 0.574601    
FoundationPConc                  4.915e-02  2.147e-02   2.289 0.022369 *  
FoundationSlab                  -5.521e-02  6.023e-02  -0.917 0.359700    
FoundationStone                  6.305e-02  6.269e-02   1.006 0.314913    
FoundationWood                   1.653e-02  1.236e-01   0.134 0.893606    
BsmtFinSF1                       1.910e-04  5.380e-05   3.550 0.000410 ***
HeatingGasA                      4.213e-02  1.283e-01   0.328 0.742658    
HeatingGasW                      1.314e-01  1.355e-01   0.970 0.332463    
HeatingGrav                     -2.163e-01  1.425e-01  -1.518 0.129398    
HeatingOthW                      2.348e-02  1.574e-01   0.149 0.881458    
HeatingWall                      1.238e-01  1.477e-01   0.838 0.402259    
HeatingQCFa                     -1.794e-02  3.231e-02  -0.555 0.578856    
HeatingQCGd                     -3.347e-02  1.361e-02  -2.459 0.014173 *  
HeatingQCPo                     -1.086e-01  1.368e-01  -0.794 0.427545    
HeatingQCTA                     -3.837e-02  1.346e-02  -2.851 0.004485 ** 
CentralAirY                      4.524e-02  2.247e-02   2.014 0.044413 *  
X2ndFlrSF                        1.420e-04  3.133e-05   4.531 6.89e-06 ***
BsmtFullBath                     2.236e-02  1.221e-02   1.831 0.067451 .  
KitchenQualFa                   -6.009e-02  4.521e-02  -1.329 0.184220    
KitchenQualGd                   -5.779e-02  2.135e-02  -2.707 0.006957 ** 
KitchenQualTA                   -4.373e-02  2.483e-02  -1.761 0.078618 .  
TotRmsAbvGrd                     2.300e-02  5.174e-03   4.446 1.02e-05 ***
FunctionalMaj2                  -2.407e-01  1.071e-01  -2.246 0.024981 *  
FunctionalMin1                  -4.132e-02  5.234e-02  -0.789 0.430170    
FunctionalMin2                   3.638e-03  5.152e-02   0.071 0.943723    
FunctionalMod                   -1.492e-01  6.144e-02  -2.429 0.015387 *  
FunctionalSev                   -3.773e-01  1.432e-01  -2.634 0.008613 ** 
FunctionalTyp                    1.786e-02  4.308e-02   0.415 0.678499    
Fireplaces                       3.624e-02  8.538e-03   4.245 2.48e-05 ***
GarageYrBlt                     -7.612e-04  3.821e-04  -1.992 0.046707 *  
GarageCars                       4.111e-02  1.436e-02   2.863 0.004319 ** 
GarageArea                       1.056e-04  5.136e-05   2.055 0.040245 *  
GarageCondFa                    -1.749e-01  1.278e-01  -1.368 0.171716    
GarageCondGd                    -9.761e-02  1.339e-01  -0.729 0.466126    
GarageCondPo                     3.666e-02  1.456e-01   0.252 0.801312    
GarageCondTA                    -7.495e-02  1.223e-01  -0.613 0.540099    
GarageCondNA                    -8.263e-02  1.259e-01  -0.656 0.511842    
WoodDeckSF                       1.112e-04  3.778e-05   2.944 0.003346 ** 
EnclosedPorch                    2.256e-04  8.202e-05   2.751 0.006095 ** 
X3SsnPorch                       2.743e-04  1.433e-04   1.915 0.055900 .  
ScreenPorch                      3.235e-04  7.871e-05   4.110 4.41e-05 ***
PoolQCFa                        -3.727e-01  1.813e-01  -2.056 0.040188 *  
PoolQCGd                         3.161e-01  2.030e-01   1.557 0.119977    
PoolQCNA                        -1.970e-01  1.327e-01  -1.484 0.138257    
FenceGdWo                       -5.835e-02  3.185e-02  -1.832 0.067358 .  
FenceMnPrv                      -1.243e-02  2.666e-02  -0.466 0.641242    
FenceMnWw                       -6.348e-02  5.800e-02  -1.095 0.274078    
FenceNA                          7.583e-04  2.432e-02   0.031 0.975135    
SaleTypeCon                      5.351e-02  1.198e-01   0.447 0.655369    
SaleTypeConLD                    1.975e-01  5.999e-02   3.293 0.001042 ** 
SaleTypeConLI                   -8.785e-02  9.067e-02  -0.969 0.332929    
SaleTypeConLw                    6.888e-02  7.949e-02   0.867 0.386508    
SaleTypeCWD                      1.408e-01  7.390e-02   1.906 0.057070 .  
SaleTypeNew                      6.309e-01  1.586e-01   3.978 7.66e-05 ***
SaleTypeOth                      5.875e-02  7.512e-02   0.782 0.434434    
SaleTypeWD                      -1.272e-02  2.870e-02  -0.443 0.657699    
SaleConditionAdjLand             8.640e-02  8.882e-02   0.973 0.331001    
SaleConditionAlloca             -1.780e-02  5.885e-02  -0.302 0.762403    
SaleConditionFamily              9.193e-02  5.226e-02   1.759 0.078993 .  
SaleConditionNormal              6.864e-02  1.885e-02   3.642 0.000290 ***
SaleConditionPartial            -5.287e-01  1.563e-01  -3.382 0.000759 ***
TotalBsmtSF                      1.588e-04  3.468e-05   4.577 5.57e-06 ***
KitchenAbvGr                    -2.530e-02  2.795e-02  -0.905 0.365684    
OverallQual:X1stFlrSF            2.739e-05  4.921e-06   5.567 3.69e-08 ***
OverallQual:NeighborhoodBlueste -6.528e-03  2.119e-02  -0.308 0.758133    
OverallQual:NeighborhoodBrDale  -1.859e-02  9.835e-03  -1.890 0.059163 .  
OverallQual:NeighborhoodBrkSide  8.487e-04  8.478e-03   0.100 0.920288    
OverallQual:NeighborhoodClearCr  4.721e-03  7.753e-03   0.609 0.542770    
OverallQual:NeighborhoodCollgCr -4.921e-04  5.552e-03  -0.089 0.929389    
OverallQual:NeighborhoodCrawfor  1.738e-02  7.112e-03   2.444 0.014777 *  
OverallQual:NeighborhoodEdwards -2.295e-02  6.507e-03  -3.527 0.000447 ***
OverallQual:NeighborhoodGilbert -1.160e-03  6.092e-03  -0.190 0.848984    
OverallQual:NeighborhoodIDOTRR  -9.217e-03  9.888e-03  -0.932 0.351555    
OverallQual:NeighborhoodMeadowV -4.061e-02  1.113e-02  -3.649 0.000283 ***
OverallQual:NeighborhoodMitchel -5.325e-03  6.960e-03  -0.765 0.444462    
OverallQual:NeighborhoodNAmes   -3.707e-03  6.309e-03  -0.588 0.556943    
OverallQual:NeighborhoodNoRidge  6.057e-03  6.319e-03   0.959 0.338115    
OverallQual:NeighborhoodNPkVill -2.408e-03  1.022e-02  -0.235 0.813900    
OverallQual:NeighborhoodNridgHt  1.338e-02  5.534e-03   2.418 0.015879 *  
OverallQual:NeighborhoodNWAmes  -1.625e-03  6.304e-03  -0.258 0.796600    
OverallQual:NeighborhoodOldTown -5.875e-03  8.291e-03  -0.709 0.478810    
OverallQual:NeighborhoodSawyer  -9.485e-03  6.922e-03  -1.370 0.171020    
OverallQual:NeighborhoodSawyerW -2.044e-03  6.092e-03  -0.336 0.737331    
OverallQual:NeighborhoodSomerst -2.759e-03  7.837e-03  -0.352 0.724872    
OverallQual:NeighborhoodStoneBr  2.228e-02  7.037e-03   3.166 0.001611 ** 
OverallQual:NeighborhoodSWISU    2.798e-04  8.639e-03   0.032 0.974171    
OverallQual:NeighborhoodTimber   4.355e-03  6.181e-03   0.705 0.481243    
OverallQual:NeighborhoodVeenker  5.475e-03  1.279e-02   0.428 0.668703    
OverallQual:LotArea              1.384e-07  9.581e-08   1.444 0.149109    
OverallQual:BsmtUnfSF           -1.495e-05  4.824e-06  -3.099 0.002019 ** 
BsmtFinSF1:OverallQual          -2.948e-05  8.880e-06  -3.320 0.000948 ***
YearBuilt:OverallQual           -3.135e-04  1.802e-04  -1.740 0.082318 .  
OverallQual:MoSold              -1.338e-04  2.374e-04  -0.563 0.573296    
YearRemodAdd:OverallQual         3.313e-04  1.783e-04   1.859 0.063480 .  
OverallQual:BsmtExposureGd       4.287e-03  2.830e-03   1.515 0.130277    
OverallQual:BsmtExposureMn      -1.859e-03  3.017e-03  -0.616 0.537908    
OverallQual:BsmtExposureNo      -1.124e-03  2.067e-03  -0.544 0.586821    
OverallQual:BsmtExposureNA       1.607e-02  1.072e-02   1.499 0.134331    
OverallQual:MSSubClass          -5.146e-05  2.815e-05  -1.828 0.067943 .  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.1126 on 708 degrees of freedom
Multiple R-squared:  0.9382,    Adjusted R-squared:  0.9261 
F-statistic: 77.34 on 139 and 708 DF,  p-value: < 2.2e-16

Based on our findings, we see a couple things. The cross validation selection of lambda degrades the performance of the LASSO Regression, LASSO performs worse than any of our other models. Training on the log of SalePrice improves model performances the most. there are only a few ordianl variables, so there is little to be gained from cahnging them from numeric to categorical variables. In the end, we went with the multiple linear regression model with the stepwise feature set including interaction terms as it exhibited the most favorable performance metrics on the validation set.

Results

Now let’s check the diagnostics for the forward selected linear model as it seems to be the ‘best’ model because it contains the variable with the strongest correlation with the response variable.

#Residual Plots
plot(stepwise.int.lm)
not plotting observations with leverage one:
  6, 76, 157, 194, 330, 351, 357, 397, 454, 592, 687, 689, 739, 755, 766, 807, 831

not plotting observations with leverage one:
  6, 76, 157, 194, 330, 351, 357, 397, 454, 592, 687, 689, 739, 755, 766, 807, 831

# Plot of predicted versus true reponse test values
plot(test.y,stepwise.int.lm.pred, xlab="Observed Values", ylab="Predicted Values") 
title("Observed vs. Predicted Values")
abline(0,1, col = 'red')

Conclusions

Final model performnace on test set

# Importing the data
test.data <- read.csv("test.csv")
#cols.keep <- c("MSZoning", "LotFrontage" , "LotConfig" , "LandSlope" , "Condition1" , 
#         "HouseStyle" , "OverallCond" , "YearBuilt" , "YearRemodAdd" , 
#         "Foundation" , "BsmtFinSF1",  "Heating" , "HeatingQC" , 
#        "CentralAir" , "X2ndFlrSF" , "BsmtFullBath" , "KitchenQual" , 
#        "TotRmsAbvGrd" , "Functional" , "Fireplaces" , "GarageYrBlt" ,
#        "GarageCars" , "GarageArea" , "GarageCond" , "WoodDeckSF" , "EnclosedPorch" , 
#        "X3SsnPorch" , "ScreenPorch" , "PoolQC" , "Fence" , "SaleType" , 
#        "SaleCondition" , "TotalBsmtSF" , "KitchenAbvGr", "OverallQual", "X1stFlrSF", #"Neighborhood",         "LotArea", "BsmtUnfSF", "BsmtFinSF1", "YearBuilt", "MoSold", "YearRemodAdd", #"BsmtExposure",         "MSSubClass")
# Leave these out: 
test.data.x <- test.data[,-81]
test.data.y <- test.data[,81]

Any missing values?

(sum(is.na(test.data.x))/(nrow(test.data.x)*ncol(test.data.x)))*100
[1] 5.975
# Adding 'NA' as a factor level
col_list <- c('Alley', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'FireplaceQu', 'GarageType', 'GarageFinish', 'GarageQual', 'GarageCond', 'PoolQC', 'Fence', 'MiscFeature')
test.data.x[col_list] <- lapply(test.data.x[col_list], addNA)
(sum(is.na(test.data.x))/(nrow(test.data.x)*ncol(test.data.x)))*100
[1] 0.309375
# Imputing missing values
final.train.mids <- mice(test.data.x, m=1, method='cart', seed=2019)

 iter imp variable
  1   1  LotFrontage  MasVnrType  MasVnrArea  Electrical  GarageYrBlt
  2   1  LotFrontage  MasVnrType  MasVnrArea  Electrical  GarageYrBlt
  3   1  LotFrontage  MasVnrType  MasVnrArea  Electrical  GarageYrBlt
  4   1  LotFrontage  MasVnrType  MasVnrArea  Electrical  GarageYrBlt
  5   1  LotFrontage  MasVnrType  MasVnrArea  Electrical  GarageYrBlt
Number of logged events: 26
test.data.x <- complete(final.train.mids,1)
(sum(is.na(test.data.x))/(nrow(test.data.x)*ncol(test.data.x)))*100
[1] 0
test.data <- cbind(test.data.x, test.data.y)
#test.data[test.data$Condition2=="PosA" | test.data$Condition2=="RRAe",]
#test.data[test.data$RoofMatl=="Membran",]
test.data <- test.data[-c(77,158,337),]
new.test.x <- test.data[,-81]
new.test.y <- test.data[,81]
final.pred <- exp(predict(stepwise.int.lm, new.test.x))
cat("Bias: ", mean(final.pred-new.test.y))
Bias:  1628.562
cat("\nMaximum Deviation: ", max(final.pred-new.test.y))

Maximum Deviation:  98258.15
cat("\nMean Absolute Deviation: ", mean(abs(final.pred-new.test.y)))

Mean Absolute Deviation:  14952.98
cat("\nMean Square Error: ", mean((final.pred-new.test.y)**2))

Mean Square Error:  476726595
cat("\nRoot Mean Square Error: ", sqrt(mean((final.pred-new.test.y)**2)))

Root Mean Square Error:  21834.07

3 observations removed from the final test set; new levels in “Condition2” and “RoofMatl” not accounted for by model.

LS0tCnRpdGxlOiAiR0xNIFByb2plY3QiCm91dHB1dDogaHRtbF9ub3RlYm9vawplZGl0b3Jfb3B0aW9uczogCiAgY2h1bmtfb3V0cHV0X3R5cGU6IGlubGluZQotLS0KCkluIHRoaXMgcHJvamVjdCwgd2UgYXJlIHByZXNlbnRlZCB3aXRoIGEgdHJhaW4gYW5kIHRlc3Qgc2V0IG9mIHRoZSBBbWVzIElvd2EgaG91c2UgZGF0YSBmcm9tIEthZ2dsZS4gVGhlIGdvYWwgaXMgdG8gY3JlYXRlIGEgbW9kZWwgdGhhdCBpcyBjYXBhYmxlIG9mIHByZWRpY3RpbmcgaG91c2UgcHJpY2VzIHVzaW5nIHRoZSBhdmFpbGFibGUgdmFyaWFibGVzIGluIHRoZSBkYXRhc2V0LiBUaGUgTXVsaXRwbGUgTGluZWFyIFJlZ3Jlc3Npb24gbW9kZWwgd2lsbCBiZSBleHBsb3JlZCBhbmQgdXRpbGl6ZWQgaW4gdGhpcyBhbmFseXNpcy4gCgoqIFtEYXRhIEV4cGxvcmF0aW9uICYgQ2xlYW5pbmddKCNkYXRhZXhwbG9yYXRpb25hbmRjbGVhbmluZykKKiBbUHJpbmNpcGFsIENvbXBvbmVudCBBbmFseXNpc10oI3ByaW5jaXBhbGNvbXBvbmVudGFuYWx5c2lzKQoqIFtNb2RlbCBCdWlsZGluZ10oI21vZGVsYnVpbGRpbmcpCiogW1Jlc3VsdHNdKCNyZXN1bHRzKQoqIFtDb25jbHVzaW9uc10oI2NvbmNsdXNpb25zKQoKIyMgRGF0YSBFeHBsb3JhdGlvbiAmIENsZWFuaW5nIHsjZGF0YWV4cGxvcmF0aW9uYW5kY2xlYW5pbmd9CgpgYGB7cixtZXNzYWdlPUZBTFNFfQojIEltcG9ydGluZyB0aGUgbmVjZXNzYXJ5IGxpYnJhcmllcyBhbmQgcGFja2FnZXMKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KGRhdGEudGFibGUpCmxpYnJhcnkobWljZSkKbGlicmFyeShjYVRvb2xzKQpsaWJyYXJ5KGRwbHlyKQpsaWJyYXJ5KE1BU1MpCmxpYnJhcnkoZ2xtbmV0KQpsaWJyYXJ5KGZhY3RvZXh0cmEpCmxpYnJhcnkocmFuZG9tRm9yZXN0KQpgYGAKCmBgYHtyfQojIEltcG9ydGluZyB0aGUgZGF0YQpkYXRhIDwtIHJlYWQuY3N2KCJ0cmFpbmluZy5jc3YiKQpgYGAKCmBgYHtyfQojIEV4cGxvcmluZyBiYXNpYyBpbmZvcm1hdGlvbiBhYm91dCB0aGUgZGF0YQpjb2xuYW1lcyhkYXRhKQpzdHIoZGF0YSkKYGBgCgpJdCBsb29rcyBsaWtlIHdlIGhhdmUgMTA2MCBvYnNlcnZhdGlvbnMgYW5kIDgwIHZhcmlhYmxlcywgZXhjbHVkaW5nIHRhcmdldCB2YXJpYWJsZXMgJ1NhbGVQcmljZSc7IDIzIG5vbWluYWwsIDIzIG9yZGluYWwsIDE0IGRpc2NyZXRlLCBhbmQgMjAgY29udGludW91cy4gQWxzbywgaXQgaXMgY2xlYXIgdGhhdCB3ZSBhcmUgbWlzc2luZyB2YWx1ZXMgaW4gb3VyIGRhdGFzZXQuIExldCdzIGRldGVybWluZSB0aGUgZGlzdHJpYnV0aW9uIG9mIHRoZSBtaXNpbmcgZGF0YS4KCmBgYHtyfQojIENoZWNraW5nIHdoYXQgcHJjZW50IG9mIGFsbCB0aGUgZGF0YSBpcyBjb21wb3NlZCBvZiBtaXNzaW5nIHZhbHVlcyAKKHN1bShpcy5uYShkYXRhKSkvKG5yb3coZGF0YSkqbmNvbChkYXRhKSkpKjEwMApgYGAKCmBgYHtyfQojIExldCdzIGdldCBhIGJyZWFrIGRvd24gb2YgdGhlIG51bWJlciBvZiBtaXNzaW5nIHZhbHVlcyBpbiBlYWNoIGNvbHVtbgoKbWlzc2luZy5jb2wgPC0gKGNvbFN1bXMoaXMubmEoZGF0YSkpW2NvbFN1bXMoaXMubmEoZGF0YSkpID4gMF0vbnJvdyhkYXRhKSkqMTAwCm1pc3NpbmcuY29sLm5hbWVzIDwtIHJvdy5uYW1lcyhkYXRhLmZyYW1lKG1pc3NpbmcuY29sKSkKbWlzc2luZy5jb2wucGVyY2VudCA8LSBkYXRhLmZyYW1lKG1pc3NpbmcuY29sKQptaXNzaW5nLmNvbC5wZXJjZW50IDwtIHNldG9yZGVyKGRhdGEuZnJhbWUobWlzc2luZy5jb2wpLCBtaXNzaW5nLmNvbCkKY29sbmFtZXMobWlzc2luZy5jb2wucGVyY2VudCkgPC0gJ3BlcmNlbnQubWlzc2luZycKbWlzc2luZy5jb2wucGVyY2VudApgYGAKCkl0IHNlZW1zIHRoYXQgJ0xvdEZyb250YWdlJywgJ0ZpcmVwbGFjZVF1JywgJ0ZlbmNlJywgJ0FsbGV5JywgJ01pc2NGZWF0dXJlJywgYW5kICdQb29sUUMnIHBvc3Nlc3Mgc2lnbmlmaWNhbnQgcHJvcG9ydGlvbnMgb2YgbWlzc2luZyBkYXRhLCBvdmVyIDUlLiBNb3JlIHByZXNzaW5nIGlzIHRoZSBmYWN0IHRoYXQgc29tZSBvZiB0aGUgdmFyaWFibGVzLCBzdWNoIGFzICdQb29sUUMnLCBjb250YWluIG5vIGRhdGEgYXQgYWxsIGFuZCBldmVuICdMb3RGcm9udGFnZScsIHdoaWNoIGhhcyB0aGUgbGVhc3QgbWlzc2luZyBkYXRhIG9mIHRoZSB2YXJpYWJsZXMgcHJldmlvdXNseSBtZW50aW9uZWQsIGlzIG1pc3NpbmcgYWxtb3N0IDIwJSBvZiBpdHMgZGF0YS4KCkFmdGVyIGNvbnN1bHRpbmcgdGhlIGRhdGEgZG9jdW1lbnRhdGlvbiwgaXQgc2VlbXMgdGhhdCBtYW55IG9mIHRoZSB2YXJpYWJsZXMgdXNlICdOQScgYXMgcmVzcHJlc2VudGluZyB0aGUgYWJzZW5jZSBvZiBhIGZ1dHVyZS4gRm9yIGV4YW1wbGUsICdGaXJlcGxhY2VRdScgcmVwcmVzZW50cyBmaXJlcGxhY2UgcXVhbGl0eSBvZiBhIGhvbWUuIE5vdywgYWx0aG91Z2ggdGhpcyB2YXJpYWJsZSBpcyBtaXNzaW5nIGFsbW9zdCBoYWxmIG9mIGl0J3MgZGF0YSwgdGhhdCBpcyBkdWUgdG8gdGhlIGZhY3QgdGhhdCBhIG1pc3NpbmcgdmFsdWUgZGVub3RlcyB0aGUgYWJzZW5jZSBvZiBhIGZpcmVwbGFjZSBpbiB0aGUgaG9tZSBhbmQgbm90IGEgbWlzc2luZyBwaWVjZSBvZiBpbmZyb21hdGlvbiBvZiBkYXRhLiBTbywgYmVmb3JlIGNvbnRpbnVpbmcsIGxldCdzIGFkZCAnTkEnIGFzIGEgZmVhdHVyZSBsZXZlbCB3aGVyZSBpdCBpcyBhcHByb3ByaWF0ZSBiYXNlZCBvbiB0aGUgZGF0YSBkb2N1bWVudGF0aW9uLgoKYGBge3J9CiMgQWRkaW5nICdOQScgYXMgYSBmYWN0b3IgbGV2ZWwKCmNvbF9saXN0IDwtIGMoJ0FsbGV5JywgJ0JzbXRRdWFsJywgJ0JzbXRDb25kJywgJ0JzbXRFeHBvc3VyZScsICdCc210RmluVHlwZTEnLCAnQnNtdEZpblR5cGUyJywgJ0ZpcmVwbGFjZVF1JywgJ0dhcmFnZVR5cGUnLCAnR2FyYWdlRmluaXNoJywgJ0dhcmFnZVF1YWwnLCAnR2FyYWdlQ29uZCcsICdQb29sUUMnLCAnRmVuY2UnLCAnTWlzY0ZlYXR1cmUnKQoKZGF0YVtjb2xfbGlzdF0gPC0gbGFwcGx5KGRhdGFbY29sX2xpc3RdLCBhZGROQSkKYGBgCgpOb3csIGxldCdzIHNlZSBob3cgbXVjaCBtaXNzaW5nIGRhdGEgcmVtYWlucyBpbiB0aGUgZGF0YXNldC4KCmBgYHtyfQooc3VtKGlzLm5hKGRhdGEpKS8obnJvdyhkYXRhKSpuY29sKGRhdGEpKSkqMTAwCmBgYAoKYGBge3J9Cm1pc3NpbmcuY29sIDwtIChjb2xTdW1zKGlzLm5hKGRhdGEpKVtjb2xTdW1zKGlzLm5hKGRhdGEpKSA+IDBdL25yb3coZGF0YSkpKjEwMAptaXNzaW5nLmNvbC5uYW1lcyA8LSByb3cubmFtZXMoZGF0YS5mcmFtZShtaXNzaW5nLmNvbCkpCm1pc3NpbmcuY29sLnBlcmNlbnQgPC0gZGF0YS5mcmFtZShtaXNzaW5nLmNvbCkKbWlzc2luZy5jb2wucGVyY2VudCA8LSBzZXRvcmRlcihkYXRhLmZyYW1lKG1pc3NpbmcuY29sKSwgbWlzc2luZy5jb2wpCmNvbG5hbWVzKG1pc3NpbmcuY29sLnBlcmNlbnQpIDwtICdwZXJjZW50Lm1pc3NpbmcnCm1pc3NpbmcuY29sLnBlcmNlbnQKYGBgCgpgYGB7cn0Kc3RyKGRhdGFbLGMoJ01hc1ZuclR5cGUnLCdNYXNWbnJBcmVhJywnR2FyYWdlWXJCbHQnLCdMb3RGcm9udGFnZScpXSkKYGBgCgpXb3csIHdoYXQgYSBkaWZmZXJlbmNlLiBXZSBoYXZlIHJlZHVjZWQgdGhlIHRvdGFsIHByb3BvcnRpb24gb2YgbWlzc2luZyBkYXRhIGJ5IGFsbW9zdCBoYWxmLCB+NiUgdmVyc3VzIH4zJSwgYW5kIHdoZXJlIHdlIGhhZCAxOCB2YXJpYWJsZXMgd2l0aCBtaXNzaW5nIHZhbHVlcywgd2Ugbm93IG9ubHkgaGF2ZSA0LiAKCk5vdywgd2Ugd2lsbCB1c2UgdGhlICdtaWNlJyBwYWNrYWdlIHRvIGltcHV0ZSBhbnkgbWlzc2luZyB2YWx1ZXMgdGhhdCByZW1haW4uIEluIG9yZGVyIHRvIHByZXZlbnQgdGhlIGluamVjdGlvbiBvZiBhbnkgcGVyc29uYWwgYmlhcywgd2Ugd2lsbCBwcmVlbXB0aXZlbHkgc3BsaXQgdGhlIGRhdGEgaW50byBhIHRyYWluIGFuZCB0ZXN0IHNldCBmaXJzdCBhbmQgaW1wdXRlIHRoZSB0d28gZGF0YSBzZXRzIGluZGVwZW5kZW50bHkuIEJlZm9yZSB0aGF0LCBsZXQncyBzZWUgaWYgdGhlcmUgYXJlIGFueSBjb2x1bW5zIHdlIGNhbiwgYW5kIHNob3VsZCwgcmVtb3ZlIGFsbCB0b2dldGhlciBiZWNhdXNlIG9mIGEgbGFjayBvZiBhZGRpdGlvbmFsIHByZWRpY3RhYmlsaXR5IG9yIGluZm9ybWF0aW9uLgoKYGBge3J9CiMgQ2hlY2tpbmcgd2hpY2ggdmFyaWFibGVzIGhhdmUgb25seSBhIHNpbmdsZSB2YWx1ZSBmb3IgYWxsIG9ic2VydmF0aW9ucwpmb3IoeCBpbiBjb2xuYW1lcyhkYXRhKSl7CiAgaWYobGVuZ3RoKHVuaXF1ZShkYXRhWyx4XSkpID09IDEpewogICAgcHJpbnQoeCkKICB9Cn0KYGBgCgpXZSBzZWUgdGhhdCAnVXRpbGl0aWVzJyBnaXZlcyB1cyBubyBhZGRpdGlvbmFsIHByZWRpY2l0dmUgcG93ZXIgYXMgdGhlIG9ubHkgdmFsdWUgcHJlc2VudCBmb3IgYWxsIG9ic2VydmF0aW9ucyBpcyAnQWxsUHViJy4gRnVydGhlciBtb3JlLCB0aGUgJ0lkJyB2YXJpYWJsZSBzZXJ2ZXMgbm8gcHVycG9zZSBvdGhlciB0aGFuIGlkZW50aWZpY2F0aW9uIGFuZCBzaG91bGQgbyBiZSBpbmNsdWRlZCBpbiB0aGUgbW9kZWwuIEZvciB0aGVzZSByZWFzb25zLCBib3RoIG9mIHRoZXNlIGNvbHVtbnMgd2lsbCBiZSByZW1vdmVkIGZyb20gdGhlIGRhdGFzZXQuCgpgYGB7cn0KIyBEcm9wcGluZyB0aGUgJ0lkJyBhbmQgJ1V0aWxpdGllcycgY29sdW1ucwpkYXRhIDwtIGRhdGFbLCAhKG5hbWVzKGRhdGEpICVpbiUgYygnSWQnLCdVdGlsaXRpZXMnKSldCmBgYAoKTGV0J3MgY29udGludWUgd2l0aCB0aGUgaW1wdXRhdGlvbiBvZiB0aGUgcmVhbWluaW5nIG1pc3NpbmcgdmFsdWVzLgoKYGBge3IscmVzdWx0cz0naGlkZSd9CiMgQ3JlYXRpbmcgYSB0cmFpbiBhbmQgdGVzdCBzcGxpdApzZXQuc2VlZCgyMDE5KQpzcGxpdCA8LSBzYW1wbGUuc3BsaXQoZGF0YSRTYWxlUHJpY2UsIFNwbGl0UmF0aW8gPSAwLjgpCnRyYWluIDwtIHN1YnNldChkYXRhLCBzcGxpdCA9PSBUUlVFKQp0ZXN0IDwtIHN1YnNldChkYXRhLCBzcGxpdCA9PSBGQUxTRSkKdGVzdC54IDwtIHRlc3RbICwhKG5hbWVzKHRlc3QpICVpbiUgYygiU2FsZVByaWNlIikpXQp0ZXN0LnkgPC0gdGVzdCRTYWxlUHJpY2UKCiMgSW1wdXRpbmcgbWlzc2luZyB2YWx1ZXMKdHJhaW4ubWlkcyA8LSBtaWNlKHRyYWluLCBtPTEsIG1ldGhvZD0nY2FydCcsIHNlZWQ9MjAxOSkKdGVzdC5taWRzIDwtIG1pY2UodGVzdC54LCBtPTEsIG1ldGhvZD0nY2FydCcsIHNlZWQ9MjAxOSkKCnRyYWluLmltcHV0ZWQgPC0gY29tcGxldGUodHJhaW4ubWlkcywxKQp0ZXN0LmltcHV0ZWQgPC0gY29tcGxldGUodGVzdC5taWRzLDEpCmBgYCAKCmBgYHtyfQojIENvbmZpcm1pbmcgdGhhdCB0aGVyZSBhcmUgbm8gbW9yZSBtaXNzaW5nIHZhbHVlcyBpbiB0aGUgZGF0YXNldAp0cmFpbi5pbXAubWlzcyA8LSAoc3VtKGlzLm5hKHRyYWluLmltcHV0ZWQpKS8obnJvdyh0cmFpbi5pbXB1dGVkKSpuY29sKHRyYWluLmltcHV0ZWQpKSkqMTAwCnRlc3QuaW1wLm1pc3MgPC0gKHN1bShpcy5uYSh0ZXN0LmltcHV0ZWQpKS8obnJvdyh0ZXN0LmltcHV0ZWQpKm5jb2wodGVzdC5pbXB1dGVkKSkpKjEwMApwcmludCgiUGVyY2VudCBtaXNzaW5nIGRhdGEgaW4gaW1wdXRlZCB0cmFpbiBzZXQ6ICIpCnByaW50KHRyYWluLmltcC5taXNzKQpwcmludCgiUGVyY2VudCBtaXNzaW5nIGRhdGEgaW4gaW1wdXRlZCB0ZXN0IHNldDogIikKcHJpbnQodGVzdC5pbXAubWlzcykKYGBgCgpOb3csIGxldCdzIGNvbnRpbnVlIGJ5IGV4cGxvcmluZyB0aGUgY29ycmVsYXRpb25zIGJldHdlZW4gdGhlIHZhcmlhYmxlcyBpbiB0aGUgZGF0YXNldCAKCmBgYHtyfQojIENyZWF0aW5nIGEgZGF0YWZyYW1lIG9mIGNvcnJlbGF0aW9ucyBiZXR3ZWVuIFNhbGVQcmljZSBhbmQgbnVtZXJpYyB2YXJzLgp0cmFpbi5udW1lcmljIDwtIGRwbHlyOjpzZWxlY3RfaWYoZGF0YSwgaXMubnVtZXJpYykKY29yci5tYXRyaXggPC0gY29yKHRyYWluLm51bWVyaWMsIHVzZSA9ICdwYWlyd2lzZS5jb21wbGV0ZS5vYnMnKQpjb3JyLmRmIDwtIGRhdGEuZnJhbWUocm93PXJvd25hbWVzKGNvcnIubWF0cml4KVtyb3coY29yci5tYXRyaXgpW3VwcGVyLnRyaShjb3JyLm1hdHJpeCldXSwgY29sPWNvbG5hbWVzKGNvcnIubWF0cml4KVtjb2woY29yci5tYXRyaXgpW3VwcGVyLnRyaShjb3JyLm1hdHJpeCldXSxjb3JyPWNvcnIubWF0cml4W3VwcGVyLnRyaShjb3JyLm1hdHJpeCldKQpjb3JyLmRmIDwtIGNvcnIuZGZbb3JkZXIoY29yci5kZiQnY29ycicpLF0Kc2FsZS5wcmljZS5jb3JyIDwtIGNvcnIuZGZbY29yci5kZiRjb2wgPT0gJ1NhbGVQcmljZScsXQpyYmluZChoZWFkKHNhbGUucHJpY2UuY29yciw1KSwgdGFpbChzYWxlLnByaWNlLmNvcnIsNSkpCmBgYAoKSXQgbG9va3MgbGlrZSAnT3ZlcmFsbFF1YWwnLCAnR3JMaXZBcmVhJywgJ0dhcmFnZUNhcnMnLCBhbmQgJ0dhcmFnZUFyZWEnIGFsbCBoYXZlICJzaWduaWZpY2FudCIgY29ycmVsYXRpb25zIHdpdGggdGhlIHJlc3BvbnNlIHZhcmFpYmxlLCBTYWxlUHJpY2UuIEJ5ICJzaWduaWZpY2FudCIsIHdlIG1lYW4gYSBjb3JyZWxhdGlvbiBhYm92ZSAwLjYuIFRoZXNlIG1heSBiZSBpbXBvcnRhbnQgcHJlZGljdG9ycyBvZiBTYWxlUHJpY2UuIExldCB1cyBhbHNvIHZpc3VhbGl6ZSB0aGUgZGlzdHJpYnV0aW9uIG9mIFNhbGVQcmljZS4KCmBgYHtyfQojIFBsb3Qgb2YgU2FsZVByaWNlIGRpc3RyaWJ1dGlvbgpzcC5wbG90IDwtIGdncGxvdChkYXRhID0gdHJhaW4uaW1wdXRlZCwgYWVzKHg9dHJhaW4uaW1wdXRlZCRTYWxlUHJpY2UpKSArIGdlb21faGlzdG9ncmFtKGJpbnMgPSA1MCwgY29sb3IgPSAnd2hpdGUnLCBmaWxsID0gJ2RvZGdlcmJsdWUnKSArIAogIGxhYnModGl0bGUgPSAiSGlzdG9ncmFtIG9mICdTYWxlUHJpY2UnIikgKwogIHhsYWIoIlNhbGVQcmljZSIpCgpsb2cuc3AucGxvdCA8LSBnZ3Bsb3QoZGF0YSA9IHRyYWluLmltcHV0ZWQsIGFlcyh4PWxvZyh0cmFpbi5pbXB1dGVkJFNhbGVQcmljZSkpKSArIGdlb21faGlzdG9ncmFtKGJpbnMgPSA1MCwgY29sb3IgPSAnd2hpdGUnLCBmaWxsID0gJ2RvZGdlcmJsdWUnKSArCiAgbGFicyh0aXRsZSA9ICJIaXN0b2dyYW0gb2YgbG9nKCdTYWxlUHJpY2UnKSIpICsKICB4bGFiKCJsb2coJ1NhbGVQcmljZScpIikKCnNwLnBsb3QKbG9nLnNwLnBsb3QKYGBgCgpUaGUgZGlzdHJpYnV0aW9uIGRvZXMgc2VlbSB0byBiZWNvbWUgbW9yZSBub3JtYWwgd2hlbiBhIGxvbmcgdHJhbnNmb3JtIGlzIGFwcGxpZWQuIFRoaXMgbWF5IGltcHJvdmUgdGhlIHBlcmZvcm1hbmNlIG9mIGEgbGluZWFyIG1vZGVsLiBXaWxsIG5lZWQgdG8gY29tZSBiYWNrIGxhdGVyIGFuZCBkZXRlcm1pbmUgaWYgdGhlIGxvZyB0cmFuZGZvcm0gcmVhbGx5IGhhcyBhIGJlbmVmaWNpYWwgZWZmZWN0LiAKCkxldCdzIG5vdyB1dGlsaXplIG1ldGhvZGljYWwgYXBwcm9hY2hlcyB0byBzZWxlY3QgdGhlIGZlYXR1cmVzIHRoYXQgd2lsbCBwcm92aWRlIHVzIHdpdGggdGhlIG1vc3QgcHJlZGljdGFiaWxpdHkgaW4gb3VyIG1vZGVsLiBXZSB3aWxsIGV4cGxvcmUgZm9yd2FyZCwgYmFja3dhcmQsIGFuZCBzdGVwd2lzZSBzZWxlY3Rpb24gYXMgd2VsbCBhcyBMYXNzbyBSZWdyZXNzaW9uLgoKYGBge3J9CiMgU29tZSBsZXZlbHMgcHJlc2VudCBpbiB0ZXN0IHNldCB0aGF0IGFyZSBub3QgYWNjb3VudGVkIGZvciBpbiB0cmFpbiBzZXQKIHRyYWluLmltcHV0ZWQgPC0gdHJhaW4uaW1wdXRlZFsgLCEobmFtZXModHJhaW4uaW1wdXRlZCkgJWluJSBjKCJFeHRlcmlvcjFzdCIpKV0KIHRlc3QuaW1wdXRlZCA8LSB0ZXN0LmltcHV0ZWRbICwhKG5hbWVzKHRlc3QuaW1wdXRlZCkgJWluJSBjKCJFeHRlcmlvcjFzdCIpKV0KYGBgCgpgYGB7cn0KIyBDcmVhdGluZyBhIGRhdGFmcmFtZSB3aXRoIHRoZSBsb2cgdHJhbnNmb3JtIG9mIFNhbGVQcmljZQp0cmFpbi5pbXB1dGVkLmxvZyA8LSB0cmFpbi5pbXB1dGVkCnRyYWluLmltcHV0ZWQubG9nJFNhbGVQcmljZSA8LSBsb2codHJhaW4uaW1wdXRlZC5sb2ckU2FsZVByaWNlKQpgYGAKCmBgYHtyLCByZXN1bHRzPSdoaWRlJ30KIyBPbiBsb2coU2FsZVByaWNlKQoKIyBGb3J3YXJkIFNlbGVjdGlvbgpmb3J3YXJkLmZpdC5sb2cgPC0gbG0oU2FsZVByaWNlfjEsIGRhdGEgPSB0cmFpbi5pbXB1dGVkLmxvZykKc3RlcC5mLmxvZyA8LSBzdGVwQUlDKGZvcndhcmQuZml0LmxvZywgZGlyZWN0aW9uPSJmb3J3YXJkIiwgc2NvcGUgPSBmb3JtdWxhKGxtKFNhbGVQcmljZX4uLCBkYXRhID0gdHJhaW4uaW1wdXRlZC5sb2cpKSkKCiMgQmFja3dhcmQgU2VsZWN0aW9uCmJhY2t3YXJkLmZpdC5sb2cgPC0gbG0oU2FsZVByaWNlfi4sZGF0YSA9IHRyYWluLmltcHV0ZWQubG9nKQpzdGVwLmIubG9nIDwtIHN0ZXBBSUMoYmFja3dhcmQuZml0LmxvZywgZGlyZWN0aW9uPSJiYWNrd2FyZCIpCgojIFN0ZXB3aXNlIFNlbGVjdGlvbgpzdGVwd2lzZS5maXQubG9nIDwtIGxtKFNhbGVQcmljZX4uLGRhdGEgPSB0cmFpbi5pbXB1dGVkLmxvZykKc3RlcC5zLmxvZyA8LSBzdGVwQUlDKHN0ZXB3aXNlLmZpdC5sb2csIGRpcmVjdGlvbj0iYm90aCIpCmBgYAoKYGBge3J9CiMgU2VsZWN0aW9uIHJlc3VsdHMgKG9uIGxvZyhTYWxlUHJpY2UpKQpzdGVwLmYubG9nJGNhbGwgIyBGb3J3YXJkIHJlc3VsdHMKcHJpbnQoIiMjIyMjIyMjIyMjIyMjIyMjIyMjIyMiKQpzdGVwLmIubG9nJGNhbGwgIyBCYWNrd2FyZCByZXN1bHRzCnByaW50KCIjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIikKc3RlcC5zLmxvZyRjYWxsICMgU3RlcHdpc2UgcmVzdWx0cwpgYGAKCiMjIFByaW5jaXBhbCBDb21wb25lbnQgQW5hbHlzaXMgeyNQcmluY2lwYWxDb21wb25lbnRBbmFseXNpc30KCmBgYHtyfQojIENvbmR1Y3RpbmcgUENBIG9uIHRoZSBzdGVwd2lzZSBzZWxlY3RlZCBmZWF0dXJlIHNldHMgdXNpbmcgbG9nIFNhbGVQcmljZQoKIyBTY2FsaW5nIHRoZSBzdWJzZXQgb2YgdGhlIHByZWRpY3RvcnMKdHJhaW4uaW1wdXRlZC5sb2cucGNhIDwtIHRyYWluLmltcHV0ZWQubG9nWyxjKCdNU1N1YkNsYXNzJywgJ01TWm9uaW5nJywgJ0xvdEZyb250YWdlJywgCiAgICAnTG90QXJlYScgLCAnTG90Q29uZmlnJyAsICdMYW5kU2xvcGUnICwgJ05laWdoYm9yaG9vZCcgLCAnQ29uZGl0aW9uMScgLCAKICAgICdDb25kaXRpb24yJyAsICdIb3VzZVN0eWxlJyAsICdPdmVyYWxsUXVhbCcgLCAnT3ZlcmFsbENvbmQnICwgJ1llYXJCdWlsdCcgLCAKICAgICdZZWFyUmVtb2RBZGQnICwgJ1Jvb2ZNYXRsJyAsICdGb3VuZGF0aW9uJyAsICdCc210RmluU0YxJyAsICdIZWF0aW5nJyAsIAogICAgJ0hlYXRpbmdRQycgLCAnQ2VudHJhbEFpcicgLCAnWDFzdEZsclNGJyAsICdYMm5kRmxyU0YnICwgJ0JzbXRGdWxsQmF0aCcgLCAKICAgICdLaXRjaGVuUXVhbCcgLCAnVG90Um1zQWJ2R3JkJyAsICdGdW5jdGlvbmFsJyAsICdGaXJlcGxhY2VzJyAsICdHYXJhZ2VZckJsdCcgLCAKICAgICdHYXJhZ2VDYXJzJyAsICdHYXJhZ2VBcmVhJyAsICdHYXJhZ2VDb25kJyAsICdXb29kRGVja1NGJyAsICdFbmNsb3NlZFBvcmNoJyAsIAogICAgJ1gzU3NuUG9yY2gnICwgJ1NjcmVlblBvcmNoJyAsICdQb29sUUMnICwgJ0ZlbmNlJyAsICdTYWxlVHlwZScgLCAnU2FsZUNvbmRpdGlvbicgLCAKICAgICdUb3RhbEJzbXRTRicgLCAnS2l0Y2hlbkFidkdyJyldCgp0ZXN0LmltcHV0ZWQubG9nLnBjYSA8LSB0ZXN0LnhbLGMoJ01TU3ViQ2xhc3MnLCAnTVNab25pbmcnLCAnTG90RnJvbnRhZ2UnLCAKICAgICdMb3RBcmVhJyAsICdMb3RDb25maWcnICwgJ0xhbmRTbG9wZScgLCAnTmVpZ2hib3Job29kJyAsICdDb25kaXRpb24xJyAsIAogICAgJ0NvbmRpdGlvbjInICwgJ0hvdXNlU3R5bGUnICwgJ092ZXJhbGxRdWFsJyAsICdPdmVyYWxsQ29uZCcgLCAnWWVhckJ1aWx0JyAsIAogICAgJ1llYXJSZW1vZEFkZCcgLCAnUm9vZk1hdGwnICwgJ0ZvdW5kYXRpb24nICwgJ0JzbXRGaW5TRjEnICwgJ0hlYXRpbmcnICwgCiAgICAnSGVhdGluZ1FDJyAsICdDZW50cmFsQWlyJyAsICdYMXN0RmxyU0YnICwgJ1gybmRGbHJTRicgLCAnQnNtdEZ1bGxCYXRoJyAsIAogICAgJ0tpdGNoZW5RdWFsJyAsICdUb3RSbXNBYnZHcmQnICwgJ0Z1bmN0aW9uYWwnICwgJ0ZpcmVwbGFjZXMnICwgJ0dhcmFnZVlyQmx0JyAsIAogICAgJ0dhcmFnZUNhcnMnICwgJ0dhcmFnZUFyZWEnICwgJ0dhcmFnZUNvbmQnICwgJ1dvb2REZWNrU0YnICwgJ0VuY2xvc2VkUG9yY2gnICwgCiAgICAnWDNTc25Qb3JjaCcgLCAnU2NyZWVuUG9yY2gnICwgJ1Bvb2xRQycgLCAnRmVuY2UnICwgJ1NhbGVUeXBlJyAsICdTYWxlQ29uZGl0aW9uJyAsIAogICAgJ1RvdGFsQnNtdFNGJyAsICdLaXRjaGVuQWJ2R3InKV0KCnVudXNhYmxlLmNvbHMgPC0gYygiTmVpZ2hib3Job29kIiwgIkNvbmRpdGlvbjEiLCAiSG91c2VTdHlsZSIsICJSb29mTWF0bCIsICJIZWF0aW5nIiwgIkdhcmFnZUNvbmQiLCAiU2FsZUNvbmRpdGlvbiIsICJMYW5kU2xvcGUiLCAiQ29uZGl0aW9uMiIsICJGb3VuZGF0aW9uIiwgIkhlYXRpbmdRQyIsICJGdW5jdGlvbmFsIiwgIlBvb2xRQyIsICJTYWxlVHlwZSIpCgpwY2EubWF0cml4LnRyYWluIDwtIHRyYWluLmltcHV0ZWQubG9nLnBjYVssIShuYW1lcyh0cmFpbi5pbXB1dGVkLmxvZy5wY2EpICVpbiUgdW51c2FibGUuY29scyldCnBjYS5tYXRyaXgudHJhaW4gPC0gbW9kZWwubWF0cml4KCB+Li0xLCBkYXRhPXBjYS5tYXRyaXgudHJhaW4gKQpwY2EubWF0cml4LnRyYWluIDwtIHNjYWxlKHBjYS5tYXRyaXgudHJhaW4pCgpwY2EubWF0cml4LnRlc3QgPC0gdGVzdC5pbXB1dGVkLmxvZy5wY2FbLCEobmFtZXModGVzdC5pbXB1dGVkLmxvZy5wY2EpICVpbiUgdW51c2FibGUuY29scyldCnBjYS5tYXRyaXgudGVzdCA8LSBtb2RlbC5tYXRyaXgoIH4uLTEsIGRhdGE9cGNhLm1hdHJpeC50ZXN0ICkKcGNhLm1hdHJpeC50ZXN0IDwtIHNjYWxlKHBjYS5tYXRyaXgudGVzdCkKYGBgCgoKYGBge3J9CiMgUENBCmFsbC5wY2EgPC0gcHJjb21wKHBjYS5tYXRyaXgudHJhaW4sIHNjYWxlID0gRkFMU0UpCnN1bW1hcnkoYWxsLnBjYSkKYGBgCgpgYGB7cn0KIyBWaXN1bGl6ZSBTY3JlZSBQbG90CmZ2aXpfZWlnKGFsbC5wY2EpCgojIFBsb3Qgb2YgaW5kaXZpZHVhbCBncm91cGluZ3MKI2Z2aXpfcGNhX2luZChhbGwucGNhLCBjb2wuaW5kID0gImNvczIiLCBncmFkaWVudC5jb2xzID0gYygiIzAwQUZCQiIsICIjRTdCODAwIiwgIiNGQzRFMDciKSwgcmVwZWwgPSBUUlVFKQoKIyBWYXJpYWJsZSBjb250cmlidXRpb24gdG8gZWFjaCBQQwpmdml6X3BjYV92YXIoYWxsLnBjYSwgYXhlcyA9IGMoMSwgMiksIGNvbC52YXIgPSAiY29udHJpYiIsIGdyYWRpZW50LmNvbHMgPSBjKCIjMDBBRkJCIiwgIiNFN0I4MDAiLCAiI0ZDNEUwNyIpLCByZXBlbCA9IEZBTFNFKQpmdml6X3BjYV92YXIoYWxsLnBjYSwgYXhlcyA9IGMoMiwgMyksIGNvbC52YXIgPSAiY29udHJpYiIsIGdyYWRpZW50LmNvbHMgPSBjKCIjMDBBRkJCIiwgIiNFN0I4MDAiLCAiI0ZDNEUwNyIpLCByZXBlbCA9IEZBTFNFKQpmdml6X3BjYV92YXIoYWxsLnBjYSwgYXhlcyA9IGMoMywgNCksIGNvbC52YXIgPSAiY29udHJpYiIsIGdyYWRpZW50LmNvbHMgPSBjKCIjMDBBRkJCIiwgIiNFN0I4MDAiLCAiI0ZDNEUwNyIpLCByZXBlbCA9IEZBTFNFKQoKIyBCaXBsb3QKI2Z2aXpfcGNhX2JpcGxvdChhbGwucGNhLCByZXBlbCA9IFRSVUUsIGNvbC52YXIgPSAiIzJFOUZERiIsIGNvbC5pbmQgPSAiIzY5Njk2OSIpCgojIFByZWRpY3RpbmcgUENBIHZhbHVlcyBmb3IgZWFjaCBvYnNlcnZhdGlvbgpwY2EucHJlZC50cmFpbiA8LSBwcmVkaWN0KGFsbC5wY2EsIG5ld2RhdGEgPSBwY2EubWF0cml4LnRyYWluKQp0cmFpbi5wY2EgPC0gcGNhLnByZWQudHJhaW4KdHJhaW4ucGNhIDwtIGRhdGEuZnJhbWUodHJhaW4ucGNhKQp0cmFpbi5wY2EkU2FsZVByaWNlIDwtIHRyYWluLmltcHV0ZWQubG9nJFNhbGVQcmljZQpwY2EucHJlZC50ZXN0IDwtIHByZWRpY3QoYWxsLnBjYSwgbmV3ZGF0YSA9IHBjYS5tYXRyaXgudGVzdCkKdGVzdC5wY2EgPC0gcGNhLnByZWQudGVzdAp0ZXN0LnBjYSA8LSBkYXRhLmZyYW1lKHRlc3QucGNhKQpgYGAKCiMjIE1vZGVsIEJ1aWxkaW5nIHsjbW9kZWxidWlsZGluZ30KCkxldCdzIHByZWRpY3QgdGhlICdTYWxlUHJpY2UnLCBvZiBlYWNoIHRlc3Qgb2JzZXJ2YXRpb24sIHRvIGJlIHRoZSBhdmVyYWdlIG9mIGFsbCB0aGUgdHJhaW4gJ1NhbGVQcmljZScgdmFsdWVzLCBhbmQgdXNlIHRoZSByZXN1bHRpbmcgbWV0cmljcyBhcyBhIGJlbmNobWFyayBhZ2FpbnN0IHdoaWNoIGFsbCBmb2xsb3dpbmcgbW9kZWwgcGVyZm9ybWFuY2VzIHdpbGwgYmUgY29tcGFyZWQuCgpgYGB7cn0KIyBMZXQncyBwcmVkaWN0IHRoZSBhdmVyYWdlIGZvciBlYWNoIHRlc3Qgb2JzZXJ2YXRpb24gYW5kIHVzZSBpdCBhcyBhIGJlbmNobWFyawpzYWxlLnByaWNlLm1lYW4gPC0gbWVhbih0cmFpbi5pbXB1dGVkJFNhbGVQcmljZSkKYXZnLnZlY3RvciA8LSByZXAoYyhzYWxlLnByaWNlLm1lYW4pLCB0aW1lcyA9IDIxMikKCmNhdCgiQmlhczogIiwgbWVhbihhdmcudmVjdG9yLXRlc3QueSkpCmNhdCgiXG5NYXhpbXVtIERldmlhdGlvbjogIiwgbWF4KGF2Zy52ZWN0b3ItdGVzdC55KSkKY2F0KCJcbk1lYW4gQWJzb2x1dGUgRGV2aWF0aW9uOiAiLCBtZWFuKGFicyhhdmcudmVjdG9yLXRlc3QueSkpKQpjYXQoIlxuTWVhbiBTcXVhcmUgRXJyb3I6ICIsIG1lYW4oKGF2Zy52ZWN0b3ItdGVzdC55KSoqMikpCmNhdCgiXG5Sb290IE1lYW4gU3F1YXJlIEVycm9yOiAiLCBzcXJ0KG1lYW4oKGF2Zy52ZWN0b3ItdGVzdC55KSoqMikpKQpgYGAKCk1vdmluZyBvbiB0byBhY3R1YWxseSBidWlsZGluZyB0aGUgbW9kZWxzLgoKYGBge3J9CiMgTGluZWFyIG1vZGVsIHdpdGggZm9yd2FyZCBzZWxlY3RlZCBmZWF0dXJlcyAobG9nIFNhbGVQcmljZSkKZm9yd2FyZC5sbS5sb2cgPC0gbG0oZm9ybXVsYShzdGVwLmYubG9nJGNhbGwpLCBkYXRhID0gdHJhaW4uaW1wdXRlZC5sb2cpCmZvcndhcmQucHJlZGljdGlvbnMubG9nIDwtIGV4cChwcmVkaWN0KGZvcndhcmQubG0ubG9nLCB0ZXN0LmltcHV0ZWQpKQoKc3VtbWFyeShmb3J3YXJkLmxtLmxvZykkYWRqLnIuc3F1YXJlZApjYXQoIkJpYXM6ICIsIG1lYW4oZm9yd2FyZC5wcmVkaWN0aW9ucy5sb2ctdGVzdC55KSkKY2F0KCJcbk1heGltdW0gRGV2aWF0aW9uOiAiLCBtYXgoZm9yd2FyZC5wcmVkaWN0aW9ucy5sb2ctdGVzdC55KSkKY2F0KCJcbk1lYW4gQWJzb2x1dGUgRGV2aWF0aW9uOiAiLCBtZWFuKGFicyhmb3J3YXJkLnByZWRpY3Rpb25zLmxvZy10ZXN0LnkpKSkKY2F0KCJcbk1lYW4gU3F1YXJlIEVycm9yOiAiLCBtZWFuKChmb3J3YXJkLnByZWRpY3Rpb25zLmxvZy10ZXN0LnkpKioyKSkKY2F0KCJcblJvb3QgTWVhbiBTcXVhcmUgRXJyb3I6ICIsIHNxcnQobWVhbigoZm9yd2FyZC5wcmVkaWN0aW9ucy5sb2ctdGVzdC55KSoqMikpKQpgYGAKCkxpbmVhciBtb2RlbCB3aXRoIGJhY2t3YXJkIHNlbGVjdGVkIGZlYXR1cmVzCgpgYGB7cn0KIyBMaW5lYXIgbW9kZWwgd2l0aCBiYWNrd2FyZCBzZWxlY3RlZCBmZWF0dXJlcyAobG9nIFNhbGVQcmljZSkKYmFja3dhcmQubG0ubG9nIDwtIGxtKGZvcm11bGEoc3RlcC5iLmxvZyRjYWxsKSwgZGF0YSA9IHRyYWluLmltcHV0ZWQubG9nKQpiYWNrd2FyZC5wcmVkaWN0aW9ucy5sb2cgPC0gZXhwKHByZWRpY3QoYmFja3dhcmQubG0ubG9nLCB0ZXN0LmltcHV0ZWQpKQoKc3VtbWFyeShiYWNrd2FyZC5sbS5sb2cpJGFkai5yLnNxdWFyZWQKY2F0KCJCaWFzOiAiLCBtZWFuKGJhY2t3YXJkLnByZWRpY3Rpb25zLmxvZy10ZXN0LnkpKQpjYXQoIlxuTWF4aW11bSBEZXZpYXRpb246ICIsIG1heChiYWNrd2FyZC5wcmVkaWN0aW9ucy5sb2ctdGVzdC55KSkKY2F0KCJcbk1lYW4gQWJzb2x1dGUgRGV2aWF0aW9uOiAiLCBtZWFuKGFicyhiYWNrd2FyZC5wcmVkaWN0aW9ucy5sb2ctdGVzdC55KSkpCmNhdCgiXG5NZWFuIFNxdWFyZSBFcnJvcjogIiwgbWVhbigoYmFja3dhcmQucHJlZGljdGlvbnMubG9nLXRlc3QueSkqKjIpKQpjYXQoIlxuUm9vdCBNZWFuIFNxdWFyZSBFcnJvcjogIiwgc3FydChtZWFuKChiYWNrd2FyZC5wcmVkaWN0aW9ucy5sb2ctdGVzdC55KSoqMikpKQpgYGAKCkxpbmVhciBtb2RlbCB3aXRoIHN0ZXB3aXNlIHNlbGVjdGVkIGZlYXR1cmVzCgpgYGB7cn0KIyBMaW5lYXIgbW9kZWwgd2l0aCBzdGVwd2lzZSBzZWxlY3RlZCBmZWF0dXJlcyAobG9nIFNhbGVQcmljZSkKc3RlcHdpc2UubG0ubG9nIDwtIGxtKGZvcm11bGEoc3RlcC5zLmxvZyRjYWxsKSwgZGF0YSA9IHRyYWluLmltcHV0ZWQubG9nKQpzdGVwd2lzZS5wcmVkaWN0aW9ucy5sb2cgPC0gZXhwKHByZWRpY3Qoc3RlcHdpc2UubG0ubG9nLCB0ZXN0LmltcHV0ZWQpKQoKc3VtbWFyeShzdGVwd2lzZS5sbS5sb2cpJGFkai5yLnNxdWFyZWQKY2F0KCJCaWFzOiAiLCBtZWFuKHN0ZXB3aXNlLnByZWRpY3Rpb25zLmxvZy10ZXN0LnkpKQpjYXQoIlxuTWF4aW11bSBEZXZpYXRpb246ICIsIG1heChzdGVwd2lzZS5wcmVkaWN0aW9ucy5sb2ctdGVzdC55KSkKY2F0KCJcbk1lYW4gQWJzb2x1dGUgRGV2aWF0aW9uOiAiLCBtZWFuKGFicyhzdGVwd2lzZS5wcmVkaWN0aW9ucy5sb2ctdGVzdC55KSkpCmNhdCgiXG5NZWFuIFNxdWFyZSBFcnJvcjogIiwgbWVhbigoc3RlcHdpc2UucHJlZGljdGlvbnMubG9nLXRlc3QueSkqKjIpKQpjYXQoIlxuUm9vdCBNZWFuIFNxdWFyZSBFcnJvcjogIiwgc3FydChtZWFuKChzdGVwd2lzZS5wcmVkaWN0aW9ucy5sb2ctdGVzdC55KSoqMikpKQpgYGAKClBDQQoKYGBge3Isd2FybmluZz1GQUxTRX0KIyBMaW5lYXIgbW9kZWwgd2l0aCBQQ0EgdGVybXMgZnJvbSBsb2cgU2FsZVByaWNlIGFuZCBzdGVwd2lzZSB2YXJpYWJsZXMKCnBjYS5maXQgPC0gbG0oU2FsZVByaWNlfi4sIGRhdGEgPSB0cmFpbi5wY2EpCnBjYS5wcmVkaWN0aW9ucyA8LSBleHAocHJlZGljdChwY2EuZml0LCB0ZXN0LnBjYSkpCgpjYXQoIkJpYXM6ICIsIG1lYW4ocGNhLnByZWRpY3Rpb25zLXRlc3QueSkpCmNhdCgiXG5NYXhpbXVtIERldmlhdGlvbjogIiwgbWF4KHBjYS5wcmVkaWN0aW9ucy10ZXN0LnkpKQpjYXQoIlxuTWVhbiBBYnNvbHV0ZSBEZXZpYXRpb246ICIsIG1lYW4oYWJzKHBjYS5wcmVkaWN0aW9ucy10ZXN0LnkpKSkKY2F0KCJcbk1lYW4gU3F1YXJlIEVycm9yOiAiLCBtZWFuKChwY2EucHJlZGljdGlvbnMtdGVzdC55KSoqMikpCmNhdCgiXG5Sb290IE1lYW4gU3F1YXJlIEVycm9yOiAiLCBzcXJ0KG1lYW4oKHBjYS5wcmVkaWN0aW9ucy10ZXN0LnkpKioyKSkpCmBgYAoKTEFTU08gUmVncmVzc2lvbgoKYGBge3J9CiMgTEFTU08gUmVncmVzc2lvbiAobG9nIFNhbGVQcmljZSkKbHIudHJhaW4ubG9nIDwtIG1vZGVsLm1hdHJpeChTYWxlUHJpY2V+LiwgZGF0YSA9IHRyYWluLmltcHV0ZWQubG9nKQp0ZXN0LmltcHV0ZWQkU2FsZVByaWNlIDwtIHRlc3QueQpsci50ZXN0IDwtIG1vZGVsLm1hdHJpeChTYWxlUHJpY2V+LiwgZGF0YSA9IHRlc3QuaW1wdXRlZCkKZ3JpZCA8LSAxMF5zZXEoNCwgLTIsIGxlbmd0aCA9IDEwMCkKCnNldC5zZWVkICgyMDE5KQpjdi5sYXNzby5sb2cgPC0gY3YuZ2xtbmV0KGxyLnRyYWluLmxvZywgbG9nKHRyYWluJFNhbGVQcmljZSksIGFscGhhID0gMSwgbGFtYmRhID0gZ3JpZCkKYmVzdGxhbS5sb2cgPC0gY3YubGFzc28ubG9nJGxhbWJkYS5taW4KYmVzdGxhbS5sb2cgIyBCZXN0IGxhbWJkYSA9IDAuMDM1MTExOTIKYGBgCgpgYGB7cn0Kc2V0LnNlZWQoMjAxOSkKIyBGaXR0aW5nIHRoZSBMQVNTTyAobG9nIFNhbGVQcmljZSkKZml0Lmxhc3NvLmxvZyA8LSBnbG1uZXQobHIudHJhaW4ubG9nLCB0cmFpbi5pbXB1dGVkLmxvZyRTYWxlUHJpY2UsIGFscGhhID0gMSwgbGFtYmRhID0gMC4wMzUxMTE5MikKbGFzc28ucHJlZGljdC5sb2cgPC0gZXhwKHByZWRpY3QoZml0Lmxhc3NvLmxvZywgbHIudGVzdCkpCgpjYXQoIkJpYXM6ICIsIG1lYW4obGFzc28ucHJlZGljdC5sb2ctdGVzdC55KSkKY2F0KCJcbk1heGltdW0gRGV2aWF0aW9uOiAiLCBtYXgobGFzc28ucHJlZGljdC5sb2ctdGVzdC55KSkKY2F0KCJcbk1lYW4gQWJzb2x1dGUgRGV2aWF0aW9uOiAiLCBtZWFuKGFicyhsYXNzby5wcmVkaWN0LmxvZy10ZXN0LnkpKSkKY2F0KCJcbk1lYW4gU3F1YXJlIEVycm9yOiAiLCBtZWFuKChsYXNzby5wcmVkaWN0LmxvZy10ZXN0LnkpKioyKSkKY2F0KCJcblJvb3QgTWVhbiBTcXVhcmUgRXJyb3I6ICIsIHNxcnQobWVhbigobGFzc28ucHJlZGljdC5sb2ctdGVzdC55KSoqMikpKQpgYGAKCkEgcmFuZG9tIEZvcmVzdCB3YXMgdXNlZCB0byBkZXRlcm1pbmUgdGhlIG1vc3Qgc2lnbmlmaWNhbnQgaW50ZXJhY3Rpb24gdGVybXMgb2YgdGhlIHN0ZXB3aXNlIGZlYXR1cmUgc2V0LgoKYGBge3J9CiMgTGluZWFyIG1vZGVsIHdpdGggc3RlcHdpc2Ugc2VsZWN0ZWQgZmVhdHVyZXMgKGxvZyBTYWxlUHJpY2UpICh3aXRoIGludGVyZWFjdGlvbnMpCnRlc3QuaW1wdXRlZCRTYWxlUHJpY2UgPC0gTlVMTApzdGVwd2lzZS5pbnQubG0gPC0gbG0oU2FsZVByaWNlIH4gIE1TWm9uaW5nICsgTG90RnJvbnRhZ2UgKyBMb3RDb25maWcgKwogICAgSG91c2VTdHlsZSArIE92ZXJhbGxDb25kICsgWWVhckJ1aWx0ICsgKyBMYW5kU2xvcGUgKyBDb25kaXRpb24xICsgQ29uZGl0aW9uMiArCiAgICBZZWFyUmVtb2RBZGQgKyBGb3VuZGF0aW9uICsgQnNtdEZpblNGMSArIEhlYXRpbmcgKyBSb29mTWF0bCArCiAgICBIZWF0aW5nUUMgKyBDZW50cmFsQWlyICArIFgybmRGbHJTRiArIEJzbXRGdWxsQmF0aCArIAogICAgS2l0Y2hlblF1YWwgKyBUb3RSbXNBYnZHcmQgKyBGdW5jdGlvbmFsICsgRmlyZXBsYWNlcyArIEdhcmFnZVlyQmx0ICsgCiAgICBHYXJhZ2VDYXJzICsgR2FyYWdlQXJlYSArIEdhcmFnZUNvbmQgKyBXb29kRGVja1NGICsgRW5jbG9zZWRQb3JjaCArIAogICAgWDNTc25Qb3JjaCArIFNjcmVlblBvcmNoICsgUG9vbFFDICsgRmVuY2UgKyBTYWxlVHlwZSArIFNhbGVDb25kaXRpb24gKyAKICAgIFRvdGFsQnNtdFNGICsgS2l0Y2hlbkFidkdyICsgT3ZlcmFsbFF1YWw6WDFzdEZsclNGICsgT3ZlcmFsbFF1YWw6TmVpZ2hib3Job29kICsgICAgT3ZlcmFsbFF1YWw6TG90QXJlYQkrIE92ZXJhbGxRdWFsOkJzbXRVbmZTRiArIE92ZXJhbGxRdWFsOkJzbXRGaW5TRjEgKyBPdmVyYWxsUXVhbDpZZWFyQnVpbHQgKyBPdmVyYWxsUXVhbDpNb1NvbGQgKyBPdmVyYWxsUXVhbDpZZWFyUmVtb2RBZGQgKyBPdmVyYWxsUXVhbDpCc210RXhwb3N1cmUgKyBPdmVyYWxsUXVhbDpNU1N1YkNsYXNzLCBkYXRhID0gdHJhaW4uaW1wdXRlZC5sb2cpCgpzdGVwd2lzZS5pbnQubG0ucHJlZCA8LSBleHAocHJlZGljdChzdGVwd2lzZS5pbnQubG0sIHRlc3QuaW1wdXRlZCkpCgpjYXQoIkJpYXM6ICIsIG1lYW4oc3RlcHdpc2UuaW50LmxtLnByZWQtdGVzdC55KSkKY2F0KCJcbk1heGltdW0gRGV2aWF0aW9uOiAiLCBtYXgoc3RlcHdpc2UuaW50LmxtLnByZWQtdGVzdC55KSkKY2F0KCJcbk1lYW4gQWJzb2x1dGUgRGV2aWF0aW9uOiAiLCBtZWFuKGFicyhzdGVwd2lzZS5pbnQubG0ucHJlZC10ZXN0LnkpKSkKY2F0KCJcbk1lYW4gU3F1YXJlIEVycm9yOiAiLCBtZWFuKChzdGVwd2lzZS5pbnQubG0ucHJlZC10ZXN0LnkpKioyKSkKY2F0KCJcblJvb3QgTWVhbiBTcXVhcmUgRXJyb3I6ICIsIHNxcnQobWVhbigoc3RlcHdpc2UuaW50LmxtLnByZWQtdGVzdC55KSoqMikpKQpgYGAKCmBgYHtyfQpzdW1tYXJ5KHN0ZXB3aXNlLmludC5sbSkKYGBgCgpCYXNlZCBvbiBvdXIgZmluZGluZ3MsIHdlIHNlZSBhIGNvdXBsZSB0aGluZ3MuIFRoZSBjcm9zcyB2YWxpZGF0aW9uIHNlbGVjdGlvbiBvZiBsYW1iZGEgZGVncmFkZXMgdGhlIHBlcmZvcm1hbmNlIG9mIHRoZSBMQVNTTyBSZWdyZXNzaW9uLCBMQVNTTyBwZXJmb3JtcyB3b3JzZSB0aGFuIGFueSBvZiBvdXIgb3RoZXIgbW9kZWxzLiBUcmFpbmluZyBvbiB0aGUgbG9nIG9mIFNhbGVQcmljZSBpbXByb3ZlcyBtb2RlbCBwZXJmb3JtYW5jZXMgdGhlIG1vc3QuIHRoZXJlIGFyZSBvbmx5IGEgZmV3IG9yZGlhbmwgdmFyaWFibGVzLCBzbyB0aGVyZSBpcyBsaXR0bGUgdG8gYmUgZ2FpbmVkIGZyb20gY2FobmdpbmcgdGhlbSBmcm9tIG51bWVyaWMgdG8gY2F0ZWdvcmljYWwgdmFyaWFibGVzLiBJbiB0aGUgZW5kLCB3ZSB3ZW50IHdpdGggdGhlIG11bHRpcGxlIGxpbmVhciByZWdyZXNzaW9uIG1vZGVsIHdpdGggdGhlIHN0ZXB3aXNlIGZlYXR1cmUgc2V0IGluY2x1ZGluZyBpbnRlcmFjdGlvbiB0ZXJtcyBhcyBpdCBleGhpYml0ZWQgdGhlIG1vc3QgZmF2b3JhYmxlIHBlcmZvcm1hbmNlIG1ldHJpY3Mgb24gdGhlIHZhbGlkYXRpb24gc2V0LgoKIyMgUmVzdWx0cyB7I3Jlc3VsdHN9CgpOb3cgbGV0J3MgY2hlY2sgdGhlIGRpYWdub3N0aWNzIGZvciB0aGUgZm9yd2FyZCBzZWxlY3RlZCBsaW5lYXIgbW9kZWwgYXMgaXQgc2VlbXMgdG8gYmUgdGhlICdiZXN0JyBtb2RlbCBiZWNhdXNlIGl0IGNvbnRhaW5zIHRoZSB2YXJpYWJsZSB3aXRoIHRoZSBzdHJvbmdlc3QgY29ycmVsYXRpb24gd2l0aCB0aGUgcmVzcG9uc2UgdmFyaWFibGUuIAoKYGBge3J9CiNSZXNpZHVhbCBQbG90cwpwbG90KHN0ZXB3aXNlLmludC5sbSkKYGBgCgpgYGB7cn0KI1Jlc2lkdWFsIFBsb3RzCmhpc3Qoc3RlcHdpc2UuaW50LmxtJHJlc2lkdWFscywgYnJlYWtzPTUwLCBtYWluID0gIkhpc3RvZ3JhbSBvZiBGaW5hbCBNb2RlbCBSZXNpZHVhbHMiLCB4bGFiID0gIlJlc2lkdWFscyIpCmBgYAoKYGBge3J9CiMgUGxvdCBvZiBwcmVkaWN0ZWQgdmVyc3VzIHRydWUgcmVwb25zZSB0ZXN0IHZhbHVlcwpwbG90KHRlc3QueSxzdGVwd2lzZS5pbnQubG0ucHJlZCwgeGxhYj0iT2JzZXJ2ZWQgVmFsdWVzIiwgeWxhYj0iUHJlZGljdGVkIFZhbHVlcyIpIAp0aXRsZSgiT2JzZXJ2ZWQgdnMuIFByZWRpY3RlZCBWYWx1ZXMiKQphYmxpbmUoMCwxLCBjb2wgPSAncmVkJykKYGBgCgojIyBDb25jbHVzaW9ucyB7I2NvbmNsdXNpb25zfQoKRmluYWwgbW9kZWwgcGVyZm9ybW5hY2Ugb24gdGVzdCBzZXQKCmBgYHtyfQojIEltcG9ydGluZyB0aGUgZGF0YQp0ZXN0LmRhdGEgPC0gcmVhZC5jc3YoInRlc3QuY3N2IikKCiNjb2xzLmtlZXAgPC0gYygiTVNab25pbmciLCAiTG90RnJvbnRhZ2UiICwgIkxvdENvbmZpZyIgLCAiTGFuZFNsb3BlIiAsICJDb25kaXRpb24xIiAsIAojICAgICAgICAgIkhvdXNlU3R5bGUiICwgIk92ZXJhbGxDb25kIiAsICJZZWFyQnVpbHQiICwgIlllYXJSZW1vZEFkZCIgLCAKIyAgICAgICAgICJGb3VuZGF0aW9uIiAsICJCc210RmluU0YxIiwgICJIZWF0aW5nIiAsICJIZWF0aW5nUUMiICwgCiMgICAgICAgICJDZW50cmFsQWlyIiAsICJYMm5kRmxyU0YiICwgIkJzbXRGdWxsQmF0aCIgLCAiS2l0Y2hlblF1YWwiICwgCiMgICAgICAgICJUb3RSbXNBYnZHcmQiICwgIkZ1bmN0aW9uYWwiICwgIkZpcmVwbGFjZXMiICwgIkdhcmFnZVlyQmx0IiAsCiMgICAgICAgICJHYXJhZ2VDYXJzIiAsICJHYXJhZ2VBcmVhIiAsICJHYXJhZ2VDb25kIiAsICJXb29kRGVja1NGIiAsICJFbmNsb3NlZFBvcmNoIiAsIAojICAgICAgICAiWDNTc25Qb3JjaCIgLCAiU2NyZWVuUG9yY2giICwgIlBvb2xRQyIgLCAiRmVuY2UiICwgIlNhbGVUeXBlIiAsIAojICAgICAgICAiU2FsZUNvbmRpdGlvbiIgLCAiVG90YWxCc210U0YiICwgIktpdGNoZW5BYnZHciIsICJPdmVyYWxsUXVhbCIsICJYMXN0RmxyU0YiLCAjIk5laWdoYm9yaG9vZCIsICAgICAgICAgIkxvdEFyZWEiLCAiQnNtdFVuZlNGIiwgIkJzbXRGaW5TRjEiLCAiWWVhckJ1aWx0IiwgIk1vU29sZCIsICJZZWFyUmVtb2RBZGQiLCAjIkJzbXRFeHBvc3VyZSIsICAgICAgICAgIk1TU3ViQ2xhc3MiKQoKIyBMZWF2ZSB0aGVzZSBvdXQ6IAoKdGVzdC5kYXRhLnggPC0gdGVzdC5kYXRhWywtODFdCnRlc3QuZGF0YS55IDwtIHRlc3QuZGF0YVssODFdCmBgYAoKQW55IG1pc3NpbmcgdmFsdWVzPwoKYGBge3J9CihzdW0oaXMubmEodGVzdC5kYXRhLngpKS8obnJvdyh0ZXN0LmRhdGEueCkqbmNvbCh0ZXN0LmRhdGEueCkpKSoxMDAKYGBgCgpgYGB7cn0KIyBBZGRpbmcgJ05BJyBhcyBhIGZhY3RvciBsZXZlbAoKY29sX2xpc3QgPC0gYygnQWxsZXknLCAnQnNtdFF1YWwnLCAnQnNtdENvbmQnLCAnQnNtdEV4cG9zdXJlJywgJ0JzbXRGaW5UeXBlMScsICdCc210RmluVHlwZTInLCAnRmlyZXBsYWNlUXUnLCAnR2FyYWdlVHlwZScsICdHYXJhZ2VGaW5pc2gnLCAnR2FyYWdlUXVhbCcsICdHYXJhZ2VDb25kJywgJ1Bvb2xRQycsICdGZW5jZScsICdNaXNjRmVhdHVyZScpCgp0ZXN0LmRhdGEueFtjb2xfbGlzdF0gPC0gbGFwcGx5KHRlc3QuZGF0YS54W2NvbF9saXN0XSwgYWRkTkEpCmBgYAoKYGBge3J9CihzdW0oaXMubmEodGVzdC5kYXRhLngpKS8obnJvdyh0ZXN0LmRhdGEueCkqbmNvbCh0ZXN0LmRhdGEueCkpKSoxMDAKYGBgCgpgYGB7cn0KIyBJbXB1dGluZyBtaXNzaW5nIHZhbHVlcwpmaW5hbC50cmFpbi5taWRzIDwtIG1pY2UodGVzdC5kYXRhLngsIG09MSwgbWV0aG9kPSdjYXJ0Jywgc2VlZD0yMDE5KQp0ZXN0LmRhdGEueCA8LSBjb21wbGV0ZShmaW5hbC50cmFpbi5taWRzLDEpCmBgYAoKYGBge3J9CihzdW0oaXMubmEodGVzdC5kYXRhLngpKS8obnJvdyh0ZXN0LmRhdGEueCkqbmNvbCh0ZXN0LmRhdGEueCkpKSoxMDAKYGBgCgpgYGB7cn0KdGVzdC5kYXRhIDwtIGNiaW5kKHRlc3QuZGF0YS54LCB0ZXN0LmRhdGEueSkKI3Rlc3QuZGF0YVt0ZXN0LmRhdGEkQ29uZGl0aW9uMj09IlBvc0EiIHwgdGVzdC5kYXRhJENvbmRpdGlvbjI9PSJSUkFlIixdCiN0ZXN0LmRhdGFbdGVzdC5kYXRhJFJvb2ZNYXRsPT0iTWVtYnJhbiIsXQp0ZXN0LmRhdGEgPC0gdGVzdC5kYXRhWy1jKDc3LDE1OCwzMzcpLF0KYGBgCgpgYGB7cn0KbmV3LnRlc3QueCA8LSB0ZXN0LmRhdGFbLC04MV0KbmV3LnRlc3QueSA8LSB0ZXN0LmRhdGFbLDgxXQpgYGAKCmBgYHtyfQoKZmluYWwucHJlZCA8LSBleHAocHJlZGljdChzdGVwd2lzZS5pbnQubG0sIG5ldy50ZXN0LngpKQoKY2F0KCJCaWFzOiAiLCBtZWFuKGZpbmFsLnByZWQtbmV3LnRlc3QueSkpCmNhdCgiXG5NYXhpbXVtIERldmlhdGlvbjogIiwgbWF4KGZpbmFsLnByZWQtbmV3LnRlc3QueSkpCmNhdCgiXG5NZWFuIEFic29sdXRlIERldmlhdGlvbjogIiwgbWVhbihhYnMoZmluYWwucHJlZC1uZXcudGVzdC55KSkpCmNhdCgiXG5NZWFuIFNxdWFyZSBFcnJvcjogIiwgbWVhbigoZmluYWwucHJlZC1uZXcudGVzdC55KSoqMikpCmNhdCgiXG5Sb290IE1lYW4gU3F1YXJlIEVycm9yOiAiLCBzcXJ0KG1lYW4oKGZpbmFsLnByZWQtbmV3LnRlc3QueSkqKjIpKSkKYGBgCgozIG9ic2VydmF0aW9ucyByZW1vdmVkIGZyb20gdGhlIGZpbmFsIHRlc3Qgc2V0OyBuZXcgbGV2ZWxzIGluIOKAnENvbmRpdGlvbjLigJ0gYW5kIOKAnFJvb2ZNYXRs4oCdIG5vdCBhY2NvdW50ZWQgZm9yIGJ5IG1vZGVsLgo=