1. Data Exploration
a. Read Data In
The dataset of interest for this report is the real estate prices from kaggle.com.
The dataset is stored in a github repostiory and read into R programing environment directly. The first 6 rows of the data given are as below. The target variable is under “SalePrice” column that shows the dollar values of the houses last sold.
## Id MSSubClass MSZoning LotFrontage LotArea Street
## 1 1 60 RL 65 8450 Pave
## 2 2 20 RL 80 9600 Pave
## 3 3 60 RL 68 11250 Pave
## 4 4 70 RL 60 9550 Pave
## 5 5 60 RL 84 14260 Pave
## 6 6 50 RL 85 14115 Pave
A corresponding short description of the predictors variables are given as below.
## # A tibble: 81 x 2
## Columns_Name Short_Desc
## <fct> <fct>
## 1 Id RowINdex
## 2 MSSubClass Building Class
## 3 MSZoning Zoning Classification
## 4 LotFrontage Linear feet of street connnected to property
## 5 LotArea Lot size in square feet
## 6 Street Type of road access
## 7 Alley Type of alley access
## 8 LotShape General shape of property
## 9 LandContour Flatness of the property
## 10 Utilities Type of utilities available
## 11 LotConfig Lot Configuration
## 12 LandSlope Slope of property
## 13 Neighborhood Physical locations within Ames city limits
## # ... with 68 more rows
b. Check for missing and NA
The data is checked to see if it contains any blank or NA values.
## na_count col_names
## 17 1453 PoolQC
## 19 1406 MiscFeature
## 2 1369 Alley
## 18 1179 Fence
## 11 690 FireplaceQu
## 1 259 LotFrontage
## 12 81 GarageType
## 13 81 GarageYrBlt
## 14 81 GarageFinish
## 15 81 GarageQual
## 16 81 GarageCond
## 7 38 BsmtExposure
## 9 38 BsmtFinType2
## 5 37 BsmtQual
## 6 37 BsmtCond
## 8 37 BsmtFinType1
## 3 8 MasVnrType
## 4 8 MasVnrArea
## 10 1 Electrical
There are quite a few categorical variables as shown in below examples.
#unique(housetrain$BldgType)
#unique(housetrain$MiscFeature)
#unique(housetrain$PoolQC)
#unique(housetrain$MiscFeature)
#unique(housetrain$Alley)
#unique(housetrain$Fence)
#unique(housetrain$FireplaceQu)
#unique(housetrain$GarageType)
unique(housetrain$GarageYrBlt)
## [1] 2003 1976 2001 1998 2000 1993 2004 1973 1931 1939 1965 2005 1962 2006
## [15] 1960 1991 1970 1967 1958 1930 2002 1968 2007 2008 1957 1920 1966 1959
## [29] 1995 1954 1953 NA 1983 1977 1997 1985 1963 1981 1964 1999 1935 1990
## [43] 1945 1987 1989 1915 1956 1948 1974 2009 1950 1961 1921 1900 1979 1951
## [57] 1969 1936 1975 1971 1923 1984 1926 1955 1986 1988 1916 1932 1972 1918
## [71] 1980 1924 1996 1940 1949 1994 1910 1978 1982 1992 1925 1941 2010 1927
## [85] 1947 1937 1942 1938 1952 1928 1922 1934 1906 1914 1946 1908 1929 1933
unique(housetrain$GarageFinish)
## [1] RFn Unf Fin <NA>
## Levels: Fin RFn Unf
unique(housetrain$GarageQual)
## [1] TA Fa Gd <NA> Ex Po
## Levels: Ex Fa Gd Po TA
#unique(crimetrain$LotFrontage)
c. Separate categorical data and numerical data
We will separate out the categorical data columns with missing variables and create a new data frame.
#Id has nothing to do house value
housetrain.asis$Id <- NULL
#Convert int columns to factor, as these are fixed values and choice is made from the list
housetrain.asis$MSSubClass <- factor(housetrain.asis$MSSubClass, levels = c(20,30,40,45,50,60,70,75,80,85,90,120,150,160,180,190))
housetrain.asis$OverallQual <- factor(housetrain.asis$OverallQual, levels = c(1,2,3,4,5,6,7,8,9,10))
housetrain.asis$OverallCond <- factor(housetrain.asis$OverallCond, levels = c(1,2,3,4,5,6,7,8,9,10))
housetrain.asis$BsmtFullBath <- factor(housetrain.asis$BsmtFullBath, levels = c(0,1,2,3))
housetrain.asis$BsmtHalfBath <- factor(housetrain.asis$BsmtHalfBath, levels = c(0,1,2))
housetrain.asis$FullBath <- factor(housetrain.asis$FullBath, levels = c(0,1,2,3))
housetrain.asis$HalfBath <- factor(housetrain.asis$HalfBath, levels = c(0,1,2))
housetrain.asis$BedroomAbvGr <- factor(housetrain.asis$BedroomAbvGr, levels = c(0,1,2,3,4,5,6,7,8))
housetrain.asis$KitchenAbvGr <- factor(housetrain.asis$KitchenAbvGr, levels = c(0,1,2,3))
housetrain.asis$Fireplaces <- factor(housetrain.asis$Fireplaces, levels = c(0,1,2,3))
housetrain.asis$GarageCars <- factor(housetrain.asis$GarageCars, levels = c(0,1,2,3))
housetrain.asis$MSZoning <- as.character(housetrain.asis$MSZoning)
housetrain.asis$MSZoning <- ifelse(housetrain.asis$MSZoning == 'C (all)', 'C', housetrain.asis$MSZoning)
housetrain.asis$MSZoning <- factor(housetrain.asis$MSZoning)
housetrain.asis$RoofMatl <- as.character(housetrain.asis$RoofMatl)
housetrain.asis$RoofMatl <- ifelse(housetrain.asis$RoofMatl == 'Tar&Grv', 'TarGrv', housetrain.asis$RoofMatl)
housetrain.asis$RoofMatl <- factor(housetrain.asis$RoofMatl)
#Based on the documentation, following columns have 'NA' values and they are correct
#Values are not really missing but they are legitimately not availabel for particular property
#Created 'NA' as one of the category
housetrain.asis$Alley <- as.character(housetrain.asis$Alley)
housetrain.asis[c("Alley")][is.na(housetrain.asis[c("Alley")])] <- 'NAly'
housetrain.asis$Alley <- factor(housetrain.asis$Alley)
housetrain.asis$PoolQC <- as.character(housetrain.asis$PoolQC)
housetrain.asis$PoolQC <- ifelse(housetrain.asis$PoolArea == 0, 'NP', housetrain.asis$PoolQC) #NP - No Pool
housetrain.asis$PoolQC <- factor(housetrain.asis$PoolQC)
housetrain.asis$GarageType <- as.character(housetrain.asis$GarageType)
housetrain.asis$GarageType <- ifelse(housetrain.asis$GarageCars == 0, 'NG', housetrain.asis$GarageType) #NG - No Garage
housetrain.asis$GarageType <- factor(housetrain.asis$GarageType)
housetrain.asis$GarageQual <- as.character(housetrain.asis$GarageQual)
housetrain.asis$GarageQual <- ifelse(housetrain.asis$GarageCars == 0, 'NG', housetrain.asis$GarageQual)
housetrain.asis$GarageQual <- factor(housetrain.asis$GarageQual)
housetrain.asis$GarageCond <- as.character(housetrain.asis$GarageCond)
housetrain.asis$GarageCond <- ifelse(housetrain.asis$GarageCars == 0, 'NG', housetrain.asis$GarageCond)
housetrain.asis$GarageCond <- factor(housetrain.asis$GarageCond)
housetrain.asis$GarageYrBlt <- housetrain.asis$YrSold - housetrain.asis$GarageYrBlt
housetrain.asis[c("GarageYrBlt")][is.na(housetrain.asis[c("GarageYrBlt")])] <- 0
housetrain.asis$GarageYrBlt <- as.integer(housetrain.asis$GarageYrBlt)
housetrain.asis$GarageFinish <- as.character(housetrain.asis$GarageFinish)
housetrain.asis$GarageFinish <- ifelse(housetrain.asis$GarageCars == 0, 'NG', housetrain.asis$GarageFinish)
housetrain.asis$GarageFinish <- factor(housetrain.asis$GarageFinish)
#Time duration year build to sold, this helps us to understand if old houses values are lower than newer ones
housetrain.asis$YearBuilt <- housetrain.asis$YrSold - housetrain.asis$YearBuilt
housetrain.asis$YearRemodAdd <- housetrain.asis$YrSold - housetrain.asis$YearRemodAdd
housetrain.asis$FireplaceQu <- as.character(housetrain.asis$FireplaceQu)
housetrain.asis$FireplaceQu <- ifelse(housetrain.asis$Fireplaces == 0, 'NF', housetrain.asis$FireplaceQu) #NF - No Fireplace
housetrain.asis$FireplaceQu <- factor(housetrain.asis$FireplaceQu)
housetrain.asis$Fence <- as.character(housetrain.asis$Fence)
housetrain.asis[c("Fence")][is.na(housetrain.asis[c("Fence")])] <- 'NF' #NF - No Fence
housetrain.asis$Fence <- factor(housetrain.asis$Fence)
housetrain.asis$BsmtQual <- as.character(housetrain.asis$BsmtQual)
housetrain.asis$BsmtQual <- ifelse(housetrain.asis$TotalBsmtSF == 0, 'NB', housetrain.asis$BsmtQual) #NB - No Basement
housetrain.asis$BsmtQual <- factor(housetrain.asis$BsmtQual)
housetrain.asis$BsmtCond <- as.character(housetrain.asis$BsmtCond)
housetrain.asis$BsmtCond <- ifelse(housetrain.asis$TotalBsmtSF == 0, 'NB', housetrain.asis$BsmtCond)
housetrain.asis$BsmtCond <- factor(housetrain.asis$BsmtCond)
housetrain.asis$BsmtExposure <- as.character(housetrain.asis$BsmtExposure)
housetrain.asis$BsmtExposure <- ifelse(housetrain.asis$TotalBsmtSF == 0, 'NB', housetrain.asis$BsmtExposure)
housetrain.asis$BsmtExposure <- factor(housetrain.asis$BsmtExposure)
housetrain.asis$BsmtFinType1 <- as.character(housetrain.asis$BsmtFinType1)
housetrain.asis$BsmtFinType1 <- ifelse(housetrain.asis$TotalBsmtSF == 0, 'NB', housetrain.asis$BsmtFinType1)
housetrain.asis$BsmtFinType1 <- factor(housetrain.asis$BsmtFinType1)
housetrain.asis$BsmtFinType2 <- as.character(housetrain.asis$BsmtFinType2)
housetrain.asis$BsmtFinType2 <- ifelse(housetrain.asis$TotalBsmtSF == 0, 'NB', housetrain.asis$BsmtFinType2)
housetrain.asis$BsmtFinType2 <- factor(housetrain.asis$BsmtFinType2)
housetrain.asis$MiscFeature <- as.character(housetrain.asis$MiscFeature)
housetrain.asis[c("MiscFeature")][is.na(housetrain.asis[c("MiscFeature")])] <- 'NM' #NM - No additional features
housetrain.asis$MiscFeature <- factor(housetrain.asis$MiscFeature)
#Year and Month sold are also fixed
housetrain.asis$MoSold <- factor(housetrain.asis$MoSold)
housetrain.asis$YrSold <- factor(housetrain.asis$YrSold)
d. Check for missing values and zeros
#Get datatypes of each column
colTypes <- sapply(housetrain.asis, class) %>% data.frame(stringsAsFactors = F)
colnames(colTypes) = c("colType")
colTypes <- tibble::rownames_to_column(colTypes, "colNames")
#Seperate them into integer and non integer datatypes
colIntNames <- colTypes %>% dplyr::filter(colType=="integer")
colCharNames <- colTypes %>% dplyr::filter(colType != "integer")
#Let list of missing values 'NA'
aggr_plot <- aggr(housetrain.asis, numbers=F, sortVars=F, labels=names(house.df), cex.axis=.45, gap=3, ylab=c("Missing data","Pattern"), plot=F)
#Summarize the output calculate missing percentages per column
summary(aggr_plot)$missings %>%
data.frame() %>% filter(Count > 0) %>%
mutate(Percentage = Count*100/nrow(housetrain.asis)) %>%
mutate(Percentage = paste0(round(Percentage,2),'%')) %>%
kable("html",caption = "Variables With Missing Values", row.names = F, digits = 2) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F, position = "left", font_size = 12)
Variables With Missing Values
|
Variable
|
Count
|
Percentage
|
|
LotFrontage
|
259
|
17.74%
|
|
MasVnrType
|
8
|
0.55%
|
|
MasVnrArea
|
8
|
0.55%
|
|
BsmtExposure
|
1
|
0.07%
|
|
BsmtFinType2
|
1
|
0.07%
|
|
Electrical
|
1
|
0.07%
|
|
GarageType
|
5
|
0.34%
|
|
GarageFinish
|
5
|
0.34%
|
|
GarageCars
|
5
|
0.34%
|
|
GarageQual
|
5
|
0.34%
|
|
GarageCond
|
5
|
0.34%
|
#Get percentage of ZERO values in each numeric column
colString <- paste(colIntNames$colNames, collapse=", ")
colString <- as.list(strsplit(colString, ",")[[1]])
house.zero.var <- housetrain.asis %>% dplyr::select_(.dots = colString)
colZeros <- house.zero.var %>% melt()
colZeros <- colZeros %>% dplyr::filter(value==0)
colZeros <- colZeros %>% dplyr::group_by(variable) %>% dplyr::summarize(Total = n())
#Generate output
colZeros %>%
mutate(Percentage = Total*100/nrow(housetrain.asis)) %>%
mutate(Percentage = paste0(round(Percentage,2),'%')) %>%
kable("html",caption = "Variables With Zero Values", row.names = F, digits = 2) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F, position = "left", font_size = 12)
Variables With Zero Values
|
variable
|
Total
|
Percentage
|
|
YearBuilt
|
64
|
4.38%
|
|
YearRemodAdd
|
124
|
8.49%
|
|
MasVnrArea
|
861
|
58.97%
|
|
BsmtFinSF1
|
467
|
31.99%
|
|
BsmtFinSF2
|
1293
|
88.56%
|
|
BsmtUnfSF
|
118
|
8.08%
|
|
TotalBsmtSF
|
37
|
2.53%
|
|
X2ndFlrSF
|
829
|
56.78%
|
|
LowQualFinSF
|
1434
|
98.22%
|
|
GarageYrBlt
|
165
|
11.3%
|
|
GarageArea
|
81
|
5.55%
|
|
WoodDeckSF
|
761
|
52.12%
|
|
OpenPorchSF
|
656
|
44.93%
|
|
EnclosedPorch
|
1252
|
85.75%
|
|
X3SsnPorch
|
1436
|
98.36%
|
|
ScreenPorch
|
1344
|
92.05%
|
|
PoolArea
|
1453
|
99.52%
|
|
MiscVal
|
1408
|
96.44%
|
f. Data imputation
#Generate sample test data
housetrain.comp.cases <- housetrain.asis[complete.cases(housetrain.asis), ]
housetrain.comp.test <- housetrain.comp.cases
#Remove sales price column as it won't be available while impuuting actual test data
housetrain.comp.test$SalePrice <- NULL
set.seed(101)
housetrain.comp.test[sample(1:nrow(housetrain.comp.test), 200), "LotFrontage"] <- NA
housetrain.comp.test[sample(1:nrow(housetrain.comp.test), 10), "MasVnrType"] <- NA
housetrain.comp.test[sample(1:nrow(housetrain.comp.test), 10), "MasVnrArea"] <- NA
housetrain.comp.test[sample(1:nrow(housetrain.comp.test), 5), "BsmtExposure"] <- NA
housetrain.comp.test[sample(1:nrow(housetrain.comp.test), 5), "BsmtFinType2"] <- NA
housetrain.comp.test[sample(1:nrow(housetrain.comp.test), 5), "Electrical"] <- NA
housetrain.comp.test[sample(1:nrow(housetrain.comp.test), 5), "GarageType"] <- NA
housetrain.comp.test[sample(1:nrow(housetrain.comp.test), 5), "GarageFinish"] <- NA
housetrain.comp.test[sample(1:nrow(housetrain.comp.test), 5), "GarageCars"] <- NA
housetrain.comp.test[sample(1:nrow(housetrain.comp.test), 5), "GarageQual"] <- NA
housetrain.comp.test[sample(1:nrow(housetrain.comp.test), 5), "GarageCond"] <- NA
for(i in 5:20){
housetrain.comp.sample <- housetrain.comp.test
#Imputation using kNN
housetrain.comp.imp <- knnImputation(housetrain.comp.sample, i, meth='weighAvg')
actual <- housetrain.comp.cases$LotFrontage[is.na(housetrain.comp.test$LotFrontage)]
predicts <- housetrain.comp.imp$LotFrontage[is.na(housetrain.comp.test$LotFrontage)]
error.rate <- regr.eval(actual, predicts)
if (i==5){
Accuracy.df <- data.frame(error.rate, stringsAsFactors = F) %>%
t() %>%
data.frame()
row.names(Accuracy.df) <- paste0("kNN-",i)
}else{
A <- data.frame(error.rate, stringsAsFactors = F) %>%
t() %>%
data.frame()
row.names(A) <- paste0("kNN-",i)
Accuracy.df <- rbind(Accuracy.df, A)
}
}
Accuracy.df <- tibble::rownames_to_column(Accuracy.df, "kNN")
Accuracy.df$Neighbors <- 5:20
Accuracy.df %>%
reshape2::melt(id.vars = c('kNN','Neighbors')) %>%
ggplot(aes(x = Neighbors, y = value)) +
geom_point() +
geom_line() +
facet_wrap(~variable, scales = "free", nrow = 2, ncol = 2) +
scale_x_continuous(breaks=seq(5,20,2))

#Impute actual data
housetrain.asis.noSalePrice <- housetrain.asis
housetrain.asis.noSalePrice$SalePrice <- NULL
housetrain.knn <- knnImputation(housetrain.asis.noSalePrice, 11, meth='weighAvg')
housetrain.knn <- cbind(housetrain.knn, SalePrice = housetrain.asis$SalePrice)
Plots show 11 or 13 is optimal number of neighbors. I selected 11.
g. Build models
#Build LM
housetrain.lm <- lm(SalePrice ~ . , data = housetrain.knn)
summary(housetrain.lm)
##
## Call:
## lm(formula = SalePrice ~ ., data = housetrain.knn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -175318 -8247 0 8403 175318
##
## Coefficients: (12 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -856585.8319 157716.3303 -5.431 6.82e-08 ***
## MSSubClass30 312.5315 4749.6819 0.066 0.947548
## MSSubClass40 -409.5717 17178.9075 -0.024 0.980983
## MSSubClass45 -8448.1143 24905.5631 -0.339 0.734516
## MSSubClass50 -7789.8938 8883.9598 -0.877 0.380751
## MSSubClass60 -3455.2813 7988.2812 -0.433 0.665427
## MSSubClass70 1407.6420 8632.4992 0.163 0.870497
## MSSubClass75 -16035.4407 17178.6156 -0.933 0.350781
## MSSubClass80 -12511.4539 12767.3952 -0.980 0.327315
## MSSubClass85 -19952.9352 11440.4242 -1.744 0.081413 .
## MSSubClass90 -13673.4045 8666.1110 -1.578 0.114885
## MSSubClass120 -16420.7858 14393.4399 -1.141 0.254168
## MSSubClass160 -23288.8672 17582.6446 -1.325 0.185587
## MSSubClass180 -27703.9733 19593.5634 -1.414 0.157652
## MSSubClass190 -11951.5154 28135.6002 -0.425 0.671074
## MSZoningFV 42969.1076 12180.5092 3.528 0.000436 ***
## MSZoningRH 35748.6420 12351.1849 2.894 0.003871 **
## MSZoningRL 37363.5453 10587.6176 3.529 0.000434 ***
## MSZoningRM 32768.1718 9986.3092 3.281 0.001064 **
## LotFrontage 20.9988 45.9370 0.457 0.647669
## LotArea 0.6401 0.1119 5.721 1.35e-08 ***
## StreetPave 26130.3168 12945.7496 2.018 0.043776 *
## AlleyNAly -2668.2968 4213.4032 -0.633 0.526671
## AlleyPave -1682.7633 6157.6373 -0.273 0.784686
## LotShapeIR2 6358.7594 4106.9414 1.548 0.121826
## LotShapeIR3 9541.4823 8621.2235 1.107 0.268636
## LotShapeReg 1894.3697 1575.0553 1.203 0.229327
## LandContourHLS 7728.6638 5115.8398 1.511 0.131131
## LandContourLow -3788.3299 6471.8550 -0.585 0.558424
## LandContourLvl 6462.7773 3699.0781 1.747 0.080881 .
## UtilitiesNoSeWa -54405.5796 27880.6483 -1.951 0.051254 .
## LotConfigCulDSac 7679.3814 3250.9841 2.362 0.018334 *
## LotConfigFR2 -6874.7706 3926.6985 -1.751 0.080250 .
## LotConfigFR3 -16419.4216 12253.8526 -1.340 0.180529
## LotConfigInside -1253.4217 1772.8608 -0.707 0.479706
## LandSlopeMod 8787.3664 3979.4485 2.208 0.027427 *
## LandSlopeSev -35399.1020 11398.8698 -3.105 0.001946 **
## NeighborhoodBlueste 7572.3432 19589.2186 0.387 0.699156
## NeighborhoodBrDale 222.9895 11597.3364 0.019 0.984663
## NeighborhoodBrkSide -2714.4237 9603.5291 -0.283 0.777497
## NeighborhoodClearCr -9383.2870 9400.8443 -0.998 0.318425
## NeighborhoodCollgCr -6779.0451 7339.0932 -0.924 0.355841
## NeighborhoodCrawfor 14877.3462 8737.1968 1.703 0.088883 .
## NeighborhoodEdwards -20142.1236 8117.2991 -2.481 0.013229 *
## NeighborhoodGilbert -5301.0633 7745.8229 -0.684 0.493875
## NeighborhoodIDOTRR -9914.5639 10845.9875 -0.914 0.360843
## NeighborhoodMeadowV -12908.5356 12283.7122 -1.051 0.293540
## NeighborhoodMitchel -15578.3349 8289.1501 -1.879 0.060447 .
## NeighborhoodNAmes -13625.8055 7993.8666 -1.705 0.088551 .
## NeighborhoodNoRidge 20006.9890 8449.2484 2.368 0.018054 *
## NeighborhoodNPkVill 14385.0367 13742.7571 1.047 0.295441
## NeighborhoodNridgHt 14416.5586 7548.3604 1.910 0.056395 .
## NeighborhoodNWAmes -11363.7422 8167.2857 -1.391 0.164381
## NeighborhoodOldTown -11497.1115 9740.5410 -1.180 0.238110
## NeighborhoodSawyer -7551.0614 8228.7155 -0.918 0.358995
## NeighborhoodSawyerW -1267.9200 7857.6183 -0.161 0.871837
## NeighborhoodSomerst -143.1670 9062.2290 -0.016 0.987398
## NeighborhoodStoneBr 34444.5007 8497.2360 4.054 5.38e-05 ***
## NeighborhoodSWISU -7443.5025 9823.7496 -0.758 0.448783
## NeighborhoodTimber -6187.1498 8176.1157 -0.757 0.449363
## NeighborhoodVeenker 3661.4979 10550.4316 0.347 0.728619
## Condition1Feedr 4936.5026 4941.8952 0.999 0.318048
## Condition1Norm 14939.4485 4128.3378 3.619 0.000309 ***
## Condition1PosA 12556.2385 9770.1399 1.285 0.198993
## Condition1PosN 13904.7262 7324.1268 1.898 0.057882 .
## Condition1RRAe -15239.3626 8854.0985 -1.721 0.085489 .
## Condition1RRAn 13984.8142 6793.2366 2.059 0.039753 *
## Condition1RRNe -1202.2300 16928.8206 -0.071 0.943397
## Condition1RRNn 13839.8090 12546.4842 1.103 0.270221
## Condition2Feedr 8776.1690 26428.8248 0.332 0.739898
## Condition2Norm 2378.5998 24002.3491 0.099 0.921077
## Condition2PosA -18834.5971 39170.0381 -0.481 0.630720
## Condition2PosN -258489.4013 30430.3119 -8.494 < 2e-16 ***
## Condition2RRAe -107562.5019 73291.8879 -1.468 0.142488
## Condition2RRAn -4699.2716 33637.6551 -0.140 0.888919
## Condition2RRNn 10020.6099 29937.9729 0.335 0.737903
## BldgType2fmCon -2212.3330 27346.4174 -0.081 0.935535
## BldgTypeDuplex NA NA NA NA
## BldgTypeTwnhs -2088.1771 15296.6039 -0.137 0.891440
## BldgTypeTwnhsE 554.7325 14557.6446 0.038 0.969610
## HouseStyle1.5Unf 7316.6389 24498.4576 0.299 0.765255
## HouseStyle1Story -7828.3626 9029.7557 -0.867 0.386149
## HouseStyle2.5Fin -6561.5638 19009.3043 -0.345 0.730025
## HouseStyle2.5Unf 5641.4270 17247.6215 0.327 0.743663
## HouseStyle2Story -10516.5652 8237.3960 -1.277 0.201970
## HouseStyleSFoyer 2310.0048 12573.1599 0.184 0.854261
## HouseStyleSLvl 2065.1615 14037.0544 0.147 0.883061
## OverallQual2 26840.6553 31267.5125 0.858 0.390839
## OverallQual3 11242.8658 28835.4037 0.390 0.696684
## OverallQual4 10677.5264 28585.6786 0.374 0.708825
## OverallQual5 10001.3553 28733.4023 0.348 0.727848
## OverallQual6 13383.2947 28803.0126 0.465 0.642270
## OverallQual7 20047.1855 28836.3697 0.695 0.487067
## OverallQual8 32971.8738 28978.0187 1.138 0.255431
## OverallQual9 63180.6625 29527.6733 2.140 0.032588 *
## OverallQual10 101560.1520 30421.2659 3.338 0.000869 ***
## OverallCond2 -23026.8519 52799.2337 -0.436 0.662831
## OverallCond3 -51538.9146 54972.2965 -0.938 0.348675
## OverallCond4 -40842.2151 55222.5517 -0.740 0.459698
## OverallCond5 -33476.7489 55321.9175 -0.605 0.545214
## OverallCond6 -27577.4167 55313.3528 -0.499 0.618179
## OverallCond7 -21069.9957 55309.5363 -0.381 0.703313
## OverallCond8 -17420.5720 55363.9373 -0.315 0.753080
## OverallCond9 -9748.6585 55477.2757 -0.176 0.860542
## YearBuilt -398.0932 83.3638 -4.775 2.02e-06 ***
## YearRemodAdd -75.4919 55.8749 -1.351 0.176933
## RoofStyleGable -3871.8558 18240.0897 -0.212 0.831933
## RoofStyleGambrel -3081.4416 20092.1772 -0.153 0.878137
## RoofStyleHip -4582.1507 18303.3675 -0.250 0.802365
## RoofStyleMansard 1227.4811 21859.3903 0.056 0.955229
## RoofStyleShed 85967.0079 40024.9183 2.148 0.031935 *
## RoofMatlCompShg 585529.3349 54166.5268 10.810 < 2e-16 ***
## RoofMatlMembran 661166.2063 63596.3180 10.396 < 2e-16 ***
## RoofMatlMetal 635156.8076 63237.2752 10.044 < 2e-16 ***
## RoofMatlRoll 587564.7093 59654.3825 9.849 < 2e-16 ***
## RoofMatlTarGrv 574270.6328 57660.1437 9.960 < 2e-16 ***
## RoofMatlWdShake 578481.5889 56417.7754 10.254 < 2e-16 ***
## RoofMatlWdShngl 630849.0006 55216.2901 11.425 < 2e-16 ***
## Exterior1stAsphShn -12740.4988 32393.4394 -0.393 0.694167
## Exterior1stBrkComm 9229.6717 28221.9229 0.327 0.743698
## Exterior1stBrkFace 19898.7413 12885.3882 1.544 0.122793
## Exterior1stCBlock -6262.5973 27440.6493 -0.228 0.819513
## Exterior1stCemntBd 4142.8709 19080.6403 0.217 0.828150
## Exterior1stHdBoard 478.0806 13067.8272 0.037 0.970823
## Exterior1stImStucc -15290.1013 27565.1749 -0.555 0.579215
## Exterior1stMetalSd 7396.1988 14788.9052 0.500 0.617087
## Exterior1stPlywood -594.1876 12859.6453 -0.046 0.963154
## Exterior1stStone 6864.0167 23986.0633 0.286 0.774802
## Exterior1stStucco 4875.4891 14525.0416 0.336 0.737188
## Exterior1stVinylSd 792.7966 13587.0974 0.058 0.953481
## Exterior1stWd Sdng 95.3849 12583.1559 0.008 0.993953
## Exterior1stWdShing 3269.5070 13659.5921 0.239 0.810872
## Exterior2ndAsphShn 10495.9599 22184.0199 0.473 0.636209
## Exterior2ndBrk Cmn 2109.6978 20304.2606 0.104 0.917263
## Exterior2ndBrkFace -3872.1856 13257.6970 -0.292 0.770285
## Exterior2ndCBlock NA NA NA NA
## Exterior2ndCmentBd 4310.1523 18665.2459 0.231 0.817419
## Exterior2ndHdBoard 1071.2443 12513.4361 0.086 0.931793
## Exterior2ndImStucc 4670.4074 14451.5051 0.323 0.746619
## Exterior2ndMetalSd -1449.7693 14371.1190 -0.101 0.919663
## Exterior2ndOther -21576.2416 26520.0761 -0.814 0.416053
## Exterior2ndPlywood 774.8445 12156.8698 0.064 0.949191
## Exterior2ndStone -13676.9448 17235.1148 -0.794 0.427620
## Exterior2ndStucco -1879.4273 13861.3453 -0.136 0.892171
## Exterior2ndVinylSd 4633.7771 13043.7155 0.355 0.722467
## Exterior2ndWd Sdng 5956.6046 12106.0224 0.492 0.622787
## Exterior2ndWd Shng 681.3413 12719.7997 0.054 0.957291
## MasVnrTypeBrkFace 7664.6659 6208.2553 1.235 0.217234
## MasVnrTypeNone 9208.9986 6246.7646 1.474 0.140700
## MasVnrTypeStone 10195.0205 6570.4481 1.552 0.121021
## MasVnrArea 18.0445 5.6607 3.188 0.001473 **
## ExterQualFa -2204.5097 12989.7205 -0.170 0.865267
## ExterQualGd -5895.7940 5137.4725 -1.148 0.251369
## ExterQualTA -6570.6929 5565.0549 -1.181 0.237963
## ExterCondFa -9579.9182 18315.4165 -0.523 0.601038
## ExterCondGd -14087.9576 17453.0756 -0.807 0.419723
## ExterCondPo -42296.7164 35966.5800 -1.176 0.239837
## ExterCondTA -11641.5717 17500.0714 -0.665 0.506036
## FoundationCBlock 1115.6569 3196.5688 0.349 0.727140
## FoundationPConc 3774.7042 3407.6568 1.108 0.268217
## FoundationSlab -3844.9456 9952.4991 -0.386 0.699324
## FoundationStone 4080.2253 12316.4734 0.331 0.740492
## FoundationWood -33236.7575 14480.1387 -2.295 0.021893 *
## BsmtQualFa -4372.2416 6420.5463 -0.681 0.496023
## BsmtQualGd -11284.2298 3388.1880 -3.330 0.000894 ***
## BsmtQualNB 732.5989 13512.1403 0.054 0.956771
## BsmtQualTA -9243.4600 4149.4311 -2.228 0.026097 *
## BsmtCondGd 3164.3826 5295.5566 0.598 0.550255
## BsmtCondNB NA NA NA NA
## BsmtCondPo -10843.0162 37529.5570 -0.289 0.772695
## BsmtCondTA 5816.9029 4295.9220 1.354 0.175985
## BsmtExposureGd 10706.9710 3004.4654 3.564 0.000381 ***
## BsmtExposureMn -2003.0047 2977.6050 -0.673 0.501279
## BsmtExposureNB NA NA NA NA
## BsmtExposureNo -4166.6345 2156.1283 -1.932 0.053547 .
## BsmtFinType1BLQ 1506.4290 2748.5311 0.548 0.583740
## BsmtFinType1GLQ 5177.3063 2473.9355 2.093 0.036590 *
## BsmtFinType1LwQ -2403.3611 3662.5666 -0.656 0.511829
## BsmtFinType1NB NA NA NA NA
## BsmtFinType1Rec 173.7837 2946.8354 0.059 0.952984
## BsmtFinType1Unf 2749.8198 2900.7502 0.948 0.343344
## BsmtFinSF1 33.8308 5.1443 6.576 7.29e-11 ***
## BsmtFinType2BLQ -8805.3928 7282.6886 -1.209 0.226878
## BsmtFinType2GLQ -3284.9591 9100.3017 -0.361 0.718186
## BsmtFinType2LwQ -8467.4203 7133.7992 -1.187 0.235494
## BsmtFinType2NB NA NA NA NA
## BsmtFinType2Rec -6382.6716 6802.4816 -0.938 0.348294
## BsmtFinType2Unf -3855.7317 7250.4149 -0.532 0.594971
## BsmtFinSF2 29.0478 8.8092 3.297 0.001005 **
## BsmtUnfSF 14.9952 4.7291 3.171 0.001560 **
## TotalBsmtSF NA NA NA NA
## HeatingGasA 22192.1664 25410.0888 0.873 0.382648
## HeatingGasW 22970.5173 26232.4959 0.876 0.381402
## HeatingGrav 7560.5202 27647.0722 0.273 0.784544
## HeatingOthW 8770.9961 31266.3951 0.281 0.779125
## HeatingWall 33611.6671 29252.1490 1.149 0.250781
## HeatingQCFa -1341.0831 4734.4495 -0.283 0.777028
## HeatingQCGd -2938.4171 2024.0437 -1.452 0.146842
## HeatingQCPo 9488.6207 25966.2178 0.365 0.714864
## HeatingQCTA -2589.3972 2037.7375 -1.271 0.204084
## CentralAirY -416.1486 3868.0706 -0.108 0.914343
## ElectricalFuseF -2745.2904 5841.1398 -0.470 0.638449
## ElectricalFuseP -10212.7563 18649.6849 -0.548 0.584066
## ElectricalMix NA NA NA NA
## ElectricalSBrkr -2825.2548 2950.5700 -0.958 0.338501
## X1stFlrSF 48.4013 5.5595 8.706 < 2e-16 ***
## X2ndFlrSF 52.5641 6.0736 8.654 < 2e-16 ***
## LowQualFinSF -4.6670 19.2784 -0.242 0.808759
## GrLivArea NA NA NA NA
## BsmtFullBath1 1177.8488 1997.6882 0.590 0.555570
## BsmtFullBath2 5505.5181 9962.1301 0.553 0.580614
## BsmtFullBath3 29366.4887 27298.1445 1.076 0.282256
## BsmtHalfBath1 2649.5711 3118.1774 0.850 0.395658
## BsmtHalfBath2 -23464.8224 29650.6071 -0.791 0.428887
## FullBath1 -7464.1327 17478.7877 -0.427 0.669430
## FullBath2 -7047.3866 17772.4836 -0.397 0.691785
## FullBath3 16637.4679 18535.7249 0.898 0.369592
## HalfBath1 3307.8658 2207.3000 1.499 0.134250
## HalfBath2 -1763.0251 9266.7583 -0.190 0.849145
## BedroomAbvGr1 18147.7194 16784.4248 1.081 0.279824
## BedroomAbvGr2 22185.8470 16541.3568 1.341 0.180108
## BedroomAbvGr3 15933.9648 16679.2490 0.955 0.339618
## BedroomAbvGr4 17253.2694 16911.5135 1.020 0.307844
## BedroomAbvGr5 8833.1865 18068.8629 0.489 0.625032
## BedroomAbvGr6 20038.4118 20235.5406 0.990 0.322256
## BedroomAbvGr8 37862.6508 35315.2674 1.072 0.283885
## KitchenAbvGr1 -5630.6399 44118.1705 -0.128 0.898467
## KitchenAbvGr2 -15664.0709 44590.5941 -0.351 0.725438
## KitchenAbvGr3 -26808.1967 48140.5373 -0.557 0.577722
## KitchenQualFa -17447.3368 6256.0149 -2.789 0.005376 **
## KitchenQualGd -17430.3145 3516.6326 -4.957 8.25e-07 ***
## KitchenQualTA -17969.5274 3933.3294 -4.569 5.44e-06 ***
## TotRmsAbvGrd 1317.5771 950.2140 1.387 0.165828
## FunctionalMaj2 -9876.0786 14324.0639 -0.689 0.490663
## FunctionalMin1 3385.2590 8540.6642 0.396 0.691906
## FunctionalMin2 3180.4629 8724.4960 0.365 0.715519
## FunctionalMod -7104.6055 10488.3939 -0.677 0.498302
## FunctionalSev -34338.6173 29070.0654 -1.181 0.237752
## FunctionalTyp 13063.0907 7564.5761 1.727 0.084458 .
## Fireplaces1 -6582.4482 5483.7242 -1.200 0.230246
## Fireplaces2 127.2270 6014.5918 0.021 0.983127
## Fireplaces3 1869.1869 12866.5352 0.145 0.884519
## FireplaceQuFa 4549.1504 6786.9507 0.670 0.502814
## FireplaceQuGd 8208.6063 5316.1123 1.544 0.122839
## FireplaceQuNF NA NA NA NA
## FireplaceQuPo 13969.8680 7798.4762 1.791 0.073498 .
## FireplaceQuTA 9360.7897 5505.9034 1.700 0.089375 .
## GarageTypeAttchd 33119.6708 11809.5244 2.804 0.005124 **
## GarageTypeBasment 39938.5136 13369.5144 2.987 0.002874 **
## GarageTypeBuiltIn 30354.1390 12287.1570 2.470 0.013640 *
## GarageTypeCarPort 38737.7148 15411.6653 2.514 0.012088 *
## GarageTypeDetchd 34919.9286 11803.0625 2.959 0.003154 **
## GarageTypeNG -99198.6001 49654.0658 -1.998 0.045974 *
## GarageYrBlt 2.2244 60.7921 0.037 0.970818
## GarageFinishNG 114505.6266 47214.7084 2.425 0.015452 *
## GarageFinishRFn -701.9726 1929.9356 -0.364 0.716127
## GarageFinishUnf -1033.6589 2384.3036 -0.434 0.664713
## GarageCars1 -15045.6134 19130.8915 -0.786 0.431762
## GarageCars2 -15940.7516 19061.8343 -0.836 0.403179
## GarageCars3 -6305.7140 19187.2391 -0.329 0.742487
## GarageArea 20.6652 7.8375 2.637 0.008484 **
## GarageQualFa -73062.0313 31793.5482 -2.298 0.021739 *
## GarageQualGd -68166.6546 32566.1915 -2.093 0.036552 *
## GarageQualNG NA NA NA NA
## GarageQualPo -78037.5768 40775.8366 -1.914 0.055891 .
## GarageQualTA -68294.4160 31429.2263 -2.173 0.029986 *
## GarageCondFa 63157.9279 35385.9877 1.785 0.074552 .
## GarageCondGd 63734.0725 36764.4691 1.734 0.083260 .
## GarageCondNG NA NA NA NA
## GarageCondPo 69100.6168 38774.4055 1.782 0.074993 .
## GarageCondTA 64411.1056 35167.4126 1.832 0.067275 .
## PavedDriveP -4946.4662 5489.7861 -0.901 0.367760
## PavedDriveY -703.1284 3461.6202 -0.203 0.839076
## WoodDeckSF 10.1153 5.7623 1.755 0.079453 .
## OpenPorchSF 5.0249 11.4393 0.439 0.660553
## EnclosedPorch 7.8812 12.4620 0.632 0.527237
## X3SsnPorch 52.1939 21.7640 2.398 0.016635 *
## ScreenPorch 46.3312 12.2859 3.771 0.000171 ***
## PoolArea 651.3711 226.5377 2.875 0.004110 **
## PoolQCFa -143233.9603 40580.7579 -3.530 0.000433 ***
## PoolQCGd -108692.0229 36466.6736 -2.981 0.002937 **
## PoolQCNP 252148.3295 123801.7614 2.037 0.041907 *
## FenceGdWo 3602.4174 4875.0861 0.739 0.460091
## FenceMnPrv 6919.0492 3968.7734 1.743 0.081535 .
## FenceMnWw 617.7255 8009.5008 0.077 0.938538
## FenceNF 5291.5861 3626.2491 1.459 0.144770
## MiscFeatureNM -452.7031 98780.6709 -0.005 0.996344
## MiscFeatureOthr 14837.5813 90648.6409 0.164 0.870010
## MiscFeatureShed 1399.1800 94548.0229 0.015 0.988195
## MiscFeatureTenC 6595.4966 97756.4521 0.067 0.946220
## MiscVal 0.2458 6.2278 0.039 0.968524
## MoSold2 -7731.8508 4640.6001 -1.666 0.095959 .
## MoSold3 -2669.7935 4084.7448 -0.654 0.513499
## MoSold4 -2592.0432 3873.1388 -0.669 0.503479
## MoSold5 -952.1368 3706.0580 -0.257 0.797291
## MoSold6 -2476.5190 3655.8633 -0.677 0.498282
## MoSold7 -347.0836 3713.8167 -0.093 0.925556
## MoSold8 -6521.0467 3919.2241 -1.664 0.096412 .
## MoSold9 -5516.9199 4484.2655 -1.230 0.218842
## MoSold10 -7657.0159 4256.0910 -1.799 0.072269 .
## MoSold11 -4929.3721 4292.5573 -1.148 0.251061
## MoSold12 -4544.2673 4605.6940 -0.987 0.324015
## YrSold2007 86.9338 1923.1899 0.045 0.963953
## YrSold2008 2545.6042 2015.5626 1.263 0.206854
## YrSold2009 -3.1244 1957.9171 -0.002 0.998727
## YrSold2010 3058.1111 2448.0367 1.249 0.211842
## SaleTypeCon 25106.9899 17511.8883 1.434 0.151926
## SaleTypeConLD 14126.8993 9891.0703 1.428 0.153491
## SaleTypeConLI 547.0299 11431.8229 0.048 0.961843
## SaleTypeConLw 2737.2419 12140.6171 0.225 0.821660
## SaleTypeCWD 11139.1104 12647.6958 0.881 0.378652
## SaleTypeNew 25491.2792 15371.7619 1.658 0.097525 .
## SaleTypeOth 7346.2181 14491.6217 0.507 0.612302
## SaleTypeWD 145.1526 4109.8105 0.035 0.971832
## SaleConditionAdjLand 27076.3022 16205.4457 1.671 0.095030 .
## SaleConditionAlloca -3537.0955 9952.9400 -0.355 0.722368
## SaleConditionFamily 753.0752 6013.8156 0.125 0.900368
## SaleConditionNormal 7021.8337 2873.5884 2.444 0.014692 *
## SaleConditionPartial -2789.4755 14754.9403 -0.189 0.850084
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 21670 on 1153 degrees of freedom
## Multiple R-squared: 0.9412, Adjusted R-squared: 0.9256
## F-statistic: 60.31 on 306 and 1153 DF, p-value: < 2.2e-16
lm.formula <- formula(housetrain.lm)
Above model has coefficients with ‘NA’ values. This is because variable is linearly dependent on other variables.
i. Build model using wide form dataset
#Generate wide form lm
housetrain.knn.long.lm <- lm(SalePrice ~., data = housetrain.knn.long)
summary(housetrain.knn.long.lm)
##
## Call:
## lm(formula = SalePrice ~ ., data = housetrain.knn.long)
##
## Residuals:
## Min 1Q Median 3Q Max
## -175318 -8247 0 8403 175318
##
## Coefficients: (73 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 91281.2250 112837.8369 0.809 0.418705
## LotFrontage 20.9988 45.9370 0.457 0.647669
## LotArea 0.6401 0.1119 5.721 1.35e-08 ***
## YearBuilt -398.0932 83.3638 -4.775 2.02e-06 ***
## YearRemodAdd -75.4919 55.8749 -1.351 0.176933
## MasVnrArea 18.0445 5.6607 3.188 0.001473 **
## BsmtFinSF1 33.8308 5.1443 6.576 7.29e-11 ***
## BsmtFinSF2 29.0478 8.8092 3.297 0.001005 **
## BsmtUnfSF 14.9952 4.7291 3.171 0.001560 **
## TotalBsmtSF NA NA NA NA
## X1stFlrSF 48.4013 5.5595 8.706 < 2e-16 ***
## X2ndFlrSF 52.5641 6.0736 8.654 < 2e-16 ***
## LowQualFinSF -4.6670 19.2784 -0.242 0.808759
## GrLivArea NA NA NA NA
## TotRmsAbvGrd 1317.5771 950.2140 1.387 0.165828
## GarageYrBlt 2.2244 60.7921 0.037 0.970818
## GarageArea 20.6652 7.8375 2.637 0.008484 **
## WoodDeckSF 10.1153 5.7623 1.755 0.079453 .
## OpenPorchSF 5.0249 11.4393 0.439 0.660553
## EnclosedPorch 7.8812 12.4620 0.632 0.527237
## X3SsnPorch 52.1939 21.7640 2.398 0.016635 *
## ScreenPorch 46.3312 12.2859 3.771 0.000171 ***
## PoolArea 651.3711 226.5377 2.875 0.004110 **
## MiscVal 0.2458 6.2278 0.039 0.968524
## MSSubClass120 -2192.6488 9556.7809 -0.229 0.818572
## MSSubClass160 -9060.7302 13071.3566 -0.693 0.488340
## MSSubClass180 -13475.8363 13369.2279 -1.008 0.313678
## MSSubClass190 2276.6216 31511.4702 0.072 0.942418
## MSSubClass20 14228.1370 16858.9677 0.844 0.398872
## MSSubClass30 14540.6684 17550.9590 0.828 0.407569
## MSSubClass40 13818.5652 24434.4326 0.566 0.571819
## MSSubClass45 5780.0226 29938.6387 0.193 0.846944
## MSSubClass50 6438.2431 18633.6330 0.346 0.729769
## MSSubClass60 10772.8557 18459.3225 0.584 0.559604
## MSSubClass70 15635.7790 18744.1143 0.834 0.404358
## MSSubClass75 -1807.3038 24363.5779 -0.074 0.940880
## MSSubClass80 1716.6831 19580.4775 0.088 0.930152
## MSSubClass85 -5724.7982 18167.3100 -0.315 0.752731
## MSSubClass90 NA NA NA NA
## MSZoningC -32768.1718 9986.3092 -3.281 0.001064 **
## MSZoningFV 10200.9358 7507.7876 1.359 0.174503
## MSZoningRH 2980.4702 7667.0216 0.389 0.697541
## MSZoningRL 4595.3735 3769.7640 1.219 0.223090
## MSZoningRM NA NA NA NA
## StreetGrvl -26130.3168 12945.7496 -2.018 0.043776 *
## StreetPave NA NA NA NA
## AlleyGrvl 1682.7633 6157.6373 0.273 0.784686
## AlleyNAly -985.5335 4862.0378 -0.203 0.839406
## AlleyPave NA NA NA NA
## LotShapeIR1 -1894.3697 1575.0553 -1.203 0.229327
## LotShapeIR2 4464.3897 4215.5860 1.059 0.289812
## LotShapeIR3 7647.1126 8671.9968 0.882 0.378060
## LotShapeReg NA NA NA NA
## LandContourBnk -6462.7773 3699.0781 -1.747 0.080881 .
## LandContourHLS 1265.8865 4051.2050 0.312 0.754739
## LandContourLow -10251.1072 5685.6718 -1.803 0.071654 .
## LandContourLvl NA NA NA NA
## UtilitiesAllPub 54405.5796 27880.6483 1.951 0.051254 .
## UtilitiesNoSeWa NA NA NA NA
## LotConfigCorner 1253.4217 1772.8608 0.707 0.479706
## LotConfigCulDSac 8932.8031 2943.1467 3.035 0.002458 **
## LotConfigFR2 -5621.3489 3679.9160 -1.528 0.126892
## LotConfigFR3 -15165.9999 12160.2044 -1.247 0.212584
## LotConfigInside NA NA NA NA
## LandSlopeGtl 35399.1020 11398.8698 3.105 0.001946 **
## LandSlopeMod 44186.4683 11437.6541 3.863 0.000118 ***
## LandSlopeSev NA NA NA NA
## NeighborhoodBlmngtn -3661.4979 10550.4316 -0.347 0.728619
## NeighborhoodBlueste 3910.8453 19447.3066 0.201 0.840656
## NeighborhoodBrDale -3438.5084 11868.0633 -0.290 0.772077
## NeighborhoodBrkSide -6375.9217 9350.6634 -0.682 0.495459
## NeighborhoodClearCr -13044.7849 9558.1637 -1.365 0.172589
## NeighborhoodCollgCr -10440.5431 8084.4655 -1.291 0.196813
## NeighborhoodCrawfor 11215.8483 8852.5156 1.267 0.205423
## NeighborhoodEdwards -23803.6215 8408.2460 -2.831 0.004721 **
## NeighborhoodGilbert -8962.5612 8460.1943 -1.059 0.289649
## NeighborhoodIDOTRR -13576.0619 10521.4714 -1.290 0.197198
## NeighborhoodMeadowV -16570.0335 12360.8348 -1.341 0.180338
## NeighborhoodMitchel -19239.8328 8613.7730 -2.234 0.025700 *
## NeighborhoodNAmes -17287.3034 8053.4015 -2.147 0.032035 *
## NeighborhoodNoRidge 16345.4911 8960.3767 1.824 0.068381 .
## NeighborhoodNPkVill 10723.5387 14150.8060 0.758 0.448723
## NeighborhoodNridgHt 10755.0607 8643.5579 1.244 0.213647
## NeighborhoodNWAmes -15025.2401 8244.4653 -1.822 0.068644 .
## NeighborhoodOldTown -15158.6094 9430.3112 -1.607 0.108233
## NeighborhoodSawyer -11212.5593 8287.9927 -1.353 0.176363
## NeighborhoodSawyerW -4929.4179 8331.1018 -0.592 0.554175
## NeighborhoodSomerst -3804.6649 9597.4242 -0.396 0.691864
## NeighborhoodStoneBr 30783.0027 9394.8460 3.277 0.001082 **
## NeighborhoodSWISU -11105.0004 10047.0721 -1.105 0.269261
## NeighborhoodTimber -9848.6477 8860.3936 -1.112 0.266569
## NeighborhoodVeenker NA NA NA NA
## Condition1Artery -13839.8090 12546.4842 -1.103 0.270221
## Condition1Feedr -8903.3064 12145.6781 -0.733 0.463681
## Condition1Norm 1099.6395 11829.1766 0.093 0.925952
## Condition1PosA -1283.5705 14798.8891 -0.087 0.930898
## Condition1PosN 64.9172 13180.4523 0.005 0.996071
## Condition1RRAe -29079.1716 14270.4176 -2.038 0.041805 *
## Condition1RRAn 145.0052 12412.5224 0.012 0.990681
## Condition1RRNe -15042.0390 20088.9880 -0.749 0.454148
## Condition1RRNn NA NA NA NA
## Condition2Artery -10020.6099 29937.9729 -0.335 0.737903
## Condition2Feedr -1244.4410 21187.5245 -0.059 0.953174
## Condition2Norm -7642.0102 17303.2692 -0.442 0.658824
## Condition2PosA -28855.2071 36854.4533 -0.783 0.433817
## Condition2PosN -268510.0113 25785.9918 -10.413 < 2e-16 ***
## Condition2RRAe -117583.1118 71913.5227 -1.635 0.102309
## Condition2RRAn -14719.8815 28617.0338 -0.514 0.607089
## Condition2RRNn NA NA NA NA
## BldgType1Fam -554.7325 14557.6446 -0.038 0.969610
## BldgType2fmCon -2767.0655 30700.6306 -0.090 0.928199
## BldgTypeDuplex NA NA NA NA
## BldgTypeTwnhs -2642.9096 5087.4779 -0.519 0.603517
## BldgTypeTwnhsE NA NA NA NA
## HouseStyle1.5Fin -2065.1615 14037.0544 -0.147 0.883061
## HouseStyle1.5Unf 5251.4774 26536.7409 0.198 0.843162
## HouseStyle1Story -9893.5242 12618.7988 -0.784 0.433183
## HouseStyle2.5Fin -8626.7253 22935.5566 -0.376 0.706890
## HouseStyle2.5Unf 3576.2654 21093.2130 0.170 0.865397
## HouseStyle2Story -12581.7267 13578.4271 -0.927 0.354330
## HouseStyleSFoyer 244.8433 11994.0679 0.020 0.983717
## HouseStyleSLvl NA NA NA NA
## OverallQual1 -63180.6625 29527.6733 -2.140 0.032588 *
## OverallQual10 38379.4895 7805.8578 4.917 1.01e-06 ***
## OverallQual2 -36340.0072 19375.4512 -1.876 0.060967 .
## OverallQual3 -51937.7967 9143.0709 -5.681 1.70e-08 ***
## OverallQual4 -52503.1361 6936.1502 -7.569 7.65e-14 ***
## OverallQual5 -53179.3072 6379.8406 -8.336 < 2e-16 ***
## OverallQual6 -49797.3678 6127.8338 -8.126 1.13e-15 ***
## OverallQual7 -43133.4769 5770.9796 -7.474 1.53e-13 ***
## OverallQual8 -30208.7887 5232.8309 -5.773 1.00e-08 ***
## OverallQual9 NA NA NA NA
## OverallCond1 6923.4036 55533.1151 0.125 0.900805
## OverallCond2 -13278.1935 18211.4584 -0.729 0.466081
## OverallCond3 -41790.2561 8779.7514 -4.760 2.18e-06 ***
## OverallCond4 -31093.5566 7518.2502 -4.136 3.79e-05 ***
## OverallCond5 -23728.0904 6715.4371 -3.533 0.000427 ***
## OverallCond6 -17828.7583 6610.9671 -2.697 0.007102 **
## OverallCond7 -11321.3372 6401.6358 -1.769 0.077241 .
## OverallCond8 -7671.9136 6554.2786 -1.171 0.242034
## OverallCond9 NA NA NA NA
## RoofStyleFlat -85967.0079 40024.9183 -2.148 0.031935 *
## RoofStyleGable -89838.8636 36591.7101 -2.455 0.014229 *
## RoofStyleGambrel -89048.4495 37547.4770 -2.372 0.017874 *
## RoofStyleHip -90549.1585 36627.5045 -2.472 0.013573 *
## RoofStyleMansard -84739.5268 36715.4743 -2.308 0.021175 *
## RoofStyleShed NA NA NA NA
## RoofMatlClyTile -630849.0006 55216.2901 -11.425 < 2e-16 ***
## RoofMatlCompShg -45319.6657 11256.2043 -4.026 6.04e-05 ***
## RoofMatlMembran 30317.2057 33869.7816 0.895 0.370915
## RoofMatlMetal 4307.8070 32931.0396 0.131 0.895946
## RoofMatlRoll -43284.2913 27745.8663 -1.560 0.119028
## RoofMatlTarGrv -56578.3678 20759.1835 -2.725 0.006518 **
## RoofMatlWdShake -52367.4117 18483.1516 -2.833 0.004688 **
## RoofMatlWdShngl NA NA NA NA
## Exterior1stAsbShng -3269.5070 13659.5921 -0.239 0.810872
## Exterior1stAsphShn -16010.0057 31212.9533 -0.513 0.608100
## Exterior1stBrkComm 5960.1648 26091.1307 0.228 0.819347
## Exterior1stBrkFace 16629.2344 7806.0671 2.130 0.033358 *
## Exterior1stCBlock -10213.4455 27241.2461 -0.375 0.707785
## Exterior1stCemntBd 873.3639 15418.0622 0.057 0.954837
## Exterior1stHdBoard -2791.4263 7280.5940 -0.383 0.701489
## Exterior1stImStucc -18559.6083 25580.8665 -0.726 0.468276
## Exterior1stMetalSd 4126.6919 9846.0262 0.419 0.675205
## Exterior1stPlywood -3863.6946 7356.0403 -0.525 0.599517
## Exterior1stStone 3594.5097 21310.7214 0.169 0.866085
## Exterior1stStucco 1605.9821 9926.1383 0.162 0.871497
## Exterior1stVinylSd -2476.7103 8775.2519 -0.282 0.777812
## `Exterior1stWd Sdng` -3174.1221 6713.5850 -0.473 0.636452
## Exterior1stWdShing NA NA NA NA
## Exterior2ndAsbShng -681.3413 12719.7997 -0.054 0.957291
## Exterior2ndAsphShn 9814.6186 19346.1785 0.507 0.612030
## `Exterior2ndBrk Cmn` 1428.3565 17418.5976 0.082 0.934660
## Exterior2ndBrkFace -4553.5269 8429.2107 -0.540 0.589158
## Exterior2ndCBlock NA NA NA NA
## Exterior2ndCmentBd 3628.8110 14955.1124 0.243 0.808322
## Exterior2ndHdBoard 389.9030 6795.3035 0.057 0.954254
## Exterior2ndImStucc 3989.0661 10044.3952 0.397 0.691335
## Exterior2ndMetalSd -2131.1106 9463.0961 -0.225 0.821862
## Exterior2ndOther -22257.5829 24553.4687 -0.906 0.364864
## Exterior2ndPlywood 93.5032 6514.7961 0.014 0.988551
## Exterior2ndStone -14358.2861 13246.9299 -1.084 0.278638
## Exterior2ndStucco -2560.7686 9124.2334 -0.281 0.779025
## Exterior2ndVinylSd 3952.4358 7839.5612 0.504 0.614241
## `Exterior2ndWd Sdng` 5275.2633 5821.9507 0.906 0.365073
## `Exterior2ndWd Shng` NA NA NA NA
## MasVnrTypeBrkCmn -10195.0205 6570.4481 -1.552 0.121021
## MasVnrTypeBrkFace -2530.3546 2860.1578 -0.885 0.376508
## MasVnrTypeNone -986.0219 3079.8523 -0.320 0.748911
## MasVnrTypeStone NA NA NA NA
## ExterQualEx 6570.6929 5565.0549 1.181 0.237963
## ExterQualFa 4366.1831 11763.1398 0.371 0.710575
## ExterQualGd 674.8988 2468.0565 0.273 0.784554
## ExterQualTA NA NA NA NA
## ExterCondEx 11641.5717 17500.0714 0.665 0.506036
## ExterCondFa 2061.6535 5810.0878 0.355 0.722774
## ExterCondGd -2446.3859 2372.7094 -1.031 0.302733
## ExterCondPo -30655.1447 31182.9493 -0.983 0.325777
## ExterCondTA NA NA NA NA
## FoundationBrkTil 33236.7575 14480.1387 2.295 0.021893 *
## FoundationCBlock 34352.4144 14249.0085 2.411 0.016070 *
## FoundationPConc 37011.4617 14153.7878 2.615 0.009040 **
## FoundationSlab 29391.8118 17021.1402 1.727 0.084475 .
## FoundationStone 37316.9828 18480.3966 2.019 0.043690 *
## FoundationWood NA NA NA NA
## BsmtQualEx 9243.4600 4149.4311 2.228 0.026097 *
## BsmtQualFa 4871.2185 4956.4788 0.983 0.325913
## BsmtQualGd -2040.7698 2543.3278 -0.802 0.422486
## BsmtQualNB 9431.7024 8950.9335 1.054 0.292236
## BsmtQualTA NA NA NA NA
## BsmtCondFa -5816.9029 4295.9220 -1.354 0.175985
## BsmtCondGd -2652.5204 3194.6308 -0.830 0.406538
## BsmtCondNB NA NA NA NA
## BsmtCondPo -13834.6643 37556.2376 -0.368 0.712664
## BsmtCondTA NA NA NA NA
## BsmtExposureAv 4166.6345 2156.1283 1.932 0.053547 .
## BsmtExposureGd 14873.6055 2987.3181 4.979 7.37e-07 ***
## BsmtExposureMn 2163.6298 2470.0907 0.876 0.381250
## BsmtExposureNB NA NA NA NA
## BsmtExposureNo NA NA NA NA
## BsmtFinType1ALQ -2749.8198 2900.7502 -0.948 0.343344
## BsmtFinType1BLQ -1243.3909 3098.2913 -0.401 0.688263
## BsmtFinType1GLQ 2427.4865 2663.2827 0.911 0.362241
## BsmtFinType1LwQ -5153.1810 3733.1790 -1.380 0.167739
## BsmtFinType1NB NA NA NA NA
## BsmtFinType1Rec -2576.0362 3155.3934 -0.816 0.414445
## BsmtFinType1Unf NA NA NA NA
## BsmtFinType2ALQ 3855.7317 7250.4149 0.532 0.594971
## BsmtFinType2BLQ -4949.6611 4979.9522 -0.994 0.320472
## BsmtFinType2GLQ 570.7726 8714.8278 0.065 0.947792
## BsmtFinType2LwQ -4611.6886 4513.6202 -1.022 0.307124
## BsmtFinType2NB NA NA NA NA
## BsmtFinType2Rec -2526.9399 4723.0670 -0.535 0.592739
## BsmtFinType2Unf NA NA NA NA
## HeatingFloor -33611.6671 29252.1490 -1.149 0.250781
## HeatingGasA -11419.5007 14428.8982 -0.791 0.428854
## HeatingGasW -10641.1497 15534.0274 -0.685 0.493468
## HeatingGrav -26051.1469 17645.2020 -1.476 0.140113
## HeatingOthW -24840.6709 23288.2992 -1.067 0.286349
## HeatingWall NA NA NA NA
## HeatingQCEx0 -2589.3972 2037.7375 -1.271 0.204084
## HeatingQCEx1 NA NA NA NA
## HeatingQCFa0 -1248.3142 4535.0645 -0.275 0.783167
## HeatingQCFa1 NA NA NA NA
## HeatingQCGd0 349.0199 2132.2739 0.164 0.870008
## HeatingQCGd1 NA NA NA NA
## HeatingQCPo0 -12078.0180 25951.3119 -0.465 0.641725
## HeatingQCPo1 NA NA NA NA
## HeatingQCTA0 NA NA NA NA
## HeatingQCTA1 NA NA NA NA
## CentralAirN 416.1486 3868.0706 0.108 0.914343
## CentralAirY NA NA NA NA
## ElectricalFuseA 2825.2548 2950.5700 0.958 0.338501
## ElectricalFuseF 79.9644 5638.3831 0.014 0.988687
## ElectricalFuseP -7387.5015 18479.2951 -0.400 0.689399
## ElectricalMix NA NA NA NA
## ElectricalSBrkr NA NA NA NA
## BsmtFullBath0 -29366.4887 27298.1445 -1.076 0.282256
## BsmtFullBath1 -28188.6399 27205.4178 -1.036 0.300354
## BsmtFullBath2 -23860.9706 28267.7751 -0.844 0.398786
## BsmtFullBath3 NA NA NA NA
## BsmtHalfBath0 23464.8224 29650.6071 0.791 0.428887
## BsmtHalfBath1 26114.3935 29679.6142 0.880 0.379110
## BsmtHalfBath2 NA NA NA NA
## FullBath0 -16637.4679 18535.7249 -0.898 0.369592
## FullBath1 -24101.6005 6143.4004 -3.923 9.26e-05 ***
## FullBath2 -23684.8545 5411.1329 -4.377 1.31e-05 ***
## FullBath3 NA NA NA NA
## HalfBath0 1763.0251 9266.7583 0.190 0.849145
## HalfBath1 5070.8908 9223.9527 0.550 0.582596
## HalfBath2 NA NA NA NA
## BedroomAbvGr0 -37862.6508 35315.2674 -1.072 0.283885
## BedroomAbvGr1 -19714.9314 31878.4752 -0.618 0.536407
## BedroomAbvGr2 -15676.8038 31439.1037 -0.499 0.618128
## BedroomAbvGr3 -21928.6860 31340.3285 -0.700 0.484259
## BedroomAbvGr4 -20609.3814 31156.4896 -0.661 0.508437
## BedroomAbvGr5 -29029.4643 30967.6224 -0.937 0.348742
## BedroomAbvGr6 -17824.2390 32381.1855 -0.550 0.582117
## BedroomAbvGr8 NA NA NA NA
## KitchenAbvGr0 26808.1967 48140.5373 0.557 0.577722
## KitchenAbvGr1 21177.5568 19201.1847 1.103 0.270288
## KitchenAbvGr2 11144.1258 19184.2961 0.581 0.561422
## KitchenAbvGr3 NA NA NA NA
## KitchenQualEx 17969.5274 3933.3294 4.569 5.44e-06 ***
## KitchenQualFa 522.1906 4966.6996 0.105 0.916284
## KitchenQualGd 539.2129 2109.8451 0.256 0.798329
## KitchenQualTA NA NA NA NA
## FunctionalMaj1 -13063.0907 7564.5761 -1.727 0.084458 .
## FunctionalMaj2 -22939.1693 12333.8364 -1.860 0.063160 .
## FunctionalMin1 -9677.8317 4682.5648 -2.067 0.038978 *
## FunctionalMin2 -9882.6279 4648.2360 -2.126 0.033707 *
## FunctionalMod -20167.6963 7603.6856 -2.652 0.008103 **
## FunctionalSev -47401.7081 28347.4748 -1.672 0.094763 .
## FunctionalTyp NA NA NA NA
## Fireplaces0 -11229.9766 12177.0951 -0.922 0.356606
## Fireplaces1 -8451.6352 11961.1421 -0.707 0.479963
## Fireplaces2 -1741.9600 12135.7908 -0.144 0.885890
## Fireplaces3 NA NA NA NA
## FireplaceQuEx -9360.7897 5505.9034 -1.700 0.089375 .
## FireplaceQuFa -4811.6393 4502.5939 -1.069 0.285457
## FireplaceQuGd -1152.1834 2170.8249 -0.531 0.595688
## FireplaceQuNF NA NA NA NA
## FireplaceQuPo 4609.0783 5940.0973 0.776 0.437952
## FireplaceQuTA NA NA NA NA
## GarageType2Types 95315.2897 54178.6428 1.759 0.078796 .
## GarageTypeAttchd 128434.9605 53011.8770 2.423 0.015556 *
## GarageTypeBasment 135253.8033 53535.4705 2.526 0.011656 *
## GarageTypeBuiltIn 125669.4287 53236.4964 2.361 0.018412 *
## GarageTypeCarPort 134053.0045 53590.3909 2.501 0.012507 *
## GarageTypeDetchd 130235.2184 53150.2134 2.450 0.014421 *
## GarageTypeNG NA NA NA NA
## GarageFinishFin 1033.6589 2384.3036 0.434 0.664713
## GarageFinishNG 115539.2855 47304.6610 2.442 0.014737 *
## GarageFinishRFn 331.6863 2117.6271 0.157 0.875563
## GarageFinishUnf NA NA NA NA
## GarageCars0 6305.7140 19187.2391 0.329 0.742487
## GarageCars1 -8739.8994 4815.0742 -1.815 0.069766 .
## GarageCars2 -9635.0376 3347.2610 -2.878 0.004070 **
## GarageCars3 NA NA NA NA
## GarageQualEx 68294.4160 31429.2263 2.173 0.029986 *
## GarageQualFa -4767.6153 4740.1206 -1.006 0.314723
## GarageQualGd 127.7614 7595.3743 0.017 0.986582
## GarageQualNG NA NA NA NA
## GarageQualPo -9743.1608 25676.5935 -0.379 0.704418
## GarageQualTA NA NA NA NA
## GarageCondEx -64411.1056 35167.4126 -1.832 0.067275 .
## GarageCondFa -1253.1777 5218.0920 -0.240 0.810249
## GarageCondGd -677.0331 9110.2013 -0.074 0.940772
## GarageCondNG NA NA NA NA
## GarageCondPo 4689.5112 15496.1608 0.303 0.762231
## GarageCondTA NA NA NA NA
## PavedDriveN 703.1284 3461.6202 0.203 0.839076
## PavedDriveP -4243.3379 4783.9314 -0.887 0.375265
## PavedDriveY NA NA NA NA
## PoolQCEx -252148.3295 123801.7614 -2.037 0.041907 *
## PoolQCFa -395382.2898 149157.0426 -2.651 0.008141 **
## PoolQCGd -360840.3524 146168.8202 -2.469 0.013706 *
## PoolQCNP NA NA NA NA
## FenceGdPrv -5291.5861 3626.2491 -1.459 0.144770
## FenceGdWo -1689.1687 3528.4250 -0.479 0.632220
## FenceMnPrv 1627.4631 2230.9960 0.729 0.465857
## FenceMnWw -4673.8607 7269.4775 -0.643 0.520389
## FenceNF NA NA NA NA
## MiscFeatureGar2 -6595.4966 97756.4521 -0.067 0.946220
## MiscFeatureNM -7048.1997 47037.9461 -0.150 0.880916
## MiscFeatureOthr 8242.0847 50658.3246 0.163 0.870784
## MiscFeatureShed -5196.3166 46316.4268 -0.112 0.910691
## MiscFeatureTenC NA NA NA NA
## MoSold1 5516.9199 4484.2655 1.230 0.218842
## MoSold10 -2140.0960 4041.2081 -0.530 0.596513
## MoSold11 587.5478 4104.8866 0.143 0.886210
## MoSold12 972.6526 4439.2980 0.219 0.826611
## MoSold2 -2214.9309 4520.8038 -0.490 0.624268
## MoSold3 2847.1264 3934.5350 0.724 0.469443
## MoSold4 2924.8767 3748.8413 0.780 0.435428
## MoSold5 4564.7832 3545.8473 1.287 0.198227
## MoSold6 3040.4009 3459.4634 0.879 0.379658
## MoSold7 5169.8363 3465.9224 1.492 0.136073
## MoSold8 -1004.1268 3741.5590 -0.268 0.788462
## MoSold9 NA NA NA NA
## YrSold2006 -3058.1111 2448.0367 -1.249 0.211842
## YrSold2007 -2971.1773 2413.8535 -1.231 0.218617
## YrSold2008 -512.5069 2401.0787 -0.213 0.831015
## YrSold2009 -3061.2356 2368.0109 -1.293 0.196358
## YrSold2010 NA NA NA NA
## SaleTypeCOD -145.1526 4109.8105 -0.035 0.971832
## SaleTypeCon 24961.8373 17002.9453 1.468 0.142353
## SaleTypeConLD 13981.7466 9092.0877 1.538 0.124374
## SaleTypeConLI 401.8772 10708.6786 0.038 0.970070
## SaleTypeConLw 2592.0892 11351.0162 0.228 0.819409
## SaleTypeCWD 10993.9577 11961.3030 0.919 0.358221
## SaleTypeNew 25346.1265 14812.6257 1.711 0.087329 .
## SaleTypeOth 7201.0655 14161.5200 0.508 0.611203
## SaleTypeWD NA NA NA NA
## SaleConditionAbnorml 2789.4755 14754.9403 0.189 0.850084
## SaleConditionAdjLand 29865.7778 21713.2637 1.375 0.169255
## SaleConditionAlloca -747.6199 17477.3159 -0.043 0.965887
## SaleConditionFamily 3542.5507 15463.7084 0.229 0.818841
## SaleConditionNormal 9811.3092 14617.5823 0.671 0.502228
## SaleConditionPartial NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 21670 on 1153 degrees of freedom
## Multiple R-squared: 0.9412, Adjusted R-squared: 0.9256
## F-statistic: 60.31 on 306 and 1153 DF, p-value: < 2.2e-16
lm.formula <- formula(housetrain.knn.long.lm)
#the linearly dependent variables, 'NA' variables
ld.vars <- attributes(alias(housetrain.knn.long.lm)$Complete)$dimnames[[1]]
#remove the linearly dependent variables variables
formula.new <- as.formula(
paste(
paste(deparse(lm.formula), collapse=""),
paste(ld.vars, collapse="-"),
sep="-"
)
)
#Generate lm model
housetrain.lm2 <- lm(formula.new, data = housetrain.knn.long)
lm.Summary <- summary(housetrain.lm2)
#Get coefficients and filter out non significant ones
coef.df <- round(data.frame(Coefs = coef(housetrain.lm2), SEs = se.coef(housetrain.lm2), pVal = coef(summary(housetrain.lm2))[,'Pr(>|t|)']),4)
coef.df <- tibble::rownames_to_column(coef.df, "colNames")
coef.df <- coef.df %>% filter(pVal <= 0.05 & colNames != '(Intercept)')
#Generate new formula
formula.new <- as.formula(paste('SalePrice ~',paste(coef.df$colNames, collapse="+")))
#Generate lm using new formula
housetrain.lm3 <- lm(formula.new, data = housetrain.knn.long)
summary(housetrain.lm3)
##
## Call:
## lm(formula = formula.new, data = housetrain.knn.long)
##
## Residuals:
## Min 1Q Median 3Q Max
## -200312 -12014 254 10912 200312
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 87369.76573 26041.23836 3.355 0.000815 ***
## LotArea 0.80170 0.08991 8.917 < 2e-16 ***
## YearBuilt -511.31258 46.79598 -10.926 < 2e-16 ***
## MasVnrArea 18.17881 4.41528 4.117 4.06e-05 ***
## BsmtFinSF1 36.08949 3.68436 9.795 < 2e-16 ***
## BsmtFinSF2 24.21567 5.29363 4.574 5.20e-06 ***
## BsmtUnfSF 17.11843 3.57604 4.787 1.87e-06 ***
## X1stFlrSF 55.13120 3.96721 13.897 < 2e-16 ***
## X2ndFlrSF 54.22160 2.27653 23.818 < 2e-16 ***
## GarageArea 40.36335 5.03416 8.018 2.26e-15 ***
## X3SsnPorch 45.46843 22.12262 2.055 0.040037 *
## ScreenPorch 49.50975 12.16040 4.071 4.94e-05 ***
## PoolArea 579.22222 168.86948 3.430 0.000621 ***
## MSZoningC -38682.76106 8459.75340 -4.573 5.25e-06 ***
## StreetGrvl -12860.30125 11136.48076 -1.155 0.248375
## LotConfigCulDSac 7316.96146 2767.49776 2.644 0.008288 **
## LandSlopeGtl 41772.29960 9633.63276 4.336 1.55e-05 ***
## LandSlopeMod 53539.97825 9733.66856 5.500 4.50e-08 ***
## NeighborhoodEdwards -10388.63328 2814.06777 -3.692 0.000231 ***
## NeighborhoodMitchel -15831.71779 3827.09430 -4.137 3.73e-05 ***
## NeighborhoodNAmes -6354.16556 2142.92491 -2.965 0.003077 **
## NeighborhoodStoneBr 21170.40239 5204.54409 4.068 5.01e-05 ***
## Condition1RRAe -23285.49810 7607.52671 -3.061 0.002249 **
## Condition2PosN -279995.08505 18706.28788 -14.968 < 2e-16 ***
## OverallQual1 -66915.62093 19576.19545 -3.418 0.000649 ***
## OverallQual10 52896.70383 7646.74640 6.918 6.99e-12 ***
## OverallQual3 -63082.11614 7984.93063 -7.900 5.62e-15 ***
## OverallQual4 -63978.55987 6016.00587 -10.635 < 2e-16 ***
## OverallQual5 -66165.25465 5543.02256 -11.937 < 2e-16 ***
## OverallQual6 -61209.39456 5402.30690 -11.330 < 2e-16 ***
## OverallQual7 -47838.82707 5193.20341 -9.212 < 2e-16 ***
## OverallQual8 -30721.54899 4835.31504 -6.354 2.84e-10 ***
## OverallCond3 -43006.98455 5361.29794 -8.022 2.19e-15 ***
## OverallCond4 -31718.20072 3754.63115 -8.448 < 2e-16 ***
## OverallCond5 -20172.51800 2071.51868 -9.738 < 2e-16 ***
## OverallCond6 -11243.87378 2134.99337 -5.266 1.61e-07 ***
## RoofStyleFlat 15181.48776 23077.07020 0.658 0.510737
## RoofStyleGable -1852.00244 18921.55518 -0.098 0.922043
## RoofStyleGambrel 9530.65491 20372.14755 0.468 0.639981
## RoofStyleHip -1716.17837 18984.55909 -0.090 0.927983
## RoofStyleMansard -6298.20716 20471.69112 -0.308 0.758391
## RoofMatlClyTile -676675.51138 45074.20238 -15.012 < 2e-16 ***
## RoofMatlCompShg -30122.05786 9132.66335 -3.298 0.000997 ***
## RoofMatlTarGrv -58878.02300 15133.40449 -3.891 0.000105 ***
## RoofMatlWdShake -28565.60666 15616.41846 -1.829 0.067583 .
## Exterior1stBrkFace 18303.46406 3751.42854 4.879 1.19e-06 ***
## FoundationBrkTil 241.18461 6494.69315 0.037 0.970382
## FoundationCBlock -6630.76754 5943.67669 -1.116 0.264786
## FoundationPConc 5779.56366 6082.13032 0.950 0.342149
## FoundationStone 3237.10498 12203.02595 0.265 0.790840
## BsmtQualEx 19038.43634 3261.31458 5.838 6.58e-09 ***
## BsmtExposureGd 11807.30167 2636.13379 4.479 8.11e-06 ***
## FullBath1 -14219.57714 4765.39250 -2.984 0.002895 **
## FullBath2 -19132.94224 4338.56363 -4.410 1.11e-05 ***
## KitchenQualEx 21232.66344 3453.83537 6.148 1.03e-09 ***
## FunctionalMin1 -5588.53605 4619.67124 -1.210 0.226590
## FunctionalMin2 -6880.05526 4479.00079 -1.536 0.124749
## FunctionalMod -11864.67604 6650.35359 -1.784 0.074631 .
## GarageTypeAttchd 50801.70139 10123.37385 5.018 5.89e-07 ***
## GarageTypeBasment 48679.29338 11448.45644 4.252 2.26e-05 ***
## GarageTypeBuiltIn 52895.90818 10518.35436 5.029 5.58e-07 ***
## GarageTypeCarPort 32541.64847 12934.92392 2.516 0.011988 *
## GarageTypeDetchd 48021.94491 10130.36128 4.740 2.35e-06 ***
## GarageFinishNG 51207.51566 10673.83213 4.797 1.78e-06 ***
## GarageCars2 -8369.21594 1612.36421 -5.191 2.41e-07 ***
## GarageQualEx 14818.11980 13071.53513 1.134 0.257150
## PoolQCEx -242700.32488 91891.98050 -2.641 0.008355 **
## PoolQCFa -357996.38637 100496.46136 -3.562 0.000380 ***
## PoolQCGd -317046.58280 111516.06888 -2.843 0.004534 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 24330 on 1391 degrees of freedom
## Multiple R-squared: 0.9106, Adjusted R-squared: 0.9062
## F-statistic: 208.2 on 68 and 1391 DF, p-value: < 2.2e-16
#Get coefficients and filter out non significant ones
coef.df <- round(data.frame(Coefs = coef(housetrain.lm3), SEs = se.coef(housetrain.lm3), pVal = coef(summary(housetrain.lm3))[,'Pr(>|t|)']),4)
coef.df <- tibble::rownames_to_column(coef.df, "colNames")
coef.df <- coef.df %>% filter(pVal <= 0.05 & colNames != '(Intercept)')
#Generate new formula
formula.new <- as.formula(paste('SalePrice ~',paste(coef.df$colNames, collapse="+")))
housetrain.lm4 <- lm(formula.new, data = housetrain.knn.long)
summary(housetrain.lm4)
##
## Call:
## lm(formula = formula.new, data = housetrain.knn.long)
##
## Residuals:
## Min 1Q Median 3Q Max
## -198432 -12021 225 11012 198432
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 79741.29576 16604.63696 4.802 1.74e-06 ***
## LotArea 0.73875 0.08708 8.484 < 2e-16 ***
## YearBuilt -534.33818 40.16772 -13.303 < 2e-16 ***
## MasVnrArea 15.00114 4.39244 3.415 0.000655 ***
## BsmtFinSF1 37.29505 3.23541 11.527 < 2e-16 ***
## BsmtFinSF2 23.71627 4.98520 4.757 2.16e-06 ***
## BsmtUnfSF 19.44294 3.08565 6.301 3.95e-10 ***
## X1stFlrSF 51.76253 3.63840 14.227 < 2e-16 ***
## X2ndFlrSF 54.71985 2.28314 23.967 < 2e-16 ***
## GarageArea 42.00562 4.98816 8.421 < 2e-16 ***
## X3SsnPorch 47.76385 22.43356 2.129 0.033417 *
## ScreenPorch 46.58730 12.27270 3.796 0.000153 ***
## PoolArea 593.60768 171.27792 3.466 0.000545 ***
## MSZoningC -41402.64905 8248.73004 -5.019 5.85e-07 ***
## LotConfigCulDSac 6962.32624 2791.92523 2.494 0.012755 *
## LandSlopeGtl 37883.92056 9068.75994 4.177 3.13e-05 ***
## LandSlopeMod 48610.33223 9231.67508 5.266 1.61e-07 ***
## NeighborhoodEdwards -10402.66281 2853.17803 -3.646 0.000276 ***
## NeighborhoodMitchel -16147.10678 3859.28395 -4.184 3.04e-05 ***
## NeighborhoodNAmes -9525.40925 2083.19726 -4.572 5.24e-06 ***
## NeighborhoodStoneBr 20552.16505 5280.78386 3.892 0.000104 ***
## Condition1RRAe -24540.47840 7689.26388 -3.192 0.001447 **
## Condition2PosN -278434.52986 18986.64337 -14.665 < 2e-16 ***
## OverallQual1 -69226.77110 19615.24765 -3.529 0.000430 ***
## OverallQual10 55316.14684 7687.74318 7.195 1.01e-12 ***
## OverallQual3 -67572.31523 8045.82901 -8.398 < 2e-16 ***
## OverallQual4 -69045.74711 6034.35338 -11.442 < 2e-16 ***
## OverallQual5 -71495.33850 5561.77149 -12.855 < 2e-16 ***
## OverallQual6 -65223.88309 5448.01160 -11.972 < 2e-16 ***
## OverallQual7 -49158.06210 5254.40214 -9.356 < 2e-16 ***
## OverallQual8 -30490.80224 4898.76007 -6.224 6.38e-10 ***
## OverallCond3 -42956.36840 5366.07016 -8.005 2.48e-15 ***
## OverallCond4 -32410.95502 3790.24918 -8.551 < 2e-16 ***
## OverallCond5 -18545.46317 2070.17018 -8.958 < 2e-16 ***
## OverallCond6 -11748.19494 2157.46649 -5.445 6.09e-08 ***
## RoofMatlClyTile -656348.30081 45487.81244 -14.429 < 2e-16 ***
## RoofMatlCompShg -18721.29602 7059.92587 -2.652 0.008097 **
## RoofMatlTarGrv -33422.51142 10485.48958 -3.188 0.001467 **
## Exterior1stBrkFace 19219.45581 3781.95504 5.082 4.24e-07 ***
## BsmtQualEx 21089.01480 3279.21116 6.431 1.73e-10 ***
## BsmtExposureGd 12736.14806 2660.49667 4.787 1.87e-06 ***
## FullBath1 -13349.86341 4792.96640 -2.785 0.005420 **
## FullBath2 -17285.16656 4361.41002 -3.963 7.77e-05 ***
## KitchenQualEx 21675.98270 3509.57484 6.176 8.58e-10 ***
## GarageTypeAttchd 52477.11496 9752.18973 5.381 8.66e-08 ***
## GarageTypeBasment 49556.26555 11208.47631 4.421 1.06e-05 ***
## GarageTypeBuiltIn 54713.00477 10188.51111 5.370 9.20e-08 ***
## GarageTypeCarPort 32859.69487 12815.43357 2.564 0.010448 *
## GarageTypeDetchd 50460.26139 9807.46974 5.145 3.05e-07 ***
## GarageFinishNG 53226.13940 10357.81245 5.139 3.15e-07 ***
## GarageCars2 -8161.07600 1628.17840 -5.012 6.06e-07 ***
## PoolQCEx -246545.39280 93218.22635 -2.645 0.008264 **
## PoolQCFa -369751.08160 101944.17169 -3.627 0.000297 ***
## PoolQCGd -328217.03555 113164.09710 -2.900 0.003785 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 24750 on 1406 degrees of freedom
## Multiple R-squared: 0.9064, Adjusted R-squared: 0.9029
## F-statistic: 257 on 53 and 1406 DF, p-value: < 2.2e-16
#Compare with step functions
stepOutputF <- step(housetrain.lm4, trace = 0, direction = "forward")
stepOutputB <- step(housetrain.lm4, trace = 0, direction = "backward")
summary(stepOutputB)
##
## Call:
## lm(formula = SalePrice ~ LotArea + YearBuilt + MasVnrArea + BsmtFinSF1 +
## BsmtFinSF2 + BsmtUnfSF + X1stFlrSF + X2ndFlrSF + GarageArea +
## X3SsnPorch + ScreenPorch + PoolArea + MSZoningC + LotConfigCulDSac +
## LandSlopeGtl + LandSlopeMod + NeighborhoodEdwards + NeighborhoodMitchel +
## NeighborhoodNAmes + NeighborhoodStoneBr + Condition1RRAe +
## Condition2PosN + OverallQual1 + OverallQual10 + OverallQual3 +
## OverallQual4 + OverallQual5 + OverallQual6 + OverallQual7 +
## OverallQual8 + OverallCond3 + OverallCond4 + OverallCond5 +
## OverallCond6 + RoofMatlClyTile + RoofMatlCompShg + RoofMatlTarGrv +
## Exterior1stBrkFace + BsmtQualEx + BsmtExposureGd + FullBath1 +
## FullBath2 + KitchenQualEx + GarageTypeAttchd + GarageTypeBasment +
## GarageTypeBuiltIn + GarageTypeCarPort + GarageTypeDetchd +
## GarageFinishNG + GarageCars2 + PoolQCEx + PoolQCFa + PoolQCGd,
## data = housetrain.knn.long)
##
## Residuals:
## Min 1Q Median 3Q Max
## -198432 -12021 225 11012 198432
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 79741.29576 16604.63696 4.802 1.74e-06 ***
## LotArea 0.73875 0.08708 8.484 < 2e-16 ***
## YearBuilt -534.33818 40.16772 -13.303 < 2e-16 ***
## MasVnrArea 15.00114 4.39244 3.415 0.000655 ***
## BsmtFinSF1 37.29505 3.23541 11.527 < 2e-16 ***
## BsmtFinSF2 23.71627 4.98520 4.757 2.16e-06 ***
## BsmtUnfSF 19.44294 3.08565 6.301 3.95e-10 ***
## X1stFlrSF 51.76253 3.63840 14.227 < 2e-16 ***
## X2ndFlrSF 54.71985 2.28314 23.967 < 2e-16 ***
## GarageArea 42.00562 4.98816 8.421 < 2e-16 ***
## X3SsnPorch 47.76385 22.43356 2.129 0.033417 *
## ScreenPorch 46.58730 12.27270 3.796 0.000153 ***
## PoolArea 593.60768 171.27792 3.466 0.000545 ***
## MSZoningC -41402.64905 8248.73004 -5.019 5.85e-07 ***
## LotConfigCulDSac 6962.32624 2791.92523 2.494 0.012755 *
## LandSlopeGtl 37883.92056 9068.75994 4.177 3.13e-05 ***
## LandSlopeMod 48610.33223 9231.67508 5.266 1.61e-07 ***
## NeighborhoodEdwards -10402.66281 2853.17803 -3.646 0.000276 ***
## NeighborhoodMitchel -16147.10678 3859.28395 -4.184 3.04e-05 ***
## NeighborhoodNAmes -9525.40925 2083.19726 -4.572 5.24e-06 ***
## NeighborhoodStoneBr 20552.16505 5280.78386 3.892 0.000104 ***
## Condition1RRAe -24540.47840 7689.26388 -3.192 0.001447 **
## Condition2PosN -278434.52986 18986.64337 -14.665 < 2e-16 ***
## OverallQual1 -69226.77110 19615.24765 -3.529 0.000430 ***
## OverallQual10 55316.14684 7687.74318 7.195 1.01e-12 ***
## OverallQual3 -67572.31523 8045.82901 -8.398 < 2e-16 ***
## OverallQual4 -69045.74711 6034.35338 -11.442 < 2e-16 ***
## OverallQual5 -71495.33850 5561.77149 -12.855 < 2e-16 ***
## OverallQual6 -65223.88309 5448.01160 -11.972 < 2e-16 ***
## OverallQual7 -49158.06210 5254.40214 -9.356 < 2e-16 ***
## OverallQual8 -30490.80224 4898.76007 -6.224 6.38e-10 ***
## OverallCond3 -42956.36840 5366.07016 -8.005 2.48e-15 ***
## OverallCond4 -32410.95502 3790.24918 -8.551 < 2e-16 ***
## OverallCond5 -18545.46317 2070.17018 -8.958 < 2e-16 ***
## OverallCond6 -11748.19494 2157.46649 -5.445 6.09e-08 ***
## RoofMatlClyTile -656348.30081 45487.81244 -14.429 < 2e-16 ***
## RoofMatlCompShg -18721.29602 7059.92587 -2.652 0.008097 **
## RoofMatlTarGrv -33422.51142 10485.48958 -3.188 0.001467 **
## Exterior1stBrkFace 19219.45581 3781.95504 5.082 4.24e-07 ***
## BsmtQualEx 21089.01480 3279.21116 6.431 1.73e-10 ***
## BsmtExposureGd 12736.14806 2660.49667 4.787 1.87e-06 ***
## FullBath1 -13349.86341 4792.96640 -2.785 0.005420 **
## FullBath2 -17285.16656 4361.41002 -3.963 7.77e-05 ***
## KitchenQualEx 21675.98270 3509.57484 6.176 8.58e-10 ***
## GarageTypeAttchd 52477.11496 9752.18973 5.381 8.66e-08 ***
## GarageTypeBasment 49556.26555 11208.47631 4.421 1.06e-05 ***
## GarageTypeBuiltIn 54713.00477 10188.51111 5.370 9.20e-08 ***
## GarageTypeCarPort 32859.69487 12815.43357 2.564 0.010448 *
## GarageTypeDetchd 50460.26139 9807.46974 5.145 3.05e-07 ***
## GarageFinishNG 53226.13940 10357.81245 5.139 3.15e-07 ***
## GarageCars2 -8161.07600 1628.17840 -5.012 6.06e-07 ***
## PoolQCEx -246545.39280 93218.22635 -2.645 0.008264 **
## PoolQCFa -369751.08160 101944.17169 -3.627 0.000297 ***
## PoolQCGd -328217.03555 113164.09710 -2.900 0.003785 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 24750 on 1406 degrees of freedom
## Multiple R-squared: 0.9064, Adjusted R-squared: 0.9029
## F-statistic: 257 on 53 and 1406 DF, p-value: < 2.2e-16
summary(stepOutputF)
##
## Call:
## lm(formula = SalePrice ~ LotArea + YearBuilt + MasVnrArea + BsmtFinSF1 +
## BsmtFinSF2 + BsmtUnfSF + X1stFlrSF + X2ndFlrSF + GarageArea +
## X3SsnPorch + ScreenPorch + PoolArea + MSZoningC + LotConfigCulDSac +
## LandSlopeGtl + LandSlopeMod + NeighborhoodEdwards + NeighborhoodMitchel +
## NeighborhoodNAmes + NeighborhoodStoneBr + Condition1RRAe +
## Condition2PosN + OverallQual1 + OverallQual10 + OverallQual3 +
## OverallQual4 + OverallQual5 + OverallQual6 + OverallQual7 +
## OverallQual8 + OverallCond3 + OverallCond4 + OverallCond5 +
## OverallCond6 + RoofMatlClyTile + RoofMatlCompShg + RoofMatlTarGrv +
## Exterior1stBrkFace + BsmtQualEx + BsmtExposureGd + FullBath1 +
## FullBath2 + KitchenQualEx + GarageTypeAttchd + GarageTypeBasment +
## GarageTypeBuiltIn + GarageTypeCarPort + GarageTypeDetchd +
## GarageFinishNG + GarageCars2 + PoolQCEx + PoolQCFa + PoolQCGd,
## data = housetrain.knn.long)
##
## Residuals:
## Min 1Q Median 3Q Max
## -198432 -12021 225 11012 198432
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 79741.29576 16604.63696 4.802 1.74e-06 ***
## LotArea 0.73875 0.08708 8.484 < 2e-16 ***
## YearBuilt -534.33818 40.16772 -13.303 < 2e-16 ***
## MasVnrArea 15.00114 4.39244 3.415 0.000655 ***
## BsmtFinSF1 37.29505 3.23541 11.527 < 2e-16 ***
## BsmtFinSF2 23.71627 4.98520 4.757 2.16e-06 ***
## BsmtUnfSF 19.44294 3.08565 6.301 3.95e-10 ***
## X1stFlrSF 51.76253 3.63840 14.227 < 2e-16 ***
## X2ndFlrSF 54.71985 2.28314 23.967 < 2e-16 ***
## GarageArea 42.00562 4.98816 8.421 < 2e-16 ***
## X3SsnPorch 47.76385 22.43356 2.129 0.033417 *
## ScreenPorch 46.58730 12.27270 3.796 0.000153 ***
## PoolArea 593.60768 171.27792 3.466 0.000545 ***
## MSZoningC -41402.64905 8248.73004 -5.019 5.85e-07 ***
## LotConfigCulDSac 6962.32624 2791.92523 2.494 0.012755 *
## LandSlopeGtl 37883.92056 9068.75994 4.177 3.13e-05 ***
## LandSlopeMod 48610.33223 9231.67508 5.266 1.61e-07 ***
## NeighborhoodEdwards -10402.66281 2853.17803 -3.646 0.000276 ***
## NeighborhoodMitchel -16147.10678 3859.28395 -4.184 3.04e-05 ***
## NeighborhoodNAmes -9525.40925 2083.19726 -4.572 5.24e-06 ***
## NeighborhoodStoneBr 20552.16505 5280.78386 3.892 0.000104 ***
## Condition1RRAe -24540.47840 7689.26388 -3.192 0.001447 **
## Condition2PosN -278434.52986 18986.64337 -14.665 < 2e-16 ***
## OverallQual1 -69226.77110 19615.24765 -3.529 0.000430 ***
## OverallQual10 55316.14684 7687.74318 7.195 1.01e-12 ***
## OverallQual3 -67572.31523 8045.82901 -8.398 < 2e-16 ***
## OverallQual4 -69045.74711 6034.35338 -11.442 < 2e-16 ***
## OverallQual5 -71495.33850 5561.77149 -12.855 < 2e-16 ***
## OverallQual6 -65223.88309 5448.01160 -11.972 < 2e-16 ***
## OverallQual7 -49158.06210 5254.40214 -9.356 < 2e-16 ***
## OverallQual8 -30490.80224 4898.76007 -6.224 6.38e-10 ***
## OverallCond3 -42956.36840 5366.07016 -8.005 2.48e-15 ***
## OverallCond4 -32410.95502 3790.24918 -8.551 < 2e-16 ***
## OverallCond5 -18545.46317 2070.17018 -8.958 < 2e-16 ***
## OverallCond6 -11748.19494 2157.46649 -5.445 6.09e-08 ***
## RoofMatlClyTile -656348.30081 45487.81244 -14.429 < 2e-16 ***
## RoofMatlCompShg -18721.29602 7059.92587 -2.652 0.008097 **
## RoofMatlTarGrv -33422.51142 10485.48958 -3.188 0.001467 **
## Exterior1stBrkFace 19219.45581 3781.95504 5.082 4.24e-07 ***
## BsmtQualEx 21089.01480 3279.21116 6.431 1.73e-10 ***
## BsmtExposureGd 12736.14806 2660.49667 4.787 1.87e-06 ***
## FullBath1 -13349.86341 4792.96640 -2.785 0.005420 **
## FullBath2 -17285.16656 4361.41002 -3.963 7.77e-05 ***
## KitchenQualEx 21675.98270 3509.57484 6.176 8.58e-10 ***
## GarageTypeAttchd 52477.11496 9752.18973 5.381 8.66e-08 ***
## GarageTypeBasment 49556.26555 11208.47631 4.421 1.06e-05 ***
## GarageTypeBuiltIn 54713.00477 10188.51111 5.370 9.20e-08 ***
## GarageTypeCarPort 32859.69487 12815.43357 2.564 0.010448 *
## GarageTypeDetchd 50460.26139 9807.46974 5.145 3.05e-07 ***
## GarageFinishNG 53226.13940 10357.81245 5.139 3.15e-07 ***
## GarageCars2 -8161.07600 1628.17840 -5.012 6.06e-07 ***
## PoolQCEx -246545.39280 93218.22635 -2.645 0.008264 **
## PoolQCFa -369751.08160 101944.17169 -3.627 0.000297 ***
## PoolQCGd -328217.03555 113164.09710 -2.900 0.003785 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 24750 on 1406 degrees of freedom
## Multiple R-squared: 0.9064, Adjusted R-squared: 0.9029
## F-statistic: 257 on 53 and 1406 DF, p-value: < 2.2e-16
k. Model-6
#These values are linearly dependent on other variables
housetrain.lm6 <- lm(SalePrice~BsmtQual+BsmtExposure, data = housetrain.knn)
summary(housetrain.lm6)
##
## Call:
## lm(formula = SalePrice ~ BsmtQual + BsmtExposure, data = housetrain.knn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -203132 -27933 -5195 23101 427292
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 317708 6052 52.496 < 2e-16 ***
## BsmtQualFa -198553 11074 -17.930 < 2e-16 ***
## BsmtQualGd -116190 5730 -20.279 < 2e-16 ***
## BsmtQualNB -212055 11119 -19.072 < 2e-16 ***
## BsmtQualTA -173718 5876 -29.562 < 2e-16 ***
## BsmtExposureGd 39820 6221 6.401 2.08e-10 ***
## BsmtExposureMn 8924 6592 1.354 0.176
## BsmtExposureNB NA NA NA NA
## BsmtExposureNo -7092 4393 -1.614 0.107
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 56740 on 1452 degrees of freedom
## Multiple R-squared: 0.4924, Adjusted R-squared: 0.49
## F-statistic: 201.2 on 7 and 1452 DF, p-value: < 2.2e-16
l. Model-7
formula.new <- as.formula(paste('SalePrice ~',paste(coef.df$ActualName, collapse="+"),"-BsmtExposure"))
housetrain.lm7 <- lm(formula.new, data = housetrain.knn)
summary(housetrain.lm7)
##
## Call:
## lm(formula = formula.new, data = housetrain.knn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -134981 -15337 -13 13405 235406
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -95884.8 69835.7 -1.373 0.169979
## MSZoningFV 47691.4 14817.1 3.219 0.001319 **
## MSZoningRH 36321.6 14944.1 2.430 0.015208 *
## MSZoningRL 44031.9 12405.3 3.549 0.000399 ***
## MSZoningRM 30817.1 11638.9 2.648 0.008197 **
## LotConfigCulDSac 12335.0 3972.1 3.105 0.001940 **
## LotConfigFR2 -14323.1 5093.8 -2.812 0.004997 **
## LotConfigFR3 -19738.9 16470.8 -1.198 0.230964
## LotConfigInside -837.3 2248.2 -0.372 0.709624
## LandSlopeMod 20012.9 4339.8 4.612 4.38e-06 ***
## LandSlopeSev 32924.2 10850.2 3.034 0.002456 **
## NeighborhoodBlueste -2602.5 23804.7 -0.109 0.912961
## NeighborhoodBrDale 13629.2 12640.5 1.078 0.281129
## NeighborhoodBrkSide 30014.5 10115.9 2.967 0.003060 **
## NeighborhoodClearCr 44755.4 10422.3 4.294 1.88e-05 ***
## NeighborhoodCollgCr 28144.0 8172.2 3.444 0.000591 ***
## NeighborhoodCrawfor 53712.6 9479.8 5.666 1.78e-08 ***
## NeighborhoodEdwards 15640.0 9002.3 1.737 0.082557 .
## NeighborhoodGilbert 21117.6 8565.6 2.465 0.013810 *
## NeighborhoodIDOTRR 21349.2 11470.8 1.861 0.062937 .
## NeighborhoodMeadowV -9785.4 13387.3 -0.731 0.464938
## NeighborhoodMitchel 16160.5 9397.2 1.720 0.085715 .
## NeighborhoodNAmes 23190.8 8701.3 2.665 0.007786 **
## NeighborhoodNoRidge 92243.3 9335.1 9.881 < 2e-16 ***
## NeighborhoodNPkVill -8017.4 13764.1 -0.582 0.560334
## NeighborhoodNridgHt 44646.0 8958.5 4.984 7.05e-07 ***
## NeighborhoodNWAmes 20111.9 9107.0 2.208 0.027384 *
## NeighborhoodOldTown 17989.0 10022.6 1.795 0.072903 .
## NeighborhoodSawyer 22722.5 9246.4 2.457 0.014118 *
## NeighborhoodSawyerW 33756.4 8993.6 3.753 0.000182 ***
## NeighborhoodSomerst 24428.9 10599.2 2.305 0.021330 *
## NeighborhoodStoneBr 56085.5 10299.3 5.446 6.13e-08 ***
## NeighborhoodSWISU 27342.7 10763.0 2.540 0.011183 *
## NeighborhoodTimber 32605.7 9365.5 3.481 0.000515 ***
## NeighborhoodVeenker 48502.5 12741.8 3.807 0.000147 ***
## Condition1Feedr 8226.9 6236.2 1.319 0.187319
## Condition1Norm 9062.4 5093.2 1.779 0.075411 .
## Condition1PosA 27101.9 12522.9 2.164 0.030625 *
## Condition1PosN 20324.0 9245.4 2.198 0.028098 *
## Condition1RRAe 1683.1 11044.4 0.152 0.878899
## Condition1RRAn 10081.6 8587.9 1.174 0.240632
## Condition1RRNe -6019.3 22902.2 -0.263 0.792723
## Condition1RRNn 10376.6 15938.4 0.651 0.515128
## Condition2Feedr 5050.6 27836.4 0.181 0.856050
## Condition2Norm 14159.8 23864.9 0.593 0.553059
## Condition2PosA -20789.0 40994.6 -0.507 0.612157
## Condition2PosN -185097.8 34711.8 -5.332 1.14e-07 ***
## Condition2RRAe 39460.1 39565.7 0.997 0.318782
## Condition2RRAn -169.3 39498.2 -0.004 0.996580
## Condition2RRNn 111.9 32843.5 0.003 0.997283
## OverallQual2 26305.7 37680.0 0.698 0.485213
## OverallQual3 32280.8 33684.7 0.958 0.338071
## OverallQual4 48588.9 33304.7 1.459 0.144820
## OverallQual5 53812.8 33463.7 1.608 0.108049
## OverallQual6 64392.3 33541.0 1.920 0.055093 .
## OverallQual7 82022.2 33607.9 2.441 0.014792 *
## OverallQual8 105614.7 33698.0 3.134 0.001761 **
## OverallQual9 147794.3 34348.3 4.303 1.81e-05 ***
## OverallQual10 216534.3 35246.3 6.143 1.06e-09 ***
## OverallCond2 1334.6 50487.3 0.026 0.978915
## OverallCond3 -16256.1 47536.1 -0.342 0.732424
## OverallCond4 -7605.1 48484.6 -0.157 0.875382
## OverallCond5 -7303.6 48351.8 -0.151 0.879957
## OverallCond6 2883.3 48405.4 0.060 0.952510
## OverallCond7 8854.3 48387.1 0.183 0.854834
## OverallCond8 6690.2 48473.0 0.138 0.890246
## OverallCond9 13882.4 48806.8 0.284 0.776119
## RoofMatlCompShg 266918.1 40498.3 6.591 6.26e-11 ***
## RoofMatlMembran 254867.3 52173.1 4.885 1.16e-06 ***
## RoofMatlMetal 230770.8 52117.4 4.428 1.03e-05 ***
## RoofMatlRoll 262557.8 51124.5 5.136 3.22e-07 ***
## RoofMatlTarGrv 267840.1 40568.8 6.602 5.82e-11 ***
## RoofMatlWdShake 275285.5 43227.7 6.368 2.62e-10 ***
## RoofMatlWdShngl 345741.4 41882.5 8.255 3.58e-16 ***
## Exterior1stAsphShn 9463.1 32324.1 0.293 0.769752
## Exterior1stBrkComm -9940.7 25136.3 -0.395 0.692558
## Exterior1stBrkFace 20178.7 9010.1 2.240 0.025283 *
## Exterior1stCBlock -5027.5 32737.8 -0.154 0.877973
## Exterior1stCemntBd 13390.4 9465.1 1.415 0.157381
## Exterior1stHdBoard -2723.7 8191.6 -0.332 0.739567
## Exterior1stImStucc -57256.3 32162.2 -1.780 0.075262 .
## Exterior1stMetalSd -722.0 8003.1 -0.090 0.928130
## Exterior1stPlywood 2655.1 8582.5 0.309 0.757091
## Exterior1stStone 18020.5 24820.8 0.726 0.467951
## Exterior1stStucco -3494.5 10226.2 -0.342 0.732611
## Exterior1stVinylSd 2765.7 8045.3 0.344 0.731074
## Exterior1stWd Sdng -1517.8 7974.2 -0.190 0.849075
## Exterior1stWdShing -14650.5 9859.7 -1.486 0.137543
## BsmtQualFa -41740.0 7632.2 -5.469 5.39e-08 ***
## BsmtQualGd -28610.4 4284.8 -6.677 3.55e-11 ***
## BsmtQualNB -55113.6 7465.4 -7.383 2.71e-13 ***
## BsmtQualTA -32226.1 5039.2 -6.395 2.21e-10 ***
## FullBath1 2183.1 12120.6 0.180 0.857087
## FullBath2 20200.6 12091.7 1.671 0.095029 .
## FullBath3 80013.0 13405.5 5.969 3.05e-09 ***
## KitchenQualFa -37520.1 7469.2 -5.023 5.76e-07 ***
## KitchenQualGd -26045.9 4561.4 -5.710 1.39e-08 ***
## KitchenQualTA -29912.9 4945.4 -6.049 1.89e-09 ***
## GarageTypeAttchd 53517.1 14186.1 3.773 0.000169 ***
## GarageTypeBasment 56304.1 15751.0 3.575 0.000363 ***
## GarageTypeBuiltIn 57221.0 14557.8 3.931 8.90e-05 ***
## GarageTypeCarPort 36113.8 17969.4 2.010 0.044658 *
## GarageTypeDetchd 41606.8 14228.2 2.924 0.003511 **
## GarageTypeNG -129633.7 39691.7 -3.266 0.001118 **
## GarageFinishNG 108416.7 31879.7 3.401 0.000692 ***
## GarageFinishRFn -5827.8 2547.3 -2.288 0.022304 *
## GarageFinishUnf -8587.7 3010.7 -2.852 0.004405 **
## GarageCars1 -47290.0 23303.3 -2.029 0.042621 *
## GarageCars2 -33852.6 23249.4 -1.456 0.145607
## GarageCars3 200.5 23378.1 0.009 0.993158
## PoolQCFa -112005.0 32036.7 -3.496 0.000487 ***
## PoolQCGd -71774.9 32549.3 -2.205 0.027614 *
## PoolQCNP -123019.5 23098.5 -5.326 1.18e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 30640 on 1347 degrees of freedom
## Multiple R-squared: 0.8626, Adjusted R-squared: 0.8512
## F-statistic: 75.53 on 112 and 1347 DF, p-value: < 2.2e-16
vif(housetrain.lm7) %>% kable("html",caption = "VIF Model-7", row.names = T, digits = 2) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F, position = "left", font_size = 12)
VIF Model-7
|
|
GVIF
|
Df
|
GVIF^(1/(2*Df))
|
|
MSZoning
|
29.49
|
4
|
1.53
|
|
LotConfig
|
1.68
|
4
|
1.07
|
|
LandSlope
|
2.00
|
2
|
1.19
|
|
Neighborhood
|
4553.50
|
24
|
1.19
|
|
Condition1
|
4.04
|
8
|
1.09
|
|
Condition2
|
3.29
|
7
|
1.09
|
|
OverallQual
|
84.18
|
9
|
1.28
|
|
OverallCond
|
11.89
|
8
|
1.17
|
|
RoofMatl
|
4.79
|
7
|
1.12
|
|
Exterior1st
|
26.28
|
14
|
1.12
|
|
BsmtQual
|
10.12
|
4
|
1.34
|
|
FullBath
|
4.01
|
3
|
1.26
|
|
KitchenQual
|
5.75
|
3
|
1.34
|
|
GarageType
|
602.48
|
6
|
1.70
|
|
GarageFinish
|
251.62
|
3
|
2.51
|
|
GarageCars
|
164.35
|
3
|
2.34
|
|
PoolQC
|
2.03
|
3
|
1.13
|
par(mfrow = c(2, 2)) # Split the plotting panel into a 2 x 2 grid
plot(housetrain.lm7)

m. Model-8
formula.new <- as.formula(paste('SalePrice ~',paste(coef.df$ActualName, collapse="+"),"-BsmtQual"))
housetrain.lm8 <- lm(formula.new, data = housetrain.knn)
summary(housetrain.lm8)
##
## Call:
## lm(formula = formula.new, data = housetrain.knn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -125732 -15706 0 13893 215122
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -140355.8 69086.6 -2.032 0.042391 *
## MSZoningFV 46384.0 14730.8 3.149 0.001676 **
## MSZoningRH 33929.9 14823.6 2.289 0.022239 *
## MSZoningRL 41831.2 12316.5 3.396 0.000703 ***
## MSZoningRM 29084.7 11577.9 2.512 0.012118 *
## LotConfigCulDSac 11900.4 3944.9 3.017 0.002604 **
## LotConfigFR2 -13120.8 5051.5 -2.597 0.009495 **
## LotConfigFR3 -25583.0 16357.1 -1.564 0.118045
## LotConfigInside -1798.3 2236.6 -0.804 0.421524
## LandSlopeMod 13422.2 4415.4 3.040 0.002413 **
## LandSlopeSev 20250.5 10854.7 1.866 0.062316 .
## NeighborhoodBlueste 3133.0 23678.6 0.132 0.894757
## NeighborhoodBrDale 16016.0 12463.5 1.285 0.199003
## NeighborhoodBrkSide 30207.1 10002.7 3.020 0.002576 **
## NeighborhoodClearCr 41956.9 10346.7 4.055 5.30e-05 ***
## NeighborhoodCollgCr 29503.1 8124.9 3.631 0.000293 ***
## NeighborhoodCrawfor 57862.5 9443.8 6.127 1.17e-09 ***
## NeighborhoodEdwards 16695.2 8937.9 1.868 0.061992 .
## NeighborhoodGilbert 23716.9 8525.2 2.782 0.005478 **
## NeighborhoodIDOTRR 21842.7 11304.0 1.932 0.053533 .
## NeighborhoodMeadowV -8571.3 13304.9 -0.644 0.519544
## NeighborhoodMitchel 21482.4 9282.5 2.314 0.020802 *
## NeighborhoodNAmes 23618.7 8573.7 2.755 0.005952 **
## NeighborhoodNoRidge 94131.8 9302.0 10.120 < 2e-16 ***
## NeighborhoodNPkVill 858.7 13732.6 0.063 0.950152
## NeighborhoodNridgHt 53856.6 8864.8 6.075 1.61e-09 ***
## NeighborhoodNWAmes 25010.3 9072.1 2.757 0.005915 **
## NeighborhoodOldTown 18064.6 9858.8 1.832 0.067123 .
## NeighborhoodSawyer 24736.4 9139.6 2.707 0.006885 **
## NeighborhoodSawyerW 37127.0 8980.4 4.134 3.78e-05 ***
## NeighborhoodSomerst 29982.4 10559.2 2.839 0.004587 **
## NeighborhoodStoneBr 59675.6 10243.6 5.826 7.11e-09 ***
## NeighborhoodSWISU 29805.4 10668.1 2.794 0.005282 **
## NeighborhoodTimber 33575.4 9292.4 3.613 0.000314 ***
## NeighborhoodVeenker 41020.2 12730.3 3.222 0.001302 **
## Condition1Feedr 9160.9 6195.6 1.479 0.139478
## Condition1Norm 10689.3 5055.3 2.114 0.034659 *
## Condition1PosA 23900.0 12451.5 1.919 0.055140 .
## Condition1PosN 23662.7 9170.5 2.580 0.009976 **
## Condition1RRAe 2230.6 10984.8 0.203 0.839115
## Condition1RRAn 12397.3 8511.9 1.456 0.145499
## Condition1RRNe -570.4 22758.4 -0.025 0.980008
## Condition1RRNn 13107.2 15815.1 0.829 0.407375
## Condition2Feedr 6054.7 27670.0 0.219 0.826825
## Condition2Norm 16415.0 23663.7 0.694 0.488005
## Condition2PosA -16585.4 40643.4 -0.408 0.683286
## Condition2PosN -179405.7 34472.1 -5.204 2.25e-07 ***
## Condition2RRAe 50721.9 39226.3 1.293 0.196213
## Condition2RRAn -1726.5 39201.8 -0.044 0.964877
## Condition2RRNn 5029.1 32552.6 0.154 0.877247
## OverallQual2 15064.6 37445.0 0.402 0.687517
## OverallQual3 25857.1 33469.7 0.773 0.439922
## OverallQual4 40859.5 33085.9 1.235 0.217064
## OverallQual5 46313.6 33250.4 1.393 0.163888
## OverallQual6 56947.6 33331.3 1.709 0.087768 .
## OverallQual7 74711.4 33401.0 2.237 0.025463 *
## OverallQual8 98966.0 33492.6 2.955 0.003182 **
## OverallQual9 150004.9 34098.6 4.399 1.17e-05 ***
## OverallQual10 209628.8 35055.0 5.980 2.85e-09 ***
## OverallCond2 39594.4 49906.2 0.793 0.427698
## OverallCond3 18853.9 46944.2 0.402 0.688024
## OverallCond4 29286.9 47779.4 0.613 0.540006
## OverallCond5 29579.0 47669.1 0.621 0.535030
## OverallCond6 39079.0 47726.7 0.819 0.413041
## OverallCond7 44659.0 47722.7 0.936 0.349544
## OverallCond8 40685.6 47833.9 0.851 0.395165
## OverallCond9 50102.1 48121.6 1.041 0.297990
## RoofMatlCompShg 273999.9 40286.5 6.801 1.55e-11 ***
## RoofMatlMembran 259977.5 51858.7 5.013 6.07e-07 ***
## RoofMatlMetal 231321.4 51708.9 4.474 8.34e-06 ***
## RoofMatlRoll 269088.3 50808.6 5.296 1.38e-07 ***
## RoofMatlTarGrv 264049.8 40306.2 6.551 8.11e-11 ***
## RoofMatlWdShake 284898.5 43015.7 6.623 5.07e-11 ***
## RoofMatlWdShngl 344371.0 41608.0 8.277 3.02e-16 ***
## Exterior1stAsphShn 8081.1 32109.4 0.252 0.801332
## Exterior1stBrkComm -5456.1 24947.3 -0.219 0.826912
## Exterior1stBrkFace 18548.0 8948.0 2.073 0.038376 *
## Exterior1stCBlock -4468.5 32470.4 -0.138 0.890563
## Exterior1stCemntBd 12273.4 9407.8 1.305 0.192254
## Exterior1stHdBoard -5045.3 8139.6 -0.620 0.535464
## Exterior1stImStucc -56209.2 31958.3 -1.759 0.078833 .
## Exterior1stMetalSd -2392.6 7958.3 -0.301 0.763735
## Exterior1stPlywood -2353.5 8536.7 -0.276 0.782828
## Exterior1stStone 11519.4 24623.5 0.468 0.639988
## Exterior1stStucco -5934.7 10159.7 -0.584 0.559222
## Exterior1stVinylSd 2243.4 7992.5 0.281 0.778992
## Exterior1stWd Sdng -3076.1 7926.8 -0.388 0.698026
## Exterior1stWdShing -14098.3 9786.4 -1.441 0.149928
## BsmtExposureGd 24073.1 3733.9 6.447 1.58e-10 ***
## BsmtExposureMn 3934.1 3703.4 1.062 0.288294
## BsmtExposureNB -25456.5 6229.3 -4.087 4.64e-05 ***
## BsmtExposureNo -3875.1 2551.9 -1.519 0.129119
## FullBath1 -3027.3 12060.3 -0.251 0.801841
## FullBath2 15608.9 12056.1 1.295 0.195647
## FullBath3 76456.8 13376.1 5.716 1.34e-08 ***
## KitchenQualFa -41723.4 7406.4 -5.633 2.15e-08 ***
## KitchenQualGd -31863.4 4510.4 -7.064 2.58e-12 ***
## KitchenQualTA -35602.1 4891.3 -7.279 5.71e-13 ***
## GarageTypeAttchd 58256.8 14033.1 4.151 3.51e-05 ***
## GarageTypeBasment 54694.9 15621.1 3.501 0.000478 ***
## GarageTypeBuiltIn 61710.5 14409.7 4.283 1.98e-05 ***
## GarageTypeCarPort 41479.0 17758.6 2.336 0.019653 *
## GarageTypeDetchd 47099.6 14074.1 3.347 0.000841 ***
## GarageTypeNG -132476.1 39401.8 -3.362 0.000795 ***
## GarageFinishNG 110174.5 31654.5 3.481 0.000516 ***
## GarageFinishRFn -7305.3 2521.5 -2.897 0.003826 **
## GarageFinishUnf -9510.1 2974.5 -3.197 0.001420 **
## GarageCars1 -51320.9 23179.1 -2.214 0.026989 *
## GarageCars2 -38673.3 23122.8 -1.673 0.094654 .
## GarageCars3 -4170.2 23253.0 -0.179 0.857697
## PoolQCFa -120859.0 31854.8 -3.794 0.000155 ***
## PoolQCGd -76285.1 32350.0 -2.358 0.018510 *
## PoolQCNP -133698.9 22993.7 -5.815 7.58e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 30440 on 1347 degrees of freedom
## Multiple R-squared: 0.8645, Adjusted R-squared: 0.8532
## F-statistic: 76.7 on 112 and 1347 DF, p-value: < 2.2e-16
vif(housetrain.lm8) %>% kable("html",caption = "VIF Model-8", row.names = T, digits = 2) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F, position = "left", font_size = 12)
VIF Model-8
|
|
GVIF
|
Df
|
GVIF^(1/(2*Df))
|
|
MSZoning
|
29.28
|
4
|
1.53
|
|
LotConfig
|
1.68
|
4
|
1.07
|
|
LandSlope
|
2.12
|
2
|
1.21
|
|
Neighborhood
|
3663.20
|
24
|
1.19
|
|
Condition1
|
4.04
|
8
|
1.09
|
|
Condition2
|
3.23
|
7
|
1.09
|
|
OverallQual
|
77.10
|
9
|
1.27
|
|
OverallCond
|
11.33
|
8
|
1.16
|
|
RoofMatl
|
4.84
|
7
|
1.12
|
|
Exterior1st
|
26.03
|
14
|
1.12
|
|
BsmtExposure
|
2.52
|
4
|
1.12
|
|
FullBath
|
4.01
|
3
|
1.26
|
|
KitchenQual
|
5.66
|
3
|
1.34
|
|
GarageType
|
599.79
|
6
|
1.70
|
|
GarageFinish
|
249.09
|
3
|
2.51
|
|
GarageCars
|
161.54
|
3
|
2.33
|
|
PoolQC
|
2.06
|
3
|
1.13
|
confint(housetrain.lm8, level=0.95)
## 2.5 % 97.5 %
## (Intercept) -275884.9059 -4826.774
## MSZoningFV 17486.1784 75281.839
## MSZoningRH 4849.9560 63009.807
## MSZoningRL 17669.6471 65992.676
## MSZoningRM 6371.9829 51797.426
## LotConfigCulDSac 4161.4610 19639.245
## LotConfigFR2 -23030.4094 -3211.195
## LotConfigFR3 -57671.0497 6505.086
## LotConfigInside -6185.8800 2589.312
## LandSlopeMod 4760.3693 22083.970
## LandSlopeSev -1043.5301 41544.461
## NeighborhoodBlueste -43317.9592 49583.917
## NeighborhoodBrDale -8434.0559 40466.095
## NeighborhoodBrkSide 10584.5482 49829.577
## NeighborhoodClearCr 21659.3606 62254.345
## NeighborhoodCollgCr 13564.3320 45441.960
## NeighborhoodCrawfor 39336.2893 76388.781
## NeighborhoodEdwards -838.5449 34229.044
## NeighborhoodGilbert 6992.8182 40440.971
## NeighborhoodIDOTRR -332.6991 44018.057
## NeighborhoodMeadowV -34671.9188 17529.368
## NeighborhoodMitchel 3272.6087 39692.138
## NeighborhoodNAmes 6799.3714 40438.060
## NeighborhoodNoRidge 75883.8234 112379.742
## NeighborhoodNPkVill -26080.9340 27798.251
## NeighborhoodNridgHt 36466.2469 71246.941
## NeighborhoodNWAmes 7213.3435 42807.171
## NeighborhoodOldTown -1275.6949 37404.801
## NeighborhoodSawyer 6807.0416 42665.664
## NeighborhoodSawyerW 19509.9765 54744.089
## NeighborhoodSomerst 9268.2427 50696.589
## NeighborhoodStoneBr 39580.4549 79770.663
## NeighborhoodSWISU 8877.5846 50733.191
## NeighborhoodTimber 15346.2208 51804.483
## NeighborhoodVeenker 16046.8551 65993.508
## Condition1Feedr -2993.1840 21314.992
## Condition1Norm 772.1090 20606.534
## Condition1PosA -526.5658 48326.501
## Condition1PosN 5672.6668 41652.794
## Condition1RRAe -19318.4939 23779.739
## Condition1RRAn -4300.8008 29095.394
## Condition1RRNe -45216.2269 44075.434
## Condition1RRNn -17917.6499 44132.149
## Condition2Feedr -48226.2612 60335.667
## Condition2Norm -30006.8330 62836.756
## Condition2PosA -96316.5516 63145.781
## Condition2PosN -247030.5194 -111780.822
## Condition2RRAe -26229.4352 127673.171
## Condition2RRAn -78629.7341 75176.675
## Condition2RRNn -58830.2771 68888.385
## OverallQual2 -58392.1900 88521.463
## OverallQual3 -39801.1728 91515.441
## OverallQual4 -24045.9464 105764.874
## OverallQual5 -18914.6393 111541.894
## OverallQual6 -8439.3423 122334.445
## OverallQual7 9187.7174 140235.087
## OverallQual8 33262.5883 164669.344
## OverallQual9 83112.7198 216897.043
## OverallQual10 140860.4957 278397.042
## OverallCond2 -58307.9174 137496.776
## OverallCond3 -73237.7133 110945.538
## OverallCond4 -64443.2895 123017.099
## OverallCond5 -63934.7968 123092.752
## OverallCond6 -54547.7815 132705.754
## OverallCond7 -48960.0029 138277.953
## OverallCond8 -53151.4717 134522.771
## OverallCond9 -44299.3831 144503.530
## RoofMatlCompShg 194968.7417 353031.019
## RoofMatlMembran 158244.8764 361710.218
## RoofMatlMetal 129882.6866 332760.105
## RoofMatlRoll 169415.7424 368760.930
## RoofMatlTarGrv 184980.0346 343119.492
## RoofMatlWdShake 200513.5135 369283.575
## RoofMatlWdShngl 262747.4670 425994.630
## Exterior1stAsphShn -54908.7813 71070.981
## Exterior1stBrkComm -54395.8012 43483.572
## Exterior1stBrkFace 994.3553 36101.589
## Exterior1stCBlock -68166.5973 59229.591
## Exterior1stCemntBd -6182.1802 30729.006
## Exterior1stHdBoard -21013.0372 10922.406
## Exterior1stImStucc -118902.5184 6484.195
## Exterior1stMetalSd -18004.4912 13219.337
## Exterior1stPlywood -19100.1395 14393.160
## Exterior1stStone -36785.1989 59824.007
## Exterior1stStucco -25865.2188 13995.816
## Exterior1stVinylSd -13435.6188 17922.411
## Exterior1stWd Sdng -18626.2553 12473.986
## Exterior1stWdShing -33296.4856 5099.867
## BsmtExposureGd 16748.2798 31397.949
## BsmtExposureMn -3330.9722 11199.170
## BsmtExposureNB -37676.7150 -13236.356
## BsmtExposureNo -8881.1159 1131.012
## FullBath1 -26686.3076 20631.686
## FullBath2 -8041.7854 39259.671
## FullBath3 50216.4015 102697.102
## KitchenQualFa -56252.7410 -27194.107
## KitchenQualGd -40711.6592 -23015.125
## KitchenQualTA -45197.4458 -26006.737
## GarageTypeAttchd 30727.6369 85785.986
## GarageTypeBasment 24050.6139 85339.094
## GarageTypeBuiltIn 33442.5507 89978.384
## GarageTypeCarPort 6641.4020 76316.523
## GarageTypeDetchd 19490.0511 74709.131
## GarageTypeNG -209771.5799 -55180.581
## GarageFinishNG 48077.0371 172271.937
## GarageFinishRFn -12251.6953 -2358.819
## GarageFinishUnf -15345.1825 -3674.952
## GarageCars1 -96792.0553 -5849.789
## GarageCars2 -84033.8221 6687.245
## GarageCars3 -49786.3283 41445.880
## PoolQCFa -183349.4438 -58368.644
## PoolQCGd -139746.9596 -12823.163
## PoolQCNP -178806.1396 -88591.617
par(mfrow = c(2, 2)) # Split the plotting panel into a 2 x 2 grid
plot(housetrain.lm8)

#Compare with step functions show same coefficents as lm
stepOutputF <- step(housetrain.lm8, trace = 0, direction = "forward")
stepOutputB <- step(housetrain.lm8, trace = 0, direction = "backward")
summary(stepOutputB)
##
## Call:
## lm(formula = SalePrice ~ MSZoning + LotConfig + LandSlope + Neighborhood +
## Condition2 + OverallQual + OverallCond + RoofMatl + Exterior1st +
## BsmtExposure + FullBath + KitchenQual + GarageType + GarageFinish +
## GarageCars + PoolQC, data = housetrain.knn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -125725 -16418 0 13601 214543
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -142016 68989 -2.059 0.039729 *
## MSZoningFV 46138 14677 3.144 0.001705 **
## MSZoningRH 33600 14776 2.274 0.023121 *
## MSZoningRL 41671 12257 3.400 0.000694 ***
## MSZoningRM 28390 11530 2.462 0.013927 *
## LotConfigCulDSac 12161 3899 3.119 0.001853 **
## LotConfigFR2 -13248 5031 -2.633 0.008559 **
## LotConfigFR3 -25320 16041 -1.578 0.114695
## LotConfigInside -1636 2220 -0.737 0.461309
## LandSlopeMod 13309 4408 3.019 0.002581 **
## LandSlopeSev 19772 10849 1.822 0.068603 .
## NeighborhoodBlueste 4698 23684 0.198 0.842777
## NeighborhoodBrDale 17665 12442 1.420 0.155916
## NeighborhoodBrkSide 31837 9944 3.202 0.001399 **
## NeighborhoodClearCr 41603 10342 4.023 6.07e-05 ***
## NeighborhoodCollgCr 30051 8125 3.699 0.000225 ***
## NeighborhoodCrawfor 59336 9431 6.292 4.23e-10 ***
## NeighborhoodEdwards 16808 8936 1.881 0.060208 .
## NeighborhoodGilbert 24081 8496 2.834 0.004662 **
## NeighborhoodIDOTRR 22323 11274 1.980 0.047908 *
## NeighborhoodMeadowV -7138 13282 -0.537 0.591083
## NeighborhoodMitchel 21980 9280 2.368 0.018002 *
## NeighborhoodNAmes 24067 8564 2.810 0.005019 **
## NeighborhoodNoRidge 94544 9300 10.166 < 2e-16 ***
## NeighborhoodNPkVill 1387 13732 0.101 0.919561
## NeighborhoodNridgHt 54112 8859 6.108 1.31e-09 ***
## NeighborhoodNWAmes 27120 8991 3.016 0.002605 **
## NeighborhoodOldTown 17491 9816 1.782 0.074981 .
## NeighborhoodSawyer 24738 9084 2.723 0.006550 **
## NeighborhoodSawyerW 36444 8919 4.086 4.64e-05 ***
## NeighborhoodSomerst 30649 10397 2.948 0.003253 **
## NeighborhoodStoneBr 59905 10242 5.849 6.20e-09 ***
## NeighborhoodSWISU 30635 10649 2.877 0.004081 **
## NeighborhoodTimber 33681 9294 3.624 0.000301 ***
## NeighborhoodVeenker 40652 12707 3.199 0.001410 **
## Condition2Feedr 17671 26572 0.665 0.506153
## Condition2Norm 26773 23186 1.155 0.248427
## Condition2PosA -13725 40638 -0.338 0.735615
## Condition2PosN -155472 33173 -4.687 3.06e-06 ***
## Condition2RRAe 59656 38821 1.537 0.124603
## Condition2RRAn 6288 38729 0.162 0.871038
## Condition2RRNn 15268 31908 0.479 0.632360
## OverallQual2 14492 37443 0.387 0.698786
## OverallQual3 25641 33463 0.766 0.443658
## OverallQual4 40938 33065 1.238 0.215891
## OverallQual5 45736 33223 1.377 0.168849
## OverallQual6 56237 33303 1.689 0.091516 .
## OverallQual7 74294 33382 2.226 0.026206 *
## OverallQual8 98261 33470 2.936 0.003383 **
## OverallQual9 149666 34073 4.393 1.21e-05 ***
## OverallQual10 208538 35034 5.952 3.36e-09 ***
## OverallCond2 37283 49875 0.748 0.454880
## OverallCond3 17099 46948 0.364 0.715755
## OverallCond4 26886 47780 0.563 0.573733
## OverallCond5 27307 47670 0.573 0.566852
## OverallCond6 36630 47721 0.768 0.442875
## OverallCond7 42858 47723 0.898 0.369312
## OverallCond8 38441 47832 0.804 0.421734
## OverallCond9 46657 48116 0.970 0.332377
## RoofMatlCompShg 274232 40107 6.837 1.22e-11 ***
## RoofMatlMembran 260790 51719 5.042 5.22e-07 ***
## RoofMatlMetal 232898 51563 4.517 6.82e-06 ***
## RoofMatlRoll 267454 50829 5.262 1.66e-07 ***
## RoofMatlTarGrv 265900 40097 6.631 4.79e-11 ***
## RoofMatlWdShake 291084 42750 6.809 1.47e-11 ***
## RoofMatlWdShngl 345220 41445 8.330 < 2e-16 ***
## Exterior1stAsphShn 8515 32125 0.265 0.791008
## Exterior1stBrkComm -6208 24955 -0.249 0.803593
## Exterior1stBrkFace 18295 8922 2.050 0.040508 *
## Exterior1stCBlock -15257 32111 -0.475 0.634758
## Exterior1stCemntBd 10742 9389 1.144 0.252795
## Exterior1stHdBoard -5926 8130 -0.729 0.466198
## Exterior1stImStucc -56772 31974 -1.776 0.076031 .
## Exterior1stMetalSd -3800 7934 -0.479 0.632026
## Exterior1stPlywood -2964 8521 -0.348 0.727976
## Exterior1stStone 6727 24555 0.274 0.784172
## Exterior1stStucco -6723 10156 -0.662 0.508094
## Exterior1stVinylSd 1235 7973 0.155 0.876938
## Exterior1stWd Sdng -3945 7913 -0.499 0.618191
## Exterior1stWdShing -15653 9753 -1.605 0.108740
## BsmtExposureGd 23973 3730 6.426 1.81e-10 ***
## BsmtExposureMn 3652 3684 0.992 0.321616
## BsmtExposureNB -26245 6208 -4.228 2.52e-05 ***
## BsmtExposureNo -4119 2548 -1.616 0.106251
## FullBath1 -2328 12044 -0.193 0.846771
## FullBath2 16561 12032 1.376 0.168912
## FullBath3 77653 13359 5.813 7.65e-09 ***
## KitchenQualFa -42920 7387 -5.810 7.76e-09 ***
## KitchenQualGd -32081 4506 -7.119 1.76e-12 ***
## KitchenQualTA -35993 4884 -7.370 2.96e-13 ***
## GarageTypeAttchd 58019 14031 4.135 3.77e-05 ***
## GarageTypeBasment 53503 15618 3.426 0.000632 ***
## GarageTypeBuiltIn 61457 14409 4.265 2.14e-05 ***
## GarageTypeCarPort 41605 17684 2.353 0.018783 *
## GarageTypeDetchd 46671 14074 3.316 0.000937 ***
## GarageTypeNG -134126 39400 -3.404 0.000683 ***
## GarageFinishNG 110881 31663 3.502 0.000477 ***
## GarageFinishRFn -7599 2511 -3.026 0.002527 **
## GarageFinishUnf -10080 2964 -3.401 0.000690 ***
## GarageCars1 -51226 23192 -2.209 0.027354 *
## GarageCars2 -38565 23136 -1.667 0.095766 .
## GarageCars3 -4316 23265 -0.186 0.852859
## PoolQCFa -116752 31641 -3.690 0.000233 ***
## PoolQCGd -72345 32268 -2.242 0.025125 *
## PoolQCNP -128467 22878 -5.615 2.38e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 30460 on 1355 degrees of freedom
## Multiple R-squared: 0.8635, Adjusted R-squared: 0.853
## F-statistic: 82.4 on 104 and 1355 DF, p-value: < 2.2e-16
summary(stepOutputF)
##
## Call:
## lm(formula = SalePrice ~ MSZoning + LotConfig + LandSlope + Neighborhood +
## Condition1 + Condition2 + OverallQual + OverallCond + RoofMatl +
## Exterior1st + BsmtQual + BsmtExposure + FullBath + KitchenQual +
## GarageType + GarageFinish + GarageCars + PoolQC - BsmtQual,
## data = housetrain.knn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -125732 -15706 0 13893 215122
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -140355.8 69086.6 -2.032 0.042391 *
## MSZoningFV 46384.0 14730.8 3.149 0.001676 **
## MSZoningRH 33929.9 14823.6 2.289 0.022239 *
## MSZoningRL 41831.2 12316.5 3.396 0.000703 ***
## MSZoningRM 29084.7 11577.9 2.512 0.012118 *
## LotConfigCulDSac 11900.4 3944.9 3.017 0.002604 **
## LotConfigFR2 -13120.8 5051.5 -2.597 0.009495 **
## LotConfigFR3 -25583.0 16357.1 -1.564 0.118045
## LotConfigInside -1798.3 2236.6 -0.804 0.421524
## LandSlopeMod 13422.2 4415.4 3.040 0.002413 **
## LandSlopeSev 20250.5 10854.7 1.866 0.062316 .
## NeighborhoodBlueste 3133.0 23678.6 0.132 0.894757
## NeighborhoodBrDale 16016.0 12463.5 1.285 0.199003
## NeighborhoodBrkSide 30207.1 10002.7 3.020 0.002576 **
## NeighborhoodClearCr 41956.9 10346.7 4.055 5.30e-05 ***
## NeighborhoodCollgCr 29503.1 8124.9 3.631 0.000293 ***
## NeighborhoodCrawfor 57862.5 9443.8 6.127 1.17e-09 ***
## NeighborhoodEdwards 16695.2 8937.9 1.868 0.061992 .
## NeighborhoodGilbert 23716.9 8525.2 2.782 0.005478 **
## NeighborhoodIDOTRR 21842.7 11304.0 1.932 0.053533 .
## NeighborhoodMeadowV -8571.3 13304.9 -0.644 0.519544
## NeighborhoodMitchel 21482.4 9282.5 2.314 0.020802 *
## NeighborhoodNAmes 23618.7 8573.7 2.755 0.005952 **
## NeighborhoodNoRidge 94131.8 9302.0 10.120 < 2e-16 ***
## NeighborhoodNPkVill 858.7 13732.6 0.063 0.950152
## NeighborhoodNridgHt 53856.6 8864.8 6.075 1.61e-09 ***
## NeighborhoodNWAmes 25010.3 9072.1 2.757 0.005915 **
## NeighborhoodOldTown 18064.6 9858.8 1.832 0.067123 .
## NeighborhoodSawyer 24736.4 9139.6 2.707 0.006885 **
## NeighborhoodSawyerW 37127.0 8980.4 4.134 3.78e-05 ***
## NeighborhoodSomerst 29982.4 10559.2 2.839 0.004587 **
## NeighborhoodStoneBr 59675.6 10243.6 5.826 7.11e-09 ***
## NeighborhoodSWISU 29805.4 10668.1 2.794 0.005282 **
## NeighborhoodTimber 33575.4 9292.4 3.613 0.000314 ***
## NeighborhoodVeenker 41020.2 12730.3 3.222 0.001302 **
## Condition1Feedr 9160.9 6195.6 1.479 0.139478
## Condition1Norm 10689.3 5055.3 2.114 0.034659 *
## Condition1PosA 23900.0 12451.5 1.919 0.055140 .
## Condition1PosN 23662.7 9170.5 2.580 0.009976 **
## Condition1RRAe 2230.6 10984.8 0.203 0.839115
## Condition1RRAn 12397.3 8511.9 1.456 0.145499
## Condition1RRNe -570.4 22758.4 -0.025 0.980008
## Condition1RRNn 13107.2 15815.1 0.829 0.407375
## Condition2Feedr 6054.7 27670.0 0.219 0.826825
## Condition2Norm 16415.0 23663.7 0.694 0.488005
## Condition2PosA -16585.4 40643.4 -0.408 0.683286
## Condition2PosN -179405.7 34472.1 -5.204 2.25e-07 ***
## Condition2RRAe 50721.9 39226.3 1.293 0.196213
## Condition2RRAn -1726.5 39201.8 -0.044 0.964877
## Condition2RRNn 5029.1 32552.6 0.154 0.877247
## OverallQual2 15064.6 37445.0 0.402 0.687517
## OverallQual3 25857.1 33469.7 0.773 0.439922
## OverallQual4 40859.5 33085.9 1.235 0.217064
## OverallQual5 46313.6 33250.4 1.393 0.163888
## OverallQual6 56947.6 33331.3 1.709 0.087768 .
## OverallQual7 74711.4 33401.0 2.237 0.025463 *
## OverallQual8 98966.0 33492.6 2.955 0.003182 **
## OverallQual9 150004.9 34098.6 4.399 1.17e-05 ***
## OverallQual10 209628.8 35055.0 5.980 2.85e-09 ***
## OverallCond2 39594.4 49906.2 0.793 0.427698
## OverallCond3 18853.9 46944.2 0.402 0.688024
## OverallCond4 29286.9 47779.4 0.613 0.540006
## OverallCond5 29579.0 47669.1 0.621 0.535030
## OverallCond6 39079.0 47726.7 0.819 0.413041
## OverallCond7 44659.0 47722.7 0.936 0.349544
## OverallCond8 40685.6 47833.9 0.851 0.395165
## OverallCond9 50102.1 48121.6 1.041 0.297990
## RoofMatlCompShg 273999.9 40286.5 6.801 1.55e-11 ***
## RoofMatlMembran 259977.5 51858.7 5.013 6.07e-07 ***
## RoofMatlMetal 231321.4 51708.9 4.474 8.34e-06 ***
## RoofMatlRoll 269088.3 50808.6 5.296 1.38e-07 ***
## RoofMatlTarGrv 264049.8 40306.2 6.551 8.11e-11 ***
## RoofMatlWdShake 284898.5 43015.7 6.623 5.07e-11 ***
## RoofMatlWdShngl 344371.0 41608.0 8.277 3.02e-16 ***
## Exterior1stAsphShn 8081.1 32109.4 0.252 0.801332
## Exterior1stBrkComm -5456.1 24947.3 -0.219 0.826912
## Exterior1stBrkFace 18548.0 8948.0 2.073 0.038376 *
## Exterior1stCBlock -4468.5 32470.4 -0.138 0.890563
## Exterior1stCemntBd 12273.4 9407.8 1.305 0.192254
## Exterior1stHdBoard -5045.3 8139.6 -0.620 0.535464
## Exterior1stImStucc -56209.2 31958.3 -1.759 0.078833 .
## Exterior1stMetalSd -2392.6 7958.3 -0.301 0.763735
## Exterior1stPlywood -2353.5 8536.7 -0.276 0.782828
## Exterior1stStone 11519.4 24623.5 0.468 0.639988
## Exterior1stStucco -5934.7 10159.7 -0.584 0.559222
## Exterior1stVinylSd 2243.4 7992.5 0.281 0.778992
## Exterior1stWd Sdng -3076.1 7926.8 -0.388 0.698026
## Exterior1stWdShing -14098.3 9786.4 -1.441 0.149928
## BsmtExposureGd 24073.1 3733.9 6.447 1.58e-10 ***
## BsmtExposureMn 3934.1 3703.4 1.062 0.288294
## BsmtExposureNB -25456.5 6229.3 -4.087 4.64e-05 ***
## BsmtExposureNo -3875.1 2551.9 -1.519 0.129119
## FullBath1 -3027.3 12060.3 -0.251 0.801841
## FullBath2 15608.9 12056.1 1.295 0.195647
## FullBath3 76456.8 13376.1 5.716 1.34e-08 ***
## KitchenQualFa -41723.4 7406.4 -5.633 2.15e-08 ***
## KitchenQualGd -31863.4 4510.4 -7.064 2.58e-12 ***
## KitchenQualTA -35602.1 4891.3 -7.279 5.71e-13 ***
## GarageTypeAttchd 58256.8 14033.1 4.151 3.51e-05 ***
## GarageTypeBasment 54694.9 15621.1 3.501 0.000478 ***
## GarageTypeBuiltIn 61710.5 14409.7 4.283 1.98e-05 ***
## GarageTypeCarPort 41479.0 17758.6 2.336 0.019653 *
## GarageTypeDetchd 47099.6 14074.1 3.347 0.000841 ***
## GarageTypeNG -132476.1 39401.8 -3.362 0.000795 ***
## GarageFinishNG 110174.5 31654.5 3.481 0.000516 ***
## GarageFinishRFn -7305.3 2521.5 -2.897 0.003826 **
## GarageFinishUnf -9510.1 2974.5 -3.197 0.001420 **
## GarageCars1 -51320.9 23179.1 -2.214 0.026989 *
## GarageCars2 -38673.3 23122.8 -1.673 0.094654 .
## GarageCars3 -4170.2 23253.0 -0.179 0.857697
## PoolQCFa -120859.0 31854.8 -3.794 0.000155 ***
## PoolQCGd -76285.1 32350.0 -2.358 0.018510 *
## PoolQCNP -133698.9 22993.7 -5.815 7.58e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 30440 on 1347 degrees of freedom
## Multiple R-squared: 0.8645, Adjusted R-squared: 0.8532
## F-statistic: 76.7 on 112 and 1347 DF, p-value: < 2.2e-16
n. Model-9
#Eliminate variables based on VIF value
formula.new <- as.formula(paste('SalePrice ~',paste(coef.df$ActualName, collapse="+"),"-BsmtQual-Neighborhood"))
housetrain.lm9 <- lm(formula.new, data = housetrain.knn)
summary(housetrain.lm9)
##
## Call:
## lm(formula = formula.new, data = housetrain.knn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -146020 -17478 -1051 13980 221347
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -182954.6 73308.8 -2.496 0.012689 *
## MSZoningFV 45754.9 12187.5 3.754 0.000181 ***
## MSZoningRH 47829.2 14005.4 3.415 0.000656 ***
## MSZoningRL 48892.8 11369.8 4.300 1.83e-05 ***
## MSZoningRM 28600.2 11500.6 2.487 0.013007 *
## LotConfigCulDSac 14271.3 4188.9 3.407 0.000676 ***
## LotConfigFR2 -9894.5 5354.4 -1.848 0.064830 .
## LotConfigFR3 -13364.1 17499.6 -0.764 0.445190
## LotConfigInside -2752.9 2394.1 -1.150 0.250411
## LandSlopeMod 19725.3 4611.2 4.278 2.02e-05 ***
## LandSlopeSev 29781.6 11229.8 2.652 0.008094 **
## Condition1Feedr 15009.9 6520.4 2.302 0.021485 *
## Condition1Norm 16926.9 5324.5 3.179 0.001511 **
## Condition1PosA 28094.1 13266.6 2.118 0.034383 *
## Condition1PosN 25556.2 9735.7 2.625 0.008761 **
## Condition1RRAe 12004.1 11424.7 1.051 0.293574
## Condition1RRAn 14312.3 8855.0 1.616 0.106259
## Condition1RRNe 5709.9 24203.4 0.236 0.813535
## Condition1RRNn 5705.3 16580.7 0.344 0.730830
## Condition2Feedr 12162.4 29638.9 0.410 0.681612
## Condition2Norm 16014.3 25173.4 0.636 0.524780
## Condition2PosA -54240.0 43579.5 -1.245 0.213484
## Condition2PosN -196709.2 36803.7 -5.345 1.06e-07 ***
## Condition2RRAe 51550.4 42013.2 1.227 0.220032
## Condition2RRAn -594.2 41842.1 -0.014 0.988672
## Condition2RRNn 7921.8 34911.0 0.227 0.820525
## OverallQual2 17798.7 40019.6 0.445 0.656570
## OverallQual3 21277.2 35828.1 0.594 0.552698
## OverallQual4 35776.7 35313.1 1.013 0.311178
## OverallQual5 43704.4 35471.3 1.232 0.218121
## OverallQual6 56455.1 35553.8 1.588 0.112545
## OverallQual7 78283.4 35642.8 2.196 0.028235 *
## OverallQual8 115031.0 35721.4 3.220 0.001311 **
## OverallQual9 168398.6 36308.3 4.638 3.85e-06 ***
## OverallQual10 241181.0 37316.2 6.463 1.42e-10 ***
## OverallCond2 64246.9 53058.6 1.211 0.226155
## OverallCond3 38315.4 50000.0 0.766 0.443625
## OverallCond4 49274.5 50794.2 0.970 0.332177
## OverallCond5 52168.4 50699.1 1.029 0.303670
## OverallCond6 60853.2 50696.1 1.200 0.230210
## OverallCond7 67231.0 50696.5 1.326 0.185012
## OverallCond8 62644.0 50793.3 1.233 0.217670
## OverallCond9 71446.1 51097.7 1.398 0.162272
## RoofMatlCompShg 308948.6 43305.7 7.134 1.57e-12 ***
## RoofMatlMembran 297929.4 55825.8 5.337 1.11e-07 ***
## RoofMatlMetal 273320.3 55652.7 4.911 1.01e-06 ***
## RoofMatlRoll 299552.1 54753.2 5.471 5.31e-08 ***
## RoofMatlTarGrv 294696.7 43387.0 6.792 1.64e-11 ***
## RoofMatlWdShake 311126.5 46215.7 6.732 2.45e-11 ***
## RoofMatlWdShngl 373322.1 44811.3 8.331 < 2e-16 ***
## Exterior1stAsphShn 894.4 34634.5 0.026 0.979402
## Exterior1stBrkComm -11421.5 26821.2 -0.426 0.670292
## Exterior1stBrkFace 19691.6 9571.3 2.057 0.039841 *
## Exterior1stCBlock -4443.7 34860.0 -0.127 0.898585
## Exterior1stCemntBd 2628.3 9360.4 0.281 0.778911
## Exterior1stHdBoard -5372.7 8607.6 -0.624 0.532613
## Exterior1stImStucc -19087.0 34248.6 -0.557 0.577409
## Exterior1stMetalSd -2231.0 8552.9 -0.261 0.794249
## Exterior1stPlywood -5289.2 9020.5 -0.586 0.557736
## Exterior1stStone 24914.2 26365.3 0.945 0.344845
## Exterior1stStucco -9063.3 10923.0 -0.830 0.406828
## Exterior1stVinylSd -1567.7 8477.9 -0.185 0.853323
## Exterior1stWd Sdng -2001.5 8515.4 -0.235 0.814213
## Exterior1stWdShing -15406.3 10507.1 -1.466 0.142803
## BsmtExposureGd 24213.7 3937.2 6.150 1.01e-09 ***
## BsmtExposureMn 5554.3 3954.2 1.405 0.160356
## BsmtExposureNB -23652.6 6649.5 -3.557 0.000388 ***
## BsmtExposureNo -1493.1 2652.3 -0.563 0.573570
## FullBath1 -2526.6 12879.6 -0.196 0.844506
## FullBath2 16798.7 12856.9 1.307 0.191569
## FullBath3 81214.1 14283.5 5.686 1.59e-08 ***
## KitchenQualFa -38025.7 7829.6 -4.857 1.33e-06 ***
## KitchenQualGd -29200.2 4778.2 -6.111 1.29e-09 ***
## KitchenQualTA -36945.2 5154.1 -7.168 1.24e-12 ***
## GarageTypeAttchd 59451.0 15049.2 3.950 8.20e-05 ***
## GarageTypeBasment 55111.7 16748.1 3.291 0.001025 **
## GarageTypeBuiltIn 62611.1 15443.2 4.054 5.31e-05 ***
## GarageTypeCarPort 45411.1 19051.4 2.384 0.017279 *
## GarageTypeDetchd 50032.1 15091.6 3.315 0.000940 ***
## GarageTypeNG -123015.8 42389.1 -2.902 0.003766 **
## GarageFinishNG 102915.7 34150.9 3.014 0.002629 **
## GarageFinishRFn -5269.0 2562.5 -2.056 0.039958 *
## GarageFinishUnf -8799.5 3094.4 -2.844 0.004526 **
## GarageCars1 -49211.4 24961.4 -1.971 0.048868 *
## GarageCars2 -37037.4 24917.4 -1.486 0.137401
## GarageCars3 3715.0 25040.7 0.148 0.882080
## PoolQCFa -132612.1 34166.3 -3.881 0.000109 ***
## PoolQCGd -84674.6 34601.2 -2.447 0.014523 *
## PoolQCNP -137943.0 24667.2 -5.592 2.70e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 32940 on 1371 degrees of freedom
## Multiple R-squared: 0.8385, Adjusted R-squared: 0.8281
## F-statistic: 80.86 on 88 and 1371 DF, p-value: < 2.2e-16
vif(housetrain.lm9) %>% kable("html",caption = "VIF Model-9", row.names = T, digits = 2) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F, position = "left", font_size = 12)
VIF Model-9
|
|
GVIF
|
Df
|
GVIF^(1/(2*Df))
|
|
MSZoning
|
2.10
|
4
|
1.10
|
|
LotConfig
|
1.47
|
4
|
1.05
|
|
LandSlope
|
1.81
|
2
|
1.16
|
|
Condition1
|
2.74
|
8
|
1.07
|
|
Condition2
|
2.82
|
7
|
1.08
|
|
OverallQual
|
44.80
|
9
|
1.24
|
|
OverallCond
|
8.42
|
8
|
1.14
|
|
RoofMatl
|
4.29
|
7
|
1.11
|
|
Exterior1st
|
8.41
|
14
|
1.08
|
|
BsmtExposure
|
2.10
|
4
|
1.10
|
|
FullBath
|
3.44
|
3
|
1.23
|
|
KitchenQual
|
4.79
|
3
|
1.30
|
|
GarageType
|
458.61
|
6
|
1.67
|
|
GarageFinish
|
210.44
|
3
|
2.44
|
|
GarageCars
|
134.70
|
3
|
2.26
|
|
PoolQC
|
1.95
|
3
|
1.12
|
confint(housetrain.lm9, level=0.95)
## 2.5 % 97.5 %
## (Intercept) -326764.0490 -39145.0822
## MSZoningFV 21846.6745 69663.1990
## MSZoningRH 20354.8973 75303.5589
## MSZoningRL 26588.6091 71196.9115
## MSZoningRM 6039.4907 51160.9622
## LotConfigCulDSac 6054.0361 22488.6462
## LotConfigFR2 -20398.2204 609.2163
## LotConfigFR3 -47693.0615 20964.8368
## LotConfigInside -7449.4450 1943.7019
## LandSlopeMod 10679.4970 28771.1817
## LandSlopeSev 7752.0496 51811.0982
## Condition1Feedr 2218.8215 27801.0620
## Condition1Norm 6481.8687 27371.9901
## Condition1PosA 2069.1273 54119.1645
## Condition1PosN 6457.8572 44654.6387
## Condition1RRAe -10407.6376 34415.7904
## Condition1RRAn -3058.4622 31683.0055
## Condition1RRNe -41769.8083 53189.6154
## Condition1RRNn -26821.0675 38231.6759
## Condition2Feedr -45980.1651 70304.8948
## Condition2Norm -33368.3516 65396.9333
## Condition2PosA -139729.7670 31249.8119
## Condition2PosN -268906.9512 -124511.5303
## Condition2RRAe -30866.7214 133967.4259
## Condition2RRAn -82675.5925 81487.1782
## Condition2RRNn -60563.0476 76406.5731
## OverallQual2 -60707.5136 96304.9500
## OverallQual3 -49006.6575 91560.9813
## OverallQual4 -33496.9467 105050.3194
## OverallQual5 -25879.5829 113288.3755
## OverallQual6 -13290.6754 126200.9425
## OverallQual7 8363.1191 148203.6858
## OverallQual8 44956.4697 185105.6134
## OverallQual9 97172.8560 239624.3885
## OverallQual10 167978.0111 314383.9403
## OverallCond2 -39837.8459 168331.7008
## OverallCond3 -59769.4412 136400.1932
## OverallCond4 -50368.2478 148917.1556
## OverallCond5 -47287.7210 151624.6059
## OverallCond6 -38597.1304 160303.5993
## OverallCond7 -32220.0840 166682.1836
## OverallCond8 -36996.9166 162284.9051
## OverallCond9 -28792.0482 171684.2202
## RoofMatlCompShg 223996.0034 393901.1092
## RoofMatlMembran 188416.1044 407442.6564
## RoofMatlMetal 164146.6092 382493.9042
## RoofMatlRoll 192143.0601 406961.2384
## RoofMatlTarGrv 209584.7105 379808.7144
## RoofMatlWdShake 220465.3666 401787.7029
## RoofMatlWdShngl 285416.0555 461228.2384
## Exterior1stAsphShn -67048.0849 68836.8175
## Exterior1stBrkComm -64036.4522 41193.4339
## Exterior1stBrkFace 915.5879 38467.5154
## Exterior1stCBlock -72828.4495 63941.0404
## Exterior1stCemntBd -15733.9745 20990.6651
## Exterior1stHdBoard -22258.3153 11512.8654
## Exterior1stImStucc -86272.2985 48098.3911
## Exterior1stMetalSd -19009.2470 14547.2236
## Exterior1stPlywood -22984.6518 12406.2975
## Exterior1stStone -26806.5011 76634.9507
## Exterior1stStucco -30490.9307 12364.3262
## Exterior1stVinylSd -18198.8215 15063.4193
## Exterior1stWd Sdng -18706.1133 14703.1847
## Exterior1stWdShing -36018.1438 5205.4814
## BsmtExposureGd 16490.1547 31937.3374
## BsmtExposureMn -2202.7466 13311.2771
## BsmtExposureNB -36696.8286 -10608.3306
## BsmtExposureNo -6696.1830 3709.9854
## FullBath1 -27792.3459 22739.1878
## FullBath2 -8422.5381 42020.0355
## FullBath3 53194.1970 109234.0199
## KitchenQualFa -53384.9699 -22666.4904
## KitchenQualGd -38573.4588 -19826.8583
## KitchenQualTA -47056.0725 -26834.4249
## GarageTypeAttchd 29929.0320 88972.9297
## GarageTypeBasment 22257.0136 87966.4328
## GarageTypeBuiltIn 32316.3774 92905.9047
## GarageTypeCarPort 8038.0011 82784.1708
## GarageTypeDetchd 20426.9702 79637.1660
## GarageTypeNG -206170.3620 -39861.3352
## GarageFinishNG 35921.9311 169909.4043
## GarageFinishRFn -10295.8962 -242.0261
## GarageFinishUnf -14869.7542 -2729.1827
## GarageCars1 -98178.0838 -244.6269
## GarageCars2 -85917.7043 11842.9605
## GarageCars3 -45407.1159 52837.1792
## PoolQCFa -199635.9850 -65588.1774
## PoolQCGd -152551.6662 -16797.6273
## PoolQCNP -186332.5065 -89553.5818
par(mfrow = c(2, 2)) # Split the plotting panel into a 2 x 2 grid
plot(housetrain.lm9) # Plot the model information

o. Model-10
formula.new <- as.formula(paste('SalePrice ~',paste(coef.df$ActualName, collapse="+"),"-BsmtQual-Neighborhood-GarageType"))
housetrain.lm10 <- lm(formula.new, data = housetrain.knn)
summary(housetrain.lm10)
##
## Call:
## lm(formula = formula.new, data = housetrain.knn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -144451 -17943 -1114 14221 221110
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -221624.79 71837.98 -3.085 0.002076 **
## MSZoningFV 45069.61 12272.63 3.672 0.000250 ***
## MSZoningRH 44165.29 14077.28 3.137 0.001741 **
## MSZoningRL 50831.43 11415.93 4.453 9.17e-06 ***
## MSZoningRM 28588.48 11571.89 2.471 0.013612 *
## LotConfigCulDSac 14662.65 4231.01 3.466 0.000545 ***
## LotConfigFR2 -10046.52 5412.20 -1.856 0.063628 .
## LotConfigFR3 -15557.36 17702.93 -0.879 0.379662
## LotConfigInside -3396.89 2413.22 -1.408 0.159471
## LandSlopeMod 18205.91 4630.28 3.932 8.85e-05 ***
## LandSlopeSev 30767.93 11351.25 2.711 0.006801 **
## Condition1Feedr 14670.24 6598.01 2.223 0.026348 *
## Condition1Norm 16474.25 5389.29 3.057 0.002280 **
## Condition1PosA 28970.75 13422.82 2.158 0.031075 *
## Condition1PosN 25737.89 9855.55 2.612 0.009112 **
## Condition1RRAe 9531.74 11520.52 0.827 0.408170
## Condition1RRAn 13421.18 8961.12 1.498 0.134437
## Condition1RRNe 4320.94 24506.86 0.176 0.860072
## Condition1RRNn 3113.70 16568.05 0.188 0.850956
## Condition2Feedr 16584.04 29300.58 0.566 0.571488
## Condition2Norm 19228.93 24781.88 0.776 0.437925
## Condition2PosA -56926.88 43658.98 -1.304 0.192486
## Condition2PosN -194735.52 36756.26 -5.298 1.36e-07 ***
## Condition2RRAe 59403.92 41888.96 1.418 0.156380
## Condition2RRAn -695.98 41893.43 -0.017 0.986748
## Condition2RRNn 9005.43 34812.44 0.259 0.795918
## OverallQual2 17189.57 40510.79 0.424 0.671398
## OverallQual3 19896.82 36258.40 0.549 0.583266
## OverallQual4 34486.92 35738.27 0.965 0.334721
## OverallQual5 42762.38 35903.00 1.191 0.233838
## OverallQual6 56740.87 35987.79 1.577 0.115101
## OverallQual7 78984.48 36079.12 2.189 0.028750 *
## OverallQual8 116672.95 36155.75 3.227 0.001281 **
## OverallQual9 171601.66 36744.25 4.670 3.30e-06 ***
## OverallQual10 244338.59 37763.87 6.470 1.36e-10 ***
## OverallCond2 55468.93 53621.11 1.034 0.301103
## OverallCond3 39312.18 50620.02 0.777 0.437520
## OverallCond4 50637.93 51412.51 0.985 0.324829
## OverallCond5 54324.84 51320.16 1.059 0.289991
## OverallCond6 62272.22 51318.16 1.213 0.225164
## OverallCond7 68441.99 51319.16 1.334 0.182538
## OverallCond8 63407.22 51413.85 1.233 0.217685
## OverallCond9 72927.93 51725.29 1.410 0.158793
## RoofMatlCompShg 315342.36 43819.14 7.196 1.01e-12 ***
## RoofMatlMembran 307003.50 56501.47 5.434 6.53e-08 ***
## RoofMatlMetal 281864.25 56326.94 5.004 6.34e-07 ***
## RoofMatlRoll 304408.07 55359.00 5.499 4.55e-08 ***
## RoofMatlTarGrv 300518.17 43892.52 6.847 1.14e-11 ***
## RoofMatlWdShake 317621.46 46753.54 6.794 1.62e-11 ***
## RoofMatlWdShngl 379817.86 45346.18 8.376 < 2e-16 ***
## Exterior1stAsphShn 1145.11 35057.70 0.033 0.973948
## Exterior1stBrkComm -6363.35 27075.82 -0.235 0.814228
## Exterior1stBrkFace 21683.63 9635.67 2.250 0.024584 *
## Exterior1stCBlock -11702.98 35190.57 -0.333 0.739517
## Exterior1stCemntBd 6755.24 9390.34 0.719 0.472028
## Exterior1stHdBoard -2626.79 8636.04 -0.304 0.761047
## Exterior1stImStucc -16537.46 34654.66 -0.477 0.633290
## Exterior1stMetalSd -769.87 8577.56 -0.090 0.928496
## Exterior1stPlywood -2412.41 9058.36 -0.266 0.790033
## Exterior1stStone 26944.50 26629.96 1.012 0.311806
## Exterior1stStucco -4123.90 10916.82 -0.378 0.705670
## Exterior1stVinylSd 948.23 8516.96 0.111 0.911368
## Exterior1stWd Sdng -60.34 8545.62 -0.007 0.994367
## Exterior1stWdShing -14066.06 10570.78 -1.331 0.183523
## BsmtExposureGd 23841.44 3977.18 5.995 2.60e-09 ***
## BsmtExposureMn 4877.24 4000.13 1.219 0.222950
## BsmtExposureNB -25165.36 6700.85 -3.756 0.000180 ***
## BsmtExposureNo -2118.69 2676.34 -0.792 0.428710
## FullBath1 -451.92 12951.10 -0.035 0.972169
## FullBath2 18765.05 12930.84 1.451 0.146956
## FullBath3 83576.10 14251.70 5.864 5.64e-09 ***
## KitchenQualFa -38163.79 7883.53 -4.841 1.44e-06 ***
## KitchenQualGd -27845.18 4816.65 -5.781 9.17e-09 ***
## KitchenQualTA -36446.78 5204.75 -7.003 3.92e-12 ***
## GarageFinishNG 455.80 20080.41 0.023 0.981894
## GarageFinishRFn -5724.25 2560.62 -2.235 0.025544 *
## GarageFinishUnf -13424.26 2898.90 -4.631 3.99e-06 ***
## GarageCars1 27879.62 19934.06 1.399 0.162160
## GarageCars2 38319.90 20003.90 1.916 0.055620 .
## GarageCars3 76906.88 20325.51 3.784 0.000161 ***
## PoolQCFa -127664.23 34581.20 -3.692 0.000231 ***
## PoolQCGd -78571.75 34995.20 -2.245 0.024912 *
## PoolQCNP -132696.33 24922.95 -5.324 1.18e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 33350 on 1377 degrees of freedom
## Multiple R-squared: 0.8336, Adjusted R-squared: 0.8237
## F-statistic: 84.15 on 82 and 1377 DF, p-value: < 2.2e-16
vif(housetrain.lm10) %>% kable("html",caption = "VIF Model-10", row.names = T, digits = 2) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F, position = "left", font_size = 12)
VIF Model-10
|
|
GVIF
|
Df
|
GVIF^(1/(2*Df))
|
|
MSZoning
|
1.89
|
4
|
1.08
|
|
LotConfig
|
1.45
|
4
|
1.05
|
|
LandSlope
|
1.77
|
2
|
1.15
|
|
Condition1
|
2.62
|
8
|
1.06
|
|
Condition2
|
2.60
|
7
|
1.07
|
|
OverallQual
|
41.90
|
9
|
1.23
|
|
OverallCond
|
7.79
|
8
|
1.14
|
|
RoofMatl
|
4.19
|
7
|
1.11
|
|
Exterior1st
|
7.39
|
14
|
1.07
|
|
BsmtExposure
|
2.04
|
4
|
1.09
|
|
FullBath
|
3.13
|
3
|
1.21
|
|
KitchenQual
|
4.67
|
3
|
1.29
|
|
GarageFinish
|
56.40
|
3
|
1.96
|
|
GarageCars
|
80.81
|
3
|
2.08
|
|
PoolQC
|
1.94
|
3
|
1.12
|
confint(housetrain.lm10, level=0.95)
## 2.5 % 97.5 %
## (Intercept) -362548.4993 -80701.0749
## MSZoningFV 20994.5268 69144.6891
## MSZoningRH 16550.0605 71780.5261
## MSZoningRL 28436.9263 73225.9321
## MSZoningRM 5888.0265 51288.9260
## LotConfigCulDSac 6362.7256 22962.5758
## LotConfigFR2 -20663.5794 570.5357
## LotConfigFR3 -50284.9805 19170.2697
## LotConfigInside -8130.8788 1337.0993
## LandSlopeMod 9122.7367 27289.0868
## LandSlopeSev 8500.3167 53035.5408
## Condition1Feedr 1727.0030 27613.4775
## Condition1Norm 5902.1308 27046.3621
## Condition1PosA 2639.3536 55302.1419
## Condition1PosN 6404.3744 45071.4049
## Condition1RRAe -13067.9163 32131.4027
## Condition1RRAn -4157.7544 31000.1084
## Condition1RRNe -43753.8851 52395.7619
## Condition1RRNn -29387.6477 35615.0515
## Condition2Feedr -40894.5551 74062.6385
## Condition2Norm -29385.3874 67843.2399
## Condition2PosA -142572.1942 28718.4247
## Condition2PosN -266839.8506 -122631.1951
## Condition2RRAe -22769.1504 141577.0003
## Condition2RRAn -82877.8382 81485.8795
## Condition2RRNn -59285.7347 77296.5878
## OverallQual2 -62279.9783 96659.1086
## OverallQual3 -51230.8671 91024.5025
## OverallQual4 -35620.4143 104594.2589
## OverallQual5 -27668.1035 113192.8678
## OverallQual6 -13855.9616 127337.7075
## OverallQual7 8208.4923 149760.4639
## OverallQual8 45746.6352 187599.2725
## OverallQual9 99520.8922 243682.4218
## OverallQual10 170257.6440 318419.5351
## OverallCond2 -49718.9733 160656.8250
## OverallCond3 -59988.5181 138612.8726
## OverallCond4 -50217.3788 151493.2410
## OverallCond5 -46349.3117 154998.9969
## OverallCond6 -38398.0012 162942.4447
## OverallCond7 -32230.2067 169114.1838
## OverallCond8 -37450.7307 164265.1720
## OverallCond9 -28540.9638 174396.8194
## RoofMatlCompShg 229382.8793 401301.8487
## RoofMatlMembran 196165.2401 417841.7615
## RoofMatlMetal 171368.3593 392360.1445
## RoofMatlRoll 195810.9566 413005.1762
## RoofMatlTarGrv 214414.7302 386621.6007
## RoofMatlWdShake 225905.5917 409337.3258
## RoofMatlWdShngl 290862.7908 468772.9204
## Exterior1stAsphShn -67627.1617 69917.3847
## Exterior1stBrkComm -59477.6804 46750.9740
## Exterior1stBrkFace 2781.4473 40585.8051
## Exterior1stCBlock -80735.9112 57329.9608
## Exterior1stCemntBd -11665.6823 25176.1700
## Exterior1stHdBoard -19568.0158 14314.4262
## Exterior1stImStucc -84519.0923 51444.1819
## Exterior1stMetalSd -17596.3679 16056.6262
## Exterior1stPlywood -20182.0833 15357.2597
## Exterior1stStone -25295.1764 79184.1692
## Exterior1stStucco -25539.2919 17291.5011
## Exterior1stVinylSd -15759.3858 17655.8435
## Exterior1stWd Sdng -16824.1747 16703.4935
## Exterior1stWdShing -34802.6248 6670.5095
## BsmtExposureGd 16039.4589 31643.4236
## BsmtExposureMn -2969.7559 12724.2419
## BsmtExposureNB -38310.3424 -12020.3876
## BsmtExposureNo -7368.8385 3131.4654
## FullBath1 -25857.9437 24954.0969
## FullBath2 -6601.2321 44131.3412
## FullBath3 55618.7036 111533.4898
## KitchenQualFa -53628.8220 -22698.7529
## KitchenQualGd -37293.9547 -18396.4087
## KitchenQualTA -46656.8796 -26236.6881
## GarageFinishNG -38935.6978 39847.2998
## GarageFinishRFn -10747.3807 -701.1213
## GarageFinishUnf -19110.9863 -7737.5262
## GarageCars1 -11224.7790 66984.0284
## GarageCars2 -921.5126 77561.3213
## GarageCars3 37034.5580 116779.1955
## PoolQCFa -195501.7570 -59826.7040
## PoolQCGd -147221.4228 -9922.0682
## PoolQCNP -181587.3822 -83805.2819
par(mfrow = c(2, 2)) # Split the plotting panel into a 2 x 2 grid
plot(housetrain.lm10) # Plot the model information

p. Model-11
formula.new <- as.formula(paste('SalePrice ~',paste(coef.df$ActualName, collapse="+"),"-BsmtQual-Neighborhood-GarageType-GarageCars"))
housetrain.lm11 <- lm(formula.new, data = housetrain.knn)
summary(housetrain.lm11)
##
## Call:
## lm(formula = formula.new, data = housetrain.knn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -148821 -19523 -1331 15216 225334
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -223697.2 72361.3 -3.091 0.002032 **
## MSZoningFV 36976.7 12816.1 2.885 0.003973 **
## MSZoningRH 40213.9 14726.9 2.731 0.006402 **
## MSZoningRL 46476.1 11936.2 3.894 0.000103 ***
## MSZoningRM 23078.1 12102.0 1.907 0.056732 .
## LotConfigCulDSac 15046.1 4429.0 3.397 0.000700 ***
## LotConfigFR2 -10243.3 5666.9 -1.808 0.070894 .
## LotConfigFR3 -13806.2 18536.7 -0.745 0.456517
## LotConfigInside -4322.7 2514.3 -1.719 0.085793 .
## LandSlopeMod 16255.9 4819.7 3.373 0.000765 ***
## LandSlopeSev 29359.1 11884.2 2.470 0.013615 *
## Condition1Feedr 14904.2 6908.5 2.157 0.031150 *
## Condition1Norm 14957.6 5641.4 2.651 0.008108 **
## Condition1PosA 27953.9 14052.0 1.989 0.046863 *
## Condition1PosN 24140.5 10316.2 2.340 0.019423 *
## Condition1RRAe 9073.3 12044.0 0.753 0.451372
## Condition1RRAn 12360.8 9380.3 1.318 0.187812
## Condition1RRNe 1513.4 25661.1 0.059 0.952979
## Condition1RRNn -1097.8 17341.1 -0.063 0.949534
## Condition2Feedr 18948.2 30654.1 0.618 0.536593
## Condition2Norm 23278.4 25942.5 0.897 0.369712
## Condition2PosA -88390.3 45575.4 -1.939 0.052653 .
## Condition2PosN -192718.1 38486.0 -5.007 6.23e-07 ***
## Condition2RRAe 63760.6 43857.9 1.454 0.146229
## Condition2RRAn 1563.5 43843.1 0.036 0.971558
## Condition2RRNn 17491.9 36433.8 0.480 0.631233
## OverallQual2 5756.2 42409.8 0.136 0.892057
## OverallQual3 19232.8 37966.4 0.507 0.612535
## OverallQual4 28966.8 37418.7 0.774 0.438990
## OverallQual5 38077.1 37592.9 1.013 0.311295
## OverallQual6 52792.0 37677.0 1.401 0.161388
## OverallQual7 78177.0 37775.2 2.070 0.038682 *
## OverallQual8 128144.9 37847.4 3.386 0.000730 ***
## OverallQual9 192110.8 38434.1 4.998 6.52e-07 ***
## OverallQual10 271048.6 39470.2 6.867 9.87e-12 ***
## OverallCond2 72680.2 56122.8 1.295 0.195529
## OverallCond3 47108.7 53001.0 0.889 0.374251
## OverallCond4 59990.1 53826.3 1.115 0.265253
## OverallCond5 64035.8 53731.8 1.192 0.233557
## OverallCond6 70526.5 53728.8 1.313 0.189523
## OverallCond7 76588.3 53729.4 1.425 0.154254
## OverallCond8 69772.0 53828.1 1.296 0.195123
## OverallCond9 80722.0 54160.8 1.490 0.136344
## RoofMatlCompShg 354178.7 45682.2 7.753 1.73e-14 ***
## RoofMatlMembran 345180.3 59037.9 5.847 6.25e-09 ***
## RoofMatlMetal 321617.9 58834.6 5.466 5.44e-08 ***
## RoofMatlRoll 338127.0 57875.0 5.842 6.41e-09 ***
## RoofMatlTarGrv 335263.9 45784.0 7.323 4.11e-13 ***
## RoofMatlWdShake 348852.3 48824.6 7.145 1.45e-12 ***
## RoofMatlWdShngl 416396.4 47312.2 8.801 < 2e-16 ***
## Exterior1stAsphShn -5473.8 36701.4 -0.149 0.881461
## Exterior1stBrkComm -7974.7 28349.9 -0.281 0.778525
## Exterior1stBrkFace 13588.7 10052.2 1.352 0.176654
## Exterior1stCBlock -21340.3 36825.1 -0.580 0.562344
## Exterior1stCemntBd 2875.5 9825.6 0.293 0.769826
## Exterior1stHdBoard -7574.3 9024.0 -0.839 0.401420
## Exterior1stImStucc 1370.5 36248.6 0.038 0.969845
## Exterior1stMetalSd -6810.6 8961.2 -0.760 0.447374
## Exterior1stPlywood -6560.9 9442.4 -0.695 0.487280
## Exterior1stStone 18665.4 27859.1 0.670 0.502975
## Exterior1stStucco -8536.3 11390.9 -0.749 0.453746
## Exterior1stVinylSd -922.5 8912.3 -0.104 0.917571
## Exterior1stWd Sdng -4849.2 8934.9 -0.543 0.587407
## Exterior1stWdShing -17518.9 11064.4 -1.583 0.113569
## BsmtExposureGd 26066.1 4157.7 6.269 4.84e-10 ***
## BsmtExposureMn 5564.3 4185.0 1.330 0.183874
## BsmtExposureNB -26652.0 7007.5 -3.803 0.000149 ***
## BsmtExposureNo -3577.6 2794.7 -1.280 0.200716
## FullBath1 -6914.0 13549.6 -0.510 0.609939
## FullBath2 15833.2 13528.3 1.170 0.242053
## FullBath3 87159.7 14917.3 5.843 6.39e-09 ***
## KitchenQualFa -44981.5 8225.9 -5.468 5.39e-08 ***
## KitchenQualGd -31118.8 5032.6 -6.183 8.25e-10 ***
## KitchenQualTA -41225.7 5433.3 -7.588 5.96e-14 ***
## GarageFinishNG -33793.7 5072.8 -6.662 3.90e-11 ***
## GarageFinishRFn -6210.0 2677.8 -2.319 0.020539 *
## GarageFinishUnf -13559.7 3014.6 -4.498 7.44e-06 ***
## PoolQCFa -118986.9 36202.9 -3.287 0.001039 **
## PoolQCGd -69408.8 36594.2 -1.897 0.058074 .
## PoolQCNP -121671.5 26069.8 -4.667 3.35e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 34930 on 1380 degrees of freedom
## Multiple R-squared: 0.8172, Adjusted R-squared: 0.8067
## F-statistic: 78.07 on 79 and 1380 DF, p-value: < 2.2e-16
vif(housetrain.lm11) %>% kable("html",caption = "VIF Model-11", row.names = T, digits = 2) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F, position = "left", font_size = 12)
VIF Model-11
|
|
GVIF
|
Df
|
GVIF^(1/(2*Df))
|
|
MSZoning
|
1.85
|
4
|
1.08
|
|
LotConfig
|
1.43
|
4
|
1.05
|
|
LandSlope
|
1.75
|
2
|
1.15
|
|
Condition1
|
2.60
|
8
|
1.06
|
|
Condition2
|
2.57
|
7
|
1.07
|
|
OverallQual
|
33.35
|
9
|
1.22
|
|
OverallCond
|
7.56
|
8
|
1.13
|
|
RoofMatl
|
4.14
|
7
|
1.11
|
|
Exterior1st
|
6.88
|
14
|
1.07
|
|
BsmtExposure
|
2.01
|
4
|
1.09
|
|
FullBath
|
2.93
|
3
|
1.20
|
|
KitchenQual
|
4.59
|
3
|
1.29
|
|
GarageFinish
|
2.46
|
3
|
1.16
|
|
PoolQC
|
1.93
|
3
|
1.12
|
confint(housetrain.lm11, level=0.95)
## 2.5 % 97.5 %
## (Intercept) -365647.3228 -81747.1466
## MSZoningFV 11835.5556 62117.9224
## MSZoningRH 11324.3374 69103.4540
## MSZoningRL 23061.0391 69891.0968
## MSZoningRM -662.1771 46818.3229
## LotConfigCulDSac 6357.9024 23734.3449
## LotConfigFR2 -21360.0013 873.4511
## LotConfigFR3 -50169.2903 22556.9303
## LotConfigInside -9255.0380 609.5573
## LandSlopeMod 6801.1708 25710.6273
## LandSlopeSev 6046.0241 52672.1860
## Condition1Feedr 1351.8490 28456.5750
## Condition1Norm 3890.8546 26024.2668
## Condition1PosA 388.3385 55519.5586
## Condition1PosN 3903.3652 44377.5830
## Condition1RRAe -14553.2896 32699.8640
## Condition1RRAn -6040.4829 30761.9901
## Condition1RRNe -48825.4977 51852.3319
## Condition1RRNn -35115.5401 32920.0147
## Condition2Feedr -41185.5673 79081.9197
## Condition2Norm -27612.6110 74169.3163
## Condition2PosA -177794.8898 1014.3599
## Condition2PosN -268215.5423 -117220.7009
## Condition2RRAe -22274.7839 149795.9098
## Condition2RRAn -84442.8643 87569.8711
## Condition2RRNn -53979.8268 88963.5826
## OverallQual2 -77438.4470 88950.7957
## OverallQual3 -55245.3295 93710.8800
## OverallQual4 -44437.0060 102370.5288
## OverallQual5 -35668.2634 111822.4211
## OverallQual6 -21118.4917 126702.3982
## OverallQual7 4073.9662 152280.0860
## OverallQual8 53900.3128 202389.4546
## OverallQual9 116715.3042 267506.3119
## OverallQual10 193620.5166 348476.5983
## OverallCond2 -37414.9497 182775.4083
## OverallCond3 -56862.5184 151079.9686
## OverallCond4 -45600.0113 165580.2387
## OverallCond5 -41368.9540 169440.5861
## OverallCond6 -34872.4489 175925.5129
## OverallCond7 -28811.6971 181988.3844
## OverallCond8 -35821.7787 175365.7073
## OverallCond9 -25524.3248 186968.3249
## RoofMatlCompShg 264564.6038 443792.7118
## RoofMatlMembran 229366.5550 460994.1249
## RoofMatlMetal 206202.9493 437032.9316
## RoofMatlRoll 224594.5607 451659.4061
## RoofMatlTarGrv 245450.0933 425077.8050
## RoofMatlWdShake 253073.7915 444630.8078
## RoofMatlWdShngl 323584.7196 509208.0631
## Exterior1stAsphShn -77470.3219 66522.6318
## Exterior1stBrkComm -63588.1992 47638.7712
## Exterior1stBrkFace -6130.4681 33307.9648
## Exterior1stCBlock -93579.4624 50898.9141
## Exterior1stCemntBd -16399.1103 22150.1992
## Exterior1stHdBoard -25276.4508 10127.9415
## Exterior1stImStucc -69737.7585 72478.8253
## Exterior1stMetalSd -24389.6010 10768.3195
## Exterior1stPlywood -25083.9336 11962.2116
## Exterior1stStone -35985.4257 73316.2587
## Exterior1stStucco -30881.6398 13809.0413
## Exterior1stVinylSd -18405.7289 16560.6468
## Exterior1stWd Sdng -22376.5569 12678.2087
## Exterior1stWdShing -39223.8065 4185.9568
## BsmtExposureGd 17909.9928 34222.2218
## BsmtExposureMn -2645.3083 13773.9143
## BsmtExposureNB -40398.5247 -12905.4658
## BsmtExposureNo -9059.9731 1904.7706
## FullBath1 -33494.0908 19665.9915
## FullBath2 -10705.1349 42371.5483
## FullBath3 57896.5565 116422.8173
## KitchenQualFa -61118.1513 -28844.7835
## KitchenQualGd -40991.1464 -21246.3906
## KitchenQualTA -51884.0691 -30567.3118
## GarageFinishNG -43745.0243 -23842.4024
## GarageFinishRFn -11463.0199 -956.8859
## GarageFinishUnf -19473.4162 -7645.9229
## PoolQCFa -190005.4993 -47968.2254
## PoolQCGd -141195.0267 2377.4544
## PoolQCNP -172812.1534 -70530.8899
par(mfrow = c(2, 2)) # Split the plotting panel into a 2 x 2 grid
plot(housetrain.lm11) # Plot the model information

q. Model-12
formula.new <- as.formula(paste('SalePrice ~',paste(coef.df$ActualName, collapse="+"),"-BsmtQual-Neighborhood-GarageType-GarageCars-OverallQual"))
housetrain.lm12 <- lm(formula.new, data = housetrain.knn)
summary(housetrain.lm12)
##
## Call:
## lm(formula = formula.new, data = housetrain.knn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -167733 -23275 -2510 20337 244314
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13227.8 87668.1 0.151 0.880088
## MSZoningFV 51752.6 15576.6 3.322 0.000915 ***
## MSZoningRH 47841.5 17999.6 2.658 0.007953 **
## MSZoningRL 54816.9 14484.3 3.785 0.000161 ***
## MSZoningRM 30055.1 14716.2 2.042 0.041308 *
## LotConfigCulDSac 19962.5 5487.4 3.638 0.000285 ***
## LotConfigFR2 -12753.7 7019.6 -1.817 0.069453 .
## LotConfigFR3 -16022.8 22984.1 -0.697 0.485841
## LotConfigInside -6038.3 3114.7 -1.939 0.052749 .
## LandSlopeMod 8657.4 5887.6 1.470 0.141668
## LandSlopeSev 323.7 14662.2 0.022 0.982392
## Condition1Feedr 9082.4 8560.3 1.061 0.288877
## Condition1Norm 12517.6 6984.0 1.792 0.073299 .
## Condition1PosA 11190.4 17380.8 0.644 0.519787
## Condition1PosN 16861.4 12790.6 1.318 0.187634
## Condition1RRAe -5440.7 14927.7 -0.364 0.715565
## Condition1RRAn 6060.3 11626.8 0.521 0.602290
## Condition1RRNe -13618.7 31849.8 -0.428 0.669014
## Condition1RRNn 2767.8 21506.0 0.129 0.897615
## Condition2Feedr 52853.8 37718.1 1.401 0.161352
## Condition2Norm 46111.8 31717.0 1.454 0.146213
## Condition2PosA 105232.8 54953.5 1.915 0.055705 .
## Condition2PosN -73240.1 46154.4 -1.587 0.112775
## Condition2RRAe 75343.4 54197.1 1.390 0.164698
## Condition2RRAn 12169.8 54136.7 0.225 0.822170
## Condition2RRNn 40955.6 44918.3 0.912 0.362043
## OverallCond2 101661.0 52857.2 1.923 0.054645 .
## OverallCond3 65434.0 49596.1 1.319 0.187273
## OverallCond4 77646.3 49257.2 1.576 0.115174
## OverallCond5 94866.9 49055.6 1.934 0.053333 .
## OverallCond6 93093.6 49139.6 1.894 0.058370 .
## OverallCond7 97047.5 49091.9 1.977 0.048256 *
## OverallCond8 87778.9 49283.7 1.781 0.075116 .
## OverallCond9 112827.7 49820.2 2.265 0.023685 *
## RoofMatlCompShg 267787.1 55571.3 4.819 1.60e-06 ***
## RoofMatlMembran 316144.4 72203.0 4.379 1.28e-05 ***
## RoofMatlMetal 256825.1 71985.7 3.568 0.000372 ***
## RoofMatlRoll 237579.8 70892.0 3.351 0.000826 ***
## RoofMatlTarGrv 240284.7 55598.3 4.322 1.66e-05 ***
## RoofMatlWdShake 310538.7 59584.8 5.212 2.15e-07 ***
## RoofMatlWdShngl 388896.6 58177.9 6.685 3.34e-11 ***
## Exterior1stAsphShn 2742.1 45366.4 0.060 0.951810
## Exterior1stBrkComm 4101.6 34384.9 0.119 0.905066
## Exterior1stBrkFace 18293.6 12153.6 1.505 0.132501
## Exterior1stCBlock -39580.7 45485.9 -0.870 0.384355
## Exterior1stCemntBd 11803.3 11913.7 0.991 0.321986
## Exterior1stHdBoard -3835.8 10870.3 -0.353 0.724240
## Exterior1stImStucc 52349.5 44818.5 1.168 0.242994
## Exterior1stMetalSd -2644.4 10800.8 -0.245 0.806621
## Exterior1stPlywood -12698.7 11360.9 -1.118 0.263865
## Exterior1stStone -16106.4 34412.9 -0.468 0.639835
## Exterior1stStucco 7850.1 13618.3 0.576 0.564413
## Exterior1stVinylSd 5413.0 10703.6 0.506 0.613138
## Exterior1stWd Sdng 125.7 10751.4 0.012 0.990672
## Exterior1stWdShing -16504.4 13456.4 -1.227 0.220214
## BsmtExposureGd 41457.9 5090.1 8.145 8.37e-16 ***
## BsmtExposureMn 4997.6 5192.7 0.962 0.336003
## BsmtExposureNB -47416.1 8283.9 -5.724 1.27e-08 ***
## BsmtExposureNo -8753.1 3455.2 -2.533 0.011410 *
## FullBath1 -8225.6 16494.4 -0.499 0.618078
## FullBath2 29253.3 16463.3 1.777 0.075807 .
## FullBath3 123010.8 18156.3 6.775 1.83e-11 ***
## KitchenQualFa -122867.9 9280.1 -13.240 < 2e-16 ***
## KitchenQualGd -94696.8 5099.9 -18.568 < 2e-16 ***
## KitchenQualTA -121406.9 5455.7 -22.253 < 2e-16 ***
## GarageFinishNG -54391.3 6083.7 -8.940 < 2e-16 ***
## GarageFinishRFn -13410.0 3294.6 -4.070 4.96e-05 ***
## GarageFinishUnf -25708.0 3662.7 -7.019 3.49e-12 ***
## PoolQCFa -195940.0 44506.7 -4.402 1.15e-05 ***
## PoolQCGd -136667.0 45058.7 -3.033 0.002466 **
## PoolQCNP -192070.9 31817.2 -6.037 2.02e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 43380 on 1389 degrees of freedom
## Multiple R-squared: 0.7161, Adjusted R-squared: 0.7018
## F-statistic: 50.06 on 70 and 1389 DF, p-value: < 2.2e-16
vif(housetrain.lm12) %>% kable("html",caption = "VIF Model-12", row.names = T, digits = 2) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F, position = "left", font_size = 12)
VIF Model-12
|
|
GVIF
|
Df
|
GVIF^(1/(2*Df))
|
|
MSZoning
|
1.73
|
4
|
1.07
|
|
LotConfig
|
1.40
|
4
|
1.04
|
|
LandSlope
|
1.67
|
2
|
1.14
|
|
Condition1
|
2.51
|
8
|
1.06
|
|
Condition2
|
2.06
|
7
|
1.05
|
|
OverallCond
|
3.25
|
8
|
1.08
|
|
RoofMatl
|
3.66
|
7
|
1.10
|
|
Exterior1st
|
5.26
|
14
|
1.06
|
|
BsmtExposure
|
1.70
|
4
|
1.07
|
|
FullBath
|
2.41
|
3
|
1.16
|
|
KitchenQual
|
2.44
|
3
|
1.16
|
|
GarageFinish
|
2.18
|
3
|
1.14
|
|
PoolQC
|
1.85
|
3
|
1.11
|
confint(housetrain.lm12, level=0.95)
## 2.5 % 97.5 %
## (Intercept) -158748.2846 185203.87879
## MSZoningFV 21196.4877 82308.73421
## MSZoningRH 12532.1337 83150.89643
## MSZoningRL 26403.4937 83230.28533
## MSZoningRM 1186.8535 58923.41592
## LotConfigCulDSac 9198.0875 30726.93663
## LotConfigFR2 -26523.9757 1016.49129
## LotConfigFR3 -61110.1987 29064.53690
## LotConfigInside -12148.3260 71.78195
## LandSlopeMod -2892.1738 20207.04207
## LandSlopeSev -28438.8740 29086.17721
## Condition1Feedr -7710.0548 25874.83467
## Condition1Norm -1182.7359 26217.85053
## Condition1PosA -22905.0835 45285.92642
## Condition1PosN -8229.6580 41952.35918
## Condition1RRAe -34723.9427 23842.63752
## Condition1RRAn -16747.7808 28868.28595
## Condition1RRNe -76097.5835 48860.12464
## Condition1RRNn -39420.0513 44955.61886
## Condition2Feedr -21136.7991 126844.40221
## Condition2Norm -16106.5673 108330.10349
## Condition2PosA -2567.9775 213033.51759
## Condition2PosN -163780.0775 17299.82191
## Condition2RRAe -30973.5369 181660.43265
## Condition2RRAn -94028.7716 118368.34423
## Condition2RRNn -47159.4021 129070.59723
## OverallCond2 -2027.5745 205349.48839
## OverallCond3 -31857.2808 162725.26323
## OverallCond4 -18980.1367 174272.74493
## OverallCond5 -1364.2111 191098.02358
## OverallCond6 -3302.2979 189489.50495
## OverallCond7 745.2875 193349.76292
## OverallCond8 -8899.6222 184457.50175
## OverallCond9 15096.6662 210558.70722
## RoofMatlCompShg 158774.4152 376799.84942
## RoofMatlMembran 174505.6702 457783.21911
## RoofMatlMetal 115612.5901 398037.53714
## RoofMatlRoll 98512.9157 376646.68896
## RoofMatlTarGrv 131219.0788 349350.37672
## RoofMatlWdShake 193652.8836 427424.60034
## RoofMatlWdShngl 274770.5158 503022.63378
## Exterior1stAsphShn -86251.9747 91736.26579
## Exterior1stBrkComm -63350.2875 71553.53971
## Exterior1stBrkFace -5547.8622 42135.03631
## Exterior1stCBlock -128809.2892 49647.79198
## Exterior1stCemntBd -11567.3853 35174.03782
## Exterior1stHdBoard -25159.7280 17488.14292
## Exterior1stImStucc -35569.8620 140268.83444
## Exterior1stMetalSd -23831.9755 18543.17499
## Exterior1stPlywood -34984.9964 9587.64942
## Exterior1stStone -83613.2086 51400.50663
## Exterior1stStucco -18864.5981 34564.81731
## Exterior1stVinylSd -15584.0132 26409.92220
## Exterior1stWd Sdng -20965.0998 21216.54472
## Exterior1stWdShing -42901.5520 9892.65383
## BsmtExposureGd 31472.8178 51442.93775
## BsmtExposureMn -5188.7807 15183.93742
## BsmtExposureNB -63666.3211 -31165.82789
## BsmtExposureNo -15531.0888 -1975.01306
## FullBath1 -40582.3119 24131.12262
## FullBath2 -3042.3716 61548.87738
## FullBath3 87393.9574 158627.55101
## KitchenQualFa -141072.3989 -104663.34786
## KitchenQualGd -104701.0468 -84692.49543
## KitchenQualTA -132109.0888 -110704.61472
## GarageFinishNG -66325.5926 -42457.01968
## GarageFinishRFn -19873.0174 -6947.02878
## GarageFinishUnf -32893.0406 -18522.87710
## PoolQCFa -283247.6385 -108632.31435
## PoolQCGd -225057.5221 -48276.46031
## PoolQCNP -254485.9145 -129655.97471
par(mfrow = c(2, 2)) # Split the plotting panel into a 2 x 2 grid
plot(housetrain.lm12) # Plot the model information

r. Model-13
formula.new <- as.formula(paste('SalePrice ~',paste(coef.df$ActualName, collapse="+"),"-BsmtQual-Neighborhood-GarageType-GarageCars-OverallQual-Exterior1st-LandSlope"))
housetrain.lm13 <- lm(formula.new, data = housetrain.knn)
summary(housetrain.lm13)
##
## Call:
## lm(formula = formula.new, data = housetrain.knn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -168762 -23461 -2547 19697 243659
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 36184 85089 0.425 0.670722
## MSZoningFV 48638 15545 3.129 0.001791 **
## MSZoningRH 44947 18022 2.494 0.012744 *
## MSZoningRL 51360 14420 3.562 0.000381 ***
## MSZoningRM 29504 14631 2.016 0.043938 *
## LotConfigCulDSac 19396 5484 3.537 0.000418 ***
## LotConfigFR2 -13477 7040 -1.914 0.055795 .
## LotConfigFR3 -18877 23105 -0.817 0.414058
## LotConfigInside -6227 3118 -1.997 0.046045 *
## Condition1Feedr 7689 8499 0.905 0.365756
## Condition1Norm 12872 6892 1.868 0.062030 .
## Condition1PosA 11801 17291 0.682 0.495037
## Condition1PosN 19695 12770 1.542 0.123237
## Condition1RRAe -5675 14930 -0.380 0.703905
## Condition1RRAn 7372 11598 0.636 0.525091
## Condition1RRNe -19642 31884 -0.616 0.537955
## Condition1RRNn 5468 21556 0.254 0.799788
## Condition2Feedr 48846 37739 1.294 0.195772
## Condition2Norm 43673 31752 1.375 0.169214
## Condition2PosA 97334 55106 1.766 0.077561 .
## Condition2PosN -72399 46070 -1.571 0.116293
## Condition2RRAe 61357 54305 1.130 0.258729
## Condition2RRAn 19620 54313 0.361 0.717970
## Condition2RRNn 36010 45031 0.800 0.424033
## OverallCond2 92298 51377 1.796 0.072630 .
## OverallCond3 61007 48025 1.270 0.204183
## OverallCond4 74213 47779 1.553 0.120584
## OverallCond5 90183 47561 1.896 0.058144 .
## OverallCond6 86434 47659 1.814 0.069953 .
## OverallCond7 90979 47624 1.910 0.056290 .
## OverallCond8 82772 47856 1.730 0.083923 .
## OverallCond9 109361 48403 2.259 0.024012 *
## RoofMatlCompShg 254745 54901 4.640 3.81e-06 ***
## RoofMatlMembran 293000 70430 4.160 3.37e-05 ***
## RoofMatlMetal 232296 70283 3.305 0.000973 ***
## RoofMatlRoll 227039 69938 3.246 0.001197 **
## RoofMatlTarGrv 221468 54754 4.045 5.52e-05 ***
## RoofMatlWdShake 285712 58517 4.883 1.17e-06 ***
## RoofMatlWdShngl 369778 57483 6.433 1.71e-10 ***
## BsmtExposureGd 41533 4991 8.321 < 2e-16 ***
## BsmtExposureMn 4593 5192 0.885 0.376546
## BsmtExposureNB -46556 8136 -5.722 1.28e-08 ***
## BsmtExposureNo -9040 3437 -2.631 0.008617 **
## FullBath1 -2217 16333 -0.136 0.892066
## FullBath2 35193 16360 2.151 0.031636 *
## FullBath3 128977 18055 7.144 1.46e-12 ***
## KitchenQualFa -126518 9180 -13.781 < 2e-16 ***
## KitchenQualGd -96588 5003 -19.304 < 2e-16 ***
## KitchenQualTA -126031 5302 -23.768 < 2e-16 ***
## GarageFinishNG -55104 6038 -9.126 < 2e-16 ***
## GarageFinishRFn -13895 3290 -4.223 2.57e-05 ***
## GarageFinishUnf -27732 3589 -7.728 2.07e-14 ***
## PoolQCFa -206522 44463 -4.645 3.72e-06 ***
## PoolQCGd -145814 44903 -3.247 0.001192 **
## PoolQCNP -191517 31661 -6.049 1.87e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 43680 on 1405 degrees of freedom
## Multiple R-squared: 0.7089, Adjusted R-squared: 0.6977
## F-statistic: 63.36 on 54 and 1405 DF, p-value: < 2.2e-16
vif(housetrain.lm13) %>% kable("html",caption = "VIF Model-13", row.names = T, digits = 2) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F, position = "left", font_size = 12)
VIF Model-13
|
|
GVIF
|
Df
|
GVIF^(1/(2*Df))
|
|
MSZoning
|
1.48
|
4
|
1.05
|
|
LotConfig
|
1.34
|
4
|
1.04
|
|
Condition1
|
2.23
|
8
|
1.05
|
|
Condition2
|
1.92
|
7
|
1.05
|
|
OverallCond
|
2.39
|
8
|
1.06
|
|
RoofMatl
|
2.18
|
7
|
1.06
|
|
BsmtExposure
|
1.45
|
4
|
1.05
|
|
FullBath
|
2.25
|
3
|
1.14
|
|
KitchenQual
|
2.08
|
3
|
1.13
|
|
GarageFinish
|
1.97
|
3
|
1.12
|
|
PoolQC
|
1.76
|
3
|
1.10
|
confint(housetrain.lm13, level=0.95)
## 2.5 % 97.5 %
## (Intercept) -130731.4599 203099.0356
## MSZoningFV 18144.0952 79132.0746
## MSZoningRH 9594.3868 80298.8582
## MSZoningRL 23072.2740 79648.3386
## MSZoningRM 802.4700 58206.1007
## LotConfigCulDSac 8637.8272 30153.5592
## LotConfigFR2 -27287.5019 333.9765
## LotConfigFR3 -64201.9662 26447.1908
## LotConfigInside -12343.7211 -109.4336
## Condition1Feedr -8982.2884 24360.4431
## Condition1Norm -648.4707 26392.3719
## Condition1PosA -22118.4556 45721.1502
## Condition1PosN -5355.9026 44746.4216
## Condition1RRAe -34963.3127 23612.4228
## Condition1RRAn -15378.1633 30122.7949
## Condition1RRNe -82187.5624 42902.7906
## Condition1RRNn -36817.7854 47754.1268
## Condition2Feedr -25185.3240 122878.0300
## Condition2Norm -18613.2976 105958.8276
## Condition2PosA -10764.5813 205431.6556
## Condition2PosN -162772.3446 17974.6899
## Condition2RRAe -45170.3715 167883.6190
## Condition2RRAn -86922.4500 126162.8797
## Condition2RRNn -52324.9655 124345.4263
## OverallCond2 -8485.0867 193081.1406
## OverallCond3 -33201.3820 155214.3515
## OverallCond4 -19511.9866 167938.4608
## OverallCond5 -3114.9428 183479.8969
## OverallCond6 -7056.2301 179924.4697
## OverallCond7 -2442.7482 184400.4276
## OverallCond8 -11105.5657 176649.0905
## OverallCond9 14411.4693 204310.2869
## RoofMatlCompShg 147048.7962 362440.4290
## RoofMatlMembran 154841.7321 431158.3987
## RoofMatlMetal 94426.1728 370166.6832
## RoofMatlRoll 89843.8038 364233.6067
## RoofMatlTarGrv 114059.7680 328876.8957
## RoofMatlWdShake 170921.9397 400501.7097
## RoofMatlWdShngl 257016.2129 482540.5053
## BsmtExposureGd 31741.5599 51324.3040
## BsmtExposureMn -5592.2843 14777.5586
## BsmtExposureNB -62515.3822 -30596.5212
## BsmtExposureNo -15781.4608 -2298.8094
## FullBath1 -34256.1690 29822.9284
## FullBath2 3099.8736 67285.5982
## FullBath3 93558.8835 164394.3447
## KitchenQualFa -144527.4722 -108509.3719
## KitchenQualGd -106403.0038 -86773.1005
## KitchenQualTA -136432.4315 -115629.1063
## GarageFinishNG -66948.2914 -43260.0591
## GarageFinishRFn -20349.7272 -7440.1925
## GarageFinishUnf -34771.3841 -20692.1580
## PoolQCFa -293742.2175 -119302.1342
## PoolQCGd -233897.4959 -57730.6669
## PoolQCNP -253624.5235 -129409.8357
par(mfrow = c(2, 2)) # Split the plotting panel into a 2 x 2 grid
plot(housetrain.lm13) # Plot the model information

s. Model-14
formula.new <- as.formula(paste('SalePrice ~',paste(coef.df$ActualName, collapse="+"),"-BsmtQual-GarageType-OverallCond-Condition1-Condition2-GarageCars-Exterior1st-Neighborhood"))
housetrain.lm14 <- lm(formula.new, data = housetrain.knn)
summary(housetrain.lm14)
##
## Call:
## lm(formula = formula.new, data = housetrain.knn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -318023 -19232 -1438 15274 225302
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -140775 62134 -2.266 0.023622 *
## MSZoningFV 34349 12998 2.643 0.008316 **
## MSZoningRH 37177 15016 2.476 0.013411 *
## MSZoningRL 44948 12073 3.723 0.000205 ***
## MSZoningRM 22791 12229 1.864 0.062570 .
## LotConfigCulDSac 16576 4477 3.702 0.000222 ***
## LotConfigFR2 -10317 5794 -1.781 0.075190 .
## LotConfigFR3 -19835 18393 -1.078 0.281019
## LotConfigInside -4508 2540 -1.775 0.076149 .
## LandSlopeMod 16152 4859 3.324 0.000910 ***
## LandSlopeSev 27454 11955 2.296 0.021798 *
## OverallQual2 34139 34314 0.995 0.319952
## OverallQual3 51738 28971 1.786 0.074332 .
## OverallQual4 65595 28084 2.336 0.019649 *
## OverallQual5 76084 28182 2.700 0.007022 **
## OverallQual6 90657 28284 3.205 0.001379 **
## OverallQual7 115461 28398 4.066 5.05e-05 ***
## OverallQual8 165036 28544 5.782 9.07e-09 ***
## OverallQual9 224775 29218 7.693 2.68e-14 ***
## OverallQual10 273509 30388 9.001 < 2e-16 ***
## RoofMatlCompShg 336624 45948 7.326 3.96e-13 ***
## RoofMatlMembran 336231 59614 5.640 2.05e-08 ***
## RoofMatlMetal 299535 59643 5.022 5.76e-07 ***
## RoofMatlRoll 323504 58453 5.534 3.71e-08 ***
## RoofMatlTarGrv 314768 45905 6.857 1.05e-11 ***
## RoofMatlWdShake 343132 48706 7.045 2.89e-12 ***
## RoofMatlWdShngl 410137 47738 8.591 < 2e-16 ***
## BsmtExposureGd 26602 4239 6.275 4.64e-10 ***
## BsmtExposureMn 5477 4242 1.291 0.196882
## BsmtExposureNB -25800 6957 -3.708 0.000217 ***
## BsmtExposureNo -3088 2842 -1.086 0.277443
## FullBath1 1109 13410 0.083 0.934096
## FullBath2 22432 13446 1.668 0.095485 .
## FullBath3 90116 14961 6.024 2.17e-09 ***
## KitchenQualFa -51466 8226 -6.256 5.21e-10 ***
## KitchenQualGd -34897 5058 -6.900 7.83e-12 ***
## KitchenQualTA -47662 5432 -8.774 < 2e-16 ***
## GarageFinishNG -33688 5085 -6.625 4.93e-11 ***
## GarageFinishRFn -6378 2726 -2.340 0.019439 *
## GarageFinishUnf -14700 2984 -4.927 9.32e-07 ***
## PoolQCFa -125097 36711 -3.408 0.000674 ***
## PoolQCGd -66715 37198 -1.794 0.073105 .
## PoolQCNP -123742 26235 -4.717 2.63e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 36140 on 1417 degrees of freedom
## Multiple R-squared: 0.799, Adjusted R-squared: 0.7931
## F-statistic: 134.1 on 42 and 1417 DF, p-value: < 2.2e-16
vif(housetrain.lm14) %>% kable("html",caption = "VIF Model-14", row.names = T, digits = 2) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F, position = "left", font_size = 12)
VIF Model-14
|
|
GVIF
|
Df
|
GVIF^(1/(2*Df))
|
|
MSZoning
|
1.44
|
4
|
1.05
|
|
LotConfig
|
1.16
|
4
|
1.02
|
|
LandSlope
|
1.57
|
2
|
1.12
|
|
OverallQual
|
8.39
|
9
|
1.13
|
|
RoofMatl
|
2.59
|
7
|
1.07
|
|
BsmtExposure
|
1.69
|
4
|
1.07
|
|
FullBath
|
2.36
|
3
|
1.15
|
|
KitchenQual
|
3.64
|
3
|
1.24
|
|
GarageFinish
|
2.07
|
3
|
1.13
|
|
PoolQC
|
1.73
|
3
|
1.10
|
confint(housetrain.lm14, level=0.95)
## 2.5 % 97.5 %
## (Intercept) -262660.137 -18890.1821
## MSZoningFV 8852.009 59845.4947
## MSZoningRH 7720.238 66633.7705
## MSZoningRL 21264.512 68630.6154
## MSZoningRM -1197.621 46778.6355
## LotConfigCulDSac 7793.228 25357.7934
## LotConfigFR2 -21682.200 1048.8671
## LotConfigFR3 -55915.065 16244.2402
## LotConfigInside -9490.978 474.6531
## LandSlopeMod 6619.855 25684.1365
## LandSlopeSev 4002.252 50905.9882
## OverallQual2 -33172.742 101451.5595
## OverallQual3 -5091.760 108568.0131
## OverallQual4 10503.578 120686.0866
## OverallQual5 20800.549 131367.2884
## OverallQual6 35174.390 146139.0954
## OverallQual7 59755.175 171166.3763
## OverallQual8 109043.447 221027.4587
## OverallQual9 167459.440 282091.1065
## OverallQual10 213899.310 333117.9293
## RoofMatlCompShg 246490.824 426757.1622
## RoofMatlMembran 219289.290 453172.5800
## RoofMatlMetal 182536.611 416533.3252
## RoofMatlRoll 208840.262 438167.6963
## RoofMatlTarGrv 224719.686 404815.8533
## RoofMatlWdShake 247588.507 438675.9362
## RoofMatlWdShngl 316493.412 503781.3614
## BsmtExposureGd 18285.604 34917.5636
## BsmtExposureMn -2844.525 13798.4277
## BsmtExposureNB -39447.635 -12153.0112
## BsmtExposureNo -8663.394 2487.3180
## FullBath1 -25195.780 27413.9329
## FullBath2 -3944.871 48809.1541
## FullBath3 60768.733 119463.7388
## KitchenQualFa -67602.711 -35329.3586
## KitchenQualGd -44818.303 -24975.3718
## KitchenQualTA -58318.064 -37005.7739
## GarageFinishNG -43663.992 -23712.6222
## GarageFinishRFn -11724.803 -1030.4454
## GarageFinishUnf -20552.889 -8847.8470
## PoolQCFa -197111.587 -53082.7131
## PoolQCGd -139683.743 6254.2460
## PoolQCNP -175204.955 -72279.2824
par(mfrow = c(2, 2)) # Split the plotting panel into a 2 x 2 grid
plot(housetrain.lm14) # Plot the model information

t. Model-15
formula.new <- as.formula(paste('SalePrice ~',paste(coef.df$ActualName, collapse="+"),"-BsmtQual-GarageType-OverallCond-Condition1-Condition2-GarageCars-Exterior1st-OverallQual"))
housetrain.lm15 <- lm(formula.new, data = housetrain.knn)
summary(housetrain.lm15)
##
## Call:
## lm(formula = formula.new, data = housetrain.knn)
##
## Residuals:
## Min 1Q Median 3Q Max
## -195608 -19886 3 18145 221584
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 151067 58039 2.603 0.009342 **
## MSZoningFV 24606 17383 1.416 0.157138
## MSZoningRH 31598 17580 1.797 0.072497 .
## MSZoningRL 46302 14478 3.198 0.001414 **
## MSZoningRM 36354 13634 2.666 0.007756 **
## LotConfigCulDSac 15981 4748 3.366 0.000783 ***
## LotConfigFR2 -18052 6129 -2.945 0.003280 **
## LotConfigFR3 -41554 19286 -2.155 0.031357 *
## LotConfigInside -3848 2661 -1.446 0.148327
## LandSlopeMod 4853 5188 0.935 0.349800
## LandSlopeSev -3173 12676 -0.250 0.802350
## NeighborhoodBlueste -3403 28687 -0.119 0.905601
## NeighborhoodBrDale -2194 14622 -0.150 0.880734
## NeighborhoodBrkSide 14921 11485 1.299 0.194108
## NeighborhoodClearCr 21280 12361 1.721 0.085387 .
## NeighborhoodCollgCr 27524 9896 2.781 0.005489 **
## NeighborhoodCrawfor 59801 11049 5.413 7.30e-08 ***
## NeighborhoodEdwards -4920 10535 -0.467 0.640580
## NeighborhoodGilbert 16086 10222 1.574 0.115793
## NeighborhoodIDOTRR 4112 13217 0.311 0.755748
## NeighborhoodMeadowV -18837 14328 -1.315 0.188848
## NeighborhoodMitchel 11263 11140 1.011 0.312164
## NeighborhoodNAmes 12671 10125 1.251 0.210976
## NeighborhoodNoRidge 126490 11092 11.403 < 2e-16 ***
## NeighborhoodNPkVill -2239 15946 -0.140 0.888362
## NeighborhoodNridgHt 89404 10504 8.512 < 2e-16 ***
## NeighborhoodNWAmes 24881 10584 2.351 0.018868 *
## NeighborhoodOldTown 6227 11383 0.547 0.584434
## NeighborhoodSawyer 8284 10729 0.772 0.440168
## NeighborhoodSawyerW 28748 10693 2.688 0.007264 **
## NeighborhoodSomerst 59646 12552 4.752 2.22e-06 ***
## NeighborhoodStoneBr 83935 12093 6.941 5.94e-12 ***
## NeighborhoodSWISU 11052 12510 0.883 0.377149
## NeighborhoodTimber 41147 11235 3.663 0.000259 ***
## NeighborhoodVeenker 39018 15052 2.592 0.009634 **
## RoofMatlCompShg 201615 47185 4.273 2.06e-05 ***
## RoofMatlMembran 248665 61530 4.041 5.60e-05 ***
## RoofMatlMetal 187165 61530 3.042 0.002395 **
## RoofMatlRoll 181592 60341 3.009 0.002664 **
## RoofMatlTarGrv 186926 46971 3.980 7.25e-05 ***
## RoofMatlWdShake 239745 50200 4.776 1.98e-06 ***
## RoofMatlWdShngl 324035 49464 6.551 8.01e-11 ***
## BsmtExposureGd 36971 4474 8.263 3.26e-16 ***
## BsmtExposureMn 3624 4498 0.806 0.420646
## BsmtExposureNB -40847 7089 -5.762 1.02e-08 ***
## BsmtExposureNo -8221 3092 -2.659 0.007931 **
## FullBath1 2924 13485 0.217 0.828366
## FullBath2 32105 13544 2.370 0.017904 *
## FullBath3 116714 15085 7.737 1.93e-14 ***
## KitchenQualFa -101027 8163 -12.377 < 2e-16 ***
## KitchenQualGd -72792 4694 -15.508 < 2e-16 ***
## KitchenQualTA -90035 5102 -17.646 < 2e-16 ***
## GarageFinishNG -44714 5424 -8.244 3.79e-16 ***
## GarageFinishRFn -16695 2978 -5.606 2.48e-08 ***
## GarageFinishUnf -24937 3310 -7.533 8.87e-14 ***
## PoolQCFa -166629 38289 -4.352 1.45e-05 ***
## PoolQCGd -101526 38729 -2.621 0.008851 **
## PoolQCNP -163544 27149 -6.024 2.17e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 37580 on 1402 degrees of freedom
## Multiple R-squared: 0.785, Adjusted R-squared: 0.7763
## F-statistic: 89.82 on 57 and 1402 DF, p-value: < 2.2e-16
vif(housetrain.lm14) %>% kable("html",caption = "VIF Model-15", row.names = T, digits = 2) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F, position = "left", font_size = 12)
VIF Model-15
|
|
GVIF
|
Df
|
GVIF^(1/(2*Df))
|
|
MSZoning
|
1.44
|
4
|
1.05
|
|
LotConfig
|
1.16
|
4
|
1.02
|
|
LandSlope
|
1.57
|
2
|
1.12
|
|
OverallQual
|
8.39
|
9
|
1.13
|
|
RoofMatl
|
2.59
|
7
|
1.07
|
|
BsmtExposure
|
1.69
|
4
|
1.07
|
|
FullBath
|
2.36
|
3
|
1.15
|
|
KitchenQual
|
3.64
|
3
|
1.24
|
|
GarageFinish
|
2.07
|
3
|
1.13
|
|
PoolQC
|
1.73
|
3
|
1.10
|
confint(housetrain.lm15, level=0.95)
## 2.5 % 97.5 %
## (Intercept) 37215.033 264919.217
## MSZoningFV -9493.504 58705.467
## MSZoningRH -2888.759 66084.170
## MSZoningRL 17900.941 74703.854
## MSZoningRM 9607.867 63099.492
## LotConfigCulDSac 6668.385 25294.306
## LotConfigFR2 -30075.135 -6028.756
## LotConfigFR3 -79385.758 -3722.442
## LotConfigInside -9067.456 1371.309
## LandSlopeMod -5325.168 15030.407
## LandSlopeSev -28038.901 21692.045
## NeighborhoodBlueste -59676.586 52871.468
## NeighborhoodBrDale -30877.316 26488.821
## NeighborhoodBrkSide -7609.109 37450.937
## NeighborhoodClearCr -2969.104 45528.015
## NeighborhoodCollgCr 8110.326 46937.022
## NeighborhoodCrawfor 38127.317 81474.261
## NeighborhoodEdwards -25585.307 15746.039
## NeighborhoodGilbert -3966.253 36138.613
## NeighborhoodIDOTRR -21815.755 30040.461
## NeighborhoodMeadowV -46944.400 9270.869
## NeighborhoodMitchel -10589.412 33115.098
## NeighborhoodNAmes -7190.654 32532.239
## NeighborhoodNoRidge 104730.302 148249.841
## NeighborhoodNPkVill -33519.634 29041.880
## NeighborhoodNridgHt 68799.441 110008.583
## NeighborhoodNWAmes 4119.337 45643.368
## NeighborhoodOldTown -16102.112 28555.870
## NeighborhoodSawyer -12762.254 29330.328
## NeighborhoodSawyerW 7771.630 49724.348
## NeighborhoodSomerst 35023.838 84268.985
## NeighborhoodStoneBr 60213.123 107657.117
## NeighborhoodSWISU -13488.955 35593.440
## NeighborhoodTimber 19108.389 63185.168
## NeighborhoodVeenker 9491.703 68544.583
## RoofMatlCompShg 109053.640 294176.940
## RoofMatlMembran 127963.979 369365.301
## RoofMatlMetal 66463.979 307865.301
## RoofMatlRoll 63223.502 299959.491
## RoofMatlTarGrv 94786.169 279066.690
## RoofMatlWdShake 141270.816 338219.208
## RoofMatlWdShngl 227004.409 421065.760
## BsmtExposureGd 28193.531 45748.071
## BsmtExposureMn -5200.506 12447.525
## BsmtExposureNB -54753.328 -26939.998
## BsmtExposureNo -14286.005 -2155.540
## FullBath1 -23529.098 29377.267
## FullBath2 5535.923 58674.841
## FullBath3 87122.862 146305.640
## KitchenQualFa -117039.171 -85014.531
## KitchenQualGd -82000.388 -63584.315
## KitchenQualTA -100044.226 -80026.215
## GarageFinishNG -55354.348 -34074.456
## GarageFinishRFn -22536.289 -10853.469
## GarageFinishUnf -31431.097 -18442.898
## PoolQCFa -241738.359 -91520.156
## PoolQCGd -177499.946 -25552.197
## PoolQCNP -216802.082 -110286.324
par(mfrow = c(2, 2)) # Split the plotting panel into a 2 x 2 grid
plot(housetrain.lm15) # Plot the model information

#########
u. Apply models to test data
#Impute test data for missing values
housetest.noid <- housetest %>% dplyr::select(-Id)
#Apply factors
#Convert int columns to factor, as these are fixed values and choice is made from the list
housetest.noid$MSSubClass <- factor(housetest.noid$MSSubClass, levels = c(20,30,40,45,50,60,70,75,80,85,90,120,150,160,180,190))
housetest.noid$OverallQual <- factor(housetest.noid$OverallQual, levels = c(1,2,3,4,5,6,7,8,9,10))
housetest.noid$OverallCond <- factor(housetest.noid$OverallCond, levels = c(1,2,3,4,5,6,7,8,9,10))
housetest.noid$BsmtFullBath <- factor(housetest.noid$BsmtFullBath, levels = c(0,1,2,3))
housetest.noid$BsmtHalfBath <- factor(housetest.noid$BsmtHalfBath, levels = c(0,1,2))
housetest.noid$FullBath <- factor(housetest.noid$FullBath, levels = c(0,1,2,3))
housetest.noid$HalfBath <- factor(housetest.noid$HalfBath, levels = c(0,1,2))
housetest.noid$BedroomAbvGr <- factor(housetest.noid$BedroomAbvGr, levels = c(0,1,2,3,4,5,6,7,8))
housetest.noid$KitchenAbvGr <- factor(housetest.noid$KitchenAbvGr, levels = c(0,1,2,3))
housetest.noid$Fireplaces <- factor(housetest.noid$Fireplaces, levels = c(0,1,2,3))
housetest.noid$GarageCars <- factor(housetest.noid$GarageCars, levels = c(0,1,2,3))
housetest.noid$MSZoning <- as.character(housetest.noid$MSZoning)
housetest.noid$MSZoning <- ifelse(housetest.noid$MSZoning == 'C (all)', 'C', housetest.noid$MSZoning)
housetest.noid$MSZoning <- factor(housetest.noid$MSZoning)
housetest.noid$RoofMatl <- as.character(housetest.noid$RoofMatl)
housetest.noid$RoofMatl <- ifelse(housetest.noid$RoofMatl == 'Tar&Grv', 'TarGrv', housetest.noid$RoofMatl)
housetest.noid$RoofMatl <- factor(housetest.noid$RoofMatl)
#Based on the documentation, following columns have 'NA' values and they are correct
#Values are not really missing but they are legitimately not availabel for particular property
#Created 'NA' as one of the category
housetest.noid$Alley <- as.character(housetest.noid$Alley)
housetest.noid[c("Alley")][is.na(housetest.noid[c("Alley")])] <- 'NAly'
housetest.noid$Alley <- factor(housetest.noid$Alley)
housetest.noid$PoolQC <- as.character(housetest.noid$PoolQC)
housetest.noid$PoolQC <- ifelse(housetest.noid$PoolArea == 0, 'NP', housetest.noid$PoolQC) #NP - No Pool
housetest.noid$PoolQC <- factor(housetest.noid$PoolQC)
housetest.noid$GarageType <- as.character(housetest.noid$GarageType)
housetest.noid$GarageType <- ifelse(housetest.noid$GarageCars == 0, 'NG', housetest.noid$GarageType) #NG - No Garage
housetest.noid$GarageType <- factor(housetest.noid$GarageType)
housetest.noid$GarageQual <- as.character(housetest.noid$GarageQual)
housetest.noid$GarageQual <- ifelse(housetest.noid$GarageCars == 0, 'NG', housetest.noid$GarageQual)
housetest.noid$GarageQual <- factor(housetest.noid$GarageQual)
housetest.noid$GarageCond <- as.character(housetest.noid$GarageCond)
housetest.noid$GarageCond <- ifelse(housetest.noid$GarageCars == 0, 'NG', housetest.noid$GarageCond)
housetest.noid$GarageCond <- factor(housetest.noid$GarageCond)
housetest.noid$GarageYrBlt <- housetest.noid$YrSold - housetest.noid$GarageYrBlt
housetest.noid[c("GarageYrBlt")][is.na(housetest.noid[c("GarageYrBlt")])] <- 0
housetest.noid$GarageYrBlt <- as.integer(housetest.noid$GarageYrBlt)
housetest.noid$GarageFinish <- as.character(housetest.noid$GarageFinish)
housetest.noid$GarageFinish <- ifelse(housetest.noid$GarageCars == 0, 'NG', housetest.noid$GarageFinish)
housetest.noid$GarageFinish <- factor(housetest.noid$GarageFinish)
#Time duration year build to sold, this helps us to understand if old houses values are lower than newer ones
housetest.noid$YearBuilt <- housetest.noid$YrSold - housetest.noid$YearBuilt
housetest.noid$YearRemodAdd <- housetest.noid$YrSold - housetest.noid$YearRemodAdd
housetest.noid$FireplaceQu <- as.character(housetest.noid$FireplaceQu)
housetest.noid$FireplaceQu <- ifelse(housetest.noid$Fireplaces == 0, 'NF', housetest.noid$FireplaceQu) #NF - No Fireplace
housetest.noid$FireplaceQu <- factor(housetest.noid$FireplaceQu)
housetest.noid$Fence <- as.character(housetest.noid$Fence)
housetest.noid[c("Fence")][is.na(housetest.noid[c("Fence")])] <- 'NF' #NF - No Fence
housetest.noid$Fence <- factor(housetest.noid$Fence)
housetest.noid$BsmtQual <- as.character(housetest.noid$BsmtQual)
housetest.noid$BsmtQual <- ifelse(housetest.noid$TotalBsmtSF == 0, 'NB', housetest.noid$BsmtQual) #NB - No Basement
housetest.noid$BsmtQual <- factor(housetest.noid$BsmtQual)
housetest.noid$BsmtCond <- as.character(housetest.noid$BsmtCond)
housetest.noid$BsmtCond <- ifelse(housetest.noid$TotalBsmtSF == 0, 'NB', housetest.noid$BsmtCond)
housetest.noid$BsmtCond <- factor(housetest.noid$BsmtCond)
housetest.noid$BsmtExposure <- as.character(housetest.noid$BsmtExposure)
housetest.noid$BsmtExposure <- ifelse(housetest.noid$TotalBsmtSF == 0, 'NB', housetest.noid$BsmtExposure)
housetest.noid$BsmtExposure <- factor(housetest.noid$BsmtExposure)
housetest.noid$BsmtFinType1 <- as.character(housetest.noid$BsmtFinType1)
housetest.noid$BsmtFinType1 <- ifelse(housetest.noid$TotalBsmtSF == 0, 'NB', housetest.noid$BsmtFinType1)
housetest.noid$BsmtFinType1 <- factor(housetest.noid$BsmtFinType1)
housetest.noid$BsmtFinType2 <- as.character(housetest.noid$BsmtFinType2)
housetest.noid$BsmtFinType2 <- ifelse(housetest.noid$TotalBsmtSF == 0, 'NB', housetest.noid$BsmtFinType2)
housetest.noid$BsmtFinType2 <- factor(housetest.noid$BsmtFinType2)
housetest.noid$MiscFeature <- as.character(housetest.noid$MiscFeature)
housetest.noid[c("MiscFeature")][is.na(housetest.noid[c("MiscFeature")])] <- 'NM' #NM - No additional features
housetest.noid$MiscFeature <- factor(housetest.noid$MiscFeature)
#Year and Month sold are also fixed
housetest.noid$MoSold <- factor(housetest.noid$MoSold)
housetest.noid$YrSold <- factor(housetest.noid$YrSold)
#Impute dataset for missing values
housetest.knn <- knnImputation(housetest.noid, 11, meth='weighAvg')
housetest.knn <- cbind(Id = housetest$Id, housetest.knn)
v. Prediction-8
housetest.knn.pred8 <- cbind(housetest.knn, predict(housetrain.lm8, housetest.knn, interval="predict", level=.95) %>% data.frame())
housetest.knn.pred8 %>% arrange(fit) %>% head(10) %>% dplyr::select(fit, lwr, upr) %>%
kable("html",caption = "Top 10 Rows Prediction Using Model-8", row.names = T, digits = 2) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F, position = "left", font_size = 12)
Top 10 Rows Prediction Using Model-8
|
|
fit
|
lwr
|
upr
|
|
1
|
11537.87
|
-54293.97
|
77369.71
|
|
2
|
19599.66
|
-47171.58
|
86370.90
|
|
3
|
28033.83
|
-85935.58
|
142003.24
|
|
4
|
28797.12
|
-38749.81
|
96344.06
|
|
5
|
30153.52
|
-77718.34
|
138025.37
|
|
6
|
32288.34
|
-34975.26
|
99551.94
|
|
7
|
35331.33
|
-29525.27
|
100187.93
|
|
8
|
38340.33
|
-35238.25
|
111918.91
|
|
9
|
42118.46
|
-32085.87
|
116322.78
|
|
10
|
43876.84
|
-73913.55
|
161667.22
|
w. Prediction-9
housetest.knn.pred9 <- cbind(housetest.knn, predict(housetrain.lm9, housetest.knn, interval="predict", level=.95) %>% data.frame())
housetest.knn.pred9 %>% arrange(fit) %>% head(10) %>% dplyr::select(fit, lwr, upr) %>%
kable("html",caption = "Top 10 Rows Prediction Using Model-9", row.names = T, digits = 2) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F, position = "left", font_size = 12)
Top 10 Rows Prediction Using Model-9
|
|
fit
|
lwr
|
upr
|
|
1
|
-9050.59
|
-129702.34
|
111601.16
|
|
2
|
4541.86
|
-66614.21
|
75697.92
|
|
3
|
16437.65
|
-55704.25
|
88579.55
|
|
4
|
19254.72
|
-94764.09
|
133273.54
|
|
5
|
19519.60
|
-53449.37
|
92488.56
|
|
6
|
22202.95
|
-49356.22
|
93762.11
|
|
7
|
24145.89
|
-95026.96
|
143318.74
|
|
8
|
27868.54
|
-98062.90
|
153799.98
|
|
9
|
28388.78
|
-41718.84
|
98496.41
|
|
10
|
30323.87
|
-90381.49
|
151029.24
|
x. Prediction-10
housetest.knn.pred10 <- cbind(housetest.knn, predict(housetrain.lm10, housetest.knn, interval="predict", level=.95) %>% data.frame())
housetest.knn.pred10 %>% arrange(fit) %>% head(10) %>% dplyr::select(fit, lwr, upr) %>%
kable("html",caption = "Top 10 Rows Prediction Using Model-10", row.names = T, digits = 2) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F, position = "left", font_size = 12)
Top 10 Rows Prediction Using Model-10
|
|
fit
|
lwr
|
upr
|
|
1
|
-10807.48
|
-132920.28
|
111305.31
|
|
2
|
1923.39
|
-113212.93
|
117059.70
|
|
3
|
4076.57
|
-67828.73
|
75981.87
|
|
4
|
15594.48
|
-57272.02
|
88460.97
|
|
5
|
17371.82
|
-56382.23
|
91125.87
|
|
6
|
21698.34
|
-98938.62
|
142335.29
|
|
7
|
24441.14
|
-47841.68
|
96723.96
|
|
8
|
24983.82
|
-102478.96
|
152446.60
|
|
9
|
28217.72
|
-42700.22
|
99135.67
|
|
10
|
30684.52
|
-91468.75
|
152837.80
|
y. Prediction-11
housetest.knn.pred11 <- cbind(housetest.knn, predict(housetrain.lm11, housetest.knn, interval="predict", level=.95) %>% data.frame())
housetest.knn.pred11 %>% arrange(fit) %>% head(10) %>% dplyr::select(fit, lwr, upr) %>%
kable("html",caption = "Top 10 Rows Prediction Using Model-11", row.names = T, digits = 2) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F, position = "left", font_size = 12)
Top 10 Rows Prediction Using Model-11
|
|
fit
|
lwr
|
upr
|
|
1
|
-18670.39
|
-146535.77
|
109194.98
|
|
2
|
2159.30
|
-118408.49
|
122727.09
|
|
3
|
6280.31
|
-127117.78
|
139678.40
|
|
4
|
12503.69
|
-62766.78
|
87774.17
|
|
5
|
15459.48
|
-110850.09
|
141769.06
|
|
6
|
19271.31
|
-57019.44
|
95562.06
|
|
7
|
24751.21
|
-50894.45
|
100396.86
|
|
8
|
27876.26
|
-49325.73
|
105078.25
|
|
9
|
33421.22
|
-94438.62
|
161281.07
|
|
10
|
33669.97
|
-40584.53
|
107924.48
|
z. Prediction-12
housetest.knn.pred12 <- cbind(housetest.knn, predict(housetrain.lm12, housetest.knn, interval="predict", level=.95) %>% data.frame())
housetest.knn.pred12 %>% arrange(fit) %>% head(10) %>% dplyr::select(fit, lwr, upr) %>%
kable("html",caption = "Top 10 Rows Prediction Using Model-12", row.names = T, digits = 2) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F, position = "left", font_size = 12)
Top 10 Rows Prediction Using Model-12
|
|
fit
|
lwr
|
upr
|
|
1
|
-51116.04
|
-183144.01
|
80911.93
|
|
2
|
1652.67
|
-90335.15
|
93640.50
|
|
3
|
3866.85
|
-119577.88
|
127311.59
|
|
4
|
7496.72
|
-123238.68
|
138232.11
|
|
5
|
13647.82
|
-114854.48
|
142150.12
|
|
6
|
13647.82
|
-114854.48
|
142150.12
|
|
7
|
15046.15
|
-117373.15
|
147465.45
|
|
8
|
18895.20
|
-73514.72
|
111305.11
|
|
9
|
20230.50
|
-72954.04
|
113415.04
|
|
10
|
22042.34
|
-71135.23
|
115219.90
|
aa. Prediction-13
housetest.knn.pred13 <- cbind(housetest.knn, predict(housetrain.lm13, housetest.knn, interval="predict", level=.95) %>% data.frame())
housetest.knn.pred13 %>% arrange(fit) %>% head(10) %>% dplyr::select(fit, lwr, upr) %>%
kable("html",caption = "Top 10 Rows Prediction Using Model-13", row.names = T, digits = 2) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F, position = "left", font_size = 12)
Top 10 Rows Prediction Using Model-13
|
|
fit
|
lwr
|
upr
|
|
1
|
-42662.34
|
-173146.67
|
87821.99
|
|
2
|
4821.45
|
-87382.85
|
97025.74
|
|
3
|
14214.35
|
-112758.44
|
141187.14
|
|
4
|
18344.14
|
-73598.46
|
110286.75
|
|
5
|
22119.42
|
-108746.23
|
152985.07
|
|
6
|
24083.07
|
-68524.68
|
116690.82
|
|
7
|
24251.30
|
-102416.63
|
150919.23
|
|
8
|
24251.30
|
-102416.63
|
150919.23
|
|
9
|
24570.72
|
-67513.12
|
116654.56
|
|
10
|
42774.62
|
-64717.78
|
150267.02
|
ab. Prediction-14
housetest.knn.pred14 <- cbind(housetest.knn, predict(housetrain.lm14, housetest.knn, interval="predict", level=.95) %>% data.frame())
housetest.knn.pred14 %>% arrange(fit) %>% head(10) %>% dplyr::select(fit, lwr, upr) %>%
kable("html",caption = "Top 10 Rows Prediction Using Model-14", row.names = T, digits = 2) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F, position = "left", font_size = 12)
Top 10 Rows Prediction Using Model-14
|
|
fit
|
lwr
|
upr
|
|
1
|
13295.21
|
-63472.64
|
90063.05
|
|
2
|
14893.77
|
-75459.91
|
105247.46
|
|
3
|
30069.90
|
-60865.03
|
121004.84
|
|
4
|
36007.49
|
-40097.59
|
112112.57
|
|
5
|
36711.54
|
-39901.62
|
113324.69
|
|
6
|
40515.65
|
-35617.42
|
116648.73
|
|
7
|
41348.10
|
-42557.20
|
125253.40
|
|
8
|
41983.10
|
-42200.06
|
126166.25
|
|
9
|
41983.10
|
-42200.06
|
126166.25
|
|
10
|
49033.18
|
-35486.96
|
133553.32
|
ab. Prediction-15
housetest.knn.pred15 <- cbind(housetest.knn, predict(housetrain.lm15, housetest.knn, interval="predict", level=.95) %>% data.frame())
housetest.knn.pred15 %>% arrange(fit) %>% head(10) %>% dplyr::select(fit, lwr, upr) %>%
kable("html",caption = "Top 10 Rows Prediction Using Model-15", row.names = T, digits = 2) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F, position = "left", font_size = 12)
Top 10 Rows Prediction Using Model-15
|
|
fit
|
lwr
|
upr
|
|
1
|
16730.29
|
-62142.65
|
95603.23
|
|
2
|
42212.62
|
-36216.17
|
120641.42
|
|
3
|
49356.18
|
-28528.26
|
127240.62
|
|
4
|
51470.71
|
-27662.11
|
130603.53
|
|
5
|
53204.26
|
-24775.29
|
131183.80
|
|
6
|
54000.70
|
-21314.92
|
129316.32
|
|
7
|
56296.07
|
-24745.20
|
137337.34
|
|
8
|
60060.81
|
-16489.22
|
136610.85
|
|
9
|
61113.68
|
-17867.17
|
140094.52
|
|
10
|
61966.39
|
-18281.28
|
142214.06
|