Our approach includes finding the t-scores for each individual variable. We found correlations for each of our numeric variables. If we had more time, ANOVA or ANCOVA should be used to find interactions between categorical variables. We created a score that balanced t-scores with correlations on equal footing. Because it penalized for numeric variables and not categorical variable, they may have been overrepresented in our sample. We applied a Box-Cox transformation to response variable because house cost data is naturally logarithmic. One can have a house that costs many times the average, but one cannot have a house worth a large negative value. With our list of penalized variables, we built a model by adding the variables highest on the list and removing those with low t-scores within the model. We created a model with increasing R2 until our 18 models were made. We chose our 17th model to submit.
## # A tibble: 81 x 2
## t.values.continuous tester
## <dbl> <dbl>
## 1 18 54.1
## 2 47 37.5
## 3 62 48.0
## 4 63 46.2
## 5 39 43.9
## 6 44 41.6
## 7 50 38.1
## 8 20 47.3
## 9 21 44.2
## 10 55 32.2
## 11 60 43.3
## 12 57 39.0
## 13 27 37.1
## 14 30 55.8
## 15 35 42.7
## 16 42 52.5
## 17 67 40.0
## 18 4 35.6
## 19 80 51.3
## 20 68 35.8
## 21 45 27.4
## 22 51 34.2
## 23 34 50.0
## 24 66 49.8
## 25 17 48.6
## 26 5 39.5
## 27 43 48.0
## 28 79 47.6
## 29 48 43.9
## 30 3 47.3
## 31 38 33.7
## 32 24 46.4
## 33 52 31.4
## 34 25 45.8
## 35 32 45.8
## 36 13 44.8
## 37 9 44.2
## 38 26 44.0
## 39 14 43.4
## 40 7 43.4
## # ... with 41 more rows
first.model<-lm(log(train_house_prices$SalePrice)~train_house_prices$OverallQual+train_house_prices$GrLivArea)
summary(first.model)
numeric.variables<-train_house_prices[,c(2,4,5,18,19,20,21,27,35,37,38,39,44,45,46,47,48,49,50,51,52,53,55,57,60,62,63,67,68,69,70,71,72,76,77,78,81)]
correlation.matrix<-cor(numeric.variables, use="complete.obs")
#correlation.matrix
bc.1<-boxcox(train_house_prices$SalePrice~train_house_prices$OverallQual)
lambda.1 <- bc.1$x[which.max(bc.1$y)]
ggplot()+geom_point(aes(x=train_house_prices$SalePrice,y=train_house_prices$GrLivArea))
lambda.1
for (i in 1:1460){train_house_prices$SalePrice[i]<-((train_house_prices$SalePrice[i]^.090606-1)/.090606)}
#------------------------------------------------------
t.values.continuous %>%
mutate(tester=(t.values.continuous1+ 24*(1.59-t.values.continuous2-t.values.continuous3)))->t.values.continuous
##
## Call:
## lm(formula = log(train_house_prices$SalePrice) ~ train_house_prices$OverallQual +
## train_house_prices$GrLivArea)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.78553 -0.09638 0.02084 0.12482 0.76698
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.055e+01 2.420e-02 435.95 <2e-16 ***
## train_house_prices$OverallQual 1.789e-01 4.792e-03 37.33 <2e-16 ***
## train_house_prices$GrLivArea 2.536e-04 1.261e-05 20.11 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2038 on 1457 degrees of freedom
## Multiple R-squared: 0.74, Adjusted R-squared: 0.7396
## F-statistic: 2073 on 2 and 1457 DF, p-value: < 2.2e-16


## [1] 0.06060606


##
## Call:
## lm(formula = train_house_prices$SalePrice ~ train_house_prices$OverallQual)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.89568 -0.39473 0.02582 0.39219 2.75968
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 17.48976 0.08099 215.95 <2e-16 ***
## train_house_prices$OverallQual 0.70556 0.01295 54.48 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6841 on 1458 degrees of freedom
## Multiple R-squared: 0.6706, Adjusted R-squared: 0.6704
## F-statistic: 2968 on 1 and 1458 DF, p-value: < 2.2e-16
##
## Call:
## lm(formula = train_house_prices$SalePrice ~ train_house_prices[,
## 18] + train_house_prices[, 47] + train_house_prices[, 62] +
## train_house_prices[, 63] + train_house_prices[, 39] + train_house_prices[,
## 44] + train_house_prices[, 50] + train_house_prices[, 20] +
## train_house_prices[, 21] + train_house_prices[, 55])
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.6144 -0.2260 0.0187 0.2735 1.6102
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.664e+00 1.708e+00 -3.316 0.000937 ***
## train_house_prices[, 18] 2.863e-01 1.575e-02 18.181 < 2e-16 ***
## train_house_prices[, 47] 6.616e-04 5.599e-05 11.815 < 2e-16 ***
## train_house_prices[, 62] 2.094e-01 4.027e-02 5.199 2.29e-07 ***
## train_house_prices[, 63] 1.669e-04 1.364e-04 1.224 0.221267
## train_house_prices[, 39] 2.711e-04 5.683e-05 4.771 2.02e-06 ***
## train_house_prices[, 44] 1.756e-04 6.522e-05 2.693 0.007156 **
## train_house_prices[, 50] -6.177e-02 3.549e-02 -1.741 0.081946 .
## train_house_prices[, 20] 5.537e-03 6.660e-04 8.313 < 2e-16 ***
## train_house_prices[, 21] 6.463e-03 8.418e-04 7.678 2.96e-14 ***
## train_house_prices[, 55] 1.857e-02 1.480e-02 1.255 0.209835
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5017 on 1449 degrees of freedom
## Multiple R-squared: 0.8239, Adjusted R-squared: 0.8227
## F-statistic: 678.1 on 10 and 1449 DF, p-value: < 2.2e-16
##
## Call:
## lm(formula = train_house_prices$SalePrice ~ train_house_prices[,
## 18] + train_house_prices[, 47] + train_house_prices[, 62] +
## train_house_prices[, 60] + train_house_prices[, 39] + train_house_prices[,
## 44] + train_house_prices[, 50] + train_house_prices[, 20] +
## train_house_prices[, 21] + train_house_prices[, 55])
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.4622 -0.2267 0.0155 0.2823 1.6218
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.230e+00 1.861e+00 -2.273 0.02318 *
## train_house_prices[, 18] 2.939e-01 1.626e-02 18.072 < 2e-16 ***
## train_house_prices[, 47] 6.651e-04 5.635e-05 11.804 < 2e-16 ***
## train_house_prices[, 62] 2.317e-01 3.030e-02 7.647 3.86e-14 ***
## train_house_prices[, 60] -2.716e-03 1.055e-03 -2.574 0.01015 *
## train_house_prices[, 39] 2.607e-04 5.864e-05 4.445 9.50e-06 ***
## train_house_prices[, 44] 1.793e-04 6.660e-05 2.692 0.00719 **
## train_house_prices[, 50] -7.175e-02 3.638e-02 -1.972 0.04878 *
## train_house_prices[, 20] 6.862e-03 8.807e-04 7.791 1.30e-14 ***
## train_house_prices[, 21] 7.135e-03 9.121e-04 7.823 1.02e-14 ***
## train_house_prices[, 55] 2.040e-02 1.518e-02 1.344 0.17919
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4947 on 1368 degrees of freedom
## (81 observations deleted due to missingness)
## Multiple R-squared: 0.8131, Adjusted R-squared: 0.8118
## F-statistic: 595.2 on 10 and 1368 DF, p-value: < 2.2e-16
##
## Call:
## lm(formula = train_house_prices$SalePrice ~ train_house_prices[,
## 18] + train_house_prices[, 47] + train_house_prices[, 62] +
## train_house_prices[, 60] + train_house_prices[, 39] + train_house_prices[,
## 44] + train_house_prices[, 57] + train_house_prices[, 20] +
## train_house_prices[, 21] + train_house_prices[, 55])
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.2942 -0.2435 0.0104 0.2756 1.5452
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.383e+00 1.734e+00 -3.104 0.00195 **
## train_house_prices[, 18] 2.688e-01 1.607e-02 16.728 < 2e-16 ***
## train_house_prices[, 47] 5.430e-04 5.399e-05 10.057 < 2e-16 ***
## train_house_prices[, 62] 2.244e-01 2.949e-02 7.608 5.14e-14 ***
## train_house_prices[, 60] -1.478e-03 1.038e-03 -1.424 0.15474
## train_house_prices[, 39] 2.920e-04 5.685e-05 5.136 3.22e-07 ***
## train_house_prices[, 44] 9.738e-05 6.541e-05 1.489 0.13677
## train_house_prices[, 57] 2.088e-01 2.386e-02 8.754 < 2e-16 ***
## train_house_prices[, 20] 5.919e-03 8.435e-04 7.017 3.55e-12 ***
## train_house_prices[, 21] 7.489e-03 8.880e-04 8.433 < 2e-16 ***
## train_house_prices[, 55] 2.522e-02 1.471e-02 1.714 0.08669 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4821 on 1368 degrees of freedom
## (81 observations deleted due to missingness)
## Multiple R-squared: 0.8225, Adjusted R-squared: 0.8212
## F-statistic: 634.1 on 10 and 1368 DF, p-value: < 2.2e-16
##
## Call:
## lm(formula = train_house_prices$SalePrice ~ train_house_prices[,
## 18] + train_house_prices[, 47] + train_house_prices[, 62] +
## train_house_prices[, 60] + train_house_prices[, 39] + train_house_prices[,
## 44] + train_house_prices[, 57] + train_house_prices[, 20] +
## train_house_prices[, 21] + train_house_prices[, 27])
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.3961 -0.2389 0.0091 0.2731 1.5879
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.240e+00 1.734e+00 -3.022 0.00256 **
## train_house_prices[, 18] 2.642e-01 1.623e-02 16.272 < 2e-16 ***
## train_house_prices[, 47] 6.074e-04 3.739e-05 16.245 < 2e-16 ***
## train_house_prices[, 62] 2.243e-01 2.957e-02 7.585 6.15e-14 ***
## train_house_prices[, 60] -1.395e-03 1.041e-03 -1.340 0.18051
## train_house_prices[, 39] 2.758e-04 5.675e-05 4.859 1.31e-06 ***
## train_house_prices[, 44] 1.033e-04 6.577e-05 1.571 0.11650
## train_house_prices[, 57] 2.074e-01 2.396e-02 8.656 < 2e-16 ***
## train_house_prices[, 20] 5.783e-03 8.531e-04 6.779 1.80e-11 ***
## train_house_prices[, 21] 7.520e-03 8.943e-04 8.408 < 2e-16 ***
## train_house_prices[, 27] 8.479e-05 8.159e-05 1.039 0.29890
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.483 on 1360 degrees of freedom
## (89 observations deleted due to missingness)
## Multiple R-squared: 0.8218, Adjusted R-squared: 0.8205
## F-statistic: 627.1 on 10 and 1360 DF, p-value: < 2.2e-16
##
## Call:
## lm(formula = train_house_prices$SalePrice ~ train_house_prices[,
## 18] + train_house_prices[, 47] + train_house_prices[, 62] +
## train_house_prices[, 63] + train_house_prices[, 39] + train_house_prices[,
## 44] + train_house_prices[, 57] + train_house_prices[, 20] +
## train_house_prices[, 21] + train_house_prices[, 27])
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.6390 -0.2354 0.0183 0.2726 1.5710
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.367e+00 1.580e+00 -4.031 5.85e-05 ***
## train_house_prices[, 18] 2.578e-01 1.564e-02 16.488 < 2e-16 ***
## train_house_prices[, 47] 6.100e-04 3.617e-05 16.865 < 2e-16 ***
## train_house_prices[, 62] 1.755e-01 3.907e-02 4.493 7.60e-06 ***
## train_house_prices[, 63] 2.757e-04 1.327e-04 2.078 0.0379 *
## train_house_prices[, 39] 2.845e-04 5.504e-05 5.169 2.69e-07 ***
## train_house_prices[, 44] 8.962e-05 6.432e-05 1.393 0.1637
## train_house_prices[, 57] 2.182e-01 2.348e-02 9.296 < 2e-16 ***
## train_house_prices[, 20] 5.222e-03 6.241e-04 8.367 < 2e-16 ***
## train_house_prices[, 21] 7.247e-03 8.260e-04 8.773 < 2e-16 ***
## train_house_prices[, 27] 5.334e-05 8.194e-05 0.651 0.5151
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4885 on 1441 degrees of freedom
## (8 observations deleted due to missingness)
## Multiple R-squared: 0.833, Adjusted R-squared: 0.8318
## F-statistic: 718.5 on 10 and 1441 DF, p-value: < 2.2e-16
##
## Call:
## lm(formula = train_house_prices$SalePrice ~ train_house_prices[,
## 18] + train_house_prices[, 47] + train_house_prices[, 62] +
## train_house_prices[, 63] + train_house_prices[, 39] + train_house_prices[,
## 44] + train_house_prices[, 57] + train_house_prices[, 20] +
## train_house_prices[, 21] + train_house_prices[, 30])
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.4313 -0.2366 0.0147 0.2642 1.5723
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.565e+00 1.874e+00 -2.436 0.01497 *
## train_house_prices[, 18] 2.661e-01 1.600e-02 16.635 < 2e-16 ***
## train_house_prices[, 47] 6.091e-04 3.578e-05 17.022 < 2e-16 ***
## train_house_prices[, 62] 1.825e-01 3.908e-02 4.671 3.28e-06 ***
## train_house_prices[, 63] 2.786e-04 1.320e-04 2.110 0.03499 *
## train_house_prices[, 39] 2.049e-04 6.407e-05 3.198 0.00141 **
## train_house_prices[, 44] 1.541e-04 7.156e-05 2.154 0.03144 *
## train_house_prices[, 57] 2.104e-01 2.351e-02 8.947 < 2e-16 ***
## train_house_prices[, 20] 4.341e-03 7.962e-04 5.451 5.87e-08 ***
## train_house_prices[, 21] 7.126e-03 8.442e-04 8.441 < 2e-16 ***
## train_house_prices[, 30]CBlock 1.504e-01 5.389e-02 2.792 0.00532 **
## train_house_prices[, 30]PConc 1.586e-01 6.505e-02 2.438 0.01489 *
## train_house_prices[, 30]Slab -1.645e-01 1.308e-01 -1.258 0.20875
## train_house_prices[, 30]Stone 1.383e-01 2.032e-01 0.681 0.49619
## train_house_prices[, 30]Wood -6.549e-02 2.883e-01 -0.227 0.82035
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4861 on 1445 degrees of freedom
## Multiple R-squared: 0.8351, Adjusted R-squared: 0.8335
## F-statistic: 522.8 on 14 and 1445 DF, p-value: < 2.2e-16
##
## Call:
## lm(formula = train_house_prices$SalePrice ~ train_house_prices[,
## 18] + train_house_prices[, 47] + train_house_prices[, 62] +
## train_house_prices[, 63] + train_house_prices[, 39] + train_house_prices[,
## 35] + train_house_prices[, 57] + train_house_prices[, 20] +
## train_house_prices[, 21] + train_house_prices[, 30])
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.0337 -0.2216 0.0310 0.2542 1.5167
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.833e+00 1.852e+00 -2.070 0.0386 *
## train_house_prices[, 18] 2.699e-01 1.578e-02 17.100 < 2e-16 ***
## train_house_prices[, 47] 6.387e-04 3.387e-05 18.854 < 2e-16 ***
## train_house_prices[, 62] 2.089e-01 3.874e-02 5.393 8.10e-08 ***
## train_house_prices[, 63] 1.875e-04 1.310e-04 1.431 0.1526
## train_house_prices[, 39] 2.005e-04 4.222e-05 4.749 2.24e-06 ***
## train_house_prices[, 35] 2.223e-04 3.346e-05 6.645 4.29e-11 ***
## train_house_prices[, 57] 1.965e-01 2.321e-02 8.465 < 2e-16 ***
## train_house_prices[, 20] 3.848e-03 7.830e-04 4.914 9.93e-07 ***
## train_house_prices[, 21] 7.263e-03 8.296e-04 8.755 < 2e-16 ***
## train_house_prices[, 30]CBlock 1.296e-01 5.321e-02 2.436 0.0150 *
## train_house_prices[, 30]PConc 1.501e-01 6.419e-02 2.339 0.0195 *
## train_house_prices[, 30]Slab -8.555e-02 1.170e-01 -0.731 0.4649
## train_house_prices[, 30]Stone 1.666e-01 2.005e-01 0.831 0.4063
## train_house_prices[, 30]Wood -1.837e-01 2.851e-01 -0.644 0.5194
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4797 on 1445 degrees of freedom
## Multiple R-squared: 0.8395, Adjusted R-squared: 0.838
## F-statistic: 539.9 on 14 and 1445 DF, p-value: < 2.2e-16
##
## Call:
## lm(formula = train_house_prices$SalePrice ~ train_house_prices[,
## 18] + train_house_prices[, 47] + train_house_prices[, 62] +
## train_house_prices[, 63] + train_house_prices[, 39] + train_house_prices[,
## 35] + train_house_prices[, 57] + train_house_prices[, 20] +
## train_house_prices[, 21] + train_house_prices[, 30] + train_house_prices[,
## 42])
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.0104 -0.2234 0.0282 0.2506 1.5400
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.627e+00 1.868e+00 -0.871 0.3840
## train_house_prices[, 18] 2.652e-01 1.562e-02 16.972 < 2e-16 ***
## train_house_prices[, 47] 6.502e-04 3.354e-05 19.385 < 2e-16 ***
## train_house_prices[, 62] 2.133e-01 3.830e-02 5.569 3.05e-08 ***
## train_house_prices[, 63] 1.516e-04 1.297e-04 1.170 0.2424
## train_house_prices[, 39] 2.090e-04 4.177e-05 5.005 6.29e-07 ***
## train_house_prices[, 35] 2.194e-04 3.308e-05 6.633 4.63e-11 ***
## train_house_prices[, 57] 1.796e-01 2.313e-02 7.768 1.51e-14 ***
## train_house_prices[, 20] 3.238e-03 7.809e-04 4.146 3.57e-05 ***
## train_house_prices[, 21] 6.624e-03 8.273e-04 8.007 2.40e-15 ***
## train_house_prices[, 30]CBlock 8.372e-02 5.318e-02 1.574 0.1156
## train_house_prices[, 30]PConc 1.307e-01 6.354e-02 2.057 0.0399 *
## train_house_prices[, 30]Slab -3.411e-02 1.160e-01 -0.294 0.7688
## train_house_prices[, 30]Stone 2.379e-01 1.986e-01 1.198 0.2312
## train_house_prices[, 30]Wood -2.210e-01 2.819e-01 -0.784 0.4332
## train_house_prices[, 42]Y 3.373e-01 5.728e-02 5.889 4.83e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4742 on 1444 degrees of freedom
## Multiple R-squared: 0.8433, Adjusted R-squared: 0.8416
## F-statistic: 518 on 15 and 1444 DF, p-value: < 2.2e-16
##
## Call:
## lm(formula = train_house_prices$SalePrice ~ train_house_prices[,
## 18] + train_house_prices[, 47] + train_house_prices[, 62] +
## train_house_prices[, 63] + train_house_prices[, 39] + train_house_prices[,
## 35] + train_house_prices[, 57] + train_house_prices[, 20] +
## train_house_prices[, 21] + train_house_prices[, 30] + train_house_prices[,
## 42] + train_house_prices[, 67])
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.9003 -0.2212 0.0214 0.2557 1.5518
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.727e-01 1.866e+00 -0.468 0.6401
## train_house_prices[, 18] 2.671e-01 1.554e-02 17.187 < 2e-16 ***
## train_house_prices[, 47] 6.347e-04 3.356e-05 18.914 < 2e-16 ***
## train_house_prices[, 62] 2.117e-01 3.809e-02 5.557 3.26e-08 ***
## train_house_prices[, 63] 1.468e-04 1.289e-04 1.139 0.2550
## train_house_prices[, 39] 2.072e-04 4.153e-05 4.990 6.76e-07 ***
## train_house_prices[, 35] 2.069e-04 3.302e-05 6.267 4.85e-10 ***
## train_house_prices[, 57] 1.731e-01 2.305e-02 7.513 1.01e-13 ***
## train_house_prices[, 20] 3.049e-03 7.778e-04 3.920 9.28e-05 ***
## train_house_prices[, 21] 6.429e-03 8.238e-04 7.804 1.15e-14 ***
## train_house_prices[, 30]CBlock 8.386e-02 5.287e-02 1.586 0.1130
## train_house_prices[, 30]PConc 1.310e-01 6.318e-02 2.074 0.0383 *
## train_house_prices[, 30]Slab -2.025e-02 1.154e-01 -0.175 0.8607
## train_house_prices[, 30]Stone 2.318e-01 1.975e-01 1.174 0.2407
## train_house_prices[, 30]Wood -2.209e-01 2.803e-01 -0.788 0.4308
## train_house_prices[, 42]Y 3.292e-01 5.699e-02 5.776 9.36e-09 ***
## train_house_prices[, 67] 4.407e-04 1.049e-04 4.200 2.83e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4715 on 1443 degrees of freedom
## Multiple R-squared: 0.8452, Adjusted R-squared: 0.8435
## F-statistic: 492.3 on 16 and 1443 DF, p-value: < 2.2e-16
##
## Call:
## lm(formula = train_house_prices$SalePrice ~ train_house_prices[,
## 18] + train_house_prices[, 47] + train_house_prices[, 62] +
## train_house_prices[, 63] + train_house_prices[, 39] + train_house_prices[,
## 35] + train_house_prices[, 57] + train_house_prices[, 20] +
## train_house_prices[, 21] + train_house_prices[, 30] + train_house_prices[,
## 42] + train_house_prices[, 67] + train_house_prices[, 4])
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.9013 -0.2136 0.0278 0.2544 1.5574
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.988e+00 2.113e+00 -0.941 0.346994
## train_house_prices[, 18] 2.777e-01 1.798e-02 15.441 < 2e-16 ***
## train_house_prices[, 47] 5.878e-04 3.942e-05 14.911 < 2e-16 ***
## train_house_prices[, 62] 2.309e-01 4.270e-02 5.407 7.76e-08 ***
## train_house_prices[, 63] 5.574e-05 1.479e-04 0.377 0.706263
## train_house_prices[, 39] 1.527e-04 4.862e-05 3.142 0.001721 **
## train_house_prices[, 35] 2.153e-04 3.717e-05 5.793 8.88e-09 ***
## train_house_prices[, 57] 1.718e-01 2.720e-02 6.314 3.84e-10 ***
## train_house_prices[, 20] 3.286e-03 8.566e-04 3.836 0.000132 ***
## train_house_prices[, 21] 6.727e-03 9.218e-04 7.297 5.37e-13 ***
## train_house_prices[, 30]CBlock 4.387e-02 5.749e-02 0.763 0.445507
## train_house_prices[, 30]PConc 1.239e-01 6.928e-02 1.788 0.074012 .
## train_house_prices[, 30]Slab -1.669e-01 1.327e-01 -1.258 0.208717
## train_house_prices[, 30]Stone 2.647e-01 2.044e-01 1.295 0.195628
## train_house_prices[, 30]Wood -5.595e-01 3.545e-01 -1.578 0.114848
## train_house_prices[, 42]Y 3.430e-01 6.121e-02 5.604 2.60e-08 ***
## train_house_prices[, 67] 4.053e-04 1.258e-04 3.222 0.001306 **
## train_house_prices[, 4] 1.862e-03 6.837e-04 2.723 0.006562 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.487 on 1183 degrees of freedom
## (259 observations deleted due to missingness)
## Multiple R-squared: 0.8483, Adjusted R-squared: 0.8461
## F-statistic: 389.1 on 17 and 1183 DF, p-value: < 2.2e-16
##
## Call:
## lm(formula = train_house_prices$SalePrice ~ train_house_prices[,
## 18] + train_house_prices[, 47] + train_house_prices[, 62] +
## train_house_prices[, 63] + train_house_prices[, 39] + train_house_prices[,
## 35] + train_house_prices[, 57] + train_house_prices[, 20] +
## train_house_prices[, 21] + train_house_prices[, 30] + train_house_prices[,
## 42] + train_house_prices[, 67] + train_house_prices[, 4] +
## train_house_prices[, 80])
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.0024 -0.2247 0.0191 0.2524 1.7705
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.354e-01 2.124e+00 -0.205 0.837641
## train_house_prices[, 18] 2.731e-01 1.774e-02 15.393 < 2e-16 ***
## train_house_prices[, 47] 6.006e-04 3.891e-05 15.437 < 2e-16 ***
## train_house_prices[, 62] 2.423e-01 4.231e-02 5.728 1.29e-08 ***
## train_house_prices[, 63] -1.577e-05 1.466e-04 -0.108 0.914333
## train_house_prices[, 39] 1.485e-04 4.831e-05 3.073 0.002167 **
## train_house_prices[, 35] 2.316e-04 3.689e-05 6.278 4.83e-10 ***
## train_house_prices[, 57] 1.586e-01 2.690e-02 5.896 4.85e-09 ***
## train_house_prices[, 20] 2.948e-03 8.506e-04 3.466 0.000548 ***
## train_house_prices[, 21] 6.149e-03 9.154e-04 6.718 2.87e-11 ***
## train_house_prices[, 30]CBlock 5.511e-02 5.677e-02 0.971 0.331903
## train_house_prices[, 30]PConc 1.195e-01 6.832e-02 1.748 0.080650 .
## train_house_prices[, 30]Slab -1.553e-01 1.325e-01 -1.172 0.241458
## train_house_prices[, 30]Stone 2.839e-01 2.014e-01 1.410 0.158884
## train_house_prices[, 30]Wood -5.647e-01 3.492e-01 -1.617 0.106152
## train_house_prices[, 42]Y 3.586e-01 6.053e-02 5.925 4.10e-09 ***
## train_house_prices[, 67] 4.157e-04 1.244e-04 3.341 0.000861 ***
## train_house_prices[, 4] 1.794e-03 6.738e-04 2.663 0.007848 **
## train_house_prices[, 80]AdjLand 3.281e-01 2.481e-01 1.323 0.186230
## train_house_prices[, 80]Alloca 2.980e-02 1.655e-01 0.180 0.857143
## train_house_prices[, 80]Family -3.227e-03 1.249e-01 -0.026 0.979397
## train_house_prices[, 80]Normal 2.845e-01 5.536e-02 5.139 3.23e-07 ***
## train_house_prices[, 80]Partial 4.186e-01 7.420e-02 5.642 2.10e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4795 on 1178 degrees of freedom
## (259 observations deleted due to missingness)
## Multiple R-squared: 0.8535, Adjusted R-squared: 0.8508
## F-statistic: 312 on 22 and 1178 DF, p-value: < 2.2e-16
##
## Call:
## lm(formula = train_house_prices$SalePrice ~ train_house_prices[,
## 18] + train_house_prices[, 47] + train_house_prices[, 62] +
## train_house_prices[, 63] + train_house_prices[, 39] + train_house_prices[,
## 35] + train_house_prices[, 57] + train_house_prices[, 20] +
## train_house_prices[, 21] + train_house_prices[, 30] + train_house_prices[,
## 42] + train_house_prices[, 67] + train_house_prices[, 4] +
## train_house_prices[, 80] + train_house_prices[, 68])
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.0026 -0.2231 0.0193 0.2528 1.7717
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.159e-01 2.130e+00 -0.195 0.845232
## train_house_prices[, 18] 2.730e-01 1.776e-02 15.368 < 2e-16 ***
## train_house_prices[, 47] 5.998e-04 3.939e-05 15.229 < 2e-16 ***
## train_house_prices[, 62] 2.429e-01 4.252e-02 5.712 1.41e-08 ***
## train_house_prices[, 63] -1.772e-05 1.473e-04 -0.120 0.904295
## train_house_prices[, 39] 1.482e-04 4.837e-05 3.063 0.002237 **
## train_house_prices[, 35] 2.316e-04 3.691e-05 6.275 4.90e-10 ***
## train_house_prices[, 57] 1.585e-01 2.693e-02 5.886 5.15e-09 ***
## train_house_prices[, 20] 2.948e-03 8.510e-04 3.464 0.000551 ***
## train_house_prices[, 21] 6.140e-03 9.186e-04 6.684 3.59e-11 ***
## train_house_prices[, 30]CBlock 5.507e-02 5.680e-02 0.970 0.332483
## train_house_prices[, 30]PConc 1.190e-01 6.845e-02 1.738 0.082462 .
## train_house_prices[, 30]Slab -1.549e-01 1.326e-01 -1.168 0.243004
## train_house_prices[, 30]Stone 2.835e-01 2.015e-01 1.407 0.159723
## train_house_prices[, 30]Wood -5.644e-01 3.494e-01 -1.615 0.106489
## train_house_prices[, 42]Y 3.593e-01 6.077e-02 5.913 4.40e-09 ***
## train_house_prices[, 67] 4.168e-04 1.248e-04 3.341 0.000860 ***
## train_house_prices[, 4] 1.795e-03 6.741e-04 2.662 0.007869 **
## train_house_prices[, 80]AdjLand 3.282e-01 2.482e-01 1.322 0.186267
## train_house_prices[, 80]Alloca 3.070e-02 1.657e-01 0.185 0.853044
## train_house_prices[, 80]Family -2.561e-03 1.251e-01 -0.020 0.983664
## train_house_prices[, 80]Normal 2.847e-01 5.541e-02 5.139 3.23e-07 ***
## train_house_prices[, 80]Partial 4.186e-01 7.423e-02 5.639 2.14e-08 ***
## train_house_prices[, 68] 3.171e-05 2.320e-04 0.137 0.891305
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4797 on 1177 degrees of freedom
## (259 observations deleted due to missingness)
## Multiple R-squared: 0.8535, Adjusted R-squared: 0.8507
## F-statistic: 298.2 on 23 and 1177 DF, p-value: < 2.2e-16
##
## Call:
## lm(formula = train_house_prices$SalePrice ~ train_house_prices[,
## 18] + train_house_prices[, 47] + train_house_prices[, 62] +
## train_house_prices[, 63] + train_house_prices[, 39] + train_house_prices[,
## 35] + train_house_prices[, 57] + train_house_prices[, 20] +
## train_house_prices[, 21] + train_house_prices[, 30] + train_house_prices[,
## 42] + train_house_prices[, 67] + train_house_prices[, 4] +
## train_house_prices[, 80] + train_house_prices[, 68] + train_house_prices[,
## 45])
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.9034 -0.2233 0.0184 0.2508 1.7887
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.731e-01 2.129e+00 -0.222 0.824190
## train_house_prices[, 18] 2.738e-01 1.776e-02 15.418 < 2e-16 ***
## train_house_prices[, 47] 7.089e-04 7.772e-05 9.121 < 2e-16 ***
## train_house_prices[, 62] 2.434e-01 4.250e-02 5.728 1.29e-08 ***
## train_house_prices[, 63] -2.863e-05 1.474e-04 -0.194 0.845960
## train_house_prices[, 39] 6.191e-05 7.174e-05 0.863 0.388310
## train_house_prices[, 35] 2.322e-04 3.688e-05 6.295 4.33e-10 ***
## train_house_prices[, 57] 1.540e-01 2.705e-02 5.692 1.58e-08 ***
## train_house_prices[, 20] 3.121e-03 8.570e-04 3.642 0.000282 ***
## train_house_prices[, 21] 5.988e-03 9.226e-04 6.491 1.26e-10 ***
## train_house_prices[, 30]CBlock 4.966e-02 5.685e-02 0.874 0.382564
## train_house_prices[, 30]PConc 1.177e-01 6.840e-02 1.721 0.085488 .
## train_house_prices[, 30]Slab -2.429e-01 1.431e-01 -1.697 0.089931 .
## train_house_prices[, 30]Stone 2.885e-01 2.014e-01 1.433 0.152249
## train_house_prices[, 30]Wood -5.686e-01 3.492e-01 -1.629 0.103665
## train_house_prices[, 42]Y 3.591e-01 6.073e-02 5.913 4.41e-09 ***
## train_house_prices[, 67] 4.090e-04 1.248e-04 3.278 0.001076 **
## train_house_prices[, 4] 1.656e-03 6.789e-04 2.439 0.014857 *
## train_house_prices[, 80]AdjLand 3.094e-01 2.483e-01 1.246 0.212960
## train_house_prices[, 80]Alloca 7.272e-03 1.662e-01 0.044 0.965107
## train_house_prices[, 80]Family -5.326e-03 1.250e-01 -0.043 0.966020
## train_house_prices[, 80]Normal 2.871e-01 5.539e-02 5.183 2.56e-07 ***
## train_house_prices[, 80]Partial 4.193e-01 7.418e-02 5.652 1.99e-08 ***
## train_house_prices[, 68] 6.047e-05 2.325e-04 0.260 0.794846
## train_house_prices[, 45] -1.267e-04 7.783e-05 -1.628 0.103853
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4793 on 1176 degrees of freedom
## (259 observations deleted due to missingness)
## Multiple R-squared: 0.8539, Adjusted R-squared: 0.8509
## F-statistic: 286.3 on 24 and 1176 DF, p-value: < 2.2e-16
##
## Call:
## lm(formula = train_house_prices$SalePrice ~ train_house_prices[,
## 18] + train_house_prices[, 47] + train_house_prices[, 62] +
## train_house_prices[, 63] + train_house_prices[, 39] + train_house_prices[,
## 35] + train_house_prices[, 57] + train_house_prices[, 20] +
## train_house_prices[, 21] + train_house_prices[, 30] + train_house_prices[,
## 42] + train_house_prices[, 67] + train_house_prices[, 4] +
## train_house_prices[, 80] + train_house_prices[, 68] + train_house_prices[,
## 45] + train_house_prices[, 51])
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.9255 -0.2270 0.0185 0.2479 1.7490
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.263e-02 2.147e+00 -0.006 0.99531
## train_house_prices[, 18] 2.743e-01 1.775e-02 15.453 < 2e-16 ***
## train_house_prices[, 47] 7.188e-04 7.792e-05 9.224 < 2e-16 ***
## train_house_prices[, 62] 2.406e-01 4.251e-02 5.660 1.90e-08 ***
## train_house_prices[, 63] -1.976e-05 1.474e-04 -0.134 0.89336
## train_house_prices[, 39] 6.220e-05 7.169e-05 0.868 0.38576
## train_house_prices[, 35] 2.294e-04 3.690e-05 6.217 7.01e-10 ***
## train_house_prices[, 57] 1.499e-01 2.716e-02 5.521 4.14e-08 ***
## train_house_prices[, 20] 2.842e-03 8.744e-04 3.250 0.00119 **
## train_house_prices[, 21] 6.029e-03 9.224e-04 6.536 9.40e-11 ***
## train_house_prices[, 30]CBlock 4.525e-02 5.689e-02 0.796 0.42648
## train_house_prices[, 30]PConc 1.211e-01 6.839e-02 1.770 0.07699 .
## train_house_prices[, 30]Slab -2.270e-01 1.434e-01 -1.583 0.11361
## train_house_prices[, 30]Stone 3.006e-01 2.014e-01 1.493 0.13577
## train_house_prices[, 30]Wood -5.600e-01 3.490e-01 -1.605 0.10886
## train_house_prices[, 42]Y 3.543e-01 6.076e-02 5.831 7.11e-09 ***
## train_house_prices[, 67] 4.073e-04 1.247e-04 3.267 0.00112 **
## train_house_prices[, 4] 1.661e-03 6.785e-04 2.448 0.01453 *
## train_house_prices[, 80]AdjLand 3.000e-01 2.482e-01 1.209 0.22689
## train_house_prices[, 80]Alloca -1.320e-02 1.666e-01 -0.079 0.93687
## train_house_prices[, 80]Family -2.545e-03 1.249e-01 -0.020 0.98375
## train_house_prices[, 80]Normal 2.847e-01 5.538e-02 5.141 3.20e-07 ***
## train_house_prices[, 80]Partial 4.159e-01 7.416e-02 5.608 2.56e-08 ***
## train_house_prices[, 68] 3.538e-05 2.329e-04 0.152 0.87927
## train_house_prices[, 45] -1.770e-04 8.399e-05 -2.107 0.03529 *
## train_house_prices[, 51] 5.980e-02 3.767e-02 1.588 0.11265
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.479 on 1175 degrees of freedom
## (259 observations deleted due to missingness)
## Multiple R-squared: 0.8542, Adjusted R-squared: 0.8511
## F-statistic: 275.3 on 25 and 1175 DF, p-value: < 2.2e-16
##
## Call:
## lm(formula = train_house_prices$SalePrice ~ train_house_prices[,
## 18] + train_house_prices[, 47] + train_house_prices[, 62] +
## train_house_prices[, 63] + train_house_prices[, 39] + train_house_prices[,
## 35] + train_house_prices[, 57] + train_house_prices[, 20] +
## train_house_prices[, 21] + train_house_prices[, 30] + train_house_prices[,
## 42] + train_house_prices[, 67] + train_house_prices[, 4] +
## train_house_prices[, 80] + train_house_prices[, 68] + train_house_prices[,
## 45] + train_house_prices[, 51] + train_house_prices[, 34])
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.2868 -0.2262 0.0249 0.2458 1.8604
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.723e+00 2.230e+00 0.773 0.43990
## train_house_prices[, 18] 2.735e-01 1.801e-02 15.181 < 2e-16 ***
## train_house_prices[, 47] 7.755e-04 8.881e-05 8.731 < 2e-16 ***
## train_house_prices[, 62] 2.289e-01 4.278e-02 5.351 1.06e-07 ***
## train_house_prices[, 63] 1.962e-05 1.475e-04 0.133 0.89421
## train_house_prices[, 39] 7.183e-05 8.537e-05 0.841 0.40029
## train_house_prices[, 35] 1.357e-05 5.071e-05 0.268 0.78911
## train_house_prices[, 57] 1.487e-01 2.729e-02 5.450 6.17e-08 ***
## train_house_prices[, 20] 2.060e-03 8.982e-04 2.293 0.02200 *
## train_house_prices[, 21] 5.983e-03 9.426e-04 6.347 3.17e-10 ***
## train_house_prices[, 30]CBlock 2.257e-02 5.720e-02 0.395 0.69321
## train_house_prices[, 30]PConc 1.250e-01 6.847e-02 1.825 0.06821 .
## train_house_prices[, 30]Stone 3.128e-01 1.996e-01 1.567 0.11733
## train_house_prices[, 30]Wood -6.401e-01 3.459e-01 -1.851 0.06448 .
## train_house_prices[, 42]Y 3.759e-01 6.321e-02 5.947 3.62e-09 ***
## train_house_prices[, 67] 4.011e-04 1.245e-04 3.220 0.00132 **
## train_house_prices[, 4] 1.892e-03 6.765e-04 2.797 0.00524 **
## train_house_prices[, 80]AdjLand 2.997e-01 2.804e-01 1.069 0.28523
## train_house_prices[, 80]Alloca -6.773e-02 1.902e-01 -0.356 0.72187
## train_house_prices[, 80]Family 4.149e-02 1.241e-01 0.334 0.73816
## train_house_prices[, 80]Normal 2.752e-01 5.524e-02 4.982 7.28e-07 ***
## train_house_prices[, 80]Partial 4.509e-01 7.403e-02 6.091 1.53e-09 ***
## train_house_prices[, 68] -1.444e-05 2.316e-04 -0.062 0.95028
## train_house_prices[, 45] -2.223e-04 9.539e-05 -2.330 0.01996 *
## train_house_prices[, 51] 7.899e-02 3.788e-02 2.085 0.03727 *
## train_house_prices[, 34]BLQ -7.213e-02 5.867e-02 -1.229 0.21919
## train_house_prices[, 34]GLQ 1.243e-02 5.197e-02 0.239 0.81094
## train_house_prices[, 34]LwQ -1.299e-01 7.311e-02 -1.777 0.07590 .
## train_house_prices[, 34]Rec -6.069e-02 5.952e-02 -1.020 0.30816
## train_house_prices[, 34]Unf -2.950e-01 5.560e-02 -5.306 1.34e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4736 on 1140 degrees of freedom
## (290 observations deleted due to missingness)
## Multiple R-squared: 0.855, Adjusted R-squared: 0.8513
## F-statistic: 231.8 on 29 and 1140 DF, p-value: < 2.2e-16
##
## Call:
## lm(formula = train_house_prices$SalePrice ~ train_house_prices[,
## 18] + train_house_prices[, 47] + train_house_prices[, 62] +
## train_house_prices[, 63] + train_house_prices[, 35] + train_house_prices[,
## 57] + train_house_prices[, 20] + train_house_prices[, 21] +
## train_house_prices[, 42] + train_house_prices[, 67] + train_house_prices[,
## 4] + train_house_prices[, 80] + train_house_prices[, 45] +
## train_house_prices[, 51] + train_house_prices[, 34])
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.1868 -0.2250 0.0250 0.2481 1.8471
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.412e-01 1.932e+00 -0.332 0.73996
## train_house_prices[, 18] 2.774e-01 1.761e-02 15.753 < 2e-16 ***
## train_house_prices[, 47] 8.467e-04 5.828e-05 14.529 < 2e-16 ***
## train_house_prices[, 62] 2.307e-01 4.254e-02 5.424 7.11e-08 ***
## train_house_prices[, 63] 3.816e-05 1.470e-04 0.260 0.79520
## train_house_prices[, 35] 2.404e-05 4.852e-05 0.495 0.62035
## train_house_prices[, 57] 1.452e-01 2.719e-02 5.341 1.11e-07 ***
## train_house_prices[, 20] 2.983e-03 7.139e-04 4.178 3.16e-05 ***
## train_house_prices[, 21] 6.279e-03 9.201e-04 6.824 1.43e-11 ***
## train_house_prices[, 42]Y 3.599e-01 6.253e-02 5.757 1.10e-08 ***
## train_house_prices[, 67] 3.767e-04 1.244e-04 3.029 0.00251 **
## train_house_prices[, 4] 1.611e-03 6.666e-04 2.416 0.01584 *
## train_house_prices[, 80]AdjLand 3.157e-01 2.811e-01 1.123 0.26160
## train_house_prices[, 80]Alloca -9.253e-02 1.901e-01 -0.487 0.62647
## train_house_prices[, 80]Family 2.705e-02 1.243e-01 0.218 0.82783
## train_house_prices[, 80]Normal 2.748e-01 5.537e-02 4.963 7.99e-07 ***
## train_house_prices[, 80]Partial 4.580e-01 7.404e-02 6.186 8.57e-10 ***
## train_house_prices[, 45] -2.773e-04 6.229e-05 -4.452 9.33e-06 ***
## train_house_prices[, 51] 6.926e-02 3.767e-02 1.839 0.06624 .
## train_house_prices[, 34]BLQ -7.178e-02 5.879e-02 -1.221 0.22233
## train_house_prices[, 34]GLQ 3.389e-02 5.037e-02 0.673 0.50116
## train_house_prices[, 34]LwQ -1.215e-01 7.305e-02 -1.663 0.09666 .
## train_house_prices[, 34]Rec -6.067e-02 5.961e-02 -1.018 0.30901
## train_house_prices[, 34]Unf -2.667e-01 5.420e-02 -4.920 9.93e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4751 on 1146 degrees of freedom
## (290 observations deleted due to missingness)
## Multiple R-squared: 0.8533, Adjusted R-squared: 0.8503
## F-statistic: 289.8 on 23 and 1146 DF, p-value: < 2.2e-16