library(ResourceSelection)
## Warning: package 'ResourceSelection' was built under R version 4.2.3
## ResourceSelection 0.3-5 2019-07-22
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2
## ──
## ✔ ggplot2 3.4.1 ✔ purrr 1.0.1
## ✔ tibble 3.1.8 ✔ dplyr 1.1.0
## ✔ tidyr 1.3.0 ✔ stringr 1.5.0
## ✔ readr 2.1.4 ✔ forcats 1.0.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(matlib)
## Warning: package 'matlib' was built under R version 4.2.3
library(Matrix)
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
library(matrixcalc)
##
## Attaching package: 'matrixcalc'
##
## The following object is masked from 'package:matlib':
##
## vec
library(MASS)
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
train = read.csv('https://raw.githubusercontent.com/melbow2424/Data_605_Final/main/Data/train.csv')
head(train)
summary(train)
# Select the non-categorical variables
non_categorical_vars <- train[, sapply(train, is.numeric)]
# Summarize the non-categorical variables
summary_info <- summary(non_categorical_vars)
# Create a data frame from the summary information
summary_train <- data.frame(
Min = summary_info[1, ],
Q1 = summary_info[2, ],
Median = summary_info[3, ],
Mean = summary_info[4, ],
Q3 = summary_info[5, ],
Max = summary_info[6, ]
)
summary_train
## Min Q1 Median
## Id Min. : 1.0 1st Qu.: 365.8 Median : 730.5
## MSSubClass Min. : 20.0 1st Qu.: 20.0 Median : 50.0
## LotFrontage Min. : 21.00 1st Qu.: 59.00 Median : 69.00
## LotArea Min. : 1300 1st Qu.: 7554 Median : 9478
## OverallQual Min. : 1.000 1st Qu.: 5.000 Median : 6.000
## OverallCond Min. :1.000 1st Qu.:5.000 Median :5.000
## YearBuilt Min. :1872 1st Qu.:1954 Median :1973
## YearRemodAdd Min. :1950 1st Qu.:1967 Median :1994
## MasVnrArea Min. : 0.0 1st Qu.: 0.0 Median : 0.0
## BsmtFinSF1 Min. : 0.0 1st Qu.: 0.0 Median : 383.5
## BsmtFinSF2 Min. : 0.00 1st Qu.: 0.00 Median : 0.00
## BsmtUnfSF Min. : 0.0 1st Qu.: 223.0 Median : 477.5
## TotalBsmtSF Min. : 0.0 1st Qu.: 795.8 Median : 991.5
## X1stFlrSF Min. : 334 1st Qu.: 882 Median :1087
## X2ndFlrSF Min. : 0 1st Qu.: 0 Median : 0
## LowQualFinSF Min. : 0.000 1st Qu.: 0.000 Median : 0.000
## GrLivArea Min. : 334 1st Qu.:1130 Median :1464
## BsmtFullBath Min. :0.0000 1st Qu.:0.0000 Median :0.0000
## BsmtHalfBath Min. :0.00000 1st Qu.:0.00000 Median :0.00000
## FullBath Min. :0.000 1st Qu.:1.000 Median :2.000
## HalfBath Min. :0.0000 1st Qu.:0.0000 Median :0.0000
## BedroomAbvGr Min. :0.000 1st Qu.:2.000 Median :3.000
## KitchenAbvGr Min. :0.000 1st Qu.:1.000 Median :1.000
## TotRmsAbvGrd Min. : 2.000 1st Qu.: 5.000 Median : 6.000
## Fireplaces Min. :0.000 1st Qu.:0.000 Median :1.000
## GarageYrBlt Min. :1900 1st Qu.:1961 Median :1980
## GarageCars Min. :0.000 1st Qu.:1.000 Median :2.000
## GarageArea Min. : 0.0 1st Qu.: 334.5 Median : 480.0
## WoodDeckSF Min. : 0.00 1st Qu.: 0.00 Median : 0.00
## OpenPorchSF Min. : 0.00 1st Qu.: 0.00 Median : 25.00
## EnclosedPorch Min. : 0.00 1st Qu.: 0.00 Median : 0.00
## X3SsnPorch Min. : 0.00 1st Qu.: 0.00 Median : 0.00
## ScreenPorch Min. : 0.00 1st Qu.: 0.00 Median : 0.00
## PoolArea Min. : 0.000 1st Qu.: 0.000 Median : 0.000
## MiscVal Min. : 0.00 1st Qu.: 0.00 Median : 0.00
## MoSold Min. : 1.000 1st Qu.: 5.000 Median : 6.000
## YrSold Min. :2006 1st Qu.:2007 Median :2008
## SalePrice Min. : 34900 1st Qu.:129975 Median :163000
## Mean Q3 Max
## Id Mean : 730.5 3rd Qu.:1095.2 Max. :1460.0
## MSSubClass Mean : 56.9 3rd Qu.: 70.0 Max. :190.0
## LotFrontage Mean : 70.05 3rd Qu.: 80.00 Max. :313.00
## LotArea Mean : 10517 3rd Qu.: 11602 Max. :215245
## OverallQual Mean : 6.099 3rd Qu.: 7.000 Max. :10.000
## OverallCond Mean :5.575 3rd Qu.:6.000 Max. :9.000
## YearBuilt Mean :1971 3rd Qu.:2000 Max. :2010
## YearRemodAdd Mean :1985 3rd Qu.:2004 Max. :2010
## MasVnrArea Mean : 103.7 3rd Qu.: 166.0 Max. :1600.0
## BsmtFinSF1 Mean : 443.6 3rd Qu.: 712.2 Max. :5644.0
## BsmtFinSF2 Mean : 46.55 3rd Qu.: 0.00 Max. :1474.00
## BsmtUnfSF Mean : 567.2 3rd Qu.: 808.0 Max. :2336.0
## TotalBsmtSF Mean :1057.4 3rd Qu.:1298.2 Max. :6110.0
## X1stFlrSF Mean :1163 3rd Qu.:1391 Max. :4692
## X2ndFlrSF Mean : 347 3rd Qu.: 728 Max. :2065
## LowQualFinSF Mean : 5.845 3rd Qu.: 0.000 Max. :572.000
## GrLivArea Mean :1515 3rd Qu.:1777 Max. :5642
## BsmtFullBath Mean :0.4253 3rd Qu.:1.0000 Max. :3.0000
## BsmtHalfBath Mean :0.05753 3rd Qu.:0.00000 Max. :2.00000
## FullBath Mean :1.565 3rd Qu.:2.000 Max. :3.000
## HalfBath Mean :0.3829 3rd Qu.:1.0000 Max. :2.0000
## BedroomAbvGr Mean :2.866 3rd Qu.:3.000 Max. :8.000
## KitchenAbvGr Mean :1.047 3rd Qu.:1.000 Max. :3.000
## TotRmsAbvGrd Mean : 6.518 3rd Qu.: 7.000 Max. :14.000
## Fireplaces Mean :0.613 3rd Qu.:1.000 Max. :3.000
## GarageYrBlt Mean :1979 3rd Qu.:2002 Max. :2010
## GarageCars Mean :1.767 3rd Qu.:2.000 Max. :4.000
## GarageArea Mean : 473.0 3rd Qu.: 576.0 Max. :1418.0
## WoodDeckSF Mean : 94.24 3rd Qu.:168.00 Max. :857.00
## OpenPorchSF Mean : 46.66 3rd Qu.: 68.00 Max. :547.00
## EnclosedPorch Mean : 21.95 3rd Qu.: 0.00 Max. :552.00
## X3SsnPorch Mean : 3.41 3rd Qu.: 0.00 Max. :508.00
## ScreenPorch Mean : 15.06 3rd Qu.: 0.00 Max. :480.00
## PoolArea Mean : 2.759 3rd Qu.: 0.000 Max. :738.000
## MiscVal Mean : 43.49 3rd Qu.: 0.00 Max. :15500.00
## MoSold Mean : 6.322 3rd Qu.: 8.000 Max. :12.000
## YrSold Mean :2008 3rd Qu.:2009 Max. :2010
## SalePrice Mean :180921 3rd Qu.:214000 Max. :755000
# Keep only specific columns using subset()
df_1 <- subset(train, select = c(SalePrice, HalfBath, GarageArea, OverallQual, YearBuilt, YearRemodAdd, TotalBsmtSF, X1stFlrSF, FullBath, X2ndFlrSF, OverallCond, Fireplaces, BedroomAbvGr))
kdepairs(df_1)
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
variables <- c("OverallQual", "GarageArea", "TotalBsmtSF")
for (variable in variables) {
correlation <- cor.test(train$SalePrice, train[[variable]], conf.level = 0.80)
cat("Correlation test for", variable, ":\n")
print(correlation)
cat("\n")
}
## Correlation test for OverallQual :
##
## Pearson's product-moment correlation
##
## data: train$SalePrice and train[[variable]]
## t = 49.364, df = 1458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 80 percent confidence interval:
## 0.7780752 0.8032204
## sample estimates:
## cor
## 0.7909816
##
##
## Correlation test for GarageArea :
##
## Pearson's product-moment correlation
##
## data: train$SalePrice and train[[variable]]
## t = 30.446, df = 1458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 80 percent confidence interval:
## 0.6024756 0.6435283
## sample estimates:
## cor
## 0.6234314
##
##
## Correlation test for TotalBsmtSF :
##
## Pearson's product-moment correlation
##
## data: train$SalePrice and train[[variable]]
## t = 29.671, df = 1458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 80 percent confidence interval:
## 0.5922142 0.6340846
## sample estimates:
## cor
## 0.6135806
Family-wise Error Rate
k <- 7
a <- .05
1 - (1-a)^k
## [1] 0.3016627
a/k
## [1] 0.007142857
In other words, the probability of getting a type I error on at least one of the hypothesis tests is over 14% Bonferroni Correction 0.017. All p-values are less then correction. All significant.
matrix <- train %>%
dplyr::select(SalePrice, OverallQual, GarageArea, TotalBsmtSF)%>%
cor()
matrix
## SalePrice OverallQual GarageArea TotalBsmtSF
## SalePrice 1.0000000 0.7909816 0.6234314 0.6135806
## OverallQual 0.7909816 1.0000000 0.5620218 0.5378085
## GarageArea 0.6234314 0.5620218 1.0000000 0.4866655
## TotalBsmtSF 0.6135806 0.5378085 0.4866655 1.0000000
precision_matrix <- inv(matrix)
precision_matrix
##
## [1,] 3.3563856 -1.8870668 -0.6860529 -0.7106541
## [2,] -1.8870668 2.7484378 -0.2782690 -0.1848419
## [3,] -0.6860529 -0.2782690 1.7118461 -0.2624922
## [4,] -0.7106541 -0.1848419 -0.2624922 1.6631990
(cor_pre_matrix <- matrix %*% precision_matrix %>%
round(1))
##
## SalePrice 1 0 0 0
## OverallQual 0 1 0 0
## GarageArea 0 0 1 0
## TotalBsmtSF 0 0 0 1
(pre_cor_matrix <- precision_matrix %*% matrix %>%
round(1))
## SalePrice OverallQual GarageArea TotalBsmtSF
## [1,] 1 0 0 0
## [2,] 0 1 0 0
## [3,] 0 0 1 0
## [4,] 0 0 0 1
lu_decomp <- lu.decomposition(matrix)
L <- lu_decomp$L
L
## [,1] [,2] [,3] [,4]
## [1,] 1.0000000 0.0000000 0.0000000 0
## [2,] 0.7909816 1.0000000 0.0000000 0
## [3,] 0.6234314 0.1840505 1.0000000 0
## [4,] 0.6135806 0.1401839 0.1578237 1
U <- lu_decomp$U
U
## [,1] [,2] [,3] [,4]
## [1,] 1 0.7909816 0.62343144 0.61358055
## [2,] 0 0.3743481 0.06889896 0.05247757
## [3,] 0 0.0000000 0.59865235 0.09448153
## [4,] 0 0.0000000 0.00000000 0.60125097
L%*%U
## [,1] [,2] [,3] [,4]
## [1,] 1.0000000 0.7909816 0.6234314 0.6135806
## [2,] 0.7909816 1.0000000 0.5620218 0.5378085
## [3,] 0.6234314 0.5620218 1.0000000 0.4866655
## [4,] 0.6135806 0.5378085 0.4866655 1.0000000
matrix
## SalePrice OverallQual GarageArea TotalBsmtSF
## SalePrice 1.0000000 0.7909816 0.6234314 0.6135806
## OverallQual 0.7909816 1.0000000 0.5620218 0.5378085
## GarageArea 0.6234314 0.5620218 1.0000000 0.4866655
## TotalBsmtSF 0.6135806 0.5378085 0.4866655 1.0000000
Picking TotalBsmtSF Total square feet of basement area.
Bsmt <- train$TotalBsmtSF
summary(train$TotalBsmtSF)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 795.8 991.5 1057.4 1298.2 6110.0
Shift it so that the minimum value is absolutely above zero if necessary
min_value <- min(train$TotalBsmtSF)
if (min_value <= 0) {
# Calculate the shift value to make the minimum value above zero
shift_value <- abs(min_value) + 1
# Shift the column values
shifted_values <- train$TotalBsmtSF + shift_value
} else {
# No shift needed
shifted_values <- train$TotalBsmtSF
}
summary(shifted_values)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.0 796.8 992.5 1058.4 1299.2 6111.0
# Create a histogram of a numeric variable
hist(shifted_values, main = "Histogram of TotalBsmtSF", col = "skyblue", xlab = "TotalBsmtSF", ylab = "Frequency")
### Then load the MASS package and run fitdistr to fit an exponential
probability density function.Find the optimal value of lambda for this
distribution, and then take 1000 samples from this exponential
distribution using this value (e.g., rexp(1000, lambda)). Plot a
histogram and compare it with a histogram of your original variable.
lambda <- fitdistr(shifted_values, densfun="exponential")
lambda$estimate
## rate
## 0.0009447961
set.seed(100)
expon.dist <- rexp(n = 1000,lambda$estimate)
hist(expon.dist, main = "Histogram of TotalBsmtSF", col = "skyblue", xlab = "TotalBsmtSF", ylab = "Frequency")
### Using the exponential pdf, find the 5th and 95th percentiles using
the cumulative distribution function (CDF).
# Calculate the 5th percentile using the CDF
(percentile_5 <- qexp(0.05, rate = lambda$estimate))
## [1] 54.29033
# Calculate the 95th percentile using the CDF
(percentile_95 <- qexp(0.95, rate = lambda$estimate))
## [1] 3170.771
# Perform a t-test and calculate the confidence interval
confidence_interval <- t.test(shifted_values)$conf.int
# Print the confidence interval
cat("95% Confidence Interval:", confidence_interval[1], "-", confidence_interval[2], "\n")
## 95% Confidence Interval: 1035.908 - 1080.951
quantile(shifted_values, c(.05, .95))
## 5% 95%
## 520.3 1754.0
###Build some type of multiple regression model and submit your model to the competition board. Provide your complete model summary and results with analysis.
Showing best Model:
sale_log <- log(train$SalePrice)
model_4 = lm(sale_log ~ OverallQual + GarageArea + X1stFlrSF + FullBath + YearBuilt + YearRemodAdd + (OverallQual * GarageArea * X1stFlrSF * FullBath * YearBuilt * YearRemodAdd), data = train)
summary(model_4)
##
## Call:
## lm(formula = sale_log ~ OverallQual + GarageArea + X1stFlrSF +
## FullBath + YearBuilt + YearRemodAdd + (OverallQual * GarageArea *
## X1stFlrSF * FullBath * YearBuilt * YearRemodAdd), data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.94462 -0.09741 0.00385 0.08604 0.59209
##
## Coefficients:
## Estimate
## (Intercept) -9.533e+03
## OverallQual 1.368e+03
## GarageArea 2.270e+01
## X1stFlrSF 6.216e+00
## FullBath 6.669e+03
## YearBuilt 4.941e+00
## YearRemodAdd 4.791e+00
## OverallQual:GarageArea -2.888e+00
## OverallQual:X1stFlrSF -7.651e-01
## GarageArea:X1stFlrSF -1.466e-02
## OverallQual:FullBath -9.796e+02
## GarageArea:FullBath -1.394e+01
## X1stFlrSF:FullBath -4.511e+00
## OverallQual:YearBuilt -7.114e-01
## GarageArea:YearBuilt -1.186e-02
## X1stFlrSF:YearBuilt -3.198e-03
## FullBath:YearBuilt -3.472e+00
## OverallQual:YearRemodAdd -6.870e-01
## GarageArea:YearRemodAdd -1.137e-02
## X1stFlrSF:YearRemodAdd -3.127e-03
## FullBath:YearRemodAdd -3.350e+00
## YearBuilt:YearRemodAdd -2.481e-03
## OverallQual:GarageArea:X1stFlrSF 1.464e-03
## OverallQual:GarageArea:FullBath 1.772e+00
## OverallQual:X1stFlrSF:FullBath 5.978e-01
## GarageArea:X1stFlrSF:FullBath 9.323e-03
## OverallQual:GarageArea:YearBuilt 1.522e-03
## OverallQual:X1stFlrSF:YearBuilt 3.956e-04
## GarageArea:X1stFlrSF:YearBuilt 7.660e-06
## OverallQual:FullBath:YearBuilt 5.120e-01
## GarageArea:FullBath:YearBuilt 7.306e-03
## X1stFlrSF:FullBath:YearBuilt 2.338e-03
## OverallQual:GarageArea:YearRemodAdd 1.448e-03
## OverallQual:X1stFlrSF:YearRemodAdd 3.856e-04
## GarageArea:X1stFlrSF:YearRemodAdd 7.335e-06
## OverallQual:FullBath:YearRemodAdd 4.916e-01
## GarageArea:FullBath:YearRemodAdd 6.989e-03
## X1stFlrSF:FullBath:YearRemodAdd 2.267e-03
## OverallQual:YearBuilt:YearRemodAdd 3.572e-04
## GarageArea:YearBuilt:YearRemodAdd 5.944e-06
## X1stFlrSF:YearBuilt:YearRemodAdd 1.609e-06
## FullBath:YearBuilt:YearRemodAdd 1.744e-03
## OverallQual:GarageArea:X1stFlrSF:FullBath -9.945e-04
## OverallQual:GarageArea:X1stFlrSF:YearBuilt -7.792e-07
## OverallQual:GarageArea:FullBath:YearBuilt -9.370e-04
## OverallQual:X1stFlrSF:FullBath:YearBuilt -3.118e-04
## GarageArea:X1stFlrSF:FullBath:YearBuilt -4.885e-06
## OverallQual:GarageArea:X1stFlrSF:YearRemodAdd -7.332e-07
## OverallQual:GarageArea:FullBath:YearRemodAdd -8.887e-04
## OverallQual:X1stFlrSF:FullBath:YearRemodAdd -3.001e-04
## GarageArea:X1stFlrSF:FullBath:YearRemodAdd -4.667e-06
## OverallQual:GarageArea:YearBuilt:YearRemodAdd -7.634e-07
## OverallQual:X1stFlrSF:YearBuilt:YearRemodAdd -1.994e-07
## GarageArea:X1stFlrSF:YearBuilt:YearRemodAdd -3.834e-09
## OverallQual:FullBath:YearBuilt:YearRemodAdd -2.570e-04
## GarageArea:FullBath:YearBuilt:YearRemodAdd -3.663e-06
## X1stFlrSF:FullBath:YearBuilt:YearRemodAdd -1.175e-06
## OverallQual:GarageArea:X1stFlrSF:FullBath:YearBuilt 5.300e-07
## OverallQual:GarageArea:X1stFlrSF:FullBath:YearRemodAdd 4.980e-07
## OverallQual:GarageArea:X1stFlrSF:YearBuilt:YearRemodAdd 3.904e-10
## OverallQual:GarageArea:FullBath:YearBuilt:YearRemodAdd 4.700e-07
## OverallQual:X1stFlrSF:FullBath:YearBuilt:YearRemodAdd 1.566e-07
## GarageArea:X1stFlrSF:FullBath:YearBuilt:YearRemodAdd 2.446e-09
## OverallQual:GarageArea:X1stFlrSF:FullBath:YearBuilt:YearRemodAdd -2.654e-10
## Std. Error
## (Intercept) 3.922e+03
## OverallQual 7.013e+02
## GarageArea 6.547e+00
## X1stFlrSF 4.045e+00
## FullBath 2.594e+03
## YearBuilt 2.011e+00
## YearRemodAdd 1.978e+00
## OverallQual:GarageArea 1.135e+00
## OverallQual:X1stFlrSF 7.027e-01
## GarageArea:X1stFlrSF 6.820e-03
## OverallQual:FullBath 4.588e+02
## GarageArea:FullBath 4.365e+00
## X1stFlrSF:FullBath 2.505e+00
## OverallQual:YearBuilt 3.589e-01
## GarageArea:YearBuilt 3.346e-03
## X1stFlrSF:YearBuilt 2.070e-03
## FullBath:YearBuilt 1.330e+00
## OverallQual:YearRemodAdd 3.532e-01
## GarageArea:YearRemodAdd 3.293e-03
## X1stFlrSF:YearRemodAdd 2.039e-03
## FullBath:YearRemodAdd 1.304e+00
## YearBuilt:YearRemodAdd 1.014e-03
## OverallQual:GarageArea:X1stFlrSF 1.121e-03
## OverallQual:GarageArea:FullBath 7.341e-01
## OverallQual:X1stFlrSF:FullBath 4.313e-01
## GarageArea:X1stFlrSF:FullBath 4.179e-03
## OverallQual:GarageArea:YearBuilt 5.775e-04
## OverallQual:X1stFlrSF:YearBuilt 3.590e-04
## GarageArea:X1stFlrSF:YearBuilt 3.476e-06
## OverallQual:FullBath:YearBuilt 2.346e-01
## GarageArea:FullBath:YearBuilt 2.231e-03
## X1stFlrSF:FullBath:YearBuilt 1.280e-03
## OverallQual:GarageArea:YearRemodAdd 5.700e-04
## OverallQual:X1stFlrSF:YearRemodAdd 3.537e-04
## GarageArea:X1stFlrSF:YearRemodAdd 3.428e-06
## OverallQual:FullBath:YearRemodAdd 2.305e-01
## GarageArea:FullBath:YearRemodAdd 2.190e-03
## X1stFlrSF:FullBath:YearRemodAdd 1.259e-03
## OverallQual:YearBuilt:YearRemodAdd 1.808e-04
## GarageArea:YearBuilt:YearRemodAdd 1.682e-06
## X1stFlrSF:YearBuilt:YearRemodAdd 1.043e-06
## FullBath:YearBuilt:YearRemodAdd 6.683e-04
## OverallQual:GarageArea:X1stFlrSF:FullBath 6.775e-04
## OverallQual:GarageArea:X1stFlrSF:YearBuilt 5.685e-07
## OverallQual:GarageArea:FullBath:YearBuilt 3.732e-04
## OverallQual:X1stFlrSF:FullBath:YearBuilt 2.200e-04
## GarageArea:X1stFlrSF:FullBath:YearBuilt 2.129e-06
## OverallQual:GarageArea:X1stFlrSF:YearRemodAdd 5.628e-07
## OverallQual:GarageArea:FullBath:YearRemodAdd 3.682e-04
## OverallQual:X1stFlrSF:FullBath:YearRemodAdd 2.167e-04
## GarageArea:X1stFlrSF:FullBath:YearRemodAdd 2.097e-06
## OverallQual:GarageArea:YearBuilt:YearRemodAdd 2.900e-07
## OverallQual:X1stFlrSF:YearBuilt:YearRemodAdd 1.807e-07
## GarageArea:X1stFlrSF:YearBuilt:YearRemodAdd 1.747e-09
## OverallQual:FullBath:YearBuilt:YearRemodAdd 1.178e-04
## GarageArea:FullBath:YearBuilt:YearRemodAdd 1.119e-06
## X1stFlrSF:FullBath:YearBuilt:YearRemodAdd 6.436e-07
## OverallQual:GarageArea:X1stFlrSF:FullBath:YearBuilt 3.430e-07
## OverallQual:GarageArea:X1stFlrSF:FullBath:YearRemodAdd 3.398e-07
## OverallQual:GarageArea:X1stFlrSF:YearBuilt:YearRemodAdd 2.854e-10
## OverallQual:GarageArea:FullBath:YearBuilt:YearRemodAdd 1.871e-07
## OverallQual:X1stFlrSF:FullBath:YearBuilt:YearRemodAdd 1.105e-07
## GarageArea:X1stFlrSF:FullBath:YearBuilt:YearRemodAdd 1.068e-09
## OverallQual:GarageArea:X1stFlrSF:FullBath:YearBuilt:YearRemodAdd 1.720e-10
## t value
## (Intercept) -2.430
## OverallQual 1.951
## GarageArea 3.467
## X1stFlrSF 1.537
## FullBath 2.571
## YearBuilt 2.457
## YearRemodAdd 2.422
## OverallQual:GarageArea -2.545
## OverallQual:X1stFlrSF -1.089
## GarageArea:X1stFlrSF -2.149
## OverallQual:FullBath -2.135
## GarageArea:FullBath -3.194
## X1stFlrSF:FullBath -1.801
## OverallQual:YearBuilt -1.982
## GarageArea:YearBuilt -3.545
## X1stFlrSF:YearBuilt -1.545
## FullBath:YearBuilt -2.611
## OverallQual:YearRemodAdd -1.945
## GarageArea:YearRemodAdd -3.455
## X1stFlrSF:YearRemodAdd -1.534
## FullBath:YearRemodAdd -2.569
## YearBuilt:YearRemodAdd -2.447
## OverallQual:GarageArea:X1stFlrSF 1.305
## OverallQual:GarageArea:FullBath 2.414
## OverallQual:X1stFlrSF:FullBath 1.386
## GarageArea:X1stFlrSF:FullBath 2.231
## OverallQual:GarageArea:YearBuilt 2.636
## OverallQual:X1stFlrSF:YearBuilt 1.102
## GarageArea:X1stFlrSF:YearBuilt 2.203
## OverallQual:FullBath:YearBuilt 2.182
## GarageArea:FullBath:YearBuilt 3.274
## X1stFlrSF:FullBath:YearBuilt 1.826
## OverallQual:GarageArea:YearRemodAdd 2.540
## OverallQual:X1stFlrSF:YearRemodAdd 1.090
## GarageArea:X1stFlrSF:YearRemodAdd 2.140
## OverallQual:FullBath:YearRemodAdd 2.133
## GarageArea:FullBath:YearRemodAdd 3.192
## X1stFlrSF:FullBath:YearRemodAdd 1.800
## OverallQual:YearBuilt:YearRemodAdd 1.976
## GarageArea:YearBuilt:YearRemodAdd 3.533
## X1stFlrSF:YearBuilt:YearRemodAdd 1.542
## FullBath:YearBuilt:YearRemodAdd 2.610
## OverallQual:GarageArea:X1stFlrSF:FullBath -1.468
## OverallQual:GarageArea:X1stFlrSF:YearBuilt -1.371
## OverallQual:GarageArea:FullBath:YearBuilt -2.511
## OverallQual:X1stFlrSF:FullBath:YearBuilt -1.417
## GarageArea:X1stFlrSF:FullBath:YearBuilt -2.295
## OverallQual:GarageArea:X1stFlrSF:YearRemodAdd -1.303
## OverallQual:GarageArea:FullBath:YearRemodAdd -2.414
## OverallQual:X1stFlrSF:FullBath:YearRemodAdd -1.385
## GarageArea:X1stFlrSF:FullBath:YearRemodAdd -2.225
## OverallQual:GarageArea:YearBuilt:YearRemodAdd -2.632
## OverallQual:X1stFlrSF:YearBuilt:YearRemodAdd -1.104
## GarageArea:X1stFlrSF:YearBuilt:YearRemodAdd -2.194
## OverallQual:FullBath:YearBuilt:YearRemodAdd -2.181
## GarageArea:FullBath:YearBuilt:YearRemodAdd -3.273
## X1stFlrSF:FullBath:YearBuilt:YearRemodAdd -1.826
## OverallQual:GarageArea:X1stFlrSF:FullBath:YearBuilt 1.545
## OverallQual:GarageArea:X1stFlrSF:FullBath:YearRemodAdd 1.466
## OverallQual:GarageArea:X1stFlrSF:YearBuilt:YearRemodAdd 1.368
## OverallQual:GarageArea:FullBath:YearBuilt:YearRemodAdd 2.512
## OverallQual:X1stFlrSF:FullBath:YearBuilt:YearRemodAdd 1.417
## GarageArea:X1stFlrSF:FullBath:YearBuilt:YearRemodAdd 2.290
## OverallQual:GarageArea:X1stFlrSF:FullBath:YearBuilt:YearRemodAdd -1.543
## Pr(>|t|)
## (Intercept) 0.015207 *
## OverallQual 0.051218 .
## GarageArea 0.000542 ***
## X1stFlrSF 0.124571
## FullBath 0.010253 *
## YearBuilt 0.014117 *
## YearRemodAdd 0.015557 *
## OverallQual:GarageArea 0.011038 *
## OverallQual:X1stFlrSF 0.276410
## GarageArea:X1stFlrSF 0.031791 *
## OverallQual:FullBath 0.032919 *
## GarageArea:FullBath 0.001435 **
## X1stFlrSF:FullBath 0.071921 .
## OverallQual:YearBuilt 0.047666 *
## GarageArea:YearBuilt 0.000406 ***
## X1stFlrSF:YearBuilt 0.122649
## FullBath:YearBuilt 0.009115 **
## OverallQual:YearRemodAdd 0.052006 .
## GarageArea:YearRemodAdd 0.000568 ***
## X1stFlrSF:YearRemodAdd 0.125302
## FullBath:YearRemodAdd 0.010305 *
## YearBuilt:YearRemodAdd 0.014537 *
## OverallQual:GarageArea:X1stFlrSF 0.191943
## OverallQual:GarageArea:FullBath 0.015909 *
## OverallQual:X1stFlrSF:FullBath 0.165983
## GarageArea:X1stFlrSF:FullBath 0.025854 *
## OverallQual:GarageArea:YearBuilt 0.008477 **
## OverallQual:X1stFlrSF:YearBuilt 0.270658
## GarageArea:X1stFlrSF:YearBuilt 0.027724 *
## OverallQual:FullBath:YearBuilt 0.029247 *
## GarageArea:FullBath:YearBuilt 0.001085 **
## X1stFlrSF:FullBath:YearBuilt 0.068025 .
## OverallQual:GarageArea:YearRemodAdd 0.011186 *
## OverallQual:X1stFlrSF:YearRemodAdd 0.275804
## GarageArea:X1stFlrSF:YearRemodAdd 0.032547 *
## OverallQual:FullBath:YearRemodAdd 0.033119 *
## GarageArea:FullBath:YearRemodAdd 0.001445 **
## X1stFlrSF:FullBath:YearRemodAdd 0.072094 .
## OverallQual:YearBuilt:YearRemodAdd 0.048347 *
## GarageArea:YearBuilt:YearRemodAdd 0.000425 ***
## X1stFlrSF:YearBuilt:YearRemodAdd 0.123253
## FullBath:YearBuilt:YearRemodAdd 0.009145 **
## OverallQual:GarageArea:X1stFlrSF:FullBath 0.142332
## OverallQual:GarageArea:X1stFlrSF:YearBuilt 0.170742
## OverallQual:GarageArea:FullBath:YearBuilt 0.012156 *
## OverallQual:X1stFlrSF:FullBath:YearBuilt 0.156596
## GarageArea:X1stFlrSF:FullBath:YearBuilt 0.021867 *
## OverallQual:GarageArea:X1stFlrSF:YearRemodAdd 0.192916
## OverallQual:GarageArea:FullBath:YearRemodAdd 0.015907 *
## OverallQual:X1stFlrSF:FullBath:YearRemodAdd 0.166177
## GarageArea:X1stFlrSF:FullBath:YearRemodAdd 0.026209 *
## OverallQual:GarageArea:YearBuilt:YearRemodAdd 0.008582 **
## OverallQual:X1stFlrSF:YearBuilt:YearRemodAdd 0.269913
## GarageArea:X1stFlrSF:YearBuilt:YearRemodAdd 0.028378 *
## OverallQual:FullBath:YearBuilt:YearRemodAdd 0.029384 *
## GarageArea:FullBath:YearBuilt:YearRemodAdd 0.001090 **
## X1stFlrSF:FullBath:YearBuilt:YearRemodAdd 0.068120 .
## OverallQual:GarageArea:X1stFlrSF:FullBath:YearBuilt 0.122559
## OverallQual:GarageArea:X1stFlrSF:FullBath:YearRemodAdd 0.142958
## OverallQual:GarageArea:X1stFlrSF:YearBuilt:YearRemodAdd 0.171553
## OverallQual:GarageArea:FullBath:YearBuilt:YearRemodAdd 0.012131 *
## OverallQual:X1stFlrSF:FullBath:YearBuilt:YearRemodAdd 0.156650
## GarageArea:X1stFlrSF:FullBath:YearBuilt:YearRemodAdd 0.022157 *
## OverallQual:GarageArea:X1stFlrSF:FullBath:YearBuilt:YearRemodAdd 0.123042
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.171 on 1396 degrees of freedom
## Multiple R-squared: 0.8246, Adjusted R-squared: 0.8167
## F-statistic: 104.2 on 63 and 1396 DF, p-value: < 2.2e-16
plot(model_4, which=c(1,2))
#fresh import
test <- read.csv('https://raw.githubusercontent.com/melbow2424/Data_605_Final/main/Data/test.csv')
test <- test%>%
mutate(GarageArea = ifelse(is.na(GarageArea), 0, GarageArea))
summary(test)
# Building the prediction
predict_model <- predict(model_4, test)
predict_model_exp <- sapply(predict_model, exp)
Id <- test$Id
SalePrice <- predict_model_exp
submission <- data.frame(Id, SalePrice)
head(submission)
## Id SalePrice
## 1 1461 130181.0
## 2 1462 148874.7
## 3 1463 165046.3
## 4 1464 177756.2
## 5 1465 235311.2
## 6 1466 171317.3
summary (submission)
## Id SalePrice
## Min. :1461 Min. : 44633
## 1st Qu.:1826 1st Qu.:127761
## Median :2190 Median :163839
## Mean :2190 Mean :177452
## 3rd Qu.:2554 3rd Qu.:207892
## Max. :2919 Max. :571400