library(ResourceSelection)
## Warning: package 'ResourceSelection' was built under R version 4.2.3
## ResourceSelection 0.3-5   2019-07-22
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2
## ──
## ✔ ggplot2 3.4.1     ✔ purrr   1.0.1
## ✔ tibble  3.1.8     ✔ dplyr   1.1.0
## ✔ tidyr   1.3.0     ✔ stringr 1.5.0
## ✔ readr   2.1.4     ✔ forcats 1.0.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(matlib)
## Warning: package 'matlib' was built under R version 4.2.3
library(Matrix)
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
library(matrixcalc)
## 
## Attaching package: 'matrixcalc'
## 
## The following object is masked from 'package:matlib':
## 
##     vec
library(MASS)
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select

1.Descriptive and Inferential Statistics.

train = read.csv('https://raw.githubusercontent.com/melbow2424/Data_605_Final/main/Data/train.csv')

head(train)
summary(train)

Provide univariate descriptive statistics and appropriate plots for the training data set.

# Select the non-categorical variables
non_categorical_vars <- train[, sapply(train, is.numeric)]

# Summarize the non-categorical variables
summary_info <- summary(non_categorical_vars)

# Create a data frame from the summary information
summary_train <- data.frame(
  Min = summary_info[1, ],
  Q1 = summary_info[2, ],
  Median = summary_info[3, ],
  Mean = summary_info[4, ],
  Q3 = summary_info[5, ],
  Max = summary_info[6, ]
)

summary_train
##                              Min                 Q1             Median
##       Id        Min.   :   1.0     1st Qu.: 365.8     Median : 730.5  
##   MSSubClass     Min.   : 20.0      1st Qu.: 20.0      Median : 50.0  
##  LotFrontage    Min.   : 21.00     1st Qu.: 59.00     Median : 69.00  
##    LotArea      Min.   :  1300     1st Qu.:  7554     Median :  9478  
##  OverallQual    Min.   : 1.000     1st Qu.: 5.000     Median : 6.000  
##  OverallCond     Min.   :1.000      1st Qu.:5.000      Median :5.000  
##   YearBuilt       Min.   :1872       1st Qu.:1954       Median :1973  
##  YearRemodAdd     Min.   :1950       1st Qu.:1967       Median :1994  
##   MasVnrArea    Min.   :   0.0     1st Qu.:   0.0     Median :   0.0  
##   BsmtFinSF1    Min.   :   0.0     1st Qu.:   0.0     Median : 383.5  
##   BsmtFinSF2   Min.   :   0.00    1st Qu.:   0.00    Median :   0.00  
##   BsmtUnfSF     Min.   :   0.0     1st Qu.: 223.0     Median : 477.5  
##  TotalBsmtSF    Min.   :   0.0     1st Qu.: 795.8     Median : 991.5  
##   X1stFlrSF       Min.   : 334       1st Qu.: 882       Median :1087  
##   X2ndFlrSF       Min.   :   0       1st Qu.:   0       Median :   0  
##  LowQualFinSF  Min.   :  0.000    1st Qu.:  0.000    Median :  0.000  
##   GrLivArea       Min.   : 334       1st Qu.:1130       Median :1464  
##  BsmtFullBath   Min.   :0.0000     1st Qu.:0.0000     Median :0.0000  
##  BsmtHalfBath  Min.   :0.00000    1st Qu.:0.00000    Median :0.00000  
##    FullBath      Min.   :0.000      1st Qu.:1.000      Median :2.000  
##    HalfBath     Min.   :0.0000     1st Qu.:0.0000     Median :0.0000  
##  BedroomAbvGr    Min.   :0.000      1st Qu.:2.000      Median :3.000  
##  KitchenAbvGr    Min.   :0.000      1st Qu.:1.000      Median :1.000  
##  TotRmsAbvGrd   Min.   : 2.000     1st Qu.: 5.000     Median : 6.000  
##   Fireplaces     Min.   :0.000      1st Qu.:0.000      Median :1.000  
##  GarageYrBlt      Min.   :1900       1st Qu.:1961       Median :1980  
##   GarageCars     Min.   :0.000      1st Qu.:1.000      Median :2.000  
##   GarageArea    Min.   :   0.0     1st Qu.: 334.5     Median : 480.0  
##   WoodDeckSF    Min.   :  0.00     1st Qu.:  0.00     Median :  0.00  
##  OpenPorchSF    Min.   :  0.00     1st Qu.:  0.00     Median : 25.00  
## EnclosedPorch   Min.   :  0.00     1st Qu.:  0.00     Median :  0.00  
##   X3SsnPorch    Min.   :  0.00     1st Qu.:  0.00     Median :  0.00  
##  ScreenPorch    Min.   :  0.00     1st Qu.:  0.00     Median :  0.00  
##    PoolArea    Min.   :  0.000    1st Qu.:  0.000    Median :  0.000  
##    MiscVal    Min.   :    0.00   1st Qu.:    0.00   Median :    0.00  
##     MoSold      Min.   : 1.000     1st Qu.: 5.000     Median : 6.000  
##     YrSold        Min.   :2006       1st Qu.:2007       Median :2008  
##   SalePrice     Min.   : 34900     1st Qu.:129975     Median :163000  
##                             Mean                 Q3                Max
##       Id        Mean   : 730.5     3rd Qu.:1095.2     Max.   :1460.0  
##   MSSubClass     Mean   : 56.9      3rd Qu.: 70.0      Max.   :190.0  
##  LotFrontage    Mean   : 70.05     3rd Qu.: 80.00     Max.   :313.00  
##    LotArea      Mean   : 10517     3rd Qu.: 11602     Max.   :215245  
##  OverallQual    Mean   : 6.099     3rd Qu.: 7.000     Max.   :10.000  
##  OverallCond     Mean   :5.575      3rd Qu.:6.000      Max.   :9.000  
##   YearBuilt       Mean   :1971       3rd Qu.:2000       Max.   :2010  
##  YearRemodAdd     Mean   :1985       3rd Qu.:2004       Max.   :2010  
##   MasVnrArea    Mean   : 103.7     3rd Qu.: 166.0     Max.   :1600.0  
##   BsmtFinSF1    Mean   : 443.6     3rd Qu.: 712.2     Max.   :5644.0  
##   BsmtFinSF2   Mean   :  46.55    3rd Qu.:   0.00    Max.   :1474.00  
##   BsmtUnfSF     Mean   : 567.2     3rd Qu.: 808.0     Max.   :2336.0  
##  TotalBsmtSF    Mean   :1057.4     3rd Qu.:1298.2     Max.   :6110.0  
##   X1stFlrSF       Mean   :1163       3rd Qu.:1391       Max.   :4692  
##   X2ndFlrSF       Mean   : 347       3rd Qu.: 728       Max.   :2065  
##  LowQualFinSF  Mean   :  5.845    3rd Qu.:  0.000    Max.   :572.000  
##   GrLivArea       Mean   :1515       3rd Qu.:1777       Max.   :5642  
##  BsmtFullBath   Mean   :0.4253     3rd Qu.:1.0000     Max.   :3.0000  
##  BsmtHalfBath  Mean   :0.05753    3rd Qu.:0.00000    Max.   :2.00000  
##    FullBath      Mean   :1.565      3rd Qu.:2.000      Max.   :3.000  
##    HalfBath     Mean   :0.3829     3rd Qu.:1.0000     Max.   :2.0000  
##  BedroomAbvGr    Mean   :2.866      3rd Qu.:3.000      Max.   :8.000  
##  KitchenAbvGr    Mean   :1.047      3rd Qu.:1.000      Max.   :3.000  
##  TotRmsAbvGrd   Mean   : 6.518     3rd Qu.: 7.000     Max.   :14.000  
##   Fireplaces     Mean   :0.613      3rd Qu.:1.000      Max.   :3.000  
##  GarageYrBlt      Mean   :1979       3rd Qu.:2002       Max.   :2010  
##   GarageCars     Mean   :1.767      3rd Qu.:2.000      Max.   :4.000  
##   GarageArea    Mean   : 473.0     3rd Qu.: 576.0     Max.   :1418.0  
##   WoodDeckSF    Mean   : 94.24     3rd Qu.:168.00     Max.   :857.00  
##  OpenPorchSF    Mean   : 46.66     3rd Qu.: 68.00     Max.   :547.00  
## EnclosedPorch   Mean   : 21.95     3rd Qu.:  0.00     Max.   :552.00  
##   X3SsnPorch    Mean   :  3.41     3rd Qu.:  0.00     Max.   :508.00  
##  ScreenPorch    Mean   : 15.06     3rd Qu.:  0.00     Max.   :480.00  
##    PoolArea    Mean   :  2.759    3rd Qu.:  0.000    Max.   :738.000  
##    MiscVal    Mean   :   43.49   3rd Qu.:    0.00   Max.   :15500.00  
##     MoSold      Mean   : 6.322     3rd Qu.: 8.000     Max.   :12.000  
##     YrSold        Mean   :2008       3rd Qu.:2009       Max.   :2010  
##   SalePrice     Mean   :180921     3rd Qu.:214000     Max.   :755000

Provide a scatterplot matrix for at least two of the independent variables and the dependent variable.

# Keep only specific columns using subset()
df_1 <- subset(train, select = c(SalePrice, HalfBath, GarageArea, OverallQual, YearBuilt, YearRemodAdd, TotalBsmtSF, X1stFlrSF, FullBath, X2ndFlrSF, OverallCond, Fireplaces, BedroomAbvGr))

kdepairs(df_1)
## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

Test the hypotheses that the correlations between each pairwise set of variables is 0 and provide an 80% confidence interval.

variables <- c("OverallQual", "GarageArea", "TotalBsmtSF")

for (variable in variables) {
  correlation <- cor.test(train$SalePrice, train[[variable]], conf.level = 0.80)
  cat("Correlation test for", variable, ":\n")
  print(correlation)
  cat("\n")
}
## Correlation test for OverallQual :
## 
##  Pearson's product-moment correlation
## 
## data:  train$SalePrice and train[[variable]]
## t = 49.364, df = 1458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 80 percent confidence interval:
##  0.7780752 0.8032204
## sample estimates:
##       cor 
## 0.7909816 
## 
## 
## Correlation test for GarageArea :
## 
##  Pearson's product-moment correlation
## 
## data:  train$SalePrice and train[[variable]]
## t = 30.446, df = 1458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 80 percent confidence interval:
##  0.6024756 0.6435283
## sample estimates:
##       cor 
## 0.6234314 
## 
## 
## Correlation test for TotalBsmtSF :
## 
##  Pearson's product-moment correlation
## 
## data:  train$SalePrice and train[[variable]]
## t = 29.671, df = 1458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 80 percent confidence interval:
##  0.5922142 0.6340846
## sample estimates:
##       cor 
## 0.6135806

Would you be worried about familywise error? Why or why not?

Family-wise Error Rate

k <- 7
a <- .05
1 - (1-a)^k
## [1] 0.3016627
a/k
## [1] 0.007142857

In other words, the probability of getting a type I error on at least one of the hypothesis tests is over 14% Bonferroni Correction 0.017. All p-values are less then correction. All significant.

2.Linear Algebra and Correlation.(Too many Variables)

Derive a correlation matrix for any three quantitative variables in the dataset.

matrix  <- train %>%
  dplyr::select(SalePrice, OverallQual, GarageArea, TotalBsmtSF)%>%
  cor()
matrix 
##             SalePrice OverallQual GarageArea TotalBsmtSF
## SalePrice   1.0000000   0.7909816  0.6234314   0.6135806
## OverallQual 0.7909816   1.0000000  0.5620218   0.5378085
## GarageArea  0.6234314   0.5620218  1.0000000   0.4866655
## TotalBsmtSF 0.6135806   0.5378085  0.4866655   1.0000000

Invert your correlation matrix from above. (This is known as the precision matrix and contains variance inflation factors on the diagonal.)

precision_matrix <- inv(matrix)
precision_matrix
##                                                 
## [1,]  3.3563856 -1.8870668 -0.6860529 -0.7106541
## [2,] -1.8870668  2.7484378 -0.2782690 -0.1848419
## [3,] -0.6860529 -0.2782690  1.7118461 -0.2624922
## [4,] -0.7106541 -0.1848419 -0.2624922  1.6631990

Multiply the correlation matrix by the precision matrix, and then multiply the precision matrix by the correlation matrix.

(cor_pre_matrix <- matrix %*% precision_matrix %>%
   round(1))
##                    
## SalePrice   1 0 0 0
## OverallQual 0 1 0 0
## GarageArea  0 0 1 0
## TotalBsmtSF 0 0 0 1
(pre_cor_matrix <- precision_matrix %*% matrix %>%
   round(1))
##      SalePrice OverallQual GarageArea TotalBsmtSF
## [1,]         1           0          0           0
## [2,]         0           1          0           0
## [3,]         0           0          1           0
## [4,]         0           0          0           1

Conduct LU decomposition on the matrix.

lu_decomp <- lu.decomposition(matrix)
L <- lu_decomp$L
L
##           [,1]      [,2]      [,3] [,4]
## [1,] 1.0000000 0.0000000 0.0000000    0
## [2,] 0.7909816 1.0000000 0.0000000    0
## [3,] 0.6234314 0.1840505 1.0000000    0
## [4,] 0.6135806 0.1401839 0.1578237    1
U <- lu_decomp$U
U
##      [,1]      [,2]       [,3]       [,4]
## [1,]    1 0.7909816 0.62343144 0.61358055
## [2,]    0 0.3743481 0.06889896 0.05247757
## [3,]    0 0.0000000 0.59865235 0.09448153
## [4,]    0 0.0000000 0.00000000 0.60125097
L%*%U
##           [,1]      [,2]      [,3]      [,4]
## [1,] 1.0000000 0.7909816 0.6234314 0.6135806
## [2,] 0.7909816 1.0000000 0.5620218 0.5378085
## [3,] 0.6234314 0.5620218 1.0000000 0.4866655
## [4,] 0.6135806 0.5378085 0.4866655 1.0000000
matrix
##             SalePrice OverallQual GarageArea TotalBsmtSF
## SalePrice   1.0000000   0.7909816  0.6234314   0.6135806
## OverallQual 0.7909816   1.0000000  0.5620218   0.5378085
## GarageArea  0.6234314   0.5620218  1.0000000   0.4866655
## TotalBsmtSF 0.6135806   0.5378085  0.4866655   1.0000000

3.Calculus-Based Probability & Statistics.

Many times, it makes sense to fit a closed form distribution to data. Select a variable in the Kaggle.com training dataset that is skewed to the right, shift it so that the minimum value is absolutely above zero if necessary.

Picking TotalBsmtSF Total square feet of basement area.

Bsmt <- train$TotalBsmtSF
summary(train$TotalBsmtSF)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0   795.8   991.5  1057.4  1298.2  6110.0

Shift it so that the minimum value is absolutely above zero if necessary

min_value <- min(train$TotalBsmtSF)

if (min_value <= 0) {
  # Calculate the shift value to make the minimum value above zero
  shift_value <- abs(min_value) + 1
  
  # Shift the column values
  shifted_values <- train$TotalBsmtSF + shift_value
} else {
  # No shift needed
  shifted_values <- train$TotalBsmtSF
}
summary(shifted_values)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     1.0   796.8   992.5  1058.4  1299.2  6111.0
# Create a histogram of a numeric variable
hist(shifted_values, main = "Histogram of TotalBsmtSF", col = "skyblue", xlab = "TotalBsmtSF", ylab = "Frequency")

### Then load the MASS package and run fitdistr to fit an exponential probability density function.Find the optimal value of lambda for this distribution, and then take 1000 samples from this exponential distribution using this value (e.g., rexp(1000, lambda)). Plot a histogram and compare it with a histogram of your original variable.

lambda <- fitdistr(shifted_values, densfun="exponential")
lambda$estimate
##         rate 
## 0.0009447961
set.seed(100)
expon.dist <- rexp(n = 1000,lambda$estimate)
hist(expon.dist, main = "Histogram of TotalBsmtSF", col = "skyblue", xlab = "TotalBsmtSF", ylab = "Frequency")

### Using the exponential pdf, find the 5th and 95th percentiles using the cumulative distribution function (CDF).

# Calculate the 5th percentile using the CDF
(percentile_5 <- qexp(0.05, rate = lambda$estimate))
## [1] 54.29033
# Calculate the 95th percentile using the CDF
(percentile_95 <- qexp(0.95, rate = lambda$estimate))
## [1] 3170.771

Also generate a 95% confidence interval from the empirical data, assuming normality.

# Perform a t-test and calculate the confidence interval
confidence_interval <- t.test(shifted_values)$conf.int
# Print the confidence interval
cat("95% Confidence Interval:", confidence_interval[1], "-", confidence_interval[2], "\n")
## 95% Confidence Interval: 1035.908 - 1080.951

Finally, provide the empirical 5th percentile and 95th percentile of the data.

quantile(shifted_values, c(.05, .95))
##     5%    95% 
##  520.3 1754.0

4. Modeling.

###Build some type of multiple regression model and submit your model to the competition board. Provide your complete model summary and results with analysis.

Showing best Model:

sale_log <- log(train$SalePrice)


model_4 = lm(sale_log ~ OverallQual + GarageArea + X1stFlrSF + FullBath + YearBuilt + YearRemodAdd +  (OverallQual * GarageArea * X1stFlrSF * FullBath * YearBuilt * YearRemodAdd), data = train)
summary(model_4)
## 
## Call:
## lm(formula = sale_log ~ OverallQual + GarageArea + X1stFlrSF + 
##     FullBath + YearBuilt + YearRemodAdd + (OverallQual * GarageArea * 
##     X1stFlrSF * FullBath * YearBuilt * YearRemodAdd), data = train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.94462 -0.09741  0.00385  0.08604  0.59209 
## 
## Coefficients:
##                                                                    Estimate
## (Intercept)                                                      -9.533e+03
## OverallQual                                                       1.368e+03
## GarageArea                                                        2.270e+01
## X1stFlrSF                                                         6.216e+00
## FullBath                                                          6.669e+03
## YearBuilt                                                         4.941e+00
## YearRemodAdd                                                      4.791e+00
## OverallQual:GarageArea                                           -2.888e+00
## OverallQual:X1stFlrSF                                            -7.651e-01
## GarageArea:X1stFlrSF                                             -1.466e-02
## OverallQual:FullBath                                             -9.796e+02
## GarageArea:FullBath                                              -1.394e+01
## X1stFlrSF:FullBath                                               -4.511e+00
## OverallQual:YearBuilt                                            -7.114e-01
## GarageArea:YearBuilt                                             -1.186e-02
## X1stFlrSF:YearBuilt                                              -3.198e-03
## FullBath:YearBuilt                                               -3.472e+00
## OverallQual:YearRemodAdd                                         -6.870e-01
## GarageArea:YearRemodAdd                                          -1.137e-02
## X1stFlrSF:YearRemodAdd                                           -3.127e-03
## FullBath:YearRemodAdd                                            -3.350e+00
## YearBuilt:YearRemodAdd                                           -2.481e-03
## OverallQual:GarageArea:X1stFlrSF                                  1.464e-03
## OverallQual:GarageArea:FullBath                                   1.772e+00
## OverallQual:X1stFlrSF:FullBath                                    5.978e-01
## GarageArea:X1stFlrSF:FullBath                                     9.323e-03
## OverallQual:GarageArea:YearBuilt                                  1.522e-03
## OverallQual:X1stFlrSF:YearBuilt                                   3.956e-04
## GarageArea:X1stFlrSF:YearBuilt                                    7.660e-06
## OverallQual:FullBath:YearBuilt                                    5.120e-01
## GarageArea:FullBath:YearBuilt                                     7.306e-03
## X1stFlrSF:FullBath:YearBuilt                                      2.338e-03
## OverallQual:GarageArea:YearRemodAdd                               1.448e-03
## OverallQual:X1stFlrSF:YearRemodAdd                                3.856e-04
## GarageArea:X1stFlrSF:YearRemodAdd                                 7.335e-06
## OverallQual:FullBath:YearRemodAdd                                 4.916e-01
## GarageArea:FullBath:YearRemodAdd                                  6.989e-03
## X1stFlrSF:FullBath:YearRemodAdd                                   2.267e-03
## OverallQual:YearBuilt:YearRemodAdd                                3.572e-04
## GarageArea:YearBuilt:YearRemodAdd                                 5.944e-06
## X1stFlrSF:YearBuilt:YearRemodAdd                                  1.609e-06
## FullBath:YearBuilt:YearRemodAdd                                   1.744e-03
## OverallQual:GarageArea:X1stFlrSF:FullBath                        -9.945e-04
## OverallQual:GarageArea:X1stFlrSF:YearBuilt                       -7.792e-07
## OverallQual:GarageArea:FullBath:YearBuilt                        -9.370e-04
## OverallQual:X1stFlrSF:FullBath:YearBuilt                         -3.118e-04
## GarageArea:X1stFlrSF:FullBath:YearBuilt                          -4.885e-06
## OverallQual:GarageArea:X1stFlrSF:YearRemodAdd                    -7.332e-07
## OverallQual:GarageArea:FullBath:YearRemodAdd                     -8.887e-04
## OverallQual:X1stFlrSF:FullBath:YearRemodAdd                      -3.001e-04
## GarageArea:X1stFlrSF:FullBath:YearRemodAdd                       -4.667e-06
## OverallQual:GarageArea:YearBuilt:YearRemodAdd                    -7.634e-07
## OverallQual:X1stFlrSF:YearBuilt:YearRemodAdd                     -1.994e-07
## GarageArea:X1stFlrSF:YearBuilt:YearRemodAdd                      -3.834e-09
## OverallQual:FullBath:YearBuilt:YearRemodAdd                      -2.570e-04
## GarageArea:FullBath:YearBuilt:YearRemodAdd                       -3.663e-06
## X1stFlrSF:FullBath:YearBuilt:YearRemodAdd                        -1.175e-06
## OverallQual:GarageArea:X1stFlrSF:FullBath:YearBuilt               5.300e-07
## OverallQual:GarageArea:X1stFlrSF:FullBath:YearRemodAdd            4.980e-07
## OverallQual:GarageArea:X1stFlrSF:YearBuilt:YearRemodAdd           3.904e-10
## OverallQual:GarageArea:FullBath:YearBuilt:YearRemodAdd            4.700e-07
## OverallQual:X1stFlrSF:FullBath:YearBuilt:YearRemodAdd             1.566e-07
## GarageArea:X1stFlrSF:FullBath:YearBuilt:YearRemodAdd              2.446e-09
## OverallQual:GarageArea:X1stFlrSF:FullBath:YearBuilt:YearRemodAdd -2.654e-10
##                                                                  Std. Error
## (Intercept)                                                       3.922e+03
## OverallQual                                                       7.013e+02
## GarageArea                                                        6.547e+00
## X1stFlrSF                                                         4.045e+00
## FullBath                                                          2.594e+03
## YearBuilt                                                         2.011e+00
## YearRemodAdd                                                      1.978e+00
## OverallQual:GarageArea                                            1.135e+00
## OverallQual:X1stFlrSF                                             7.027e-01
## GarageArea:X1stFlrSF                                              6.820e-03
## OverallQual:FullBath                                              4.588e+02
## GarageArea:FullBath                                               4.365e+00
## X1stFlrSF:FullBath                                                2.505e+00
## OverallQual:YearBuilt                                             3.589e-01
## GarageArea:YearBuilt                                              3.346e-03
## X1stFlrSF:YearBuilt                                               2.070e-03
## FullBath:YearBuilt                                                1.330e+00
## OverallQual:YearRemodAdd                                          3.532e-01
## GarageArea:YearRemodAdd                                           3.293e-03
## X1stFlrSF:YearRemodAdd                                            2.039e-03
## FullBath:YearRemodAdd                                             1.304e+00
## YearBuilt:YearRemodAdd                                            1.014e-03
## OverallQual:GarageArea:X1stFlrSF                                  1.121e-03
## OverallQual:GarageArea:FullBath                                   7.341e-01
## OverallQual:X1stFlrSF:FullBath                                    4.313e-01
## GarageArea:X1stFlrSF:FullBath                                     4.179e-03
## OverallQual:GarageArea:YearBuilt                                  5.775e-04
## OverallQual:X1stFlrSF:YearBuilt                                   3.590e-04
## GarageArea:X1stFlrSF:YearBuilt                                    3.476e-06
## OverallQual:FullBath:YearBuilt                                    2.346e-01
## GarageArea:FullBath:YearBuilt                                     2.231e-03
## X1stFlrSF:FullBath:YearBuilt                                      1.280e-03
## OverallQual:GarageArea:YearRemodAdd                               5.700e-04
## OverallQual:X1stFlrSF:YearRemodAdd                                3.537e-04
## GarageArea:X1stFlrSF:YearRemodAdd                                 3.428e-06
## OverallQual:FullBath:YearRemodAdd                                 2.305e-01
## GarageArea:FullBath:YearRemodAdd                                  2.190e-03
## X1stFlrSF:FullBath:YearRemodAdd                                   1.259e-03
## OverallQual:YearBuilt:YearRemodAdd                                1.808e-04
## GarageArea:YearBuilt:YearRemodAdd                                 1.682e-06
## X1stFlrSF:YearBuilt:YearRemodAdd                                  1.043e-06
## FullBath:YearBuilt:YearRemodAdd                                   6.683e-04
## OverallQual:GarageArea:X1stFlrSF:FullBath                         6.775e-04
## OverallQual:GarageArea:X1stFlrSF:YearBuilt                        5.685e-07
## OverallQual:GarageArea:FullBath:YearBuilt                         3.732e-04
## OverallQual:X1stFlrSF:FullBath:YearBuilt                          2.200e-04
## GarageArea:X1stFlrSF:FullBath:YearBuilt                           2.129e-06
## OverallQual:GarageArea:X1stFlrSF:YearRemodAdd                     5.628e-07
## OverallQual:GarageArea:FullBath:YearRemodAdd                      3.682e-04
## OverallQual:X1stFlrSF:FullBath:YearRemodAdd                       2.167e-04
## GarageArea:X1stFlrSF:FullBath:YearRemodAdd                        2.097e-06
## OverallQual:GarageArea:YearBuilt:YearRemodAdd                     2.900e-07
## OverallQual:X1stFlrSF:YearBuilt:YearRemodAdd                      1.807e-07
## GarageArea:X1stFlrSF:YearBuilt:YearRemodAdd                       1.747e-09
## OverallQual:FullBath:YearBuilt:YearRemodAdd                       1.178e-04
## GarageArea:FullBath:YearBuilt:YearRemodAdd                        1.119e-06
## X1stFlrSF:FullBath:YearBuilt:YearRemodAdd                         6.436e-07
## OverallQual:GarageArea:X1stFlrSF:FullBath:YearBuilt               3.430e-07
## OverallQual:GarageArea:X1stFlrSF:FullBath:YearRemodAdd            3.398e-07
## OverallQual:GarageArea:X1stFlrSF:YearBuilt:YearRemodAdd           2.854e-10
## OverallQual:GarageArea:FullBath:YearBuilt:YearRemodAdd            1.871e-07
## OverallQual:X1stFlrSF:FullBath:YearBuilt:YearRemodAdd             1.105e-07
## GarageArea:X1stFlrSF:FullBath:YearBuilt:YearRemodAdd              1.068e-09
## OverallQual:GarageArea:X1stFlrSF:FullBath:YearBuilt:YearRemodAdd  1.720e-10
##                                                                  t value
## (Intercept)                                                       -2.430
## OverallQual                                                        1.951
## GarageArea                                                         3.467
## X1stFlrSF                                                          1.537
## FullBath                                                           2.571
## YearBuilt                                                          2.457
## YearRemodAdd                                                       2.422
## OverallQual:GarageArea                                            -2.545
## OverallQual:X1stFlrSF                                             -1.089
## GarageArea:X1stFlrSF                                              -2.149
## OverallQual:FullBath                                              -2.135
## GarageArea:FullBath                                               -3.194
## X1stFlrSF:FullBath                                                -1.801
## OverallQual:YearBuilt                                             -1.982
## GarageArea:YearBuilt                                              -3.545
## X1stFlrSF:YearBuilt                                               -1.545
## FullBath:YearBuilt                                                -2.611
## OverallQual:YearRemodAdd                                          -1.945
## GarageArea:YearRemodAdd                                           -3.455
## X1stFlrSF:YearRemodAdd                                            -1.534
## FullBath:YearRemodAdd                                             -2.569
## YearBuilt:YearRemodAdd                                            -2.447
## OverallQual:GarageArea:X1stFlrSF                                   1.305
## OverallQual:GarageArea:FullBath                                    2.414
## OverallQual:X1stFlrSF:FullBath                                     1.386
## GarageArea:X1stFlrSF:FullBath                                      2.231
## OverallQual:GarageArea:YearBuilt                                   2.636
## OverallQual:X1stFlrSF:YearBuilt                                    1.102
## GarageArea:X1stFlrSF:YearBuilt                                     2.203
## OverallQual:FullBath:YearBuilt                                     2.182
## GarageArea:FullBath:YearBuilt                                      3.274
## X1stFlrSF:FullBath:YearBuilt                                       1.826
## OverallQual:GarageArea:YearRemodAdd                                2.540
## OverallQual:X1stFlrSF:YearRemodAdd                                 1.090
## GarageArea:X1stFlrSF:YearRemodAdd                                  2.140
## OverallQual:FullBath:YearRemodAdd                                  2.133
## GarageArea:FullBath:YearRemodAdd                                   3.192
## X1stFlrSF:FullBath:YearRemodAdd                                    1.800
## OverallQual:YearBuilt:YearRemodAdd                                 1.976
## GarageArea:YearBuilt:YearRemodAdd                                  3.533
## X1stFlrSF:YearBuilt:YearRemodAdd                                   1.542
## FullBath:YearBuilt:YearRemodAdd                                    2.610
## OverallQual:GarageArea:X1stFlrSF:FullBath                         -1.468
## OverallQual:GarageArea:X1stFlrSF:YearBuilt                        -1.371
## OverallQual:GarageArea:FullBath:YearBuilt                         -2.511
## OverallQual:X1stFlrSF:FullBath:YearBuilt                          -1.417
## GarageArea:X1stFlrSF:FullBath:YearBuilt                           -2.295
## OverallQual:GarageArea:X1stFlrSF:YearRemodAdd                     -1.303
## OverallQual:GarageArea:FullBath:YearRemodAdd                      -2.414
## OverallQual:X1stFlrSF:FullBath:YearRemodAdd                       -1.385
## GarageArea:X1stFlrSF:FullBath:YearRemodAdd                        -2.225
## OverallQual:GarageArea:YearBuilt:YearRemodAdd                     -2.632
## OverallQual:X1stFlrSF:YearBuilt:YearRemodAdd                      -1.104
## GarageArea:X1stFlrSF:YearBuilt:YearRemodAdd                       -2.194
## OverallQual:FullBath:YearBuilt:YearRemodAdd                       -2.181
## GarageArea:FullBath:YearBuilt:YearRemodAdd                        -3.273
## X1stFlrSF:FullBath:YearBuilt:YearRemodAdd                         -1.826
## OverallQual:GarageArea:X1stFlrSF:FullBath:YearBuilt                1.545
## OverallQual:GarageArea:X1stFlrSF:FullBath:YearRemodAdd             1.466
## OverallQual:GarageArea:X1stFlrSF:YearBuilt:YearRemodAdd            1.368
## OverallQual:GarageArea:FullBath:YearBuilt:YearRemodAdd             2.512
## OverallQual:X1stFlrSF:FullBath:YearBuilt:YearRemodAdd              1.417
## GarageArea:X1stFlrSF:FullBath:YearBuilt:YearRemodAdd               2.290
## OverallQual:GarageArea:X1stFlrSF:FullBath:YearBuilt:YearRemodAdd  -1.543
##                                                                  Pr(>|t|)    
## (Intercept)                                                      0.015207 *  
## OverallQual                                                      0.051218 .  
## GarageArea                                                       0.000542 ***
## X1stFlrSF                                                        0.124571    
## FullBath                                                         0.010253 *  
## YearBuilt                                                        0.014117 *  
## YearRemodAdd                                                     0.015557 *  
## OverallQual:GarageArea                                           0.011038 *  
## OverallQual:X1stFlrSF                                            0.276410    
## GarageArea:X1stFlrSF                                             0.031791 *  
## OverallQual:FullBath                                             0.032919 *  
## GarageArea:FullBath                                              0.001435 ** 
## X1stFlrSF:FullBath                                               0.071921 .  
## OverallQual:YearBuilt                                            0.047666 *  
## GarageArea:YearBuilt                                             0.000406 ***
## X1stFlrSF:YearBuilt                                              0.122649    
## FullBath:YearBuilt                                               0.009115 ** 
## OverallQual:YearRemodAdd                                         0.052006 .  
## GarageArea:YearRemodAdd                                          0.000568 ***
## X1stFlrSF:YearRemodAdd                                           0.125302    
## FullBath:YearRemodAdd                                            0.010305 *  
## YearBuilt:YearRemodAdd                                           0.014537 *  
## OverallQual:GarageArea:X1stFlrSF                                 0.191943    
## OverallQual:GarageArea:FullBath                                  0.015909 *  
## OverallQual:X1stFlrSF:FullBath                                   0.165983    
## GarageArea:X1stFlrSF:FullBath                                    0.025854 *  
## OverallQual:GarageArea:YearBuilt                                 0.008477 ** 
## OverallQual:X1stFlrSF:YearBuilt                                  0.270658    
## GarageArea:X1stFlrSF:YearBuilt                                   0.027724 *  
## OverallQual:FullBath:YearBuilt                                   0.029247 *  
## GarageArea:FullBath:YearBuilt                                    0.001085 ** 
## X1stFlrSF:FullBath:YearBuilt                                     0.068025 .  
## OverallQual:GarageArea:YearRemodAdd                              0.011186 *  
## OverallQual:X1stFlrSF:YearRemodAdd                               0.275804    
## GarageArea:X1stFlrSF:YearRemodAdd                                0.032547 *  
## OverallQual:FullBath:YearRemodAdd                                0.033119 *  
## GarageArea:FullBath:YearRemodAdd                                 0.001445 ** 
## X1stFlrSF:FullBath:YearRemodAdd                                  0.072094 .  
## OverallQual:YearBuilt:YearRemodAdd                               0.048347 *  
## GarageArea:YearBuilt:YearRemodAdd                                0.000425 ***
## X1stFlrSF:YearBuilt:YearRemodAdd                                 0.123253    
## FullBath:YearBuilt:YearRemodAdd                                  0.009145 ** 
## OverallQual:GarageArea:X1stFlrSF:FullBath                        0.142332    
## OverallQual:GarageArea:X1stFlrSF:YearBuilt                       0.170742    
## OverallQual:GarageArea:FullBath:YearBuilt                        0.012156 *  
## OverallQual:X1stFlrSF:FullBath:YearBuilt                         0.156596    
## GarageArea:X1stFlrSF:FullBath:YearBuilt                          0.021867 *  
## OverallQual:GarageArea:X1stFlrSF:YearRemodAdd                    0.192916    
## OverallQual:GarageArea:FullBath:YearRemodAdd                     0.015907 *  
## OverallQual:X1stFlrSF:FullBath:YearRemodAdd                      0.166177    
## GarageArea:X1stFlrSF:FullBath:YearRemodAdd                       0.026209 *  
## OverallQual:GarageArea:YearBuilt:YearRemodAdd                    0.008582 ** 
## OverallQual:X1stFlrSF:YearBuilt:YearRemodAdd                     0.269913    
## GarageArea:X1stFlrSF:YearBuilt:YearRemodAdd                      0.028378 *  
## OverallQual:FullBath:YearBuilt:YearRemodAdd                      0.029384 *  
## GarageArea:FullBath:YearBuilt:YearRemodAdd                       0.001090 ** 
## X1stFlrSF:FullBath:YearBuilt:YearRemodAdd                        0.068120 .  
## OverallQual:GarageArea:X1stFlrSF:FullBath:YearBuilt              0.122559    
## OverallQual:GarageArea:X1stFlrSF:FullBath:YearRemodAdd           0.142958    
## OverallQual:GarageArea:X1stFlrSF:YearBuilt:YearRemodAdd          0.171553    
## OverallQual:GarageArea:FullBath:YearBuilt:YearRemodAdd           0.012131 *  
## OverallQual:X1stFlrSF:FullBath:YearBuilt:YearRemodAdd            0.156650    
## GarageArea:X1stFlrSF:FullBath:YearBuilt:YearRemodAdd             0.022157 *  
## OverallQual:GarageArea:X1stFlrSF:FullBath:YearBuilt:YearRemodAdd 0.123042    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.171 on 1396 degrees of freedom
## Multiple R-squared:  0.8246, Adjusted R-squared:  0.8167 
## F-statistic: 104.2 on 63 and 1396 DF,  p-value: < 2.2e-16
plot(model_4, which=c(1,2))

#fresh import
test  <- read.csv('https://raw.githubusercontent.com/melbow2424/Data_605_Final/main/Data/test.csv')

test <- test%>%
  mutate(GarageArea = ifelse(is.na(GarageArea), 0, GarageArea))

summary(test)
# Building the prediction
predict_model <- predict(model_4, test)
predict_model_exp <- sapply(predict_model, exp)

Id <- test$Id
SalePrice <- predict_model_exp
submission <- data.frame(Id, SalePrice)
head(submission)
##     Id SalePrice
## 1 1461  130181.0
## 2 1462  148874.7
## 3 1463  165046.3
## 4 1464  177756.2
## 5 1465  235311.2
## 6 1466  171317.3
summary (submission)
##        Id         SalePrice     
##  Min.   :1461   Min.   : 44633  
##  1st Qu.:1826   1st Qu.:127761  
##  Median :2190   Median :163839  
##  Mean   :2190   Mean   :177452  
##  3rd Qu.:2554   3rd Qu.:207892  
##  Max.   :2919   Max.   :571400