```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)

Loading Necessary Libraries

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(broom)
library(car)  # For VIF calculations
## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some

Loading and Exploring Data

# Load the Ames dataset
ames <- read.csv('D:/Stats for DS/ames.csv', header = TRUE)

# View the structure and summary of the dataset
glimpse(ames)
## Rows: 2,930
## Columns: 82
## $ Order           <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,…
## $ PID             <int> 526301100, 526350040, 526351010, 526353030, 527105010,…
## $ MS.SubClass     <int> 20, 20, 20, 20, 60, 60, 120, 120, 120, 60, 60, 20, 60,…
## $ MS.Zoning       <chr> "RL", "RH", "RL", "RL", "RL", "RL", "RL", "RL", "RL", …
## $ Lot.Frontage    <int> 141, 80, 81, 93, 74, 78, 41, 43, 39, 60, 75, NA, 63, 8…
## $ Lot.Area        <int> 31770, 11622, 14267, 11160, 13830, 9978, 4920, 5005, 5…
## $ Street          <chr> "Pave", "Pave", "Pave", "Pave", "Pave", "Pave", "Pave"…
## $ Alley           <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ Lot.Shape       <chr> "IR1", "Reg", "IR1", "Reg", "IR1", "IR1", "Reg", "IR1"…
## $ Land.Contour    <chr> "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "HLS"…
## $ Utilities       <chr> "AllPub", "AllPub", "AllPub", "AllPub", "AllPub", "All…
## $ Lot.Config      <chr> "Corner", "Inside", "Corner", "Corner", "Inside", "Ins…
## $ Land.Slope      <chr> "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl"…
## $ Neighborhood    <chr> "NAmes", "NAmes", "NAmes", "NAmes", "Gilbert", "Gilber…
## $ Condition.1     <chr> "Norm", "Feedr", "Norm", "Norm", "Norm", "Norm", "Norm…
## $ Condition.2     <chr> "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", "Norm"…
## $ Bldg.Type       <chr> "1Fam", "1Fam", "1Fam", "1Fam", "1Fam", "1Fam", "Twnhs…
## $ House.Style     <chr> "1Story", "1Story", "1Story", "1Story", "2Story", "2St…
## $ Overall.Qual    <int> 6, 5, 6, 7, 5, 6, 8, 8, 8, 7, 6, 6, 6, 7, 8, 8, 8, 9, …
## $ Overall.Cond    <int> 5, 6, 6, 5, 5, 6, 5, 5, 5, 5, 5, 7, 5, 5, 5, 5, 7, 2, …
## $ Year.Built      <int> 1960, 1961, 1958, 1968, 1997, 1998, 2001, 1992, 1995, …
## $ Year.Remod.Add  <int> 1960, 1961, 1958, 1968, 1998, 1998, 2001, 1992, 1996, …
## $ Roof.Style      <chr> "Hip", "Gable", "Hip", "Hip", "Gable", "Gable", "Gable…
## $ Roof.Matl       <chr> "CompShg", "CompShg", "CompShg", "CompShg", "CompShg",…
## $ Exterior.1st    <chr> "BrkFace", "VinylSd", "Wd Sdng", "BrkFace", "VinylSd",…
## $ Exterior.2nd    <chr> "Plywood", "VinylSd", "Wd Sdng", "BrkFace", "VinylSd",…
## $ Mas.Vnr.Type    <chr> "Stone", "None", "BrkFace", "None", "None", "BrkFace",…
## $ Mas.Vnr.Area    <int> 112, 0, 108, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 603,…
## $ Exter.Qual      <chr> "TA", "TA", "TA", "Gd", "TA", "TA", "Gd", "Gd", "Gd", …
## $ Exter.Cond      <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
## $ Foundation      <chr> "CBlock", "CBlock", "CBlock", "CBlock", "PConc", "PCon…
## $ Bsmt.Qual       <chr> "TA", "TA", "TA", "TA", "Gd", "TA", "Gd", "Gd", "Gd", …
## $ Bsmt.Cond       <chr> "Gd", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
## $ Bsmt.Exposure   <chr> "Gd", "No", "No", "No", "No", "No", "Mn", "No", "No", …
## $ BsmtFin.Type.1  <chr> "BLQ", "Rec", "ALQ", "ALQ", "GLQ", "GLQ", "GLQ", "ALQ"…
## $ BsmtFin.SF.1    <int> 639, 468, 923, 1065, 791, 602, 616, 263, 1180, 0, 0, 9…
## $ BsmtFin.Type.2  <chr> "Unf", "LwQ", "Unf", "Unf", "Unf", "Unf", "Unf", "Unf"…
## $ BsmtFin.SF.2    <int> 0, 144, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1120, 0, 0…
## $ Bsmt.Unf.SF     <int> 441, 270, 406, 1045, 137, 324, 722, 1017, 415, 994, 76…
## $ Total.Bsmt.SF   <int> 1080, 882, 1329, 2110, 928, 926, 1338, 1280, 1595, 994…
## $ Heating         <chr> "GasA", "GasA", "GasA", "GasA", "GasA", "GasA", "GasA"…
## $ Heating.QC      <chr> "Fa", "TA", "TA", "Ex", "Gd", "Ex", "Ex", "Ex", "Ex", …
## $ Central.Air     <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y",…
## $ Electrical      <chr> "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr", …
## $ X1st.Flr.SF     <int> 1656, 896, 1329, 2110, 928, 926, 1338, 1280, 1616, 102…
## $ X2nd.Flr.SF     <int> 0, 0, 0, 0, 701, 678, 0, 0, 0, 776, 892, 0, 676, 0, 0,…
## $ Low.Qual.Fin.SF <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Gr.Liv.Area     <int> 1656, 896, 1329, 2110, 1629, 1604, 1338, 1280, 1616, 1…
## $ Bsmt.Full.Bath  <int> 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, …
## $ Bsmt.Half.Bath  <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Full.Bath       <int> 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 3, 2, 1, …
## $ Half.Bath       <int> 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, …
## $ Bedroom.AbvGr   <int> 3, 2, 3, 3, 3, 3, 2, 2, 2, 3, 3, 3, 3, 2, 1, 4, 4, 1, …
## $ Kitchen.AbvGr   <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ Kitchen.Qual    <chr> "TA", "TA", "Gd", "Ex", "TA", "Gd", "Gd", "Gd", "Gd", …
## $ TotRms.AbvGrd   <int> 7, 5, 6, 8, 6, 7, 6, 5, 5, 7, 7, 6, 7, 5, 4, 12, 8, 8,…
## $ Functional      <chr> "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ"…
## $ Fireplaces      <int> 2, 0, 0, 2, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, …
## $ Fireplace.Qu    <chr> "Gd", NA, NA, "TA", "TA", "Gd", NA, NA, "TA", "TA", "T…
## $ Garage.Type     <chr> "Attchd", "Attchd", "Attchd", "Attchd", "Attchd", "Att…
## $ Garage.Yr.Blt   <int> 1960, 1961, 1958, 1968, 1997, 1998, 2001, 1992, 1995, …
## $ Garage.Finish   <chr> "Fin", "Unf", "Unf", "Fin", "Fin", "Fin", "Fin", "RFn"…
## $ Garage.Cars     <int> 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 3, …
## $ Garage.Area     <int> 528, 730, 312, 522, 482, 470, 582, 506, 608, 442, 440,…
## $ Garage.Qual     <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
## $ Garage.Cond     <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
## $ Paved.Drive     <chr> "P", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y",…
## $ Wood.Deck.SF    <int> 210, 140, 393, 0, 212, 360, 0, 0, 237, 140, 157, 483, …
## $ Open.Porch.SF   <int> 62, 0, 36, 0, 34, 36, 0, 82, 152, 60, 84, 21, 75, 0, 5…
## $ Enclosed.Porch  <int> 0, 0, 0, 0, 0, 0, 170, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ X3Ssn.Porch     <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Screen.Porch    <int> 0, 120, 0, 0, 0, 0, 0, 144, 0, 0, 0, 0, 0, 0, 140, 210…
## $ Pool.Area       <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Pool.QC         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ Fence           <chr> NA, "MnPrv", NA, NA, "MnPrv", NA, NA, NA, NA, NA, NA, …
## $ Misc.Feature    <chr> NA, NA, "Gar2", NA, NA, NA, NA, NA, NA, NA, NA, "Shed"…
## $ Misc.Val        <int> 0, 0, 12500, 0, 0, 0, 0, 0, 0, 0, 0, 500, 0, 0, 0, 0, …
## $ Mo.Sold         <int> 5, 6, 6, 4, 3, 6, 4, 1, 3, 6, 4, 3, 5, 2, 6, 6, 6, 6, …
## $ Yr.Sold         <int> 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, …
## $ Sale.Type       <chr> "WD ", "WD ", "WD ", "WD ", "WD ", "WD ", "WD ", "WD "…
## $ Sale.Condition  <chr> "Normal", "Normal", "Normal", "Normal", "Normal", "Nor…
## $ SalePrice       <int> 215000, 105000, 172000, 244000, 189900, 195500, 213500…
summary(ames)
##      Order             PID             MS.SubClass      MS.Zoning        
##  Min.   :   1.0   Min.   :5.263e+08   Min.   : 20.00   Length:2930       
##  1st Qu.: 733.2   1st Qu.:5.285e+08   1st Qu.: 20.00   Class :character  
##  Median :1465.5   Median :5.355e+08   Median : 50.00   Mode  :character  
##  Mean   :1465.5   Mean   :7.145e+08   Mean   : 57.39                     
##  3rd Qu.:2197.8   3rd Qu.:9.072e+08   3rd Qu.: 70.00                     
##  Max.   :2930.0   Max.   :1.007e+09   Max.   :190.00                     
##                                                                          
##   Lot.Frontage       Lot.Area         Street             Alley          
##  Min.   : 21.00   Min.   :  1300   Length:2930        Length:2930       
##  1st Qu.: 58.00   1st Qu.:  7440   Class :character   Class :character  
##  Median : 68.00   Median :  9436   Mode  :character   Mode  :character  
##  Mean   : 69.22   Mean   : 10148                                        
##  3rd Qu.: 80.00   3rd Qu.: 11555                                        
##  Max.   :313.00   Max.   :215245                                        
##  NA's   :490                                                            
##   Lot.Shape         Land.Contour        Utilities          Lot.Config       
##  Length:2930        Length:2930        Length:2930        Length:2930       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##   Land.Slope        Neighborhood       Condition.1        Condition.2       
##  Length:2930        Length:2930        Length:2930        Length:2930       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##   Bldg.Type         House.Style         Overall.Qual     Overall.Cond  
##  Length:2930        Length:2930        Min.   : 1.000   Min.   :1.000  
##  Class :character   Class :character   1st Qu.: 5.000   1st Qu.:5.000  
##  Mode  :character   Mode  :character   Median : 6.000   Median :5.000  
##                                        Mean   : 6.095   Mean   :5.563  
##                                        3rd Qu.: 7.000   3rd Qu.:6.000  
##                                        Max.   :10.000   Max.   :9.000  
##                                                                        
##    Year.Built   Year.Remod.Add  Roof.Style         Roof.Matl        
##  Min.   :1872   Min.   :1950   Length:2930        Length:2930       
##  1st Qu.:1954   1st Qu.:1965   Class :character   Class :character  
##  Median :1973   Median :1993   Mode  :character   Mode  :character  
##  Mean   :1971   Mean   :1984                                        
##  3rd Qu.:2001   3rd Qu.:2004                                        
##  Max.   :2010   Max.   :2010                                        
##                                                                     
##  Exterior.1st       Exterior.2nd       Mas.Vnr.Type        Mas.Vnr.Area   
##  Length:2930        Length:2930        Length:2930        Min.   :   0.0  
##  Class :character   Class :character   Class :character   1st Qu.:   0.0  
##  Mode  :character   Mode  :character   Mode  :character   Median :   0.0  
##                                                           Mean   : 101.9  
##                                                           3rd Qu.: 164.0  
##                                                           Max.   :1600.0  
##                                                           NA's   :23      
##   Exter.Qual         Exter.Cond         Foundation         Bsmt.Qual        
##  Length:2930        Length:2930        Length:2930        Length:2930       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##   Bsmt.Cond         Bsmt.Exposure      BsmtFin.Type.1      BsmtFin.SF.1   
##  Length:2930        Length:2930        Length:2930        Min.   :   0.0  
##  Class :character   Class :character   Class :character   1st Qu.:   0.0  
##  Mode  :character   Mode  :character   Mode  :character   Median : 370.0  
##                                                           Mean   : 442.6  
##                                                           3rd Qu.: 734.0  
##                                                           Max.   :5644.0  
##                                                           NA's   :1       
##  BsmtFin.Type.2      BsmtFin.SF.2      Bsmt.Unf.SF     Total.Bsmt.SF 
##  Length:2930        Min.   :   0.00   Min.   :   0.0   Min.   :   0  
##  Class :character   1st Qu.:   0.00   1st Qu.: 219.0   1st Qu.: 793  
##  Mode  :character   Median :   0.00   Median : 466.0   Median : 990  
##                     Mean   :  49.72   Mean   : 559.3   Mean   :1052  
##                     3rd Qu.:   0.00   3rd Qu.: 802.0   3rd Qu.:1302  
##                     Max.   :1526.00   Max.   :2336.0   Max.   :6110  
##                     NA's   :1         NA's   :1        NA's   :1     
##    Heating           Heating.QC        Central.Air         Electrical       
##  Length:2930        Length:2930        Length:2930        Length:2930       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##   X1st.Flr.SF      X2nd.Flr.SF     Low.Qual.Fin.SF     Gr.Liv.Area  
##  Min.   : 334.0   Min.   :   0.0   Min.   :   0.000   Min.   : 334  
##  1st Qu.: 876.2   1st Qu.:   0.0   1st Qu.:   0.000   1st Qu.:1126  
##  Median :1084.0   Median :   0.0   Median :   0.000   Median :1442  
##  Mean   :1159.6   Mean   : 335.5   Mean   :   4.677   Mean   :1500  
##  3rd Qu.:1384.0   3rd Qu.: 703.8   3rd Qu.:   0.000   3rd Qu.:1743  
##  Max.   :5095.0   Max.   :2065.0   Max.   :1064.000   Max.   :5642  
##                                                                     
##  Bsmt.Full.Bath   Bsmt.Half.Bath      Full.Bath       Half.Bath     
##  Min.   :0.0000   Min.   :0.00000   Min.   :0.000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:1.000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.00000   Median :2.000   Median :0.0000  
##  Mean   :0.4314   Mean   :0.06113   Mean   :1.567   Mean   :0.3795  
##  3rd Qu.:1.0000   3rd Qu.:0.00000   3rd Qu.:2.000   3rd Qu.:1.0000  
##  Max.   :3.0000   Max.   :2.00000   Max.   :4.000   Max.   :2.0000  
##  NA's   :2        NA's   :2                                         
##  Bedroom.AbvGr   Kitchen.AbvGr   Kitchen.Qual       TotRms.AbvGrd   
##  Min.   :0.000   Min.   :0.000   Length:2930        Min.   : 2.000  
##  1st Qu.:2.000   1st Qu.:1.000   Class :character   1st Qu.: 5.000  
##  Median :3.000   Median :1.000   Mode  :character   Median : 6.000  
##  Mean   :2.854   Mean   :1.044                      Mean   : 6.443  
##  3rd Qu.:3.000   3rd Qu.:1.000                      3rd Qu.: 7.000  
##  Max.   :8.000   Max.   :3.000                      Max.   :15.000  
##                                                                     
##   Functional          Fireplaces     Fireplace.Qu       Garage.Type       
##  Length:2930        Min.   :0.0000   Length:2930        Length:2930       
##  Class :character   1st Qu.:0.0000   Class :character   Class :character  
##  Mode  :character   Median :1.0000   Mode  :character   Mode  :character  
##                     Mean   :0.5993                                        
##                     3rd Qu.:1.0000                                        
##                     Max.   :4.0000                                        
##                                                                           
##  Garage.Yr.Blt  Garage.Finish       Garage.Cars     Garage.Area    
##  Min.   :1895   Length:2930        Min.   :0.000   Min.   :   0.0  
##  1st Qu.:1960   Class :character   1st Qu.:1.000   1st Qu.: 320.0  
##  Median :1979   Mode  :character   Median :2.000   Median : 480.0  
##  Mean   :1978                      Mean   :1.767   Mean   : 472.8  
##  3rd Qu.:2002                      3rd Qu.:2.000   3rd Qu.: 576.0  
##  Max.   :2207                      Max.   :5.000   Max.   :1488.0  
##  NA's   :159                       NA's   :1       NA's   :1       
##  Garage.Qual        Garage.Cond        Paved.Drive         Wood.Deck.SF    
##  Length:2930        Length:2930        Length:2930        Min.   :   0.00  
##  Class :character   Class :character   Class :character   1st Qu.:   0.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :   0.00  
##                                                           Mean   :  93.75  
##                                                           3rd Qu.: 168.00  
##                                                           Max.   :1424.00  
##                                                                            
##  Open.Porch.SF    Enclosed.Porch     X3Ssn.Porch       Screen.Porch
##  Min.   :  0.00   Min.   :   0.00   Min.   :  0.000   Min.   :  0  
##  1st Qu.:  0.00   1st Qu.:   0.00   1st Qu.:  0.000   1st Qu.:  0  
##  Median : 27.00   Median :   0.00   Median :  0.000   Median :  0  
##  Mean   : 47.53   Mean   :  23.01   Mean   :  2.592   Mean   : 16  
##  3rd Qu.: 70.00   3rd Qu.:   0.00   3rd Qu.:  0.000   3rd Qu.:  0  
##  Max.   :742.00   Max.   :1012.00   Max.   :508.000   Max.   :576  
##                                                                    
##    Pool.Area         Pool.QC             Fence           Misc.Feature      
##  Min.   :  0.000   Length:2930        Length:2930        Length:2930       
##  1st Qu.:  0.000   Class :character   Class :character   Class :character  
##  Median :  0.000   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :  2.243                                                           
##  3rd Qu.:  0.000                                                           
##  Max.   :800.000                                                           
##                                                                            
##     Misc.Val           Mo.Sold          Yr.Sold      Sale.Type        
##  Min.   :    0.00   Min.   : 1.000   Min.   :2006   Length:2930       
##  1st Qu.:    0.00   1st Qu.: 4.000   1st Qu.:2007   Class :character  
##  Median :    0.00   Median : 6.000   Median :2008   Mode  :character  
##  Mean   :   50.63   Mean   : 6.216   Mean   :2008                     
##  3rd Qu.:    0.00   3rd Qu.: 8.000   3rd Qu.:2009                     
##  Max.   :17000.00   Max.   :12.000   Max.   :2010                     
##                                                                       
##  Sale.Condition       SalePrice     
##  Length:2930        Min.   : 12789  
##  Class :character   1st Qu.:129500  
##  Mode  :character   Median :160000  
##                     Mean   :180796  
##                     3rd Qu.:213500  
##                     Max.   :755000  
## 
# Convert Central.Air to binary for logistic regression
ames <- ames %>% mutate(Central.Air = ifelse(Central.Air == "Y", 1, 0))

Model Building

# Fit the linear model
linear_model <- lm(SalePrice ~ Gr.Liv.Area + Lot.Area + Overall.Qual, data = ames)

# Summary of the linear model
summary(linear_model)
## 
## Call:
## lm(formula = SalePrice ~ Gr.Liv.Area + Lot.Area + Overall.Qual, 
##     data = ames)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -432835  -22651   -1106   19280  283790 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -1.162e+05  3.390e+03  -34.28   <2e-16 ***
## Gr.Liv.Area   5.265e+01  1.877e+00   28.05   <2e-16 ***
## Lot.Area      1.148e+00  9.934e-02   11.56   <2e-16 ***
## Overall.Qual  3.387e+04  6.474e+02   52.31   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 40460 on 2926 degrees of freedom
## Multiple R-squared:  0.7437, Adjusted R-squared:  0.7435 
## F-statistic:  2831 on 3 and 2926 DF,  p-value: < 2.2e-16

Logistic Regression Model

# Prepare data for logistic regression
ames_model_data <- ames %>% select(Central.Air, Gr.Liv.Area, Overall.Qual, Year.Built, Lot.Area) %>% na.omit()

# Fit the logistic regression model
logit_model <- glm(Central.Air ~ Gr.Liv.Area + Overall.Qual + Year.Built + Lot.Area, 
                   data = ames_model_data, family = binomial(link = "logit"))

# Summary of the logistic regression model
summary(logit_model)
## 
## Call:
## glm(formula = Central.Air ~ Gr.Liv.Area + Overall.Qual + Year.Built + 
##     Lot.Area, family = binomial(link = "logit"), data = ames_model_data)
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -9.668e+01  7.345e+00 -13.162  < 2e-16 ***
## Gr.Liv.Area   2.246e-04  2.333e-04   0.963   0.3357    
## Overall.Qual  7.117e-01  8.922e-02   7.976 1.51e-15 ***
## Year.Built    4.870e-02  3.779e-03  12.888  < 2e-16 ***
## Lot.Area      3.727e-05  2.195e-05   1.698   0.0894 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1438.81  on 2929  degrees of freedom
## Residual deviance:  943.58  on 2925  degrees of freedom
## AIC: 953.58
## 
## Number of Fisher Scoring iterations: 7

Model Diagnostics

# Residuals vs Fitted
plot(linear_model, which = 1)

# Normal Q-Q plot
plot(linear_model, which = 2)

vif(linear_model)
##  Gr.Liv.Area     Lot.Area Overall.Qual 
##     1.610324     1.096466     1.493078

Logistic Regression Diagnostics

# AIC of logistic model
AIC(logit_model)
## [1] 953.5784
# Convert coefficients to odds ratios
exp(coef(logit_model))
##  (Intercept)  Gr.Liv.Area Overall.Qual   Year.Built     Lot.Area 
## 1.029973e-42 1.000225e+00 2.037369e+00 1.049906e+00 1.000037e+00

Interpretation of Coefficients

Linear Model Coefficient Interpretation

The coefficient for Gr.Liv.Area in the linear model can be interpreted as the increase in SalePrice for each additional unit of living area, holding other factors constant.

# Extracting coefficients with confidence intervals
tidy(linear_model, conf.int = TRUE)
## # A tibble: 4 × 7
##   term           estimate std.error statistic   p.value    conf.low  conf.high
##   <chr>             <dbl>     <dbl>     <dbl>     <dbl>       <dbl>      <dbl>
## 1 (Intercept)  -116223.   3390.         -34.3 7.88e-217 -122870.    -109576.  
## 2 Gr.Liv.Area       52.6     1.88        28.1 1.48e-153      49.0        56.3 
## 3 Lot.Area           1.15    0.0993      11.6 2.99e- 30       0.954       1.34
## 4 Overall.Qual   33867.    647.          52.3 0           32597.      35136.

Logistic Regression Coefficient Interpretation

For the logistic model, we interpret the odds ratios to understand how the probability of having central air changes with each predictor.

# Confidence interval for Gr.Liv.Area in logistic model
coeff_se <- summary(logit_model)$coefficients["Gr.Liv.Area", "Std. Error"]
coeff <- coef(logit_model)["Gr.Liv.Area"]
z_value <- 1.96
ci_lower <- coeff - z_value * coeff_se
ci_upper <- coeff + z_value * coeff_se
ci <- c(ci_lower, ci_upper)
exp(ci)
## Gr.Liv.Area Gr.Liv.Area 
##   0.9997674   1.0006820

Insights and Further Investigations

1. Linear Model Analysis (SalePrice)

Model Performance

  • R-squared: 0.7437 (74.37% of variance explained)

  • F-statistic: 2831 (p < 2.2e-16)

  • Highly significant overall model fit

Coefficient Interpretation

  • Gr.Liv.Area: β = 52.65 (SE = 1.88)

    • For each square foot increase in living area

    • Price increases by approximately $52.65

    • Highly significant (p < 2e-16)

  • Lot.Area: β = 1.15 (SE = 0.099)

    • Each square foot of lot area

    • Adds approximately $1.15 to price

    • Highly significant (p < 2e-16)

Overall.Qual: β = 33,867 (SE = 647.41)

  • Each point increase in overall quality

  • Adds approximately $33,867 to price

  • Strongest predictor in terms of magnitude

Diagnostic Issues

Residuals Plot Issues:

  • Shows fan pattern (heteroscedasticity)

  • Wider spread of residuals at higher fitted values

  • Several notable outliers (especially 1499, 2181, 2182)

  • Suggests potential need for transformation

    Q-Q Plot Concerns:

    • Heavy tails at both ends

    • Deviation from normality especially pronounced in lower tail

    • Suggests non-normal residuals

  • Multicollinearity Assessment:

    • VIF values all below 2 (Gr.Liv.Area: 1.61, Lot.Area: 1.10, Overall.Qual: 1.49)

    • No concerning multicollinearity detected

2. Logistic Model Analysis (Central Air)

Model Performance

  • AIC: 953.58

  • Null deviance: 1438.81 → Residual deviance: 943.58

  • Significant reduction in deviance indicates good model fit

Odds Ratio Interpretation

  1. Overall.Qual: OR = 2.037

    • Each unit increase in quality

    • Doubles the odds of having central air

    • Highly significant (p < 1.51e-15)

  2. Year.Built: OR = 1.050

    • Each year newer

    • Increases odds by 5%

    • Highly significant (p < 2e-16)

  3. Gr.Liv.Area: OR = 1.0002

    • Minimal effect

    • Not statistically significant (p = 0.336)

    • CI: [0.9998, 1.0007] includes 1

  4. Lot.Area: OR = 1.000037

-   Negligible effect

-   Marginally significant (p = 0.0894)

Key Insights

  1. Model Selection and Specification:

    • Linear model shows good predictive power but violates assumptions

    • Logistic model effectively predicts central air presence

    • Both models identify Overall.Qual as crucial predictor

  2. Variable Importance:

    • Overall.Qual significant in both models

    • Year.Built crucial for central air

    • Living area more important for price than central air

  3. Data Structure:

    • Price relationships are non-linear

    • Central air relationships are more straightforward

    • Some influential outliers present

    Further Investigations

  4. Model Improvements:

    • Consider log transformation of SalePrice

    • Investigate influential outliers (esp. 1499, 2181, 2182)

    • Test for interaction effects

  5. Additional Analysis:

    • Examine residual patterns by year built

    • Study relationship between price and central air

    • Investigate neighborhood effects

  6. Variable Transformations:

    • Test polynomial terms for Overall.Qual

    • Consider standardizing lot and living areas

    • Explore binning of continuous variables