```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(broom)
library(car) # For VIF calculations
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
# Load the Ames dataset
ames <- read.csv('D:/Stats for DS/ames.csv', header = TRUE)
# View the structure and summary of the dataset
glimpse(ames)
## Rows: 2,930
## Columns: 82
## $ Order <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,…
## $ PID <int> 526301100, 526350040, 526351010, 526353030, 527105010,…
## $ MS.SubClass <int> 20, 20, 20, 20, 60, 60, 120, 120, 120, 60, 60, 20, 60,…
## $ MS.Zoning <chr> "RL", "RH", "RL", "RL", "RL", "RL", "RL", "RL", "RL", …
## $ Lot.Frontage <int> 141, 80, 81, 93, 74, 78, 41, 43, 39, 60, 75, NA, 63, 8…
## $ Lot.Area <int> 31770, 11622, 14267, 11160, 13830, 9978, 4920, 5005, 5…
## $ Street <chr> "Pave", "Pave", "Pave", "Pave", "Pave", "Pave", "Pave"…
## $ Alley <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ Lot.Shape <chr> "IR1", "Reg", "IR1", "Reg", "IR1", "IR1", "Reg", "IR1"…
## $ Land.Contour <chr> "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "Lvl", "HLS"…
## $ Utilities <chr> "AllPub", "AllPub", "AllPub", "AllPub", "AllPub", "All…
## $ Lot.Config <chr> "Corner", "Inside", "Corner", "Corner", "Inside", "Ins…
## $ Land.Slope <chr> "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl", "Gtl"…
## $ Neighborhood <chr> "NAmes", "NAmes", "NAmes", "NAmes", "Gilbert", "Gilber…
## $ Condition.1 <chr> "Norm", "Feedr", "Norm", "Norm", "Norm", "Norm", "Norm…
## $ Condition.2 <chr> "Norm", "Norm", "Norm", "Norm", "Norm", "Norm", "Norm"…
## $ Bldg.Type <chr> "1Fam", "1Fam", "1Fam", "1Fam", "1Fam", "1Fam", "Twnhs…
## $ House.Style <chr> "1Story", "1Story", "1Story", "1Story", "2Story", "2St…
## $ Overall.Qual <int> 6, 5, 6, 7, 5, 6, 8, 8, 8, 7, 6, 6, 6, 7, 8, 8, 8, 9, …
## $ Overall.Cond <int> 5, 6, 6, 5, 5, 6, 5, 5, 5, 5, 5, 7, 5, 5, 5, 5, 7, 2, …
## $ Year.Built <int> 1960, 1961, 1958, 1968, 1997, 1998, 2001, 1992, 1995, …
## $ Year.Remod.Add <int> 1960, 1961, 1958, 1968, 1998, 1998, 2001, 1992, 1996, …
## $ Roof.Style <chr> "Hip", "Gable", "Hip", "Hip", "Gable", "Gable", "Gable…
## $ Roof.Matl <chr> "CompShg", "CompShg", "CompShg", "CompShg", "CompShg",…
## $ Exterior.1st <chr> "BrkFace", "VinylSd", "Wd Sdng", "BrkFace", "VinylSd",…
## $ Exterior.2nd <chr> "Plywood", "VinylSd", "Wd Sdng", "BrkFace", "VinylSd",…
## $ Mas.Vnr.Type <chr> "Stone", "None", "BrkFace", "None", "None", "BrkFace",…
## $ Mas.Vnr.Area <int> 112, 0, 108, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 603,…
## $ Exter.Qual <chr> "TA", "TA", "TA", "Gd", "TA", "TA", "Gd", "Gd", "Gd", …
## $ Exter.Cond <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
## $ Foundation <chr> "CBlock", "CBlock", "CBlock", "CBlock", "PConc", "PCon…
## $ Bsmt.Qual <chr> "TA", "TA", "TA", "TA", "Gd", "TA", "Gd", "Gd", "Gd", …
## $ Bsmt.Cond <chr> "Gd", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
## $ Bsmt.Exposure <chr> "Gd", "No", "No", "No", "No", "No", "Mn", "No", "No", …
## $ BsmtFin.Type.1 <chr> "BLQ", "Rec", "ALQ", "ALQ", "GLQ", "GLQ", "GLQ", "ALQ"…
## $ BsmtFin.SF.1 <int> 639, 468, 923, 1065, 791, 602, 616, 263, 1180, 0, 0, 9…
## $ BsmtFin.Type.2 <chr> "Unf", "LwQ", "Unf", "Unf", "Unf", "Unf", "Unf", "Unf"…
## $ BsmtFin.SF.2 <int> 0, 144, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1120, 0, 0…
## $ Bsmt.Unf.SF <int> 441, 270, 406, 1045, 137, 324, 722, 1017, 415, 994, 76…
## $ Total.Bsmt.SF <int> 1080, 882, 1329, 2110, 928, 926, 1338, 1280, 1595, 994…
## $ Heating <chr> "GasA", "GasA", "GasA", "GasA", "GasA", "GasA", "GasA"…
## $ Heating.QC <chr> "Fa", "TA", "TA", "Ex", "Gd", "Ex", "Ex", "Ex", "Ex", …
## $ Central.Air <chr> "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y",…
## $ Electrical <chr> "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr", "SBrkr", …
## $ X1st.Flr.SF <int> 1656, 896, 1329, 2110, 928, 926, 1338, 1280, 1616, 102…
## $ X2nd.Flr.SF <int> 0, 0, 0, 0, 701, 678, 0, 0, 0, 776, 892, 0, 676, 0, 0,…
## $ Low.Qual.Fin.SF <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Gr.Liv.Area <int> 1656, 896, 1329, 2110, 1629, 1604, 1338, 1280, 1616, 1…
## $ Bsmt.Full.Bath <int> 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, …
## $ Bsmt.Half.Bath <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Full.Bath <int> 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 3, 2, 1, …
## $ Half.Bath <int> 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, …
## $ Bedroom.AbvGr <int> 3, 2, 3, 3, 3, 3, 2, 2, 2, 3, 3, 3, 3, 2, 1, 4, 4, 1, …
## $ Kitchen.AbvGr <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ Kitchen.Qual <chr> "TA", "TA", "Gd", "Ex", "TA", "Gd", "Gd", "Gd", "Gd", …
## $ TotRms.AbvGrd <int> 7, 5, 6, 8, 6, 7, 6, 5, 5, 7, 7, 6, 7, 5, 4, 12, 8, 8,…
## $ Functional <chr> "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ", "Typ"…
## $ Fireplaces <int> 2, 0, 0, 2, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, …
## $ Fireplace.Qu <chr> "Gd", NA, NA, "TA", "TA", "Gd", NA, NA, "TA", "TA", "T…
## $ Garage.Type <chr> "Attchd", "Attchd", "Attchd", "Attchd", "Attchd", "Att…
## $ Garage.Yr.Blt <int> 1960, 1961, 1958, 1968, 1997, 1998, 2001, 1992, 1995, …
## $ Garage.Finish <chr> "Fin", "Unf", "Unf", "Fin", "Fin", "Fin", "Fin", "RFn"…
## $ Garage.Cars <int> 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 3, …
## $ Garage.Area <int> 528, 730, 312, 522, 482, 470, 582, 506, 608, 442, 440,…
## $ Garage.Qual <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
## $ Garage.Cond <chr> "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", "TA", …
## $ Paved.Drive <chr> "P", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y",…
## $ Wood.Deck.SF <int> 210, 140, 393, 0, 212, 360, 0, 0, 237, 140, 157, 483, …
## $ Open.Porch.SF <int> 62, 0, 36, 0, 34, 36, 0, 82, 152, 60, 84, 21, 75, 0, 5…
## $ Enclosed.Porch <int> 0, 0, 0, 0, 0, 0, 170, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ X3Ssn.Porch <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Screen.Porch <int> 0, 120, 0, 0, 0, 0, 0, 144, 0, 0, 0, 0, 0, 0, 140, 210…
## $ Pool.Area <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Pool.QC <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ Fence <chr> NA, "MnPrv", NA, NA, "MnPrv", NA, NA, NA, NA, NA, NA, …
## $ Misc.Feature <chr> NA, NA, "Gar2", NA, NA, NA, NA, NA, NA, NA, NA, "Shed"…
## $ Misc.Val <int> 0, 0, 12500, 0, 0, 0, 0, 0, 0, 0, 0, 500, 0, 0, 0, 0, …
## $ Mo.Sold <int> 5, 6, 6, 4, 3, 6, 4, 1, 3, 6, 4, 3, 5, 2, 6, 6, 6, 6, …
## $ Yr.Sold <int> 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, …
## $ Sale.Type <chr> "WD ", "WD ", "WD ", "WD ", "WD ", "WD ", "WD ", "WD "…
## $ Sale.Condition <chr> "Normal", "Normal", "Normal", "Normal", "Normal", "Nor…
## $ SalePrice <int> 215000, 105000, 172000, 244000, 189900, 195500, 213500…
summary(ames)
## Order PID MS.SubClass MS.Zoning
## Min. : 1.0 Min. :5.263e+08 Min. : 20.00 Length:2930
## 1st Qu.: 733.2 1st Qu.:5.285e+08 1st Qu.: 20.00 Class :character
## Median :1465.5 Median :5.355e+08 Median : 50.00 Mode :character
## Mean :1465.5 Mean :7.145e+08 Mean : 57.39
## 3rd Qu.:2197.8 3rd Qu.:9.072e+08 3rd Qu.: 70.00
## Max. :2930.0 Max. :1.007e+09 Max. :190.00
##
## Lot.Frontage Lot.Area Street Alley
## Min. : 21.00 Min. : 1300 Length:2930 Length:2930
## 1st Qu.: 58.00 1st Qu.: 7440 Class :character Class :character
## Median : 68.00 Median : 9436 Mode :character Mode :character
## Mean : 69.22 Mean : 10148
## 3rd Qu.: 80.00 3rd Qu.: 11555
## Max. :313.00 Max. :215245
## NA's :490
## Lot.Shape Land.Contour Utilities Lot.Config
## Length:2930 Length:2930 Length:2930 Length:2930
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Land.Slope Neighborhood Condition.1 Condition.2
## Length:2930 Length:2930 Length:2930 Length:2930
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Bldg.Type House.Style Overall.Qual Overall.Cond
## Length:2930 Length:2930 Min. : 1.000 Min. :1.000
## Class :character Class :character 1st Qu.: 5.000 1st Qu.:5.000
## Mode :character Mode :character Median : 6.000 Median :5.000
## Mean : 6.095 Mean :5.563
## 3rd Qu.: 7.000 3rd Qu.:6.000
## Max. :10.000 Max. :9.000
##
## Year.Built Year.Remod.Add Roof.Style Roof.Matl
## Min. :1872 Min. :1950 Length:2930 Length:2930
## 1st Qu.:1954 1st Qu.:1965 Class :character Class :character
## Median :1973 Median :1993 Mode :character Mode :character
## Mean :1971 Mean :1984
## 3rd Qu.:2001 3rd Qu.:2004
## Max. :2010 Max. :2010
##
## Exterior.1st Exterior.2nd Mas.Vnr.Type Mas.Vnr.Area
## Length:2930 Length:2930 Length:2930 Min. : 0.0
## Class :character Class :character Class :character 1st Qu.: 0.0
## Mode :character Mode :character Mode :character Median : 0.0
## Mean : 101.9
## 3rd Qu.: 164.0
## Max. :1600.0
## NA's :23
## Exter.Qual Exter.Cond Foundation Bsmt.Qual
## Length:2930 Length:2930 Length:2930 Length:2930
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Bsmt.Cond Bsmt.Exposure BsmtFin.Type.1 BsmtFin.SF.1
## Length:2930 Length:2930 Length:2930 Min. : 0.0
## Class :character Class :character Class :character 1st Qu.: 0.0
## Mode :character Mode :character Mode :character Median : 370.0
## Mean : 442.6
## 3rd Qu.: 734.0
## Max. :5644.0
## NA's :1
## BsmtFin.Type.2 BsmtFin.SF.2 Bsmt.Unf.SF Total.Bsmt.SF
## Length:2930 Min. : 0.00 Min. : 0.0 Min. : 0
## Class :character 1st Qu.: 0.00 1st Qu.: 219.0 1st Qu.: 793
## Mode :character Median : 0.00 Median : 466.0 Median : 990
## Mean : 49.72 Mean : 559.3 Mean :1052
## 3rd Qu.: 0.00 3rd Qu.: 802.0 3rd Qu.:1302
## Max. :1526.00 Max. :2336.0 Max. :6110
## NA's :1 NA's :1 NA's :1
## Heating Heating.QC Central.Air Electrical
## Length:2930 Length:2930 Length:2930 Length:2930
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## X1st.Flr.SF X2nd.Flr.SF Low.Qual.Fin.SF Gr.Liv.Area
## Min. : 334.0 Min. : 0.0 Min. : 0.000 Min. : 334
## 1st Qu.: 876.2 1st Qu.: 0.0 1st Qu.: 0.000 1st Qu.:1126
## Median :1084.0 Median : 0.0 Median : 0.000 Median :1442
## Mean :1159.6 Mean : 335.5 Mean : 4.677 Mean :1500
## 3rd Qu.:1384.0 3rd Qu.: 703.8 3rd Qu.: 0.000 3rd Qu.:1743
## Max. :5095.0 Max. :2065.0 Max. :1064.000 Max. :5642
##
## Bsmt.Full.Bath Bsmt.Half.Bath Full.Bath Half.Bath
## Min. :0.0000 Min. :0.00000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:1.000 1st Qu.:0.0000
## Median :0.0000 Median :0.00000 Median :2.000 Median :0.0000
## Mean :0.4314 Mean :0.06113 Mean :1.567 Mean :0.3795
## 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.:2.000 3rd Qu.:1.0000
## Max. :3.0000 Max. :2.00000 Max. :4.000 Max. :2.0000
## NA's :2 NA's :2
## Bedroom.AbvGr Kitchen.AbvGr Kitchen.Qual TotRms.AbvGrd
## Min. :0.000 Min. :0.000 Length:2930 Min. : 2.000
## 1st Qu.:2.000 1st Qu.:1.000 Class :character 1st Qu.: 5.000
## Median :3.000 Median :1.000 Mode :character Median : 6.000
## Mean :2.854 Mean :1.044 Mean : 6.443
## 3rd Qu.:3.000 3rd Qu.:1.000 3rd Qu.: 7.000
## Max. :8.000 Max. :3.000 Max. :15.000
##
## Functional Fireplaces Fireplace.Qu Garage.Type
## Length:2930 Min. :0.0000 Length:2930 Length:2930
## Class :character 1st Qu.:0.0000 Class :character Class :character
## Mode :character Median :1.0000 Mode :character Mode :character
## Mean :0.5993
## 3rd Qu.:1.0000
## Max. :4.0000
##
## Garage.Yr.Blt Garage.Finish Garage.Cars Garage.Area
## Min. :1895 Length:2930 Min. :0.000 Min. : 0.0
## 1st Qu.:1960 Class :character 1st Qu.:1.000 1st Qu.: 320.0
## Median :1979 Mode :character Median :2.000 Median : 480.0
## Mean :1978 Mean :1.767 Mean : 472.8
## 3rd Qu.:2002 3rd Qu.:2.000 3rd Qu.: 576.0
## Max. :2207 Max. :5.000 Max. :1488.0
## NA's :159 NA's :1 NA's :1
## Garage.Qual Garage.Cond Paved.Drive Wood.Deck.SF
## Length:2930 Length:2930 Length:2930 Min. : 0.00
## Class :character Class :character Class :character 1st Qu.: 0.00
## Mode :character Mode :character Mode :character Median : 0.00
## Mean : 93.75
## 3rd Qu.: 168.00
## Max. :1424.00
##
## Open.Porch.SF Enclosed.Porch X3Ssn.Porch Screen.Porch
## Min. : 0.00 Min. : 0.00 Min. : 0.000 Min. : 0
## 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.000 1st Qu.: 0
## Median : 27.00 Median : 0.00 Median : 0.000 Median : 0
## Mean : 47.53 Mean : 23.01 Mean : 2.592 Mean : 16
## 3rd Qu.: 70.00 3rd Qu.: 0.00 3rd Qu.: 0.000 3rd Qu.: 0
## Max. :742.00 Max. :1012.00 Max. :508.000 Max. :576
##
## Pool.Area Pool.QC Fence Misc.Feature
## Min. : 0.000 Length:2930 Length:2930 Length:2930
## 1st Qu.: 0.000 Class :character Class :character Class :character
## Median : 0.000 Mode :character Mode :character Mode :character
## Mean : 2.243
## 3rd Qu.: 0.000
## Max. :800.000
##
## Misc.Val Mo.Sold Yr.Sold Sale.Type
## Min. : 0.00 Min. : 1.000 Min. :2006 Length:2930
## 1st Qu.: 0.00 1st Qu.: 4.000 1st Qu.:2007 Class :character
## Median : 0.00 Median : 6.000 Median :2008 Mode :character
## Mean : 50.63 Mean : 6.216 Mean :2008
## 3rd Qu.: 0.00 3rd Qu.: 8.000 3rd Qu.:2009
## Max. :17000.00 Max. :12.000 Max. :2010
##
## Sale.Condition SalePrice
## Length:2930 Min. : 12789
## Class :character 1st Qu.:129500
## Mode :character Median :160000
## Mean :180796
## 3rd Qu.:213500
## Max. :755000
##
# Convert Central.Air to binary for logistic regression
ames <- ames %>% mutate(Central.Air = ifelse(Central.Air == "Y", 1, 0))
# Fit the linear model
linear_model <- lm(SalePrice ~ Gr.Liv.Area + Lot.Area + Overall.Qual, data = ames)
# Summary of the linear model
summary(linear_model)
##
## Call:
## lm(formula = SalePrice ~ Gr.Liv.Area + Lot.Area + Overall.Qual,
## data = ames)
##
## Residuals:
## Min 1Q Median 3Q Max
## -432835 -22651 -1106 19280 283790
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.162e+05 3.390e+03 -34.28 <2e-16 ***
## Gr.Liv.Area 5.265e+01 1.877e+00 28.05 <2e-16 ***
## Lot.Area 1.148e+00 9.934e-02 11.56 <2e-16 ***
## Overall.Qual 3.387e+04 6.474e+02 52.31 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 40460 on 2926 degrees of freedom
## Multiple R-squared: 0.7437, Adjusted R-squared: 0.7435
## F-statistic: 2831 on 3 and 2926 DF, p-value: < 2.2e-16
# Prepare data for logistic regression
ames_model_data <- ames %>% select(Central.Air, Gr.Liv.Area, Overall.Qual, Year.Built, Lot.Area) %>% na.omit()
# Fit the logistic regression model
logit_model <- glm(Central.Air ~ Gr.Liv.Area + Overall.Qual + Year.Built + Lot.Area,
data = ames_model_data, family = binomial(link = "logit"))
# Summary of the logistic regression model
summary(logit_model)
##
## Call:
## glm(formula = Central.Air ~ Gr.Liv.Area + Overall.Qual + Year.Built +
## Lot.Area, family = binomial(link = "logit"), data = ames_model_data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -9.668e+01 7.345e+00 -13.162 < 2e-16 ***
## Gr.Liv.Area 2.246e-04 2.333e-04 0.963 0.3357
## Overall.Qual 7.117e-01 8.922e-02 7.976 1.51e-15 ***
## Year.Built 4.870e-02 3.779e-03 12.888 < 2e-16 ***
## Lot.Area 3.727e-05 2.195e-05 1.698 0.0894 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1438.81 on 2929 degrees of freedom
## Residual deviance: 943.58 on 2925 degrees of freedom
## AIC: 953.58
##
## Number of Fisher Scoring iterations: 7
Residual Plot: Checking for patterns to detect heteroscedasticity or model misfit.
Q-Q Plot: Assessing the normality of residuals.
# Residuals vs Fitted
plot(linear_model, which = 1)
# Normal Q-Q plot
plot(linear_model, which = 2)
vif(linear_model)
## Gr.Liv.Area Lot.Area Overall.Qual
## 1.610324 1.096466 1.493078
AIC: Using AIC to assess the model fit.
Odds Ratios for Interpretation.
# AIC of logistic model
AIC(logit_model)
## [1] 953.5784
# Convert coefficients to odds ratios
exp(coef(logit_model))
## (Intercept) Gr.Liv.Area Overall.Qual Year.Built Lot.Area
## 1.029973e-42 1.000225e+00 2.037369e+00 1.049906e+00 1.000037e+00
The coefficient for Gr.Liv.Area in the linear model can be interpreted as the increase in SalePrice for each additional unit of living area, holding other factors constant.
# Extracting coefficients with confidence intervals
tidy(linear_model, conf.int = TRUE)
## # A tibble: 4 × 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -116223. 3390. -34.3 7.88e-217 -122870. -109576.
## 2 Gr.Liv.Area 52.6 1.88 28.1 1.48e-153 49.0 56.3
## 3 Lot.Area 1.15 0.0993 11.6 2.99e- 30 0.954 1.34
## 4 Overall.Qual 33867. 647. 52.3 0 32597. 35136.
For the logistic model, we interpret the odds ratios to understand how the probability of having central air changes with each predictor.
# Confidence interval for Gr.Liv.Area in logistic model
coeff_se <- summary(logit_model)$coefficients["Gr.Liv.Area", "Std. Error"]
coeff <- coef(logit_model)["Gr.Liv.Area"]
z_value <- 1.96
ci_lower <- coeff - z_value * coeff_se
ci_upper <- coeff + z_value * coeff_se
ci <- c(ci_lower, ci_upper)
exp(ci)
## Gr.Liv.Area Gr.Liv.Area
## 0.9997674 1.0006820
R-squared: 0.7437 (74.37% of variance explained)
F-statistic: 2831 (p < 2.2e-16)
Highly significant overall model fit
Gr.Liv.Area: β = 52.65 (SE = 1.88)
For each square foot increase in living area
Price increases by approximately $52.65
Highly significant (p < 2e-16)
Lot.Area: β = 1.15 (SE = 0.099)
Each square foot of lot area
Adds approximately $1.15 to price
Highly significant (p < 2e-16)
Overall.Qual: β = 33,867 (SE = 647.41)
Each point increase in overall quality
Adds approximately $33,867 to price
Strongest predictor in terms of magnitude
Residuals Plot Issues:
Shows fan pattern (heteroscedasticity)
Wider spread of residuals at higher fitted values
Several notable outliers (especially 1499, 2181, 2182)
Suggests potential need for transformation
Q-Q Plot Concerns:
Heavy tails at both ends
Deviation from normality especially pronounced in lower tail
Suggests non-normal residuals
Multicollinearity Assessment:
VIF values all below 2 (Gr.Liv.Area: 1.61, Lot.Area: 1.10, Overall.Qual: 1.49)
No concerning multicollinearity detected
AIC: 953.58
Null deviance: 1438.81 → Residual deviance: 943.58
Significant reduction in deviance indicates good model fit
Overall.Qual: OR = 2.037
Each unit increase in quality
Doubles the odds of having central air
Highly significant (p < 1.51e-15)
Year.Built: OR = 1.050
Each year newer
Increases odds by 5%
Highly significant (p < 2e-16)
Gr.Liv.Area: OR = 1.0002
Minimal effect
Not statistically significant (p = 0.336)
CI: [0.9998, 1.0007] includes 1
Lot.Area: OR = 1.000037
- Negligible effect
- Marginally significant (p = 0.0894)
Model Selection and Specification:
Linear model shows good predictive power but violates assumptions
Logistic model effectively predicts central air presence
Both models identify Overall.Qual as crucial predictor
Variable Importance:
Overall.Qual significant in both models
Year.Built crucial for central air
Living area more important for price than central air
Data Structure:
Price relationships are non-linear
Central air relationships are more straightforward
Some influential outliers present
Model Improvements:
Consider log transformation of SalePrice
Investigate influential outliers (esp. 1499, 2181, 2182)
Test for interaction effects
Additional Analysis:
Examine residual patterns by year built
Study relationship between price and central air
Investigate neighborhood effects
Variable Transformations:
Test polynomial terms for Overall.Qual
Consider standardizing lot and living areas
Explore binning of continuous variables