```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)
### Load the dataset
``` r
ames <- read.csv('C:/Users/Usama Shaikh/Downloads/Stats for DS/ames.csv', header = TRUE)
### Display first few rows
head(ames)
## Order PID MS.SubClass MS.Zoning Lot.Frontage Lot.Area Street Alley
## 1 1 526301100 20 RL 141 31770 Pave <NA>
## 2 2 526350040 20 RH 80 11622 Pave <NA>
## 3 3 526351010 20 RL 81 14267 Pave <NA>
## 4 4 526353030 20 RL 93 11160 Pave <NA>
## 5 5 527105010 60 RL 74 13830 Pave <NA>
## 6 6 527105030 60 RL 78 9978 Pave <NA>
## Lot.Shape Land.Contour Utilities Lot.Config Land.Slope Neighborhood
## 1 IR1 Lvl AllPub Corner Gtl NAmes
## 2 Reg Lvl AllPub Inside Gtl NAmes
## 3 IR1 Lvl AllPub Corner Gtl NAmes
## 4 Reg Lvl AllPub Corner Gtl NAmes
## 5 IR1 Lvl AllPub Inside Gtl Gilbert
## 6 IR1 Lvl AllPub Inside Gtl Gilbert
## Condition.1 Condition.2 Bldg.Type House.Style Overall.Qual Overall.Cond
## 1 Norm Norm 1Fam 1Story 6 5
## 2 Feedr Norm 1Fam 1Story 5 6
## 3 Norm Norm 1Fam 1Story 6 6
## 4 Norm Norm 1Fam 1Story 7 5
## 5 Norm Norm 1Fam 2Story 5 5
## 6 Norm Norm 1Fam 2Story 6 6
## Year.Built Year.Remod.Add Roof.Style Roof.Matl Exterior.1st Exterior.2nd
## 1 1960 1960 Hip CompShg BrkFace Plywood
## 2 1961 1961 Gable CompShg VinylSd VinylSd
## 3 1958 1958 Hip CompShg Wd Sdng Wd Sdng
## 4 1968 1968 Hip CompShg BrkFace BrkFace
## 5 1997 1998 Gable CompShg VinylSd VinylSd
## 6 1998 1998 Gable CompShg VinylSd VinylSd
## Mas.Vnr.Type Mas.Vnr.Area Exter.Qual Exter.Cond Foundation Bsmt.Qual
## 1 Stone 112 TA TA CBlock TA
## 2 None 0 TA TA CBlock TA
## 3 BrkFace 108 TA TA CBlock TA
## 4 None 0 Gd TA CBlock TA
## 5 None 0 TA TA PConc Gd
## 6 BrkFace 20 TA TA PConc TA
## Bsmt.Cond Bsmt.Exposure BsmtFin.Type.1 BsmtFin.SF.1 BsmtFin.Type.2
## 1 Gd Gd BLQ 639 Unf
## 2 TA No Rec 468 LwQ
## 3 TA No ALQ 923 Unf
## 4 TA No ALQ 1065 Unf
## 5 TA No GLQ 791 Unf
## 6 TA No GLQ 602 Unf
## BsmtFin.SF.2 Bsmt.Unf.SF Total.Bsmt.SF Heating Heating.QC Central.Air
## 1 0 441 1080 GasA Fa Y
## 2 144 270 882 GasA TA Y
## 3 0 406 1329 GasA TA Y
## 4 0 1045 2110 GasA Ex Y
## 5 0 137 928 GasA Gd Y
## 6 0 324 926 GasA Ex Y
## Electrical X1st.Flr.SF X2nd.Flr.SF Low.Qual.Fin.SF Gr.Liv.Area Bsmt.Full.Bath
## 1 SBrkr 1656 0 0 1656 1
## 2 SBrkr 896 0 0 896 0
## 3 SBrkr 1329 0 0 1329 0
## 4 SBrkr 2110 0 0 2110 1
## 5 SBrkr 928 701 0 1629 0
## 6 SBrkr 926 678 0 1604 0
## Bsmt.Half.Bath Full.Bath Half.Bath Bedroom.AbvGr Kitchen.AbvGr Kitchen.Qual
## 1 0 1 0 3 1 TA
## 2 0 1 0 2 1 TA
## 3 0 1 1 3 1 Gd
## 4 0 2 1 3 1 Ex
## 5 0 2 1 3 1 TA
## 6 0 2 1 3 1 Gd
## TotRms.AbvGrd Functional Fireplaces Fireplace.Qu Garage.Type Garage.Yr.Blt
## 1 7 Typ 2 Gd Attchd 1960
## 2 5 Typ 0 <NA> Attchd 1961
## 3 6 Typ 0 <NA> Attchd 1958
## 4 8 Typ 2 TA Attchd 1968
## 5 6 Typ 1 TA Attchd 1997
## 6 7 Typ 1 Gd Attchd 1998
## Garage.Finish Garage.Cars Garage.Area Garage.Qual Garage.Cond Paved.Drive
## 1 Fin 2 528 TA TA P
## 2 Unf 1 730 TA TA Y
## 3 Unf 1 312 TA TA Y
## 4 Fin 2 522 TA TA Y
## 5 Fin 2 482 TA TA Y
## 6 Fin 2 470 TA TA Y
## Wood.Deck.SF Open.Porch.SF Enclosed.Porch X3Ssn.Porch Screen.Porch Pool.Area
## 1 210 62 0 0 0 0
## 2 140 0 0 0 120 0
## 3 393 36 0 0 0 0
## 4 0 0 0 0 0 0
## 5 212 34 0 0 0 0
## 6 360 36 0 0 0 0
## Pool.QC Fence Misc.Feature Misc.Val Mo.Sold Yr.Sold Sale.Type Sale.Condition
## 1 <NA> <NA> <NA> 0 5 2010 WD Normal
## 2 <NA> MnPrv <NA> 0 6 2010 WD Normal
## 3 <NA> <NA> Gar2 12500 6 2010 WD Normal
## 4 <NA> <NA> <NA> 0 4 2010 WD Normal
## 5 <NA> MnPrv <NA> 0 3 2010 WD Normal
## 6 <NA> <NA> <NA> 0 6 2010 WD Normal
## SalePrice
## 1 215000
## 2 105000
## 3 172000
## 4 244000
## 5 189900
## 6 195500
### Display column names
colnames(ames)
## [1] "Order" "PID" "MS.SubClass" "MS.Zoning"
## [5] "Lot.Frontage" "Lot.Area" "Street" "Alley"
## [9] "Lot.Shape" "Land.Contour" "Utilities" "Lot.Config"
## [13] "Land.Slope" "Neighborhood" "Condition.1" "Condition.2"
## [17] "Bldg.Type" "House.Style" "Overall.Qual" "Overall.Cond"
## [21] "Year.Built" "Year.Remod.Add" "Roof.Style" "Roof.Matl"
## [25] "Exterior.1st" "Exterior.2nd" "Mas.Vnr.Type" "Mas.Vnr.Area"
## [29] "Exter.Qual" "Exter.Cond" "Foundation" "Bsmt.Qual"
## [33] "Bsmt.Cond" "Bsmt.Exposure" "BsmtFin.Type.1" "BsmtFin.SF.1"
## [37] "BsmtFin.Type.2" "BsmtFin.SF.2" "Bsmt.Unf.SF" "Total.Bsmt.SF"
## [41] "Heating" "Heating.QC" "Central.Air" "Electrical"
## [45] "X1st.Flr.SF" "X2nd.Flr.SF" "Low.Qual.Fin.SF" "Gr.Liv.Area"
## [49] "Bsmt.Full.Bath" "Bsmt.Half.Bath" "Full.Bath" "Half.Bath"
## [53] "Bedroom.AbvGr" "Kitchen.AbvGr" "Kitchen.Qual" "TotRms.AbvGrd"
## [57] "Functional" "Fireplaces" "Fireplace.Qu" "Garage.Type"
## [61] "Garage.Yr.Blt" "Garage.Finish" "Garage.Cars" "Garage.Area"
## [65] "Garage.Qual" "Garage.Cond" "Paved.Drive" "Wood.Deck.SF"
## [69] "Open.Porch.SF" "Enclosed.Porch" "X3Ssn.Porch" "Screen.Porch"
## [73] "Pool.Area" "Pool.QC" "Fence" "Misc.Feature"
## [77] "Misc.Val" "Mo.Sold" "Yr.Sold" "Sale.Type"
## [81] "Sale.Condition" "SalePrice"
### Summary Statstics for Data
summary(ames)
## Order PID MS.SubClass MS.Zoning
## Min. : 1.0 Min. :5.263e+08 Min. : 20.00 Length:2930
## 1st Qu.: 733.2 1st Qu.:5.285e+08 1st Qu.: 20.00 Class :character
## Median :1465.5 Median :5.355e+08 Median : 50.00 Mode :character
## Mean :1465.5 Mean :7.145e+08 Mean : 57.39
## 3rd Qu.:2197.8 3rd Qu.:9.072e+08 3rd Qu.: 70.00
## Max. :2930.0 Max. :1.007e+09 Max. :190.00
##
## Lot.Frontage Lot.Area Street Alley
## Min. : 21.00 Min. : 1300 Length:2930 Length:2930
## 1st Qu.: 58.00 1st Qu.: 7440 Class :character Class :character
## Median : 68.00 Median : 9436 Mode :character Mode :character
## Mean : 69.22 Mean : 10148
## 3rd Qu.: 80.00 3rd Qu.: 11555
## Max. :313.00 Max. :215245
## NA's :490
## Lot.Shape Land.Contour Utilities Lot.Config
## Length:2930 Length:2930 Length:2930 Length:2930
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Land.Slope Neighborhood Condition.1 Condition.2
## Length:2930 Length:2930 Length:2930 Length:2930
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Bldg.Type House.Style Overall.Qual Overall.Cond
## Length:2930 Length:2930 Min. : 1.000 Min. :1.000
## Class :character Class :character 1st Qu.: 5.000 1st Qu.:5.000
## Mode :character Mode :character Median : 6.000 Median :5.000
## Mean : 6.095 Mean :5.563
## 3rd Qu.: 7.000 3rd Qu.:6.000
## Max. :10.000 Max. :9.000
##
## Year.Built Year.Remod.Add Roof.Style Roof.Matl
## Min. :1872 Min. :1950 Length:2930 Length:2930
## 1st Qu.:1954 1st Qu.:1965 Class :character Class :character
## Median :1973 Median :1993 Mode :character Mode :character
## Mean :1971 Mean :1984
## 3rd Qu.:2001 3rd Qu.:2004
## Max. :2010 Max. :2010
##
## Exterior.1st Exterior.2nd Mas.Vnr.Type Mas.Vnr.Area
## Length:2930 Length:2930 Length:2930 Min. : 0.0
## Class :character Class :character Class :character 1st Qu.: 0.0
## Mode :character Mode :character Mode :character Median : 0.0
## Mean : 101.9
## 3rd Qu.: 164.0
## Max. :1600.0
## NA's :23
## Exter.Qual Exter.Cond Foundation Bsmt.Qual
## Length:2930 Length:2930 Length:2930 Length:2930
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Bsmt.Cond Bsmt.Exposure BsmtFin.Type.1 BsmtFin.SF.1
## Length:2930 Length:2930 Length:2930 Min. : 0.0
## Class :character Class :character Class :character 1st Qu.: 0.0
## Mode :character Mode :character Mode :character Median : 370.0
## Mean : 442.6
## 3rd Qu.: 734.0
## Max. :5644.0
## NA's :1
## BsmtFin.Type.2 BsmtFin.SF.2 Bsmt.Unf.SF Total.Bsmt.SF
## Length:2930 Min. : 0.00 Min. : 0.0 Min. : 0
## Class :character 1st Qu.: 0.00 1st Qu.: 219.0 1st Qu.: 793
## Mode :character Median : 0.00 Median : 466.0 Median : 990
## Mean : 49.72 Mean : 559.3 Mean :1052
## 3rd Qu.: 0.00 3rd Qu.: 802.0 3rd Qu.:1302
## Max. :1526.00 Max. :2336.0 Max. :6110
## NA's :1 NA's :1 NA's :1
## Heating Heating.QC Central.Air Electrical
## Length:2930 Length:2930 Length:2930 Length:2930
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## X1st.Flr.SF X2nd.Flr.SF Low.Qual.Fin.SF Gr.Liv.Area
## Min. : 334.0 Min. : 0.0 Min. : 0.000 Min. : 334
## 1st Qu.: 876.2 1st Qu.: 0.0 1st Qu.: 0.000 1st Qu.:1126
## Median :1084.0 Median : 0.0 Median : 0.000 Median :1442
## Mean :1159.6 Mean : 335.5 Mean : 4.677 Mean :1500
## 3rd Qu.:1384.0 3rd Qu.: 703.8 3rd Qu.: 0.000 3rd Qu.:1743
## Max. :5095.0 Max. :2065.0 Max. :1064.000 Max. :5642
##
## Bsmt.Full.Bath Bsmt.Half.Bath Full.Bath Half.Bath
## Min. :0.0000 Min. :0.00000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:1.000 1st Qu.:0.0000
## Median :0.0000 Median :0.00000 Median :2.000 Median :0.0000
## Mean :0.4314 Mean :0.06113 Mean :1.567 Mean :0.3795
## 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.:2.000 3rd Qu.:1.0000
## Max. :3.0000 Max. :2.00000 Max. :4.000 Max. :2.0000
## NA's :2 NA's :2
## Bedroom.AbvGr Kitchen.AbvGr Kitchen.Qual TotRms.AbvGrd
## Min. :0.000 Min. :0.000 Length:2930 Min. : 2.000
## 1st Qu.:2.000 1st Qu.:1.000 Class :character 1st Qu.: 5.000
## Median :3.000 Median :1.000 Mode :character Median : 6.000
## Mean :2.854 Mean :1.044 Mean : 6.443
## 3rd Qu.:3.000 3rd Qu.:1.000 3rd Qu.: 7.000
## Max. :8.000 Max. :3.000 Max. :15.000
##
## Functional Fireplaces Fireplace.Qu Garage.Type
## Length:2930 Min. :0.0000 Length:2930 Length:2930
## Class :character 1st Qu.:0.0000 Class :character Class :character
## Mode :character Median :1.0000 Mode :character Mode :character
## Mean :0.5993
## 3rd Qu.:1.0000
## Max. :4.0000
##
## Garage.Yr.Blt Garage.Finish Garage.Cars Garage.Area
## Min. :1895 Length:2930 Min. :0.000 Min. : 0.0
## 1st Qu.:1960 Class :character 1st Qu.:1.000 1st Qu.: 320.0
## Median :1979 Mode :character Median :2.000 Median : 480.0
## Mean :1978 Mean :1.767 Mean : 472.8
## 3rd Qu.:2002 3rd Qu.:2.000 3rd Qu.: 576.0
## Max. :2207 Max. :5.000 Max. :1488.0
## NA's :159 NA's :1 NA's :1
## Garage.Qual Garage.Cond Paved.Drive Wood.Deck.SF
## Length:2930 Length:2930 Length:2930 Min. : 0.00
## Class :character Class :character Class :character 1st Qu.: 0.00
## Mode :character Mode :character Mode :character Median : 0.00
## Mean : 93.75
## 3rd Qu.: 168.00
## Max. :1424.00
##
## Open.Porch.SF Enclosed.Porch X3Ssn.Porch Screen.Porch
## Min. : 0.00 Min. : 0.00 Min. : 0.000 Min. : 0
## 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.000 1st Qu.: 0
## Median : 27.00 Median : 0.00 Median : 0.000 Median : 0
## Mean : 47.53 Mean : 23.01 Mean : 2.592 Mean : 16
## 3rd Qu.: 70.00 3rd Qu.: 0.00 3rd Qu.: 0.000 3rd Qu.: 0
## Max. :742.00 Max. :1012.00 Max. :508.000 Max. :576
##
## Pool.Area Pool.QC Fence Misc.Feature
## Min. : 0.000 Length:2930 Length:2930 Length:2930
## 1st Qu.: 0.000 Class :character Class :character Class :character
## Median : 0.000 Mode :character Mode :character Mode :character
## Mean : 2.243
## 3rd Qu.: 0.000
## Max. :800.000
##
## Misc.Val Mo.Sold Yr.Sold Sale.Type
## Min. : 0.00 Min. : 1.000 Min. :2006 Length:2930
## 1st Qu.: 0.00 1st Qu.: 4.000 1st Qu.:2007 Class :character
## Median : 0.00 Median : 6.000 Median :2008 Mode :character
## Mean : 50.63 Mean : 6.216 Mean :2008
## 3rd Qu.: 0.00 3rd Qu.: 8.000 3rd Qu.:2009
## Max. :17000.00 Max. :12.000 Max. :2010
##
## Sale.Condition SalePrice
## Length:2930 Min. : 12789
## Class :character 1st Qu.:129500
## Mode :character Median :160000
## Mean :180796
## 3rd Qu.:213500
## Max. :755000
##
dim(ames)
## [1] 2930 82
## Summary statistics for SalePrice and LotArea
summary(ames$SalePrice)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 12789 129500 160000 180796 213500 755000
summary(ames$Lot.Area)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1300 7440 9436 10148 11555 215245
### Count of unique values in Neighborhood column
table(ames$Neighborhood)
##
## Blmngtn Blueste BrDale BrkSide ClearCr CollgCr Crawfor Edwards Gilbert Greens
## 28 10 30 108 44 267 103 194 165 8
## GrnHill IDOTRR Landmrk MeadowV Mitchel NAmes NoRidge NPkVill NridgHt NWAmes
## 2 93 1 37 114 443 71 23 166 131
## OldTown Sawyer SawyerW Somerst StoneBr SWISU Timber Veenker
## 239 151 125 182 51 48 72 24
## Average SalePrice by Neighborhood
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
ames %>% group_by(Neighborhood) %>%
summarise(mean_price = mean(SalePrice, na.rm = TRUE))
## # A tibble: 28 × 2
## Neighborhood mean_price
## <chr> <dbl>
## 1 Blmngtn 196662.
## 2 Blueste 143590
## 3 BrDale 105608.
## 4 BrkSide 124756.
## 5 ClearCr 208662.
## 6 CollgCr 201803.
## 7 Crawfor 207551.
## 8 Edwards 130843.
## 9 Gilbert 190647.
## 10 Greens 193531.
## # ℹ 18 more rows
## Distribution of SalePrice
library(ggplot2)
### Histogram of SalePrice
ggplot(ames, aes(x = SalePrice)) +
geom_histogram(binwidth = 10000, fill = "blue", color = "black") +
labs(title = "Distribution of SalePrice", x = "Sale Price", y = "Frequency")
## Relationship between LotArea and SalePrice
### Scatter plot of LotArea vs SalePrice
ggplot(ames, aes(x = Lot.Area, y = SalePrice)) +
geom_point() + labs(title = "Lot Area vs Sale Price", x = "Lot Area", y = "Sale Price")
## Interaction between Categorical and Continuous Variables
### Scatter plot of LotArea vs SalePrice colored by Neighborhood
ggplot(ames, aes(x = Lot.Area, y = SalePrice, color = Neighborhood)) +
geom_point() + labs(title = "Sale Price by Lot Area and Neighborhood")
## Insights
###1.The SalePrice distribution shows a right skew, indicating a few very expensive properties.
###2. There is a positive correlation between LotArea and SalePrice, but neighborhood plays a role in affecting prices.
###3. Neighborhood has a clear impact on house prices, with some neighborhoods consistently having higher or lower average prices.