houseprices <- read.csv("~/Downloads/houseprices.csv")
houseprices$KitchenQual <- as.factor(houseprices$KitchenQual)
houseprices$GarageFinish <- as.factor(houseprices$GarageFinish)
houseprices$Neighborhood <- as.factor(houseprices$Neighborhood)
library(ggplot2)
ggplot(houseprices, aes(x=LotArea)) + geom_freqpoly(color= 'blue', bins=30) + labs(title= "Figure 1.1",x = "Lot Area (sq ft)", y = "Count")

ggplot(houseprices, aes(x=TotalBsmtSF)) + geom_freqpoly(color= 'blue', bins=30) + labs(title= "Figure 1.2",x = "Total Square Ft of Basement Space", y = "Count")

ggplot(houseprices, aes(x=X1stFlrSF)) + geom_freqpoly(color= 'blue', bins=30) + labs(title= "Figure 1.3",x = "Total Square Ft of First Floor", y = "Count")

ggplot(houseprices, aes(x=KitchenQual)) + geom_bar(color= 'blue', fill = 'blue') + labs(title= "Figure 1.4",x = "Kitchen Quality", y = "Count")

ggplot(houseprices, aes(x=GarageFinish)) + geom_bar(color= 'blue', fill = 'blue') + labs(title= "Figure 1.5",x = "Garage Finish", y = "Count")

ggplot(houseprices, aes(x=Neighborhood)) + geom_bar(color= 'blue', fill = 'blue') + labs(title= "Figure 1.6",x = "Neighborhood", y = "Count") + scale_x_discrete(guide = guide_axis(n.dodge=2))

ggplot(houseprices, aes(x=OverallCond)) + geom_histogram(bins = 10, fill='blue') + labs(title="Figure 1.7", x = "Overall Condition", y = "Count")

ggplot(houseprices, aes(x=OverallQual)) + geom_histogram(bins = 10, fill='blue') + labs(title="Figure 1.8", x = "Overall Quality", y = "Count")

ggplot(houseprices, aes(x= SalePrice)) + geom_freqpoly(color= 'blue', bins=30) + labs(title= "Figure 1.9",x = "Sale Price (in thousands)", y = "Count")

ggplot(houseprices, aes(x= log(SalePrice))) + geom_freqpoly(color= 'blue', bins=30) + labs(title= "Figure 1.10",x = "Log Sale Price (in thousands)", y = "Count")

library(ggplot2)
ggplot(houseprices, aes(x= LotArea, y = log(SalePrice))) + geom_point() + labs(title="Figure 1.11", x = "Lot Area (sq ft)", y = "Log Sales Price (in thousands)")

ggplot(houseprices, aes(x=TotalBsmtSF, y = log(SalePrice))) + geom_point() + labs(title="Figure 1.12", x = "Total Basement Square Footage", y = "Log Sales Price (in thousands)")

ggplot(houseprices, aes(x=X1stFlrSF, y = log(SalePrice))) + geom_point() + labs(title="Figure 1.13", x = "First Floor Square Footage", y = "Log Sales Price (in thousands)")

ggplot(houseprices, aes(x=Neighborhood, y = log(SalePrice), fill = Neighborhood)) + geom_boxplot() + labs(title= "Figure 1.14",x = "Neighborhood", y = "Log Sales Price (in thousands)") + scale_x_discrete(guide = guide_axis(n.dodge=4))

ggplot(houseprices, aes(x=KitchenQual, y = log(SalePrice), fill = KitchenQual)) + geom_boxplot() + labs(title= "Figure 1.15",x = "Kitchen Quality", y = "Log Sales Price (in thousands)") 

ggplot(houseprices, aes(x=GarageFinish, y = log(SalePrice), fill = GarageFinish)) + geom_boxplot() + labs(title= "Figure 1.16",x = "Garage Finish", y = "Log Sales Price (in thousands)") 

ggplot(houseprices, aes(x=OverallCond, y = log(SalePrice))) + geom_point() + labs(title="Figure 1.17", x = "Overall Condition", y = "Log Sales Price (in thousands)")

ggplot(houseprices, aes(x=OverallQual, y = log(SalePrice))) + geom_point() + labs(title="Figure 1.18", x = "Overall Quality", y = "Log Sales Price (in thousands)")

r = getOption("repos")
r["CRAN"] = "http://cran.us.r-project.org"
options(repos = r)
install.packages("corrplot")
## 
## The downloaded binary packages are in
##  /var/folders/46/lyr0qk9s4334r99v9cz27lr80000gn/T//RtmpLLFSjK/downloaded_packages
library(corrplot)
## corrplot 0.92 loaded
corrplot(cor(houseprices[,c("X1stFlrSF", "TotalBsmtSF", "LotArea", "OverallQual", "OverallCond")]), type = "upper", method = "num") 

pricemodel1 <- lm(log(SalePrice)~ X1stFlrSF + TotalBsmtSF + LotArea + OverallQual + OverallCond + KitchenQual + GarageFinish + Neighborhood + X1stFlrSF:TotalBsmtSF, data = houseprices)
knitr::kable(summary(pricemodel1)$coefficients)
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.0065974 0.1182244 33.8897729 0.0000000
X1stFlrSF 0.0001817 0.0000570 3.1876219 0.0015151
TotalBsmtSF 0.0000849 0.0000767 1.1075593 0.2685311
LotArea 0.0000113 0.0000012 9.3291208 0.0000000
OverallQual 0.1230046 0.0089007 13.8196704 0.0000000
OverallCond 0.0402567 0.0083426 4.8254373 0.0000018
KitchenQualFa -0.2208623 0.0668563 -3.3035378 0.0010161
KitchenQualGd -0.1092967 0.0254416 -4.2959889 0.0000205
KitchenQualTA -0.1829523 0.0319092 -5.7335351 0.0000000
GarageFinishRFn -0.0222192 0.0181420 -1.2247381 0.2211926
GarageFinishUnf -0.0936952 0.0219749 -4.2637289 0.0000236
NeighborhoodCrawfor 0.0472387 0.0415631 1.1365526 0.2562150
NeighborhoodGilbert 0.0219421 0.0376339 0.5830421 0.5601014
NeighborhoodNAmes -0.1034226 0.0352766 -2.9317636 0.0035091
NeighborhoodNoRidge 0.1780434 0.0385158 4.6226122 0.0000047
NeighborhoodNridgHt 0.0467461 0.0343166 1.3622035 0.1736853
NeighborhoodNWAmes -0.0707180 0.0394627 -1.7920225 0.0736730
NeighborhoodOldTown -0.1371880 0.0455736 -3.0102528 0.0027288
NeighborhoodSawyer -0.0809795 0.0490343 -1.6514858 0.0992040
NeighborhoodSawyerW 0.0117050 0.0404899 0.2890838 0.7726250
NeighborhoodSomerst 0.0387328 0.0370063 1.0466553 0.2957134
NeighborhoodStoneBr -0.0605052 0.0312187 -1.9381067 0.0531161
NeighborhoodTimber -0.0183894 0.0410968 -0.4474644 0.6547140
X1stFlrSF:TotalBsmtSF 0.0000000 0.0000000 -0.3062620 0.7595198
pricemodel2 <- lm(log(SalePrice)~ X1stFlrSF + LotArea + OverallQual + OverallCond + KitchenQual + (GarageFinish=="Unf") + (Neighborhood=="NAmes") + (Neighborhood=="NoRidge") + (Neighborhood=="OldTown") , data = houseprices)
knitr::kable(summary(pricemodel2)$coefficients)
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.9818988 0.0884339 45.026820 0.0000000
X1stFlrSF 0.0002071 0.0000219 9.456880 0.0000000
LotArea 0.0000111 0.0000012 9.153315 0.0000000
OverallQual 0.1377530 0.0084208 16.358719 0.0000000
OverallCond 0.0353805 0.0077231 4.581111 0.0000057
KitchenQualFa -0.2190373 0.0667551 -3.281208 0.0010970
KitchenQualGd -0.1220144 0.0239379 -5.097127 0.0000005
KitchenQualTA -0.2196710 0.0302542 -7.260834 0.0000000
GarageFinish == “Unf”TRUE -0.0841010 0.0186975 -4.497980 0.0000083
Neighborhood == “NAmes”TRUE -0.0741188 0.0221363 -3.348288 0.0008670
Neighborhood == “NoRidge”TRUE 0.1744893 0.0312155 5.589822 0.0000000
Neighborhood == “OldTown”TRUE -0.1197370 0.0355614 -3.367051 0.0008112
pricemodel3 <- lm(log(SalePrice)~ X1stFlrSF + LotArea + OverallQual + OverallCond + KitchenQual + (GarageFinish=="Unf") + (Neighborhood=="NAmes") + (Neighborhood=="NoRidge") + (Neighborhood=="OldTown") + X1stFlrSF:TotalBsmtSF , data = houseprices)
knitr::kable(summary(pricemodel3)$coefficients)
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.0361997 0.0935430 43.148062 0.0000000
X1stFlrSF 0.0001235 0.0000524 2.355599 0.0188325
LotArea 0.0000111 0.0000012 9.201194 0.0000000
OverallQual 0.1350594 0.0085445 15.806538 0.0000000
OverallCond 0.0369250 0.0077592 4.758899 0.0000025
KitchenQualFa -0.2159523 0.0666566 -3.239774 0.0012663
KitchenQualGd -0.1158923 0.0241478 -4.799299 0.0000020
KitchenQualTA -0.2140710 0.0303673 -7.049383 0.0000000
GarageFinish == “Unf”TRUE -0.0848069 0.0186678 -4.542965 0.0000068
Neighborhood == “NAmes”TRUE -0.0718774 0.0221329 -3.247537 0.0012329
Neighborhood == “NoRidge”TRUE 0.1722144 0.0311856 5.522242 0.0000001
Neighborhood == “OldTown”TRUE -0.1160579 0.0355585 -3.263861 0.0011652
X1stFlrSF:TotalBsmtSF 0.0000000 0.0000000 1.754171 0.0799413
ggplot(houseprices, aes(pricemodel1$fitted.values, pricemodel1$residuals)) + geom_point() + labs(x = "Predicted Values", y = "Residuals", title = "Figure 2.1") + geom_hline(yintercept = 0, lty = 2, color="red", lwd = 1.5)

ggplot(houseprices, aes(sample = pricemodel1$residuals)) + geom_qq() + geom_qq_line(color="blue") + labs( x = "Theoretical Normal Quantiles", y = "Observed Residual Quantiles", title = "Figure 2.2") + theme_bw()

knitr::kable(confint.default(pricemodel1, level=0.95))
2.5 % 97.5 %
(Intercept) 3.7748819 4.2383130
X1stFlrSF 0.0000700 0.0002933
TotalBsmtSF -0.0000654 0.0002352
LotArea 0.0000089 0.0000136
OverallQual 0.1055596 0.1404496
OverallCond 0.0239055 0.0566080
KitchenQualFa -0.3518982 -0.0898264
KitchenQualGd -0.1591613 -0.0594321
KitchenQualTA -0.2454931 -0.1204115
GarageFinishRFn -0.0577769 0.0133385
GarageFinishUnf -0.1367652 -0.0506251
NeighborhoodCrawfor -0.0342236 0.1287009
NeighborhoodGilbert -0.0518189 0.0957031
NeighborhoodNAmes -0.1725634 -0.0342818
NeighborhoodNoRidge 0.1025539 0.2535329
NeighborhoodNridgHt -0.0205131 0.1140054
NeighborhoodNWAmes -0.1480634 0.0066274
NeighborhoodOldTown -0.2265106 -0.0478654
NeighborhoodSawyer -0.1770851 0.0151260
NeighborhoodSawyerW -0.0676538 0.0910637
NeighborhoodSomerst -0.0337982 0.1112638
NeighborhoodStoneBr -0.1216927 0.0006824
NeighborhoodTimber -0.0989377 0.0621589
X1stFlrSF:TotalBsmtSF -0.0000001 0.0000001
pricemodel1 <- lm(log(SalePrice)~ X1stFlrSF + TotalBsmtSF + LotArea + OverallQual + OverallCond + KitchenQual + GarageFinish + Neighborhood + X1stFlrSF:TotalBsmtSF, data = houseprices)
knitr::kable(summary(pricemodel1)$coefficients)
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.0065974 0.1182244 33.8897729 0.0000000
X1stFlrSF 0.0001817 0.0000570 3.1876219 0.0015151
TotalBsmtSF 0.0000849 0.0000767 1.1075593 0.2685311
LotArea 0.0000113 0.0000012 9.3291208 0.0000000
OverallQual 0.1230046 0.0089007 13.8196704 0.0000000
OverallCond 0.0402567 0.0083426 4.8254373 0.0000018
KitchenQualFa -0.2208623 0.0668563 -3.3035378 0.0010161
KitchenQualGd -0.1092967 0.0254416 -4.2959889 0.0000205
KitchenQualTA -0.1829523 0.0319092 -5.7335351 0.0000000
GarageFinishRFn -0.0222192 0.0181420 -1.2247381 0.2211926
GarageFinishUnf -0.0936952 0.0219749 -4.2637289 0.0000236
NeighborhoodCrawfor 0.0472387 0.0415631 1.1365526 0.2562150
NeighborhoodGilbert 0.0219421 0.0376339 0.5830421 0.5601014
NeighborhoodNAmes -0.1034226 0.0352766 -2.9317636 0.0035091
NeighborhoodNoRidge 0.1780434 0.0385158 4.6226122 0.0000047
NeighborhoodNridgHt 0.0467461 0.0343166 1.3622035 0.1736853
NeighborhoodNWAmes -0.0707180 0.0394627 -1.7920225 0.0736730
NeighborhoodOldTown -0.1371880 0.0455736 -3.0102528 0.0027288
NeighborhoodSawyer -0.0809795 0.0490343 -1.6514858 0.0992040
NeighborhoodSawyerW 0.0117050 0.0404899 0.2890838 0.7726250
NeighborhoodSomerst 0.0387328 0.0370063 1.0466553 0.2957134
NeighborhoodStoneBr -0.0605052 0.0312187 -1.9381067 0.0531161
NeighborhoodTimber -0.0183894 0.0410968 -0.4474644 0.6547140
X1stFlrSF:TotalBsmtSF 0.0000000 0.0000000 -0.3062620 0.7595198