houseprices <- read.csv("~/Downloads/houseprices.csv")
houseprices$KitchenQual <- as.factor(houseprices$KitchenQual)
houseprices$GarageFinish <- as.factor(houseprices$GarageFinish)
houseprices$Neighborhood <- as.factor(houseprices$Neighborhood)
library(ggplot2)
ggplot(houseprices, aes(x=LotArea)) + geom_freqpoly(color= 'blue', bins=30) + labs(title= "Figure 1.1",x = "Lot Area (sq ft)", y = "Count")

ggplot(houseprices, aes(x=TotalBsmtSF)) + geom_freqpoly(color= 'blue', bins=30) + labs(title= "Figure 1.2",x = "Total Square Ft of Basement Space", y = "Count")

ggplot(houseprices, aes(x=X1stFlrSF)) + geom_freqpoly(color= 'blue', bins=30) + labs(title= "Figure 1.3",x = "Total Square Ft of First Floor", y = "Count")

ggplot(houseprices, aes(x=KitchenQual)) + geom_bar(color= 'blue', fill = 'blue') + labs(title= "Figure 1.4",x = "Kitchen Quality", y = "Count")

ggplot(houseprices, aes(x=GarageFinish)) + geom_bar(color= 'blue', fill = 'blue') + labs(title= "Figure 1.5",x = "Garage Finish", y = "Count")

ggplot(houseprices, aes(x=Neighborhood)) + geom_bar(color= 'blue', fill = 'blue') + labs(title= "Figure 1.6",x = "Neighborhood", y = "Count") + scale_x_discrete(guide = guide_axis(n.dodge=2))

ggplot(houseprices, aes(x=OverallCond)) + geom_histogram(bins = 10, fill='blue') + labs(title="Figure 1.7", x = "Overall Condition", y = "Count")

ggplot(houseprices, aes(x=OverallQual)) + geom_histogram(bins = 10, fill='blue') + labs(title="Figure 1.8", x = "Overall Quality", y = "Count")

ggplot(houseprices, aes(x= SalePrice)) + geom_freqpoly(color= 'blue', bins=30) + labs(title= "Figure 1.9",x = "Sale Price (in thousands)", y = "Count")

ggplot(houseprices, aes(x= log(SalePrice))) + geom_freqpoly(color= 'blue', bins=30) + labs(title= "Figure 1.10",x = "Log Sale Price (in thousands)", y = "Count")

library(ggplot2)
ggplot(houseprices, aes(x= LotArea, y = log(SalePrice))) + geom_point() + labs(title="Figure 1.11", x = "Lot Area (sq ft)", y = "Log Sales Price (in thousands)")

ggplot(houseprices, aes(x=TotalBsmtSF, y = log(SalePrice))) + geom_point() + labs(title="Figure 1.12", x = "Total Basement Square Footage", y = "Log Sales Price (in thousands)")

ggplot(houseprices, aes(x=X1stFlrSF, y = log(SalePrice))) + geom_point() + labs(title="Figure 1.13", x = "First Floor Square Footage", y = "Log Sales Price (in thousands)")

ggplot(houseprices, aes(x=Neighborhood, y = log(SalePrice), fill = Neighborhood)) + geom_boxplot() + labs(title= "Figure 1.14",x = "Neighborhood", y = "Log Sales Price (in thousands)") + scale_x_discrete(guide = guide_axis(n.dodge=4))

ggplot(houseprices, aes(x=KitchenQual, y = log(SalePrice), fill = KitchenQual)) + geom_boxplot() + labs(title= "Figure 1.15",x = "Kitchen Quality", y = "Log Sales Price (in thousands)")

ggplot(houseprices, aes(x=GarageFinish, y = log(SalePrice), fill = GarageFinish)) + geom_boxplot() + labs(title= "Figure 1.16",x = "Garage Finish", y = "Log Sales Price (in thousands)")

ggplot(houseprices, aes(x=OverallCond, y = log(SalePrice))) + geom_point() + labs(title="Figure 1.17", x = "Overall Condition", y = "Log Sales Price (in thousands)")

ggplot(houseprices, aes(x=OverallQual, y = log(SalePrice))) + geom_point() + labs(title="Figure 1.18", x = "Overall Quality", y = "Log Sales Price (in thousands)")

r = getOption("repos")
r["CRAN"] = "http://cran.us.r-project.org"
options(repos = r)
install.packages("corrplot")
##
## The downloaded binary packages are in
## /var/folders/46/lyr0qk9s4334r99v9cz27lr80000gn/T//RtmpLLFSjK/downloaded_packages
library(corrplot)
## corrplot 0.92 loaded
corrplot(cor(houseprices[,c("X1stFlrSF", "TotalBsmtSF", "LotArea", "OverallQual", "OverallCond")]), type = "upper", method = "num")

pricemodel1 <- lm(log(SalePrice)~ X1stFlrSF + TotalBsmtSF + LotArea + OverallQual + OverallCond + KitchenQual + GarageFinish + Neighborhood + X1stFlrSF:TotalBsmtSF, data = houseprices)
knitr::kable(summary(pricemodel1)$coefficients)
| (Intercept) |
4.0065974 |
0.1182244 |
33.8897729 |
0.0000000 |
| X1stFlrSF |
0.0001817 |
0.0000570 |
3.1876219 |
0.0015151 |
| TotalBsmtSF |
0.0000849 |
0.0000767 |
1.1075593 |
0.2685311 |
| LotArea |
0.0000113 |
0.0000012 |
9.3291208 |
0.0000000 |
| OverallQual |
0.1230046 |
0.0089007 |
13.8196704 |
0.0000000 |
| OverallCond |
0.0402567 |
0.0083426 |
4.8254373 |
0.0000018 |
| KitchenQualFa |
-0.2208623 |
0.0668563 |
-3.3035378 |
0.0010161 |
| KitchenQualGd |
-0.1092967 |
0.0254416 |
-4.2959889 |
0.0000205 |
| KitchenQualTA |
-0.1829523 |
0.0319092 |
-5.7335351 |
0.0000000 |
| GarageFinishRFn |
-0.0222192 |
0.0181420 |
-1.2247381 |
0.2211926 |
| GarageFinishUnf |
-0.0936952 |
0.0219749 |
-4.2637289 |
0.0000236 |
| NeighborhoodCrawfor |
0.0472387 |
0.0415631 |
1.1365526 |
0.2562150 |
| NeighborhoodGilbert |
0.0219421 |
0.0376339 |
0.5830421 |
0.5601014 |
| NeighborhoodNAmes |
-0.1034226 |
0.0352766 |
-2.9317636 |
0.0035091 |
| NeighborhoodNoRidge |
0.1780434 |
0.0385158 |
4.6226122 |
0.0000047 |
| NeighborhoodNridgHt |
0.0467461 |
0.0343166 |
1.3622035 |
0.1736853 |
| NeighborhoodNWAmes |
-0.0707180 |
0.0394627 |
-1.7920225 |
0.0736730 |
| NeighborhoodOldTown |
-0.1371880 |
0.0455736 |
-3.0102528 |
0.0027288 |
| NeighborhoodSawyer |
-0.0809795 |
0.0490343 |
-1.6514858 |
0.0992040 |
| NeighborhoodSawyerW |
0.0117050 |
0.0404899 |
0.2890838 |
0.7726250 |
| NeighborhoodSomerst |
0.0387328 |
0.0370063 |
1.0466553 |
0.2957134 |
| NeighborhoodStoneBr |
-0.0605052 |
0.0312187 |
-1.9381067 |
0.0531161 |
| NeighborhoodTimber |
-0.0183894 |
0.0410968 |
-0.4474644 |
0.6547140 |
| X1stFlrSF:TotalBsmtSF |
0.0000000 |
0.0000000 |
-0.3062620 |
0.7595198 |
pricemodel2 <- lm(log(SalePrice)~ X1stFlrSF + LotArea + OverallQual + OverallCond + KitchenQual + (GarageFinish=="Unf") + (Neighborhood=="NAmes") + (Neighborhood=="NoRidge") + (Neighborhood=="OldTown") , data = houseprices)
knitr::kable(summary(pricemodel2)$coefficients)
| (Intercept) |
3.9818988 |
0.0884339 |
45.026820 |
0.0000000 |
| X1stFlrSF |
0.0002071 |
0.0000219 |
9.456880 |
0.0000000 |
| LotArea |
0.0000111 |
0.0000012 |
9.153315 |
0.0000000 |
| OverallQual |
0.1377530 |
0.0084208 |
16.358719 |
0.0000000 |
| OverallCond |
0.0353805 |
0.0077231 |
4.581111 |
0.0000057 |
| KitchenQualFa |
-0.2190373 |
0.0667551 |
-3.281208 |
0.0010970 |
| KitchenQualGd |
-0.1220144 |
0.0239379 |
-5.097127 |
0.0000005 |
| KitchenQualTA |
-0.2196710 |
0.0302542 |
-7.260834 |
0.0000000 |
| GarageFinish == “Unf”TRUE |
-0.0841010 |
0.0186975 |
-4.497980 |
0.0000083 |
| Neighborhood == “NAmes”TRUE |
-0.0741188 |
0.0221363 |
-3.348288 |
0.0008670 |
| Neighborhood == “NoRidge”TRUE |
0.1744893 |
0.0312155 |
5.589822 |
0.0000000 |
| Neighborhood == “OldTown”TRUE |
-0.1197370 |
0.0355614 |
-3.367051 |
0.0008112 |
pricemodel3 <- lm(log(SalePrice)~ X1stFlrSF + LotArea + OverallQual + OverallCond + KitchenQual + (GarageFinish=="Unf") + (Neighborhood=="NAmes") + (Neighborhood=="NoRidge") + (Neighborhood=="OldTown") + X1stFlrSF:TotalBsmtSF , data = houseprices)
knitr::kable(summary(pricemodel3)$coefficients)
| (Intercept) |
4.0361997 |
0.0935430 |
43.148062 |
0.0000000 |
| X1stFlrSF |
0.0001235 |
0.0000524 |
2.355599 |
0.0188325 |
| LotArea |
0.0000111 |
0.0000012 |
9.201194 |
0.0000000 |
| OverallQual |
0.1350594 |
0.0085445 |
15.806538 |
0.0000000 |
| OverallCond |
0.0369250 |
0.0077592 |
4.758899 |
0.0000025 |
| KitchenQualFa |
-0.2159523 |
0.0666566 |
-3.239774 |
0.0012663 |
| KitchenQualGd |
-0.1158923 |
0.0241478 |
-4.799299 |
0.0000020 |
| KitchenQualTA |
-0.2140710 |
0.0303673 |
-7.049383 |
0.0000000 |
| GarageFinish == “Unf”TRUE |
-0.0848069 |
0.0186678 |
-4.542965 |
0.0000068 |
| Neighborhood == “NAmes”TRUE |
-0.0718774 |
0.0221329 |
-3.247537 |
0.0012329 |
| Neighborhood == “NoRidge”TRUE |
0.1722144 |
0.0311856 |
5.522242 |
0.0000001 |
| Neighborhood == “OldTown”TRUE |
-0.1160579 |
0.0355585 |
-3.263861 |
0.0011652 |
| X1stFlrSF:TotalBsmtSF |
0.0000000 |
0.0000000 |
1.754171 |
0.0799413 |
ggplot(houseprices, aes(pricemodel1$fitted.values, pricemodel1$residuals)) + geom_point() + labs(x = "Predicted Values", y = "Residuals", title = "Figure 2.1") + geom_hline(yintercept = 0, lty = 2, color="red", lwd = 1.5)

ggplot(houseprices, aes(sample = pricemodel1$residuals)) + geom_qq() + geom_qq_line(color="blue") + labs( x = "Theoretical Normal Quantiles", y = "Observed Residual Quantiles", title = "Figure 2.2") + theme_bw()

knitr::kable(confint.default(pricemodel1, level=0.95))
| (Intercept) |
3.7748819 |
4.2383130 |
| X1stFlrSF |
0.0000700 |
0.0002933 |
| TotalBsmtSF |
-0.0000654 |
0.0002352 |
| LotArea |
0.0000089 |
0.0000136 |
| OverallQual |
0.1055596 |
0.1404496 |
| OverallCond |
0.0239055 |
0.0566080 |
| KitchenQualFa |
-0.3518982 |
-0.0898264 |
| KitchenQualGd |
-0.1591613 |
-0.0594321 |
| KitchenQualTA |
-0.2454931 |
-0.1204115 |
| GarageFinishRFn |
-0.0577769 |
0.0133385 |
| GarageFinishUnf |
-0.1367652 |
-0.0506251 |
| NeighborhoodCrawfor |
-0.0342236 |
0.1287009 |
| NeighborhoodGilbert |
-0.0518189 |
0.0957031 |
| NeighborhoodNAmes |
-0.1725634 |
-0.0342818 |
| NeighborhoodNoRidge |
0.1025539 |
0.2535329 |
| NeighborhoodNridgHt |
-0.0205131 |
0.1140054 |
| NeighborhoodNWAmes |
-0.1480634 |
0.0066274 |
| NeighborhoodOldTown |
-0.2265106 |
-0.0478654 |
| NeighborhoodSawyer |
-0.1770851 |
0.0151260 |
| NeighborhoodSawyerW |
-0.0676538 |
0.0910637 |
| NeighborhoodSomerst |
-0.0337982 |
0.1112638 |
| NeighborhoodStoneBr |
-0.1216927 |
0.0006824 |
| NeighborhoodTimber |
-0.0989377 |
0.0621589 |
| X1stFlrSF:TotalBsmtSF |
-0.0000001 |
0.0000001 |
pricemodel1 <- lm(log(SalePrice)~ X1stFlrSF + TotalBsmtSF + LotArea + OverallQual + OverallCond + KitchenQual + GarageFinish + Neighborhood + X1stFlrSF:TotalBsmtSF, data = houseprices)
knitr::kable(summary(pricemodel1)$coefficients)
| (Intercept) |
4.0065974 |
0.1182244 |
33.8897729 |
0.0000000 |
| X1stFlrSF |
0.0001817 |
0.0000570 |
3.1876219 |
0.0015151 |
| TotalBsmtSF |
0.0000849 |
0.0000767 |
1.1075593 |
0.2685311 |
| LotArea |
0.0000113 |
0.0000012 |
9.3291208 |
0.0000000 |
| OverallQual |
0.1230046 |
0.0089007 |
13.8196704 |
0.0000000 |
| OverallCond |
0.0402567 |
0.0083426 |
4.8254373 |
0.0000018 |
| KitchenQualFa |
-0.2208623 |
0.0668563 |
-3.3035378 |
0.0010161 |
| KitchenQualGd |
-0.1092967 |
0.0254416 |
-4.2959889 |
0.0000205 |
| KitchenQualTA |
-0.1829523 |
0.0319092 |
-5.7335351 |
0.0000000 |
| GarageFinishRFn |
-0.0222192 |
0.0181420 |
-1.2247381 |
0.2211926 |
| GarageFinishUnf |
-0.0936952 |
0.0219749 |
-4.2637289 |
0.0000236 |
| NeighborhoodCrawfor |
0.0472387 |
0.0415631 |
1.1365526 |
0.2562150 |
| NeighborhoodGilbert |
0.0219421 |
0.0376339 |
0.5830421 |
0.5601014 |
| NeighborhoodNAmes |
-0.1034226 |
0.0352766 |
-2.9317636 |
0.0035091 |
| NeighborhoodNoRidge |
0.1780434 |
0.0385158 |
4.6226122 |
0.0000047 |
| NeighborhoodNridgHt |
0.0467461 |
0.0343166 |
1.3622035 |
0.1736853 |
| NeighborhoodNWAmes |
-0.0707180 |
0.0394627 |
-1.7920225 |
0.0736730 |
| NeighborhoodOldTown |
-0.1371880 |
0.0455736 |
-3.0102528 |
0.0027288 |
| NeighborhoodSawyer |
-0.0809795 |
0.0490343 |
-1.6514858 |
0.0992040 |
| NeighborhoodSawyerW |
0.0117050 |
0.0404899 |
0.2890838 |
0.7726250 |
| NeighborhoodSomerst |
0.0387328 |
0.0370063 |
1.0466553 |
0.2957134 |
| NeighborhoodStoneBr |
-0.0605052 |
0.0312187 |
-1.9381067 |
0.0531161 |
| NeighborhoodTimber |
-0.0183894 |
0.0410968 |
-0.4474644 |
0.6547140 |
| X1stFlrSF:TotalBsmtSF |
0.0000000 |
0.0000000 |
-0.3062620 |
0.7595198 |