library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stats)
bostonData= read_excel("Documents/638/Problem Sets/BostonHousing.xl.xlsx")
###Regress median housing prices on nox using a semi-log specification.
fit1= lm(log(bostonData$medv)~bostonData$nox)
summary(fit1)
##
## Call:
## lm(formula = log(bostonData$medv) ~ bostonData$nox)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.17597 -0.19503 -0.03334 0.18223 1.08159
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.03359 0.07655 52.69 <2e-16 ***
## bostonData$nox -1.80114 0.13510 -13.33 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3518 on 504 degrees of freedom
## Multiple R-squared: 0.2607, Adjusted R-squared: 0.2592
## F-statistic: 177.7 on 1 and 504 DF, p-value: < 2.2e-16
###Regress median housing prices on nox as well as all other control variables using a semi-log specification.
###Comment on the result for each variable. Does it make sense to you?
fit2= lm(log(bostonData$medv)~bostonData$nox+bostonData$crim+bostonData$zn+bostonData$indus+bostonData$chas+bostonData$rm
+bostonData$age+bostonData$dis+bostonData$rad+bostonData$tax+bostonData$ptratio+bostonData$b+bostonData$lstat)
summary(fit2)
##
## Call:
## lm(formula = log(bostonData$medv) ~ bostonData$nox + bostonData$crim +
## bostonData$zn + bostonData$indus + bostonData$chas + bostonData$rm +
## bostonData$age + bostonData$dis + bostonData$rad + bostonData$tax +
## bostonData$ptratio + bostonData$b + bostonData$lstat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.73361 -0.09747 -0.01657 0.09629 0.86435
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.1020423 0.2042726 20.081 < 2e-16 ***
## bostonData$nox -0.7783993 0.1528902 -5.091 5.07e-07 ***
## bostonData$crim -0.0102715 0.0013155 -7.808 3.52e-14 ***
## bostonData$zn 0.0011725 0.0005495 2.134 0.033349 *
## bostonData$indus 0.0024668 0.0024614 1.002 0.316755
## bostonData$chas 0.1008876 0.0344859 2.925 0.003598 **
## bostonData$rm 0.0908331 0.0167280 5.430 8.87e-08 ***
## bostonData$age 0.0002106 0.0005287 0.398 0.690567
## bostonData$dis -0.0490873 0.0079834 -6.149 1.62e-09 ***
## bostonData$rad 0.0142673 0.0026556 5.373 1.20e-07 ***
## bostonData$tax -0.0006258 0.0001505 -4.157 3.80e-05 ***
## bostonData$ptratio -0.0382715 0.0052365 -7.309 1.10e-12 ***
## bostonData$b 0.0004136 0.0001075 3.847 0.000135 ***
## bostonData$lstat -0.0290355 0.0020299 -14.304 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1899 on 492 degrees of freedom
## Multiple R-squared: 0.7896, Adjusted R-squared: 0.7841
## F-statistic: 142.1 on 13 and 492 DF, p-value: < 2.2e-16
#####We are trying to predict the median home value for those that are owner-occupied. Nitrogen oxide concentration is
#####negatively associated with median housing prices, and the coefficient is quite large. This makes sense: higher
#####levels of pollution makes homes less desirable, thus driving down the median value of homes in that area.
#######Crime rates, distance from employment centers, and percentage of population of lower status are all negatively
#####associated with home value, and are statistically significant. These are intuitive, traditionally driving down
#####property values. Property tax over $10K also has a negative coefficient: This may make sense mechanically for a
#####certain range of home values, however, I would investigate this variable further. What does the distribution look
#####like here? Is the $10K mark a sufficiently low value to drive a decrease in median housing prices in the area? Is
#####there any bunching just under this mark?
#####Prop residential land zoned for >25K sqft lots, Charles River, average num rooms per dwelling, and are all
#####positively associated with home value, and are statistically significant. These are intuitive: one should expect
#####to pay more for more land and more rooms in a house. Vicinity to a river is also desirable, as is highway
#####accessibility.
#####Neither non-retail business acreage nor proportion of 'old' houses were significant in the regression. Again,
#####this makes sense: they don't necessarily impact housing value.
###If a house currently has average nox concentrations, what is the benefit of cutting them by 50%? Answer using a
###linear, semi-log as well as double-log specification using all control variables (same as part b).
#####Semi-log specification:
summary(fit2)
##
## Call:
## lm(formula = log(bostonData$medv) ~ bostonData$nox + bostonData$crim +
## bostonData$zn + bostonData$indus + bostonData$chas + bostonData$rm +
## bostonData$age + bostonData$dis + bostonData$rad + bostonData$tax +
## bostonData$ptratio + bostonData$b + bostonData$lstat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.73361 -0.09747 -0.01657 0.09629 0.86435
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.1020423 0.2042726 20.081 < 2e-16 ***
## bostonData$nox -0.7783993 0.1528902 -5.091 5.07e-07 ***
## bostonData$crim -0.0102715 0.0013155 -7.808 3.52e-14 ***
## bostonData$zn 0.0011725 0.0005495 2.134 0.033349 *
## bostonData$indus 0.0024668 0.0024614 1.002 0.316755
## bostonData$chas 0.1008876 0.0344859 2.925 0.003598 **
## bostonData$rm 0.0908331 0.0167280 5.430 8.87e-08 ***
## bostonData$age 0.0002106 0.0005287 0.398 0.690567
## bostonData$dis -0.0490873 0.0079834 -6.149 1.62e-09 ***
## bostonData$rad 0.0142673 0.0026556 5.373 1.20e-07 ***
## bostonData$tax -0.0006258 0.0001505 -4.157 3.80e-05 ***
## bostonData$ptratio -0.0382715 0.0052365 -7.309 1.10e-12 ***
## bostonData$b 0.0004136 0.0001075 3.847 0.000135 ***
## bostonData$lstat -0.0290355 0.0020299 -14.304 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1899 on 492 degrees of freedom
## Multiple R-squared: 0.7896, Adjusted R-squared: 0.7841
## F-statistic: 142.1 on 13 and 492 DF, p-value: < 2.2e-16
mean(bostonData$nox)
## [1] 0.5546951
.275*118
## [1] 32.45
#####In our results, observe the estimate on nox: coefficient of -.78. e^.78=2.18 => a one unit increase in parts per
#####10 million nitrogen oxides is associated with a 118 percent decrease in median housing values (all else constant).
#####Average nox concentrations are at .55. 50% decrease would decrease this to .275, leading to a 32.45 percent
#####increase in medv.
#####Linear specification:
fit3= lm(bostonData$medv~bostonData$nox+bostonData$crim+bostonData$zn+bostonData$indus+bostonData$chas+bostonData$rm
+bostonData$age+bostonData$dis+bostonData$rad+bostonData$tax+bostonData$ptratio+bostonData$b+bostonData$lstat)
summary(fit3)
##
## Call:
## lm(formula = bostonData$medv ~ bostonData$nox + bostonData$crim +
## bostonData$zn + bostonData$indus + bostonData$chas + bostonData$rm +
## bostonData$age + bostonData$dis + bostonData$rad + bostonData$tax +
## bostonData$ptratio + bostonData$b + bostonData$lstat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.595 -2.730 -0.518 1.777 26.199
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.646e+01 5.103e+00 7.144 3.28e-12 ***
## bostonData$nox -1.777e+01 3.820e+00 -4.651 4.25e-06 ***
## bostonData$crim -1.080e-01 3.286e-02 -3.287 0.001087 **
## bostonData$zn 4.642e-02 1.373e-02 3.382 0.000778 ***
## bostonData$indus 2.056e-02 6.150e-02 0.334 0.738288
## bostonData$chas 2.687e+00 8.616e-01 3.118 0.001925 **
## bostonData$rm 3.810e+00 4.179e-01 9.116 < 2e-16 ***
## bostonData$age 6.922e-04 1.321e-02 0.052 0.958229
## bostonData$dis -1.476e+00 1.995e-01 -7.398 6.01e-13 ***
## bostonData$rad 3.060e-01 6.635e-02 4.613 5.07e-06 ***
## bostonData$tax -1.233e-02 3.760e-03 -3.280 0.001112 **
## bostonData$ptratio -9.527e-01 1.308e-01 -7.283 1.31e-12 ***
## bostonData$b 9.312e-03 2.686e-03 3.467 0.000573 ***
## bostonData$lstat -5.248e-01 5.072e-02 -10.347 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.745 on 492 degrees of freedom
## Multiple R-squared: 0.7406, Adjusted R-squared: 0.7338
## F-statistic: 108.1 on 13 and 492 DF, p-value: < 2.2e-16
#####A one unit increase in nox corresponds to a 17.78 unit decrease in medv. (.275)*17.78: cutting nox in half leads
#####to a $4,889.50 increase in median home values.
#####Double-log specification:
fit4= lm(log(bostonData$medv)~log(bostonData$nox)+bostonData$crim+bostonData$zn+bostonData$indus+bostonData$chas+bostonData$rm
+bostonData$age+bostonData$dis+bostonData$rad+bostonData$tax+bostonData$ptratio+bostonData$b+bostonData$lstat)
summary(fit4)
##
## Call:
## lm(formula = log(bostonData$medv) ~ log(bostonData$nox) + bostonData$crim +
## bostonData$zn + bostonData$indus + bostonData$chas + bostonData$rm +
## bostonData$age + bostonData$dis + bostonData$rad + bostonData$tax +
## bostonData$ptratio + bostonData$b + bostonData$lstat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.73631 -0.10009 -0.01841 0.09443 0.87119
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.3546173 0.1754246 19.123 < 2e-16 ***
## log(bostonData$nox) -0.4752055 0.0975427 -4.872 1.49e-06 ***
## bostonData$crim -0.0102792 0.0013186 -7.796 3.84e-14 ***
## bostonData$zn 0.0010253 0.0005526 1.855 0.064126 .
## bostonData$indus 0.0023177 0.0024659 0.940 0.347729
## bostonData$chas 0.0974220 0.0345430 2.820 0.004991 **
## bostonData$rm 0.0919043 0.0167498 5.487 6.56e-08 ***
## bostonData$age 0.0003169 0.0005379 0.589 0.556047
## bostonData$dis -0.0511299 0.0081608 -6.265 8.13e-10 ***
## bostonData$rad 0.0143661 0.0026667 5.387 1.11e-07 ***
## bostonData$tax -0.0006289 0.0001508 -4.170 3.61e-05 ***
## bostonData$ptratio -0.0369988 0.0051889 -7.130 3.59e-12 ***
## bostonData$b 0.0004185 0.0001077 3.886 0.000116 ***
## bostonData$lstat -0.0291803 0.0020327 -14.355 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1903 on 492 degrees of freedom
## Multiple R-squared: 0.7887, Adjusted R-squared: 0.7832
## F-statistic: 141.3 on 13 and 492 DF, p-value: < 2.2e-16
#####For every 1% increase in nox, there is a .48% decrease in medv. So decreasing nox by 50% leads to a .132 percent
#####increase in median home values.
###Should the hedonic approach to non-market valuation be classified as a “design- based” approach or a “model-based”
###approach? Can you explain modern concerns with such an approach?
#####I would classify this as model-based approach. We are trying to find an associated, causal relationship between
#####pollution levels and housing prices. Magnitudes of coefficients would certainly present issues, particularly in
#####a pollution context where individuals or firms may expect a hard promise on financial returns, if they invest in
#####decreasing pollution levels.