library(readxl)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stats)
bostonData= read_excel("Documents/638/Problem Sets/BostonHousing.xl.xlsx")
###Regress median housing prices on nox using a semi-log specification.
fit1= lm(log(bostonData$medv)~bostonData$nox)
summary(fit1)
## 
## Call:
## lm(formula = log(bostonData$medv) ~ bostonData$nox)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.17597 -0.19503 -0.03334  0.18223  1.08159 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     4.03359    0.07655   52.69   <2e-16 ***
## bostonData$nox -1.80114    0.13510  -13.33   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3518 on 504 degrees of freedom
## Multiple R-squared:  0.2607, Adjusted R-squared:  0.2592 
## F-statistic: 177.7 on 1 and 504 DF,  p-value: < 2.2e-16
###Regress median housing prices on nox as well as all other control variables using a semi-log specification. 
###Comment on the result for each variable. Does it make sense to you?
fit2= lm(log(bostonData$medv)~bostonData$nox+bostonData$crim+bostonData$zn+bostonData$indus+bostonData$chas+bostonData$rm
         +bostonData$age+bostonData$dis+bostonData$rad+bostonData$tax+bostonData$ptratio+bostonData$b+bostonData$lstat)
summary(fit2)
## 
## Call:
## lm(formula = log(bostonData$medv) ~ bostonData$nox + bostonData$crim + 
##     bostonData$zn + bostonData$indus + bostonData$chas + bostonData$rm + 
##     bostonData$age + bostonData$dis + bostonData$rad + bostonData$tax + 
##     bostonData$ptratio + bostonData$b + bostonData$lstat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.73361 -0.09747 -0.01657  0.09629  0.86435 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         4.1020423  0.2042726  20.081  < 2e-16 ***
## bostonData$nox     -0.7783993  0.1528902  -5.091 5.07e-07 ***
## bostonData$crim    -0.0102715  0.0013155  -7.808 3.52e-14 ***
## bostonData$zn       0.0011725  0.0005495   2.134 0.033349 *  
## bostonData$indus    0.0024668  0.0024614   1.002 0.316755    
## bostonData$chas     0.1008876  0.0344859   2.925 0.003598 ** 
## bostonData$rm       0.0908331  0.0167280   5.430 8.87e-08 ***
## bostonData$age      0.0002106  0.0005287   0.398 0.690567    
## bostonData$dis     -0.0490873  0.0079834  -6.149 1.62e-09 ***
## bostonData$rad      0.0142673  0.0026556   5.373 1.20e-07 ***
## bostonData$tax     -0.0006258  0.0001505  -4.157 3.80e-05 ***
## bostonData$ptratio -0.0382715  0.0052365  -7.309 1.10e-12 ***
## bostonData$b        0.0004136  0.0001075   3.847 0.000135 ***
## bostonData$lstat   -0.0290355  0.0020299 -14.304  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1899 on 492 degrees of freedom
## Multiple R-squared:  0.7896, Adjusted R-squared:  0.7841 
## F-statistic: 142.1 on 13 and 492 DF,  p-value: < 2.2e-16
#####We are trying to predict the median home value for those that are owner-occupied. Nitrogen oxide concentration is
#####negatively associated with median housing prices, and the coefficient is quite large. This makes sense: higher 
#####levels of pollution makes homes less desirable, thus driving down the median value of homes in that area. 
#######Crime rates, distance from employment centers, and percentage of population of lower status are all negatively 
#####associated with home value, and are statistically significant. These are intuitive, traditionally driving down
#####property values. Property tax over $10K also has a negative coefficient: This may make sense mechanically for a 
#####certain range of home values, however, I would investigate this variable further. What does the distribution look 
#####like here? Is the $10K mark a sufficiently low value to drive a decrease in median housing prices in the area? Is 
#####there any bunching just under this mark? 

#####Prop residential land zoned for >25K sqft lots, Charles River, average num rooms per dwelling, and  are all 
#####positively associated with home value, and are statistically significant. These are intuitive: one should expect
#####to pay more for more land and more rooms in a house. Vicinity to a river is also desirable, as is highway 
#####accessibility. 

#####Neither non-retail business acreage nor proportion of 'old' houses were significant in the regression. Again, 
#####this makes sense: they don't necessarily impact housing value. 
###If a house currently has average nox concentrations, what is the benefit of cutting them by 50%? Answer using a 
###linear, semi-log as well as double-log specification using all control variables (same as part b).

#####Semi-log specification: 
summary(fit2)
## 
## Call:
## lm(formula = log(bostonData$medv) ~ bostonData$nox + bostonData$crim + 
##     bostonData$zn + bostonData$indus + bostonData$chas + bostonData$rm + 
##     bostonData$age + bostonData$dis + bostonData$rad + bostonData$tax + 
##     bostonData$ptratio + bostonData$b + bostonData$lstat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.73361 -0.09747 -0.01657  0.09629  0.86435 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         4.1020423  0.2042726  20.081  < 2e-16 ***
## bostonData$nox     -0.7783993  0.1528902  -5.091 5.07e-07 ***
## bostonData$crim    -0.0102715  0.0013155  -7.808 3.52e-14 ***
## bostonData$zn       0.0011725  0.0005495   2.134 0.033349 *  
## bostonData$indus    0.0024668  0.0024614   1.002 0.316755    
## bostonData$chas     0.1008876  0.0344859   2.925 0.003598 ** 
## bostonData$rm       0.0908331  0.0167280   5.430 8.87e-08 ***
## bostonData$age      0.0002106  0.0005287   0.398 0.690567    
## bostonData$dis     -0.0490873  0.0079834  -6.149 1.62e-09 ***
## bostonData$rad      0.0142673  0.0026556   5.373 1.20e-07 ***
## bostonData$tax     -0.0006258  0.0001505  -4.157 3.80e-05 ***
## bostonData$ptratio -0.0382715  0.0052365  -7.309 1.10e-12 ***
## bostonData$b        0.0004136  0.0001075   3.847 0.000135 ***
## bostonData$lstat   -0.0290355  0.0020299 -14.304  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1899 on 492 degrees of freedom
## Multiple R-squared:  0.7896, Adjusted R-squared:  0.7841 
## F-statistic: 142.1 on 13 and 492 DF,  p-value: < 2.2e-16
mean(bostonData$nox)
## [1] 0.5546951
.275*118
## [1] 32.45
#####In our results, observe the estimate on nox: coefficient of -.78. e^.78=2.18 => a one unit increase in parts per 
#####10 million nitrogen oxides is associated with a 118 percent decrease in median housing values (all else constant). 
#####Average nox concentrations are at .55. 50% decrease would decrease this to .275, leading to a 32.45 percent 
#####increase in medv. 

#####Linear specification: 
fit3= lm(bostonData$medv~bostonData$nox+bostonData$crim+bostonData$zn+bostonData$indus+bostonData$chas+bostonData$rm
         +bostonData$age+bostonData$dis+bostonData$rad+bostonData$tax+bostonData$ptratio+bostonData$b+bostonData$lstat)
summary(fit3)
## 
## Call:
## lm(formula = bostonData$medv ~ bostonData$nox + bostonData$crim + 
##     bostonData$zn + bostonData$indus + bostonData$chas + bostonData$rm + 
##     bostonData$age + bostonData$dis + bostonData$rad + bostonData$tax + 
##     bostonData$ptratio + bostonData$b + bostonData$lstat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.595  -2.730  -0.518   1.777  26.199 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         3.646e+01  5.103e+00   7.144 3.28e-12 ***
## bostonData$nox     -1.777e+01  3.820e+00  -4.651 4.25e-06 ***
## bostonData$crim    -1.080e-01  3.286e-02  -3.287 0.001087 ** 
## bostonData$zn       4.642e-02  1.373e-02   3.382 0.000778 ***
## bostonData$indus    2.056e-02  6.150e-02   0.334 0.738288    
## bostonData$chas     2.687e+00  8.616e-01   3.118 0.001925 ** 
## bostonData$rm       3.810e+00  4.179e-01   9.116  < 2e-16 ***
## bostonData$age      6.922e-04  1.321e-02   0.052 0.958229    
## bostonData$dis     -1.476e+00  1.995e-01  -7.398 6.01e-13 ***
## bostonData$rad      3.060e-01  6.635e-02   4.613 5.07e-06 ***
## bostonData$tax     -1.233e-02  3.760e-03  -3.280 0.001112 ** 
## bostonData$ptratio -9.527e-01  1.308e-01  -7.283 1.31e-12 ***
## bostonData$b        9.312e-03  2.686e-03   3.467 0.000573 ***
## bostonData$lstat   -5.248e-01  5.072e-02 -10.347  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.745 on 492 degrees of freedom
## Multiple R-squared:  0.7406, Adjusted R-squared:  0.7338 
## F-statistic: 108.1 on 13 and 492 DF,  p-value: < 2.2e-16
#####A one unit increase in nox corresponds to a 17.78 unit decrease in medv. (.275)*17.78: cutting nox in half leads 
#####to a $4,889.50 increase in median home values. 

#####Double-log specification: 
fit4= lm(log(bostonData$medv)~log(bostonData$nox)+bostonData$crim+bostonData$zn+bostonData$indus+bostonData$chas+bostonData$rm
         +bostonData$age+bostonData$dis+bostonData$rad+bostonData$tax+bostonData$ptratio+bostonData$b+bostonData$lstat)
summary(fit4)
## 
## Call:
## lm(formula = log(bostonData$medv) ~ log(bostonData$nox) + bostonData$crim + 
##     bostonData$zn + bostonData$indus + bostonData$chas + bostonData$rm + 
##     bostonData$age + bostonData$dis + bostonData$rad + bostonData$tax + 
##     bostonData$ptratio + bostonData$b + bostonData$lstat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.73631 -0.10009 -0.01841  0.09443  0.87119 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          3.3546173  0.1754246  19.123  < 2e-16 ***
## log(bostonData$nox) -0.4752055  0.0975427  -4.872 1.49e-06 ***
## bostonData$crim     -0.0102792  0.0013186  -7.796 3.84e-14 ***
## bostonData$zn        0.0010253  0.0005526   1.855 0.064126 .  
## bostonData$indus     0.0023177  0.0024659   0.940 0.347729    
## bostonData$chas      0.0974220  0.0345430   2.820 0.004991 ** 
## bostonData$rm        0.0919043  0.0167498   5.487 6.56e-08 ***
## bostonData$age       0.0003169  0.0005379   0.589 0.556047    
## bostonData$dis      -0.0511299  0.0081608  -6.265 8.13e-10 ***
## bostonData$rad       0.0143661  0.0026667   5.387 1.11e-07 ***
## bostonData$tax      -0.0006289  0.0001508  -4.170 3.61e-05 ***
## bostonData$ptratio  -0.0369988  0.0051889  -7.130 3.59e-12 ***
## bostonData$b         0.0004185  0.0001077   3.886 0.000116 ***
## bostonData$lstat    -0.0291803  0.0020327 -14.355  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1903 on 492 degrees of freedom
## Multiple R-squared:  0.7887, Adjusted R-squared:  0.7832 
## F-statistic: 141.3 on 13 and 492 DF,  p-value: < 2.2e-16
#####For every 1% increase in nox, there is a .48% decrease in medv. So decreasing nox by 50% leads to a .132 percent 
#####increase in median home values. 
###Should the hedonic approach to non-market valuation be classified as a “design- based” approach or a “model-based” 
###approach? Can you explain modern concerns with such an approach?
#####I would classify this as model-based approach. We are trying to find an associated, causal relationship between 
#####pollution levels and housing prices. Magnitudes of coefficients would certainly present issues, particularly in
#####a pollution context where individuals or firms may expect a hard promise on financial returns, if they invest in
#####decreasing pollution levels.