library(wooldridge)
data<-wooldridge::kielmc
head(data,10)
## year age agesq nbh cbd intst lintst price rooms area land baths dist
## 1 1978 48 2304 4 3000 1000 6.9078 60000 7 1660 4578 1 10700
## 2 1978 83 6889 4 4000 1000 6.9078 40000 6 2612 8370 2 11000
## 3 1978 58 3364 4 4000 1000 6.9078 34000 6 1144 5000 1 11500
## 4 1978 11 121 4 4000 1000 6.9078 63900 5 1136 10000 1 11900
## 5 1978 48 2304 4 4000 2000 7.6009 44000 5 1868 10000 1 12100
## 6 1978 78 6084 4 3000 2000 7.6009 46000 6 1780 9500 3 10000
## 7 1978 22 484 4 4000 2000 7.6009 56000 6 1700 10878 2 11700
## 8 1978 78 6084 4 3000 2000 7.6009 38500 6 1556 3870 2 10200
## 9 1978 42 1764 4 3000 2000 7.6009 60500 8 1642 7000 2 10500
## 10 1978 41 1681 4 3000 2000 7.6009 55000 5 1443 7950 2 11000
## ldist wind lprice y81 larea lland y81ldist lintstsq nearinc
## 1 9.277999 3 11.00210 0 7.414573 8.429017 0 47.71770 1
## 2 9.305651 3 10.59663 0 7.867871 9.032409 0 47.71770 1
## 3 9.350102 3 10.43412 0 7.042286 8.517193 0 47.71770 1
## 4 9.384294 3 11.06507 0 7.035269 9.210340 0 47.71770 1
## 5 9.400961 3 10.69195 0 7.532624 9.210340 0 57.77368 1
## 6 9.210340 3 10.73640 0 7.484369 9.159047 0 57.77368 1
## 7 9.367344 3 10.93311 0 7.438384 9.294497 0 57.77368 1
## 8 9.230143 3 10.55841 0 7.349874 8.261010 0 57.77368 1
## 9 9.259131 3 11.01040 0 7.403670 8.853665 0 57.77368 1
## 10 9.305651 3 10.91509 0 7.274479 8.980927 0 57.77368 1
## y81nrinc rprice lrprice
## 1 0 60000 11.00210
## 2 0 40000 10.59663
## 3 0 34000 10.43412
## 4 0 63900 11.06507
## 5 0 44000 10.69195
## 6 0 46000 10.73640
## 7 0 56000 10.93311
## 8 0 38500 10.55841
## 9 0 60500 11.01040
## 10 0 55000 10.91509
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
y1981 <-data %>% filter(year==1981)
head(y1981,10)
## year age agesq nbh cbd intst lintst price rooms area land baths dist
## 1 1981 81 6561 4 4000 1000 6.9078 49000 6 1554 6790 1 11800
## 2 1981 71 5041 4 3000 2000 7.6009 52000 5 1575 3485 1 10100
## 3 1981 31 961 4 3000 2000 7.6009 68000 6 3304 18731 2 10200
## 4 1981 41 1681 4 3000 2000 7.6009 54000 6 1700 7500 1 11200
## 5 1981 31 961 4 4000 2000 7.6009 70000 6 1454 5500 2 11800
## 6 1981 81 6561 4 3000 2000 7.6009 47000 6 1410 4500 1 10200
## 7 1981 19 361 0 5000 4000 8.2940 73900 6 1592 13068 1 13200
## 8 1981 1 1 0 9000 9000 9.1050 117500 9 1910 15246 3 18200
## 9 1981 41 1681 0 10000 10000 9.2103 60000 5 735 10454 1 19000
## 10 1981 1 1 0 16000 15000 9.6158 85000 6 1154 43560 2 23000
## ldist wind lprice y81 larea lland y81ldist lintstsq nearinc
## 1 9.375855 4 10.79958 1 7.348588 8.823206 9.375855 47.71770 1
## 2 9.220291 3 10.85900 1 7.362010 8.156223 9.220291 57.77368 1
## 3 9.230143 3 11.12726 1 8.102889 9.837935 9.230143 57.77368 1
## 4 9.323669 3 10.89674 1 7.438384 8.922658 9.323669 57.77368 1
## 5 9.375855 3 11.15625 1 7.282073 8.612503 9.375855 57.77368 1
## 6 9.230143 3 10.75790 1 7.251345 8.411833 9.230143 57.77368 1
## 7 9.487972 3 11.21047 1 7.372746 9.477921 9.487972 68.79043 1
## 8 9.809177 5 11.67419 1 7.554859 9.632072 9.809177 82.90102 0
## 9 9.852194 5 11.00210 1 6.599871 9.254740 9.852194 84.82964 0
## 10 10.043250 5 11.35041 1 7.050990 10.681894 10.043250 92.46361 0
## y81nrinc rprice lrprice
## 1 1 37634.41 10.53567
## 2 1 39938.55 10.59510
## 3 1 52227.34 10.86336
## 4 1 41474.66 10.63284
## 5 1 53763.44 10.89235
## 6 1 36098.31 10.49400
## 7 1 56758.83 10.94657
## 8 0 90245.77 11.41029
## 9 0 46082.95 10.73820
## 10 0 65284.18 11.08650
i. To study the effects of the incinerator location on housing
price, consider the simple regression model
log(price)=B0+B1*log(dist)+u, where price is housing price in dollars
and dist is distance from the house to the incinerator measured in feet.
Interpreting this equation causally, what sign do you expect for B1 if
the presence of the incinerator depresses housing prices? ANSWER: I
expect the sign for B1 is “+” because there is a positive relation
between log(dist) and log(price), meaning the farther you are from a
garbage incinerator the more your house will sell for. Estimate this
equation and interpret the results.
attach(y1981)
mean("price")
## Warning in mean.default("price"): argument is not numeric or logical: returning
## NA
## [1] NA
mean("dist")
## Warning in mean.default("dist"): argument is not numeric or logical: returning
## NA
## [1] NA
sd("dist")
## Warning in var(if (is.vector(x) || is.factor(x)) x else as.double(x), na.rm =
## na.rm): NAs introduced by coercion
## [1] NA
model<-lm(log(price)~log(dist), data= y1981)
summary(model)
##
## Call:
## lm(formula = log(price) ~ log(dist), data = y1981)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.87318 -0.22657 -0.01985 0.25687 0.95045
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.04716 0.64624 12.452 < 2e-16 ***
## log(dist) 0.36488 0.06576 5.548 1.39e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3543 on 140 degrees of freedom
## Multiple R-squared: 0.1803, Adjusted R-squared: 0.1744
## F-statistic: 30.79 on 1 and 140 DF, p-value: 1.395e-07
iv. Is the square of log(dist) significant when you add it to the
model from part (iii)?
model4 <- lm(log(price)~log(dist)^2+log(intst)^2+log(area)+log(land)+rooms+baths+age, data= y1981)
summary(model4)
##
## Call:
## lm(formula = log(price) ~ log(dist)^2 + log(intst)^2 + log(area) +
## log(land) + rooms + baths + age, data = y1981)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.74072 -0.10669 0.00932 0.11817 0.61387
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.592332 0.641711 11.831 < 2e-16 ***
## log(dist) 0.055389 0.057621 0.961 0.338153
## log(intst) -0.039032 0.051662 -0.756 0.451261
## log(area) 0.319294 0.076418 4.178 5.27e-05 ***
## log(land) 0.076824 0.039505 1.945 0.053908 .
## rooms 0.042528 0.028251 1.505 0.134588
## baths 0.166923 0.041944 3.980 0.000113 ***
## age -0.003567 0.001059 -3.369 0.000985 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.201 on 134 degrees of freedom
## Multiple R-squared: 0.7475, Adjusted R-squared: 0.7344
## F-statistic: 56.68 on 7 and 134 DF, p-value: < 2.2e-16