QUESTION 3
a.
Truncated <- read.dta( "/Users/YaseenAbdulridha/Downloads/paeco526W16_ps2trun.dta")
Y<- (Truncated$dbirwt)
Truncated<- Truncated[,-5]
DV<- as.matrix(Truncated)
OLS_Regression<- lm(Y~DV)
summary(OLS_Regression)
##
## Call:
## lm(formula = Y ~ DV)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1540.1 -310.8 -21.9 281.8 3340.9
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3160.96236 21.71672 145.554 < 2e-16 ***
## DValcohol -18.15697 9.77415 -1.858 0.063220 .
## DVanemia -1.30641 9.67547 -0.135 0.892594
## DVcardiac -39.48234 13.14233 -3.004 0.002663 **
## DVchyper -59.68434 10.99433 -5.429 5.68e-08 ***
## DVdfage 0.18246 0.22397 0.815 0.415275
## DVdfeduc 2.63149 0.53490 4.920 8.68e-07 ***
## DVdiabete 76.49383 6.79488 11.258 < 2e-16 ***
## DVdisllb -0.14759 0.03932 -3.753 0.000175 ***
## DVdlivord 25.62565 1.20784 21.216 < 2e-16 ***
## DVdmage 4.56644 1.50048 3.043 0.002340 **
## DVdmar -27.54364 2.86680 -9.608 < 2e-16 ***
## DVdmeduc 4.76656 0.59416 8.022 1.04e-15 ***
## DVdrink -4.80741 1.81229 -2.653 0.007986 **
## DVforeignb -16.20490 5.36205 -3.022 0.002510 **
## DVnprevist 16.39770 0.37783 43.400 < 2e-16 ***
## DVpre4000 454.96102 8.68525 52.383 < 2e-16 ***
## DVpreterm -259.72091 8.84082 -29.377 < 2e-16 ***
## DVtobacco -189.75501 2.53032 -74.993 < 2e-16 ***
## DVmblack -127.66525 7.47011 -17.090 < 2e-16 ***
## DVmotherr -94.71820 11.92694 -7.942 2.01e-15 ***
## DVmhispan -72.30875 8.53335 -8.474 < 2e-16 ***
## DVfblack -25.63470 7.27416 -3.524 0.000425 ***
## DVfotherr -96.74879 11.78250 -8.211 < 2e-16 ***
## DVfhispan -46.35315 7.86917 -5.890 3.86e-09 ***
## DVadequac2 16.77668 4.00553 4.188 2.81e-05 ***
## DVadequac3 33.58539 8.48291 3.959 7.52e-05 ***
## DVtripre2 15.38323 4.23131 3.636 0.000277 ***
## DVtripre3 40.17054 9.22842 4.353 1.34e-05 ***
## DVtripre0 18.80077 12.77574 1.472 0.141130
## DVfirst -52.77062 3.23215 -16.327 < 2e-16 ***
## DVdeadkids 1.24275 1.25578 0.990 0.322356
## DVplural -581.96172 9.73741 -59.766 < 2e-16 ***
## DVdmage2 -0.09458 0.02612 -3.621 0.000293 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 439.6 on 235229 degrees of freedom
## Multiple R-squared: 0.1009, Adjusted R-squared: 0.1008
## F-statistic: 799.9 on 33 and 235229 DF, p-value: < 2.2e-16
#I would expect this to be a biased estimate because we are using OLS on a truncated model, which will fail to capture the true slope, and provide us with an upward bias on our coefficients.
c.
TruncatedData<- subset(Truncated, Y>2500)
TruncRegression<- truncreg(Y~DV, point = 2500, direction="left", data= TruncatedData)
summary(TruncRegression)
##
## Call:
## truncreg(formula = Y ~ DV, data = TruncatedData, point = 2500,
## direction = "left")
##
##
## Coefficients :
## Estimate Std. Error t-value Pr(>|t|)
## alcohol -51.347177 13.181746 -3.8953 9.807e-05 ***
## anemia 20.968966 12.388275 1.6926 0.0905228 .
## cardiac -54.873327 16.487205 -3.3282 0.0008740 ***
## chyper -73.871838 13.848603 -5.3342 9.594e-08 ***
## dfage 0.691271 0.280438 2.4650 0.0137026 *
## dfeduc 12.602550 0.660253 19.0875 < 2.2e-16 ***
## diabete 75.326009 8.255081 9.1248 < 2.2e-16 ***
## disllb 0.511291 0.048764 10.4851 < 2.2e-16 ***
## dlivord 61.574521 1.455880 42.2937 < 2.2e-16 ***
## dmage 195.600894 0.856412 228.3957 < 2.2e-16 ***
## dmar 84.666536 3.473893 24.3722 < 2.2e-16 ***
## dmeduc 8.018105 0.737951 10.8654 < 2.2e-16 ***
## drink -10.792518 2.861359 -3.7718 0.0001621 ***
## foreignb -36.461031 6.744992 -5.4056 6.458e-08 ***
## nprevist 29.353905 0.454694 64.5575 < 2.2e-16 ***
## pre4000 487.994162 10.032749 48.6401 < 2.2e-16 ***
## preterm -343.786883 12.240328 -28.0864 < 2.2e-16 ***
## tobacco -227.738782 3.313991 -68.7204 < 2.2e-16 ***
## mblack -163.243233 9.662064 -16.8953 < 2.2e-16 ***
## motherr -109.888730 15.342862 -7.1622 7.938e-13 ***
## mhispan -62.560327 10.964237 -5.7059 1.158e-08 ***
## fblack -23.240965 9.359938 -2.4830 0.0130272 *
## fotherr -128.982333 15.230815 -8.4685 < 2.2e-16 ***
## fhispan -26.490461 10.100478 -2.6227 0.0087238 **
## adequac2 87.950612 5.019822 17.5207 < 2.2e-16 ***
## adequac3 153.415302 10.973284 13.9808 < 2.2e-16 ***
## tripre2 29.434068 5.396086 5.4547 4.905e-08 ***
## tripre3 57.610810 12.082631 4.7681 1.860e-06 ***
## tripre0 25.017916 17.438364 1.4346 0.1513874
## first 57.546980 3.887257 14.8040 < 2.2e-16 ***
## deadkids -6.037892 1.565974 -3.8557 0.0001154 ***
## plural -922.779724 17.303031 -53.3305 < 2.2e-16 ***
## dmage2 -3.453109 0.014322 -241.0977 < 2.2e-16 ***
## sigma 500.928943 0.926363 540.7482 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Log-Likelihood: -1766200 on 34 Df
LL_Trunc<- TruncRegression$logLik
# estimatd effect for smoking on birth weight is -189.75501 difference in birth weight
#for infants with mothers who smoked during pregnancy.
#Standard Error = 2.53032
CI_Truncated<- 2.53032*2 + -189.75501
CI_Truncated2<- 2.53032*2 - -189.75501
CI_Truncated
## [1] -184.6944
CI_Truncated2
## [1] 194.8157
Question 4
a.
Censored<- read.dta( "/Users/YaseenAbdulridha/Downloads/paeco526W16_ps2.dta")
Censored2<- Censored
Censored<- Censored[,-5]
Y_CS<- cbind(Censored$cdbirwt2500)
Censored<- Censored[,-34]
DV_CS<- as.matrix(Censored)
OLS_Regression_CS<- lm(Y_CS~DV_CS)
summary(OLS_Regression_CS)
##
## Call:
## lm(formula = Y_CS ~ DV_CS)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1756.7 -320.5 -10.2 303.6 3382.5
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2995.07801 22.07949 135.650 < 2e-16 ***
## DV_CSalcohol -19.20176 9.71599 -1.976 0.048121 *
## DV_CSanemia -21.94937 9.67045 -2.270 0.023224 *
## DV_CScardiac -45.91896 13.41446 -3.423 0.000619 ***
## DV_CSchyper -132.87782 10.78548 -12.320 < 2e-16 ***
## DV_CSdfage 0.03888 0.22870 0.170 0.864995
## DV_CSdfeduc 3.38021 0.55072 6.138 8.38e-10 ***
## DV_CSdiabete 57.58451 6.96615 8.266 < 2e-16 ***
## DV_CSdisllb -0.22058 0.04009 -5.503 3.75e-08 ***
## DV_CSdlivord 30.03480 1.24155 24.191 < 2e-16 ***
## DV_CSdmage 6.91322 1.52958 4.520 6.20e-06 ***
## DV_CSdmar -33.91189 2.92121 -11.609 < 2e-16 ***
## DV_CSdmeduc 5.80086 0.61160 9.485 < 2e-16 ***
## DV_CSdrink -6.72022 1.65973 -4.049 5.15e-05 ***
## DV_CSforeignb -15.20975 5.51234 -2.759 0.005794 **
## DV_CSnprevist 24.89369 0.37632 66.150 < 2e-16 ***
## DV_CSpre4000 469.92124 9.13684 51.431 < 2e-16 ***
## DV_CSpreterm -365.67221 8.06538 -45.338 < 2e-16 ***
## DV_CStobacco -210.46837 2.56455 -82.068 < 2e-16 ***
## DV_CSmblack -138.48024 7.57071 -18.292 < 2e-16 ***
## DV_CSmotherr -92.56903 12.24403 -7.560 4.03e-14 ***
## DV_CSmhispan -75.18797 8.70090 -8.641 < 2e-16 ***
## DV_CSfblack -37.96559 7.38419 -5.141 2.73e-07 ***
## DV_CSfotherr -104.26981 12.07977 -8.632 < 2e-16 ***
## DV_CSfhispan -53.10200 8.00669 -6.632 3.31e-11 ***
## DV_CSadequac2 30.28170 4.04042 7.495 6.67e-14 ***
## DV_CSadequac3 67.84088 8.45059 8.028 9.95e-16 ***
## DV_CStripre2 26.61349 4.28387 6.212 5.22e-10 ***
## DV_CStripre3 66.30461 9.26586 7.156 8.34e-13 ***
## DV_CStripre0 -14.16548 12.01717 -1.179 0.238491
## DV_CSfirst -73.47962 3.32479 -22.101 < 2e-16 ***
## DV_CSdeadkids -3.80959 1.27799 -2.981 0.002874 **
## DV_CSplural -716.70279 7.44063 -96.323 < 2e-16 ***
## DV_CSdmage2 -0.15470 0.02664 -5.808 6.35e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 466.1 on 249966 degrees of freedom
## Multiple R-squared: 0.1445, Adjusted R-squared: 0.1444
## F-statistic: 1280 on 33 and 249966 DF, p-value: < 2.2e-16
#I would expect this to be a biased estimate because we are using OLS on a censored model, which will fail to capture the true slope, and provide us with an upward bias on our coefficients.
c.
CensoredRegression<- vglm(Y_CS~DV_CS, data = Censored2, tobit(Lower=2500))
summary(CensoredRegression)
##
## Call:
## vglm(formula = Y_CS ~ DV_CS, family = tobit(Lower = 2500), data = Censored2)
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## mu -3.932 -0.6534 0.002043 0.6488 6.937
## loge(sd) -1.002 -0.6618 -0.420591 0.1998 33.953
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 2.943e+03 2.335e+01 126.037 < 2e-16 ***
## (Intercept):2 6.194e+00 1.466e-03 4226.082 < 2e-16 ***
## DV_CSalcohol -2.165e+01 1.044e+01 -2.074 0.038103 *
## DV_CSanemia -2.813e+01 1.026e+01 -2.741 0.006132 **
## DV_CScardiac -4.976e+01 1.419e+01 -3.508 0.000452 ***
## DV_CSchyper -1.557e+02 1.144e+01 -13.607 < 2e-16 ***
## DV_CSdfage 1.932e-03 2.419e-01 0.008 0.993627
## DV_CSdfeduc 3.586e+00 5.816e-01 6.166 7.01e-10 ***
## DV_CSdiabete 5.366e+01 7.346e+00 7.305 2.77e-13 ***
## DV_CSdisllb -2.376e-01 4.236e-02 -5.608 2.05e-08 ***
## DV_CSdlivord 3.213e+01 1.311e+00 24.504 < 2e-16 ***
## DV_CSdmage 7.320e+00 1.617e+00 4.527 5.98e-06 ***
## DV_CSdmar -3.689e+01 3.089e+00 -11.942 < 2e-16 ***
## DV_CSdmeduc 6.187e+00 6.457e-01 9.581 < 2e-16 ***
## DV_CSdrink -9.304e+00 1.869e+00 -4.977 6.46e-07 ***
## DV_CSforeignb -1.470e+01 5.822e+00 -2.525 0.011558 *
## DV_CSnprevist 2.823e+01 3.977e-01 70.991 < 2e-16 ***
## DV_CSpre4000 4.747e+02 9.608e+00 49.409 < 2e-16 ***
## DV_CSpreterm -4.322e+02 8.726e+00 -49.528 < 2e-16 ***
## DV_CStobacco -2.226e+02 2.717e+00 -81.911 < 2e-16 ***
## DV_CSmblack -1.475e+02 8.020e+00 -18.390 < 2e-16 ***
## DV_CSmotherr -9.431e+01 1.294e+01 -7.285 3.21e-13 ***
## DV_CSmhispan -7.702e+01 9.209e+00 -8.364 < 2e-16 ***
## DV_CSfblack -4.239e+01 7.818e+00 -5.422 5.89e-08 ***
## DV_CSfotherr -1.085e+02 1.278e+01 -8.492 < 2e-16 ***
## DV_CSfhispan -5.682e+01 8.476e+00 -6.703 2.04e-11 ***
## DV_CSadequac2 3.755e+01 4.273e+00 8.788 < 2e-16 ***
## DV_CSadequac3 8.087e+01 8.966e+00 9.020 < 2e-16 ***
## DV_CStripre2 2.853e+01 4.533e+00 6.295 3.08e-10 ***
## DV_CStripre3 7.614e+01 9.831e+00 7.745 9.56e-15 ***
## DV_CStripre0 -4.319e+01 1.290e+01 -3.348 0.000813 ***
## DV_CSfirst -7.984e+01 3.511e+00 -22.744 < 2e-16 ***
## DV_CSdeadkids -5.174e+00 1.351e+00 -3.829 0.000129 ***
## DV_CSplural -9.079e+02 8.661e+00 -104.826 < 2e-16 ***
## DV_CSdmage2 -1.681e-01 2.815e-02 -5.972 2.35e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Number of linear predictors: 2
##
## Names of linear predictors: mu, loge(sd)
##
## Dispersion Parameter for tobit family: 1
##
## Log-likelihood: -1808327 on 499965 degrees of freedom
##
## Number of iterations: 5
#estimated effect for smoking on birth weight is -222.58 difference in birth weight
#for infants with mothers who smoked during pregnancy
#Standard Eerror = 2.717
CI_Censored<- 2.717*2 + -222.58
CI_Censored2<- 2.717*2 - -222.58
CI_Censored
## [1] -217.146
CI_Censored2
## [1] 228.014
Question 5
a.
Benchmark<- read.dta("/Users/YaseenAbdulridha/Downloads/paeco526W16_ps2.dta")
Y_Benchmark<- Benchmark[,5]
Benchmark2<- Benchmark[,-5]
Benchmark2<- Benchmark2[,-34]
DV_BenchMark<- as.matrix(Benchmark2)
BenchMark_Regression<- lm(Y_Benchmark~DV_BenchMark)
summary(BenchMark_Regression)
##
## Call:
## lm(formula = Y_Benchmark ~ DV_BenchMark)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3563.4 -303.2 15.9 332.7 3417.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2802.07204 25.23610 111.034 < 2e-16 ***
## DV_BenchMarkalcohol -20.04012 11.10504 -1.805 0.071139 .
## DV_BenchMarkanemia -40.47064 11.05299 -3.662 0.000251 ***
## DV_BenchMarkcardiac -53.80108 15.33227 -3.509 0.000450 ***
## DV_BenchMarkchyper -203.36054 12.32744 -16.497 < 2e-16 ***
## DV_BenchMarkdfage -0.07196 0.26140 -0.275 0.783111
## DV_BenchMarkdfeduc 4.12279 0.62946 6.550 5.78e-11 ***
## DV_BenchMarkdiabete 42.22941 7.96207 5.304 1.13e-07 ***
## DV_BenchMarkdisllb -0.26952 0.04582 -5.882 4.05e-09 ***
## DV_BenchMarkdlivord 34.72033 1.41905 24.467 < 2e-16 ***
## DV_BenchMarkdmage 9.31822 1.74826 5.330 9.83e-08 ***
## DV_BenchMarkdmar -40.38955 3.33884 -12.097 < 2e-16 ***
## DV_BenchMarkdmeduc 5.92945 0.69903 8.482 < 2e-16 ***
## DV_BenchMarkdrink -9.57060 1.89701 -5.045 4.54e-07 ***
## DV_BenchMarkforeignb -14.97303 6.30042 -2.377 0.017478 *
## DV_BenchMarknprevist 37.17285 0.43012 86.423 < 2e-16 ***
## DV_BenchMarkpre4000 476.32267 10.44309 45.611 < 2e-16 ***
## DV_BenchMarkpreterm -495.84888 9.21846 -53.789 < 2e-16 ***
## DV_BenchMarktobacco -224.36762 2.93119 -76.545 < 2e-16 ***
## DV_BenchMarkmblack -152.94977 8.65306 -17.676 < 2e-16 ***
## DV_BenchMarkmotherr -85.95448 13.99450 -6.142 8.16e-10 ***
## DV_BenchMarkmhispan -77.52961 9.94483 -7.796 6.42e-15 ***
## DV_BenchMarkfblack -49.02162 8.43988 -5.808 6.32e-09 ***
## DV_BenchMarkfotherr -106.60619 13.80676 -7.721 1.16e-14 ***
## DV_BenchMarkfhispan -58.59584 9.15137 -6.403 1.53e-10 ***
## DV_BenchMarkadequac2 68.44478 4.61806 14.821 < 2e-16 ***
## DV_BenchMarkadequac3 156.65110 9.65873 16.219 < 2e-16 ***
## DV_BenchMarktripre2 27.12976 4.89632 5.541 3.01e-08 ***
## DV_BenchMarktripre3 77.61655 10.59056 7.329 2.33e-13 ***
## DV_BenchMarktripre0 -108.66197 13.73521 -7.911 2.56e-15 ***
## DV_BenchMarkfirst -88.91614 3.80012 -23.398 < 2e-16 ***
## DV_BenchMarkdeadkids -11.09828 1.46070 -7.598 3.02e-14 ***
## DV_BenchMarkplural -977.79905 8.50439 -114.976 < 2e-16 ***
## DV_BenchMarkdmage2 -0.20978 0.03045 -6.890 5.58e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 532.7 on 249966 degrees of freedom
## Multiple R-squared: 0.1648, Adjusted R-squared: 0.1647
## F-statistic: 1495 on 33 and 249966 DF, p-value: < 2.2e-16
#estimated effect for smoking on birth weight is -224.36762 difference in birth weight
#for infants with mothers who smoked during pregnancy
#Standard Error is 2.93119
CI_BenchMark<- 2.93119*2 + -224.36762
CI_BenchMark2<- 2.93119*2 - -224.36762
CI_BenchMark
## [1] -218.5052
CI_BenchMark2
## [1] 230.23
Table<- data.frame(OLS_Regression$coefficients[19], OLS_Regression_CS$coefficients[19], TruncRegression$coefficients[19], BenchMark_Regression$coefficients[19])
colnames(Table)<- c("OLS_Regression", "OLS_Regression_CS","TruncRegression","BenchMark_Regression")
table <- t(Table)
table
## DVtobacco
## OLS_Regression -189.7550
## OLS_Regression_CS -210.4684
## TruncRegression -163.2432
## BenchMark_Regression -224.3676
#We can see the different specifications on our models and how they can bias our coefficient estimates. The censored dtat will tend to have coefficients that are biased towards zero. As we can see from our results our estimates in 3c and 4c were very close to the benchmark method, suggesting teh validity of th semiparamateric methods used to account for censored and truncated data applied in questions 3 and 4. The tobit model however attemps to correct for model bias.
Question 6
# The authors discuss the use of semiparametric mdoels to estimate the effects of civil rights on the wages of blacks vs. whites. When only using OLS they were not able to show teh convergence of wages after the movements in 1964. However wonce they suse the semiparamterci methods they were able to distinguish between the two and show this convergence. Interesting methods such as CLAD SCLS and ICLAD are proposed by powell. I found that the methods were very interesting in the fact that they are able to uncover the true effect of the convegence of wages that was unable to be ovserved using the older methods, it only leads me to question what will we continue to correct in the future in terms of what we think we know?