setwd("C:/Users/marcogeovanni/Desktop/Modern Guide To Econometrics/Chapter 7")
library(readstata13)
Benefits <- read.dta13("benefits.dta")
attach(Benefits)
summary(Benefits)
## stateur statemb state age
## Min. : 2.200 Min. : 84.0 Min. :11.00 Min. :20.00
## 1st Qu.: 5.700 1st Qu.:150.0 1st Qu.:31.00 1st Qu.:28.00
## Median : 7.200 Median :177.0 Median :55.00 Median :34.00
## Mean : 7.511 Mean :180.7 Mean :52.81 Mean :36.13
## 3rd Qu.: 9.000 3rd Qu.:205.0 3rd Qu.:74.00 3rd Qu.:43.00
## Max. :18.000 Max. :293.0 Max. :95.00 Max. :61.00
## age2 tenure slack abol
## Min. : 40.0 Min. : 1.000 Min. :0.0000 Min. :0.00000
## 1st Qu.: 78.4 1st Qu.: 2.000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :115.6 Median : 3.000 Median :0.0000 Median :0.00000
## Mean :141.8 Mean : 5.664 Mean :0.4761 Mean :0.08243
## 3rd Qu.:184.9 3rd Qu.: 7.000 3rd Qu.:1.0000 3rd Qu.:0.00000
## Max. :372.1 Max. :41.000 Max. :1.0000 Max. :1.00000
## seasonal nwhite school12 male
## Min. :0.00000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:1.0000
## Median :0.00000 Median :0.0000 Median :0.0000 Median :1.0000
## Mean :0.03629 Mean :0.1472 Mean :0.1901 Mean :0.7642
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:1.0000
## Max. :1.00000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## bluecol smsa married dkids
## Min. :1 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:1 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :1 Median :1.0000 Median :1.0000 Median :0.0000
## Mean :1 Mean :0.6527 Mean :0.6328 Mean :0.4857
## 3rd Qu.:1 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1 Max. :1.0000 Max. :1.0000 Max. :1.0000
## dykids yrdispl rr rr2
## Min. :0.0000 Min. : 1.000 Min. :0.03861 Min. :0.001491
## 1st Qu.:0.0000 1st Qu.: 2.000 1st Qu.:0.37521 1st Qu.:0.140784
## Median :0.0000 Median : 5.000 Median :0.49045 Median :0.240537
## Mean :0.2217 Mean : 5.204 Mean :0.43837 Mean :0.203440
## 3rd Qu.:0.0000 3rd Qu.: 8.000 3rd Qu.:0.51157 3rd Qu.:0.261708
## Max. :1.0000 Max. :10.000 Max. :0.69118 Max. :0.477725
## head y
## Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000
## Median :1.0000 Median :1.0000
## Mean :0.6805 Mean :0.6838
## 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000
age2_10 <- age2
LPM <- lm(y~rr + rr2 + age + age2_10 + tenure + slack + abol + seasonal + head + married + dkids + dykids + smsa + nwhite + yrdispl + school12 + male + statemb + stateur)
summary(LPM)
##
## Call:
## lm(formula = y ~ rr + rr2 + age + age2_10 + tenure + slack +
## abol + seasonal + head + married + dkids + dykids + smsa +
## nwhite + yrdispl + school12 + male + statemb + stateur)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.9706 -0.5374 0.2231 0.3347 0.6770
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.0768690 0.1220560 -0.630 0.52887
## rr 0.6288587 0.3842068 1.637 0.10174
## rr2 -1.0190591 0.4809550 -2.119 0.03416 *
## age 0.0157489 0.0047841 3.292 0.00100 **
## age2_10 -0.0014595 0.0006016 -2.426 0.01530 *
## tenure 0.0056531 0.0012152 4.652 3.37e-06 ***
## slack 0.1281283 0.0142249 9.007 < 2e-16 ***
## abol -0.0065206 0.0248281 -0.263 0.79285
## seasonal 0.0578745 0.0357985 1.617 0.10601
## head -0.0437490 0.0166430 -2.629 0.00860 **
## married 0.0485952 0.0161348 3.012 0.00261 **
## dkids -0.0305088 0.0174321 -1.750 0.08016 .
## dykids 0.0429115 0.0197563 2.172 0.02990 *
## smsa -0.0351950 0.0140138 -2.511 0.01206 *
## nwhite 0.0165889 0.0187109 0.887 0.37534
## yrdispl -0.0133149 0.0030686 -4.339 1.46e-05 ***
## school12 -0.0140365 0.0168433 -0.833 0.40468
## male -0.0363176 0.0178142 -2.039 0.04154 *
## statemb 0.0012394 0.0002039 6.078 1.31e-09 ***
## stateur 0.0181479 0.0030843 5.884 4.28e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4501 on 4857 degrees of freedom
## Multiple R-squared: 0.06691, Adjusted R-squared: 0.06326
## F-statistic: 18.33 on 19 and 4857 DF, p-value: < 2.2e-16
plot(fitted(LPM), residuals(LPM), col="blue")

LOGIT <- glm(y~rr + rr2 + age + age2_10 + tenure + slack + abol + seasonal + head + married + dkids + dykids + smsa + nwhite + yrdispl + school12 + male + statemb + stateur, family = binomial(link = "logit"))
summary(LOGIT)
##
## Call:
## glm(formula = y ~ rr + rr2 + age + age2_10 + tenure + slack +
## abol + seasonal + head + married + dkids + dykids + smsa +
## nwhite + yrdispl + school12 + male + statemb + stateur, family = binomial(link = "logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.2024 -1.2216 0.6959 0.8844 1.6015
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.800499 0.604168 -4.635 3.56e-06 ***
## rr 3.068080 1.868226 1.642 0.10054
## rr2 -4.890618 2.333522 -2.096 0.03610 *
## age 0.067697 0.023910 2.831 0.00463 **
## age2_10 -0.005968 0.003038 -1.964 0.04950 *
## tenure 0.031249 0.006644 4.703 2.56e-06 ***
## slack 0.624822 0.070639 8.845 < 2e-16 ***
## abol -0.036175 0.117808 -0.307 0.75879
## seasonal 0.270874 0.171171 1.582 0.11354
## head -0.210682 0.081226 -2.594 0.00949 **
## married 0.242266 0.079410 3.051 0.00228 **
## dkids -0.157927 0.086218 -1.832 0.06699 .
## dykids 0.205894 0.097492 2.112 0.03470 *
## smsa -0.170354 0.069781 -2.441 0.01464 *
## nwhite 0.074070 0.092956 0.797 0.42555
## yrdispl -0.063700 0.014997 -4.247 2.16e-05 ***
## school12 -0.065258 0.082413 -0.792 0.42845
## male -0.179829 0.087535 -2.054 0.03994 *
## statemb 0.006027 0.001009 5.973 2.33e-09 ***
## stateur 0.095620 0.015912 6.009 1.86e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6086.1 on 4876 degrees of freedom
## Residual deviance: 5746.4 on 4857 degrees of freedom
## AIC: 5786.4
##
## Number of Fisher Scoring iterations: 4
logLik(LOGIT)
## 'log Lik.' -2873.197 (df=20)
LogisticDensityFunction <- mean(dlogis(predict(LOGIT, type= "link")))
LogisticDensityFunction
## [1] 0.2014453
LogisticDensityFunction*coef(LOGIT)
## (Intercept) rr rr2 age age2_10
## -0.564147154 0.618050089 -0.985191829 0.013637199 -0.001202238
## tenure slack abol seasonal head
## 0.006295006 0.125867423 -0.007287342 0.054566288 -0.042440920
## married dkids dykids smsa nwhite
## 0.048803262 -0.031813631 0.041476383 -0.034316941 0.014921064
## yrdispl school12 male statemb stateur
## -0.012832091 -0.013145841 -0.036225696 0.001214109 0.019262149
DensidadDistribucionLogistica <-dlogis(predict(LOGIT, type= "link"))
hist(DensidadDistribucionLogistica, col="brown")

LOGIT0 <- update(LOGIT, formula = y~1 )
table(true = y, pred = round(fitted(LOGIT)))
## pred
## true 0 1
## 0 242 1300
## 1 171 3164
table(true = y, pred = round(fitted(LOGIT0)))
## pred
## true 1
## 0 1542
## 1 3335
mcfaddenR2 <- 1- (logLik(LOGIT)/logLik(LOGIT0))
mcfaddenR2
## 'log Lik.' 0.05581002 (df=20)
PROBIT <- glm(y~rr + rr2 + age + age2_10 + tenure + slack + abol + seasonal + head + married + dkids + dykids + smsa + nwhite + yrdispl + school12 + male + statemb + stateur, family = binomial(link = "probit"))
summary(PROBIT)
##
## Call:
## glm(formula = y ~ rr + rr2 + age + age2_10 + tenure + slack +
## abol + seasonal + head + married + dkids + dykids + smsa +
## nwhite + yrdispl + school12 + male + statemb + stateur, family = binomial(link = "probit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.2247 -1.2269 0.6988 0.8884 1.5834
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.6999891 0.3629508 -4.684 2.82e-06 ***
## rr 1.8634739 1.1293243 1.650 0.09893 .
## rr2 -2.9804364 1.4119428 -2.111 0.03478 *
## age 0.0422140 0.0143142 2.949 0.00319 **
## age2_10 -0.0037741 0.0018118 -2.083 0.03724 *
## tenure 0.0176943 0.0038476 4.599 4.25e-06 ***
## slack 0.3754931 0.0423881 8.858 < 2e-16 ***
## abol -0.0223136 0.0718629 -0.311 0.75618
## seasonal 0.1612070 0.1040951 1.549 0.12147
## head -0.1247463 0.0490620 -2.543 0.01100 *
## married 0.1454763 0.0478152 3.042 0.00235 **
## dkids -0.0965778 0.0518420 -1.863 0.06247 .
## dykids 0.1236097 0.0586377 2.108 0.03503 *
## smsa -0.1001520 0.0418419 -2.394 0.01668 *
## nwhite 0.0517937 0.0558335 0.928 0.35359
## yrdispl -0.0384797 0.0090509 -4.251 2.12e-05 ***
## school12 -0.0415517 0.0497219 -0.836 0.40333
## male -0.1067168 0.0527404 -2.023 0.04303 *
## statemb 0.0036399 0.0006065 6.002 1.95e-09 ***
## stateur 0.0568271 0.0094328 6.024 1.70e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6086.1 on 4876 degrees of freedom
## Residual deviance: 5748.1 on 4857 degrees of freedom
## AIC: 5788.1
##
## Number of Fisher Scoring iterations: 4
logLik(PROBIT)
## 'log Lik.' -2874.071 (df=20)
NormalDensityFunction <- mean(dnorm(predict(PROBIT, type= "link")))
DensidadDistribucionNormal <-dnorm(predict(PROBIT, type= "link"))
NormalDensityFunction*coef(PROBIT)
## (Intercept) rr rr2 age age2_10
## -0.569675535 0.624460178 -0.998760317 0.014146138 -0.001264732
## tenure slack abol seasonal head
## 0.005929442 0.125829762 -0.007477423 0.054021331 -0.041803156
## married dkids dykids smsa nwhite
## 0.048749892 -0.032363743 0.041422292 -0.033561482 0.017356362
## yrdispl school12 male statemb stateur
## -0.012894756 -0.013924201 -0.035761374 0.001219749 0.019043055
hist(DensidadDistribucionNormal, col="red")

PROBIT0 <- update(PROBIT, formula = y~1 )
table(true = y, pred = round(fitted(PROBIT)))
## pred
## true 0 1
## 0 231 1311
## 1 162 3173
table(true = y, pred = round(fitted(PROBIT0)))
## pred
## true 1
## 0 1542
## 1 3335
mcfaddenR2P <- 1-(logLik(PROBIT)/logLik(PROBIT0))
mcfaddenR2P
## 'log Lik.' 0.05552271 (df=20)