setwd("C:/Users/marcogeovanni/Desktop/Modern Guide To Econometrics/Chapter 7")
library(readstata13)
Benefits <- read.dta13("benefits.dta")
attach(Benefits)
summary(Benefits)
##     stateur          statemb          state            age       
##  Min.   : 2.200   Min.   : 84.0   Min.   :11.00   Min.   :20.00  
##  1st Qu.: 5.700   1st Qu.:150.0   1st Qu.:31.00   1st Qu.:28.00  
##  Median : 7.200   Median :177.0   Median :55.00   Median :34.00  
##  Mean   : 7.511   Mean   :180.7   Mean   :52.81   Mean   :36.13  
##  3rd Qu.: 9.000   3rd Qu.:205.0   3rd Qu.:74.00   3rd Qu.:43.00  
##  Max.   :18.000   Max.   :293.0   Max.   :95.00   Max.   :61.00  
##       age2           tenure           slack             abol        
##  Min.   : 40.0   Min.   : 1.000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.: 78.4   1st Qu.: 2.000   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :115.6   Median : 3.000   Median :0.0000   Median :0.00000  
##  Mean   :141.8   Mean   : 5.664   Mean   :0.4761   Mean   :0.08243  
##  3rd Qu.:184.9   3rd Qu.: 7.000   3rd Qu.:1.0000   3rd Qu.:0.00000  
##  Max.   :372.1   Max.   :41.000   Max.   :1.0000   Max.   :1.00000  
##     seasonal           nwhite          school12           male       
##  Min.   :0.00000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:1.0000  
##  Median :0.00000   Median :0.0000   Median :0.0000   Median :1.0000  
##  Mean   :0.03629   Mean   :0.1472   Mean   :0.1901   Mean   :0.7642  
##  3rd Qu.:0.00000   3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:1.0000  
##  Max.   :1.00000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##     bluecol       smsa           married           dkids       
##  Min.   :1   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:1   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :1   Median :1.0000   Median :1.0000   Median :0.0000  
##  Mean   :1   Mean   :0.6527   Mean   :0.6328   Mean   :0.4857  
##  3rd Qu.:1   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :1   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##      dykids          yrdispl             rr               rr2          
##  Min.   :0.0000   Min.   : 1.000   Min.   :0.03861   Min.   :0.001491  
##  1st Qu.:0.0000   1st Qu.: 2.000   1st Qu.:0.37521   1st Qu.:0.140784  
##  Median :0.0000   Median : 5.000   Median :0.49045   Median :0.240537  
##  Mean   :0.2217   Mean   : 5.204   Mean   :0.43837   Mean   :0.203440  
##  3rd Qu.:0.0000   3rd Qu.: 8.000   3rd Qu.:0.51157   3rd Qu.:0.261708  
##  Max.   :1.0000   Max.   :10.000   Max.   :0.69118   Max.   :0.477725  
##       head              y         
##  Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :1.0000   Median :1.0000  
##  Mean   :0.6805   Mean   :0.6838  
##  3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :1.0000   Max.   :1.0000
age2_10 <- age2
LPM <- lm(y~rr + rr2 + age + age2_10 + tenure + slack + abol + seasonal + head + married + dkids + dykids + smsa + nwhite + yrdispl + school12 + male + statemb + stateur)  
summary(LPM)
## 
## Call:
## lm(formula = y ~ rr + rr2 + age + age2_10 + tenure + slack + 
##     abol + seasonal + head + married + dkids + dykids + smsa + 
##     nwhite + yrdispl + school12 + male + statemb + stateur)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.9706 -0.5374  0.2231  0.3347  0.6770 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.0768690  0.1220560  -0.630  0.52887    
## rr           0.6288587  0.3842068   1.637  0.10174    
## rr2         -1.0190591  0.4809550  -2.119  0.03416 *  
## age          0.0157489  0.0047841   3.292  0.00100 ** 
## age2_10     -0.0014595  0.0006016  -2.426  0.01530 *  
## tenure       0.0056531  0.0012152   4.652 3.37e-06 ***
## slack        0.1281283  0.0142249   9.007  < 2e-16 ***
## abol        -0.0065206  0.0248281  -0.263  0.79285    
## seasonal     0.0578745  0.0357985   1.617  0.10601    
## head        -0.0437490  0.0166430  -2.629  0.00860 ** 
## married      0.0485952  0.0161348   3.012  0.00261 ** 
## dkids       -0.0305088  0.0174321  -1.750  0.08016 .  
## dykids       0.0429115  0.0197563   2.172  0.02990 *  
## smsa        -0.0351950  0.0140138  -2.511  0.01206 *  
## nwhite       0.0165889  0.0187109   0.887  0.37534    
## yrdispl     -0.0133149  0.0030686  -4.339 1.46e-05 ***
## school12    -0.0140365  0.0168433  -0.833  0.40468    
## male        -0.0363176  0.0178142  -2.039  0.04154 *  
## statemb      0.0012394  0.0002039   6.078 1.31e-09 ***
## stateur      0.0181479  0.0030843   5.884 4.28e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4501 on 4857 degrees of freedom
## Multiple R-squared:  0.06691,    Adjusted R-squared:  0.06326 
## F-statistic: 18.33 on 19 and 4857 DF,  p-value: < 2.2e-16
plot(fitted(LPM), residuals(LPM), col="blue")

LOGIT <- glm(y~rr + rr2 + age + age2_10 + tenure + slack + abol + seasonal + head + married + dkids + dykids + smsa + nwhite + yrdispl + school12 + male + statemb + stateur, family = binomial(link = "logit"))
summary(LOGIT)
## 
## Call:
## glm(formula = y ~ rr + rr2 + age + age2_10 + tenure + slack + 
##     abol + seasonal + head + married + dkids + dykids + smsa + 
##     nwhite + yrdispl + school12 + male + statemb + stateur, family = binomial(link = "logit"))
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.2024  -1.2216   0.6959   0.8844   1.6015  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -2.800499   0.604168  -4.635 3.56e-06 ***
## rr           3.068080   1.868226   1.642  0.10054    
## rr2         -4.890618   2.333522  -2.096  0.03610 *  
## age          0.067697   0.023910   2.831  0.00463 ** 
## age2_10     -0.005968   0.003038  -1.964  0.04950 *  
## tenure       0.031249   0.006644   4.703 2.56e-06 ***
## slack        0.624822   0.070639   8.845  < 2e-16 ***
## abol        -0.036175   0.117808  -0.307  0.75879    
## seasonal     0.270874   0.171171   1.582  0.11354    
## head        -0.210682   0.081226  -2.594  0.00949 ** 
## married      0.242266   0.079410   3.051  0.00228 ** 
## dkids       -0.157927   0.086218  -1.832  0.06699 .  
## dykids       0.205894   0.097492   2.112  0.03470 *  
## smsa        -0.170354   0.069781  -2.441  0.01464 *  
## nwhite       0.074070   0.092956   0.797  0.42555    
## yrdispl     -0.063700   0.014997  -4.247 2.16e-05 ***
## school12    -0.065258   0.082413  -0.792  0.42845    
## male        -0.179829   0.087535  -2.054  0.03994 *  
## statemb      0.006027   0.001009   5.973 2.33e-09 ***
## stateur      0.095620   0.015912   6.009 1.86e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 6086.1  on 4876  degrees of freedom
## Residual deviance: 5746.4  on 4857  degrees of freedom
## AIC: 5786.4
## 
## Number of Fisher Scoring iterations: 4
logLik(LOGIT)
## 'log Lik.' -2873.197 (df=20)
LogisticDensityFunction <- mean(dlogis(predict(LOGIT, type= "link")))
LogisticDensityFunction
## [1] 0.2014453
LogisticDensityFunction*coef(LOGIT)
##  (Intercept)           rr          rr2          age      age2_10 
## -0.564147154  0.618050089 -0.985191829  0.013637199 -0.001202238 
##       tenure        slack         abol     seasonal         head 
##  0.006295006  0.125867423 -0.007287342  0.054566288 -0.042440920 
##      married        dkids       dykids         smsa       nwhite 
##  0.048803262 -0.031813631  0.041476383 -0.034316941  0.014921064 
##      yrdispl     school12         male      statemb      stateur 
## -0.012832091 -0.013145841 -0.036225696  0.001214109  0.019262149
DensidadDistribucionLogistica <-dlogis(predict(LOGIT, type= "link"))
hist(DensidadDistribucionLogistica, col="brown")

LOGIT0 <- update(LOGIT, formula = y~1 )
table(true = y, pred = round(fitted(LOGIT)))
##     pred
## true    0    1
##    0  242 1300
##    1  171 3164
table(true = y, pred = round(fitted(LOGIT0)))
##     pred
## true    1
##    0 1542
##    1 3335
mcfaddenR2 <- 1- (logLik(LOGIT)/logLik(LOGIT0))
mcfaddenR2
## 'log Lik.' 0.05581002 (df=20)
PROBIT <- glm(y~rr + rr2 + age + age2_10 + tenure + slack + abol + seasonal + head + married + dkids + dykids + smsa + nwhite + yrdispl + school12 + male + statemb + stateur, family = binomial(link = "probit"))
summary(PROBIT)
## 
## Call:
## glm(formula = y ~ rr + rr2 + age + age2_10 + tenure + slack + 
##     abol + seasonal + head + married + dkids + dykids + smsa + 
##     nwhite + yrdispl + school12 + male + statemb + stateur, family = binomial(link = "probit"))
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.2247  -1.2269   0.6988   0.8884   1.5834  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.6999891  0.3629508  -4.684 2.82e-06 ***
## rr           1.8634739  1.1293243   1.650  0.09893 .  
## rr2         -2.9804364  1.4119428  -2.111  0.03478 *  
## age          0.0422140  0.0143142   2.949  0.00319 ** 
## age2_10     -0.0037741  0.0018118  -2.083  0.03724 *  
## tenure       0.0176943  0.0038476   4.599 4.25e-06 ***
## slack        0.3754931  0.0423881   8.858  < 2e-16 ***
## abol        -0.0223136  0.0718629  -0.311  0.75618    
## seasonal     0.1612070  0.1040951   1.549  0.12147    
## head        -0.1247463  0.0490620  -2.543  0.01100 *  
## married      0.1454763  0.0478152   3.042  0.00235 ** 
## dkids       -0.0965778  0.0518420  -1.863  0.06247 .  
## dykids       0.1236097  0.0586377   2.108  0.03503 *  
## smsa        -0.1001520  0.0418419  -2.394  0.01668 *  
## nwhite       0.0517937  0.0558335   0.928  0.35359    
## yrdispl     -0.0384797  0.0090509  -4.251 2.12e-05 ***
## school12    -0.0415517  0.0497219  -0.836  0.40333    
## male        -0.1067168  0.0527404  -2.023  0.04303 *  
## statemb      0.0036399  0.0006065   6.002 1.95e-09 ***
## stateur      0.0568271  0.0094328   6.024 1.70e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 6086.1  on 4876  degrees of freedom
## Residual deviance: 5748.1  on 4857  degrees of freedom
## AIC: 5788.1
## 
## Number of Fisher Scoring iterations: 4
logLik(PROBIT)
## 'log Lik.' -2874.071 (df=20)
NormalDensityFunction <- mean(dnorm(predict(PROBIT, type= "link")))
DensidadDistribucionNormal <-dnorm(predict(PROBIT, type= "link")) 
NormalDensityFunction*coef(PROBIT)
##  (Intercept)           rr          rr2          age      age2_10 
## -0.569675535  0.624460178 -0.998760317  0.014146138 -0.001264732 
##       tenure        slack         abol     seasonal         head 
##  0.005929442  0.125829762 -0.007477423  0.054021331 -0.041803156 
##      married        dkids       dykids         smsa       nwhite 
##  0.048749892 -0.032363743  0.041422292 -0.033561482  0.017356362 
##      yrdispl     school12         male      statemb      stateur 
## -0.012894756 -0.013924201 -0.035761374  0.001219749  0.019043055
hist(DensidadDistribucionNormal, col="red")

PROBIT0 <- update(PROBIT, formula = y~1 )
table(true = y, pred = round(fitted(PROBIT)))
##     pred
## true    0    1
##    0  231 1311
##    1  162 3173
table(true = y, pred = round(fitted(PROBIT0)))
##     pred
## true    1
##    0 1542
##    1 3335
mcfaddenR2P <- 1-(logLik(PROBIT)/logLik(PROBIT0))
mcfaddenR2P
## 'log Lik.' 0.05552271 (df=20)