Ejercicio 3

Bajo las condiciones del problema anterior ### 3.1 Utilice la libreria Sandwich para estimar la matriz de varianzas y covarianzas de \(\hat{\beta}\) suponiendo heterocedastidad utilizando la especificación \(HC2\)

library(sandwich)
## Warning: package 'sandwich' was built under R version 3.6.3
vcovHC(m2_5, type="HC2")
##               X1           X2
## X1  0.0008602218 -0.001046587
## X2 -0.0010465868  0.001333104

3.2

Halle la matriz \(H = X(X^TX)^{−1}X^T\) y los residuos de la estimación de \(\beta\) por OLS con base en los datos originales simulados con T = 100 y verifique los resultados del punto anterior

dat_e3<-simulacion_e2(s_e2,d0,d1,b_xx,rho,100)
#la matriz H
H<-dat_e3$X%*%solve(t(dat_e3$X)%*%dat_e3$X)%*%t(dat_e3$X)
#La estimación con la simulación para T=100
dat_e4<-data.frame(dat_e3$Y,dat_e3$X)
m4_1<-lm(dat_e3.Y~X1+X2+0,dat_e4)
#La matriz de varianzas y covarianzas de los OLS
I<-diag(1,100,100)
Omg_ols<-(sigma(m4_1)^2)*I
const<-solve(t(dat_e3$X)%*%dat_e3$X)%*%t(dat_e3$X)%*%Omg_ols%*%dat_e3$X%*%solve(t(dat_e3$X)%*%dat_e3$X)
const
##              [,1]        [,2]
## [1,]  0.009961178 -0.01249936
## [2,] -0.012499359  0.01632534
#verificando los calculos del punto anterior para HC2
HC2<-function(H,modelo_lm){
  n<-nrow(H)
  SR<-residuals(modelo_lm)^2
  X<-model.matrix(modelo_lm)
  Omg<-matrix(0,n,n)
  SOmg<-NULL
  for (i in 1:n){
  SOmg[i]<-H[i,i]  
  Omg[i,i] = SR[i]/(1-SOmg[i])
  }
  HC2_matrix<-solve(t(X)%*%X)%*%t(X)%*%Omg%*%X%*%solve(t(X)%*%X)
  return(HC2_matrix)
}
HC2(H,m4_1)
##             X1          X2
## X1  0.01277930 -0.01598153
## X2 -0.01598153  0.02060172

El resultado de HC2 es diferente, pero porque el tamaño de muestra del punto 3.1 era mil, y en el punto 3.2 son solo 100.

3.3

Repita el punto anterior para la opción HC4

# con paquete sandwich
vcovHC(m2_5, type="HC4")
##               X1           X2
## X1  0.0008655969 -0.001053365
## X2 -0.0010533649  0.001341717
#programando a mano
HC4<-function(H,modelo_lm){
  n<-nrow(H)
  SR<-residuals(modelo_lm)^2
  X<-model.matrix(modelo_lm)
  Omg<-matrix(0,n,n)
  SOmg<-NULL
  hm<-mean(diag(H))
  delta<-NULL
  for (i in 1:n){
  SOmg[i]<-H[i,i]
  if(SOmg[i]/hm>4){
    delta[i]<-4} else{delta[i]<-SOmg[i]/hm}
    Omg[i,i] = SR[i]/((1-SOmg[i])^delta[i])
  }
  HC4_matrix<-solve(t(X)%*%X)%*%t(X)%*%Omg%*%X%*%solve(t(X)%*%X)
  return(HC4_matrix)
}
HC4(H,m4_1) # para T=100
##             X1          X2
## X1  0.01412561 -0.01763404
## X2 -0.01763404  0.02263413

nuevamente, existen amplias diferencias por el tamaño de muestra. pero como se verá en el siguiente ejercicio, los resultados son consistentes con la librería sandwich.

Ejercicio 4

Bajo las condiciones del problema anterior replique CON LOS DATOS SIMULADOS PARA T = 100 los ejemplos ilustrativos de la viñeta Sandwich

library(lmtest)
## Warning: package 'lmtest' was built under R version 3.6.3
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
coeftest(m4_1, df=Inf, vcov=vcovHC(m4_1, type="HC0"))
## 
## z test of coefficients:
## 
##    Estimate Std. Error z value  Pr(>|z|)    
## X1  0.39926    0.11027  3.6207 0.0002938 ***
## X2  0.91261    0.14009  6.5143 7.304e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(m4_1, df=Inf, vcov=vcovHC(m4_1, type="HC4"))
## 
## z test of coefficients:
## 
##    Estimate Std. Error z value  Pr(>|z|)    
## X1  0.39926    0.11885  3.3593 0.0007813 ***
## X2  0.91261    0.15045  6.0660 1.311e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(m4_1, df=Inf, vcov=NeweyWest(m4_1, lag = 4, prewhite = FALSE))
## 
## z test of coefficients:
## 
##    Estimate Std. Error z value  Pr(>|z|)    
## X1  0.39926    0.11198  3.5654 0.0003633 ***
## X2  0.91261    0.14847  6.1467  7.91e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(m4_1, df = Inf, vcov = NeweyWest)
## 
## z test of coefficients:
## 
##    Estimate Std. Error z value  Pr(>|z|)    
## X1  0.39926    0.12259  3.2568  0.001127 ** 
## X2  0.91261    0.16007  5.7013 1.189e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
parzenHAC <- function(x, ...) kernHAC(x, kernel = "Parzen", prewhite = 2, adjust = FALSE, bw = bwNeweyWest, ...)
coeftest(m4_1, df = Inf, vcov = parzenHAC)
## 
## z test of coefficients:
## 
##    Estimate Std. Error z value  Pr(>|z|)    
## X1  0.39926    0.12523  3.1882  0.001431 ** 
## X2  0.91261    0.16530  5.5211 3.369e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Ejercicio 9

C1

Use the data in WAGE2 for this exercise.

library(gmm)
## Warning: package 'gmm' was built under R version 3.6.3
library(wooldridge)
## Warning: package 'wooldridge' was built under R version 3.6.3
data('wage2')
#como GMM no funciona con nulos
wage<-na.omit(wage2)
  1. In Example 15.2, if \(sibs\) is used as an instrument for \(educ\), the IV estimate of the return to education is .122. To convince yourself that using \(sibs\) as an IV for \(educ\) is not the same as just plugging \(sibs\) in for \(educ\) and running an \(OLS\) regression, run the regression of \(log(wage)\) on \(sibs\) and explain your findings.

    #OLS
    m9_1<-lm(lwage~sibs, data=wage2)
    summary(m9_1)
    ## 
    ## Call:
    ## lm(formula = lwage ~ sibs, data = wage2)
    ## 
    ## Residuals:
    ##      Min       1Q   Median       3Q      Max 
    ## -1.97662 -0.25857  0.02503  0.28572  1.22677 
    ## 
    ## Coefficients:
    ##              Estimate Std. Error t value Pr(>|t|)    
    ## (Intercept)  6.861076   0.022078 310.771  < 2e-16 ***
    ## sibs        -0.027904   0.005908  -4.723 2.68e-06 ***
    ## ---
    ## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
    ## 
    ## Residual standard error: 0.4164 on 933 degrees of freedom
    ## Multiple R-squared:  0.02335,    Adjusted R-squared:  0.0223 
    ## F-statistic: 22.31 on 1 and 933 DF,  p-value: 2.68e-06
    #GMM
    m9_1_2<-gmm(wage$lwage~wage$sibs,x=wage$sibs,type="iterative")
    summary(m9_1_2)
    ## 
    ## Call:
    ## gmm(g = wage$lwage ~ wage$sibs, x = wage$sibs, type = "iterative")
    ## 
    ## 
    ## Method:  iterative 
    ## 
    ## Kernel:  Quadratic Spectral
    ## 
    ## Coefficients:
    ##              Estimate     Std. Error   t value      Pr(>|t|)   
    ## (Intercept)    6.8844699    0.0274480  250.8187638    0.0000000
    ## wage$sibs     -0.0246553    0.0072532   -3.3992542    0.0006757
    ## 
    ## J-Test: degrees of freedom is 0 
    ##                 J-test                P-value             
    ## Test E(g)=0:    4.61477680377028e-29  *******

    Como la variable dependiente se encuentra en escala logaritmica, se puede decir que el número de hermanos incrementa el salario en un 2.8%. Cuando se usaba sibs como instrumento para educ, el beta era -0.22 con una constante de 14.14, y al usar el educ estimado para estimar log(wage) el beta estimado era de 0.122. es decir, explicaba un 12.2%; es decir, usar sibs como instrumento para educ podría explicar mejor a log(wage) que usar directamente sibs

  2. The variable \(brthord\) is birth order (\(brthord\) is one for a first-born child, two for a second-born child, and so on). Explain why \(educ\) and \(brthord\) might be negatively correlated. Regress \(educ\) on \(brthord\) to determine whether there is a statistically significant negative correlation.

    cor.test(wage2$brthord,wage2$educ, method="pearson" )
    ## 
    ##  Pearson's product-moment correlation
    ## 
    ## data:  wage2$brthord and wage2$educ
    ## t = -6.1062, df = 850, p-value = 1.551e-09
    ## alternative hypothesis: true correlation is not equal to 0
    ## 95 percent confidence interval:
    ##  -0.2684608 -0.1397521
    ## sample estimates:
    ##        cor 
    ## -0.2049925
    #OLS
    m9_2<-lm(educ~brthord,data=wage2)
    summary(m9_2)
    ## 
    ## Call:
    ## lm(formula = educ ~ brthord, data = wage2)
    ## 
    ## Residuals:
    ##     Min      1Q  Median      3Q     Max 
    ## -4.8668 -1.5842 -0.7362  2.1332  6.1117 
    ## 
    ## Coefficients:
    ##             Estimate Std. Error t value Pr(>|t|)    
    ## (Intercept) 14.14945    0.12868 109.962  < 2e-16 ***
    ## brthord     -0.28264    0.04629  -6.106 1.55e-09 ***
    ## ---
    ## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
    ## 
    ## Residual standard error: 2.155 on 850 degrees of freedom
    ##   (83 observations deleted due to missingness)
    ## Multiple R-squared:  0.04202,    Adjusted R-squared:  0.04089 
    ## F-statistic: 37.29 on 1 and 850 DF,  p-value: 1.551e-09
    #GMM
    m9_2_2<-gmm(wage$educ~wage$brthord,x=wage$brthord, type="iterative")
    summary(m9_2_2)
    ## 
    ## Call:
    ## gmm(g = wage$educ ~ wage$brthord, x = wage$brthord, type = "iterative")
    ## 
    ## 
    ## Method:  iterative 
    ## 
    ## Kernel:  Quadratic Spectral
    ## 
    ## Coefficients:
    ##               Estimate     Std. Error   t value      Pr(>|t|)   
    ## (Intercept)    1.4256e+01   1.5362e-01   9.2802e+01   0.0000e+00
    ## wage$brthord  -2.6435e-01   5.3592e-02  -4.9326e+00   8.1134e-07
    ## 
    ## J-Test: degrees of freedom is 0 
    ##                 J-test                P-value             
    ## Test E(g)=0:    1.99567293028392e-27  *******

    La correlación es negativa según el test de Pearson, así como tambien se obtiene un estimador con signo negativo en la regresión. Hacer supuestos sobre la relación entre ambas variables puede ser muy subjetivo, comenzando porque el coeficiente de correlación es bajo, y el $R^2" muestra que usar el valor esperado condicional como estimación del nivel educativo es algo muy débil. Si esta relación fuera cierta, es como si al hijo mayor le dieran más educación que al menor; eso sería entendible en sociedades antiguas, pero hoy en día incluso podrían haber casos dónde el hijo menor sea el consentido y se le brinden mejores oportunidades.

  3. Use \(brthord\) as an IV for \(educ\) in equation (15.1). Report and interpret the results.

    # IV Estimators
    m9_3<-AER::ivreg(lwage~educ|brthord, data=wage2) #En vez de hacer dos regresiones, sugiero instalar la librería AER que cuenta con una función para usar IV estimators
    summary(m9_3)
    ## 
    ## Call:
    ## AER::ivreg(formula = lwage ~ educ | brthord, data = wage2)
    ## 
    ## Residuals:
    ##     Min      1Q  Median      3Q     Max 
    ## -1.8532 -0.2557  0.0435  0.2970  1.3033 
    ## 
    ## Coefficients:
    ##             Estimate Std. Error t value Pr(>|t|)    
    ## (Intercept)  5.03040    0.43295  11.619  < 2e-16 ***
    ## educ         0.13064    0.03204   4.078 4.97e-05 ***
    ## ---
    ## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
    ## 
    ## Residual standard error: 0.4215 on 850 degrees of freedom
    ## Multiple R-Squared: -0.02862,    Adjusted R-squared: -0.02983 
    ## Wald test: 16.63 on 1 and 850 DF,  p-value: 4.975e-05
    # GMM Two steps
    m9_3_2<-gmm(wage$lwage~wage$educ, x=wage$brthord, type="twoStep") # cambiamos el tipo por "two steps", y ahora en X van los instrumentos
    summary(m9_3_2)
    ## 
    ## Call:
    ## gmm(g = wage$lwage ~ wage$educ, x = wage$brthord, type = "twoStep")
    ## 
    ## 
    ## Method:  twoStep 
    ## 
    ## Kernel:  Quadratic Spectral
    ## 
    ## Coefficients:
    ##              Estimate    Std. Error  t value     Pr(>|t|)  
    ## (Intercept)  4.6389e+00  6.3194e-01  7.3407e+00  2.1254e-13
    ## wage$educ    1.5902e-01  4.5997e-02  3.4571e+00  5.4593e-04
    ## 
    ## J-Test: degrees of freedom is 0 
    ##                 J-test               P-value            
    ## Test E(g)=0:    1.6640610302406e-27  *******

    Parece que el orden de los hermanos es un mejor instrumento para educ cuando se estima el salario

  4. Now, suppose that we include number of siblings as an explanatory variable in the wage equation; this controls for family background, to some extent: \(log(wage)=\beta_0+\beta_1educ+\beta_2sibs+u.\) Suppose that we want to use \(brthord\) as an IV for \(educ\), assuming that \(sibs\) is exogenous. The reduced form for \(educ\) is \(educ=\pi_0+\pi_1sibs+\pi_2brthord+v.\) State and test the identification assumption.

    #OLS
    m9_4<-lm(educ~sibs+brthord,data=wage2)
    summary(m9_4)
    ## 
    ## Call:
    ## lm(formula = educ ~ sibs + brthord, data = wage2)
    ## 
    ## Residuals:
    ##     Min      1Q  Median      3Q     Max 
    ## -5.1438 -1.6854 -0.6852  2.0090  5.9950 
    ## 
    ## Coefficients:
    ##             Estimate Std. Error t value Pr(>|t|)    
    ## (Intercept) 14.29650    0.13329 107.260  < 2e-16 ***
    ## sibs        -0.15287    0.03987  -3.834 0.000135 ***
    ## brthord     -0.15267    0.05708  -2.675 0.007619 ** 
    ## ---
    ## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
    ## 
    ## Residual standard error: 2.137 on 849 degrees of freedom
    ##   (83 observations deleted due to missingness)
    ## Multiple R-squared:  0.05833,    Adjusted R-squared:  0.05611 
    ## F-statistic: 26.29 on 2 and 849 DF,  p-value: 8.33e-12
    #GMM
    m9_4_2<-gmm(wage$educ~wage$sibs+wage$brthord,x=cbind(wage$sibs,wage$brthord), type="iterative")
    summary(m9_4_2)
    ## 
    ## Call:
    ## gmm(g = wage$educ ~ wage$sibs + wage$brthord, x = cbind(wage$sibs, 
    ##     wage$brthord), type = "iterative")
    ## 
    ## 
    ## Method:  iterative 
    ## 
    ## Kernel:  Quadratic Spectral
    ## 
    ## Coefficients:
    ##               Estimate    Std. Error  t value     Pr(>|t|)  
    ## (Intercept)   14.3911908   0.1589633  90.5315110   0.0000000
    ## wage$sibs     -0.1424838   0.0438234  -3.2513182   0.0011487
    ## wage$brthord  -0.1402304   0.0655217  -2.1402125   0.0323376
    ## 
    ## J-Test: degrees of freedom is 0 
    ##                 J-test                P-value             
    ## Test E(g)=0:    6.81915163182099e-27  *******

    En este caso, acorde a lo que se revisó en el ejercicio 8, se requeriría que los estimadores de los instrumentos sean significativos, o al menos el de el orden de los hermanos, pues siblings se considera exógeno y además se incluye en la ecuación del logaritmo del salario. La variable del número de hermanos es significativa al 5%

  5. Estimate the equation from part (iv) using \(brthord\) as an IV for \(educ\) (and \(sibs\) as its own IV). Comment on the standard errors for \(\hat{\beta}_{educ}\) and \(\hat{\beta}_{sibs}\).

    #IV Estimators
    m9_5<-AER::ivreg(lwage~ educ+sibs|.-educ+brthord, data=wage2)
    summary(m9_5)
    ## 
    ## Call:
    ## AER::ivreg(formula = lwage ~ educ + sibs | . - educ + brthord, 
    ##     data = wage2)
    ## 
    ## Residuals:
    ##      Min       1Q   Median       3Q      Max 
    ## -1.84808 -0.26227  0.03841  0.29901  1.30836 
    ## 
    ## Coefficients:
    ##             Estimate Std. Error t value Pr(>|t|)    
    ## (Intercept) 4.938527   1.055690   4.678 3.37e-06 ***
    ## educ        0.136994   0.074681   1.834   0.0669 .  
    ## sibs        0.002111   0.017372   0.122   0.9033    
    ## ---
    ## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
    ## 
    ## Residual standard error: 0.427 on 849 degrees of freedom
    ## Multiple R-Squared: -0.05428,    Adjusted R-squared: -0.05676 
    ## Wald test:  10.9 on 2 and 849 DF,  p-value: 2.124e-05
    #GMM Two Steps
    m9_5_2<-gmm(wage$lwage~ wage$educ+wage$sibs,x=cbind(wage$brthord,wage$sibs), type="twoStep")
    summary(m9_5_2)
    ## 
    ## Call:
    ## gmm(g = wage$lwage ~ wage$educ + wage$sibs, x = cbind(wage$brthord, 
    ##     wage$sibs), type = "twoStep")
    ## 
    ## 
    ## Method:  twoStep 
    ## 
    ## Kernel:  Quadratic Spectral
    ## 
    ## Coefficients:
    ##              Estimate  Std. Error  t value   Pr(>|t|)
    ## (Intercept)  3.748128  1.804650    2.076928  0.037808
    ## wage$educ    0.220264  0.126439    1.742064  0.081497
    ## wage$sibs    0.018586  0.027029    0.687636  0.491682
    ## 
    ## J-Test: degrees of freedom is 0 
    ##                 J-test                P-value             
    ## Test E(g)=0:    3.30940832847922e-29  *******

    La variable de hermanos no es significativa, tal vez porque ya se encluía en la estimación de educ. El estimador Educ ahora es significativo solo al 10%, por lo cual, el error estándar es más grande.

  6. Using the fitted values from part (iv), \(\hat{educ}\), compute the correlation between \(\hat{educ}\) and \(sibs\). Use this result to explain your findings from part (v).

    cor(m9_4$fitted.values,m9_4$model, method = "pearson")
    ##           educ       sibs   brthord
    ## [1,] 0.2415094 -0.9294818 -0.848797
    el valor estimado de educ se correlaciona solamente un 24% con el valor real, mientras que en la relación con el número de hermanos es de el 92%, aunque es una relación inversa. Esto confirma lo que habíamos dicho que en el modelo 5 sibs no era significativo porque estaba incluido en el educ estimado

C7

Use the data in PHILLIPS for this exercise.

data('phillips')
phill<-na.omit(phillips)
  1. In Example 11.5, we estimated an expectations augmented Phillips curve of the form \(\Delta inf_t=\beta_0+\beta_1unem_t+e_t,\) where \(\Delta inf_t=inf_t-inf_{t-1}\). In estimating this equation by \(OLS\), we assumed that the supply shock, \(e_t\), was uncorrelated with \(unem_t\). If this is false, what can be said about the \(OLS\) estimator of \(\beta_1\)?

    #OLS
    m9_6<-lm(cinf~unem,data=phillips)
    summary(m9_6)
    ## 
    ## Call:
    ## lm(formula = cinf ~ unem, data = phillips)
    ## 
    ## Residuals:
    ##     Min      1Q  Median      3Q     Max 
    ## -9.0741 -0.9241  0.0189  0.8606  5.4800 
    ## 
    ## Coefficients:
    ##             Estimate Std. Error t value Pr(>|t|)  
    ## (Intercept)   2.8282     1.2249   2.309   0.0249 *
    ## unem         -0.5176     0.2090  -2.476   0.0165 *
    ## ---
    ## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
    ## 
    ## Residual standard error: 2.307 on 53 degrees of freedom
    ##   (1 observation deleted due to missingness)
    ## Multiple R-squared:  0.1037, Adjusted R-squared:  0.08679 
    ## F-statistic: 6.132 on 1 and 53 DF,  p-value: 0.0165
    #GMM
    m9_6_2<-gmm(phill$cinf~phill$unem,x=phill$unem, type="iterative")
    summary(m9_6_2)
    ## 
    ## Call:
    ## gmm(g = phill$cinf ~ phill$unem, x = phill$unem, type = "iterative")
    ## 
    ## 
    ## Method:  iterative 
    ## 
    ## Kernel:  Quadratic Spectral
    ## 
    ## Coefficients:
    ##              Estimate    Std. Error  t value     Pr(>|t|)  
    ## (Intercept)   2.8282017   1.1832122   2.3902743   0.0168358
    ## phill$unem   -0.5176487   0.1911874  -2.7075458   0.0067783
    ## 
    ## J-Test: degrees of freedom is 0 
    ##                 J-test                P-value             
    ## Test E(g)=0:    9.86503197737205e-29  *******

    Si existiese correlación entre el error y la variable expplicativa entonces el estimador de unem sería sesgado

  2. Suppose that \(e_t\) is unpredictable given all past information: \(E(e_t|inf_{t-1}, unem_{t-1},...)=0\). Explain why this makes \(unem_{t-1}\) a good IV candidate for \(unem_t\).

    #IV Estimators
    m9_7<-AER::ivreg(cinf~unem|unem_1,data=phillips)
    summary(m9_7)
    ## 
    ## Call:
    ## AER::ivreg(formula = cinf ~ unem | unem_1, data = phillips)
    ## 
    ## Residuals:
    ##     Min      1Q  Median      3Q     Max 
    ## -9.1642 -1.0120  0.2054  1.0858  6.3967 
    ## 
    ## Coefficients:
    ##             Estimate Std. Error t value Pr(>|t|)
    ## (Intercept)   0.6338     1.6563   0.383    0.703
    ## unem         -0.1304     0.2867  -0.455    0.651
    ## 
    ## Residual standard error: 2.38 on 53 degrees of freedom
    ## Multiple R-Squared: 0.04568, Adjusted R-squared: 0.02767 
    ## Wald test: 0.207 on 1 and 53 DF,  p-value: 0.651
    #GMM Two Steps
    m9_7_2<-gmm(phill$cinf~phill$unem, x=phill$unem_1,type = "twoStep")
    summary(m9_7_2)
    ## 
    ## Call:
    ## gmm(g = phill$cinf ~ phill$unem, x = phill$unem_1, type = "twoStep")
    ## 
    ## 
    ## Method:  twoStep 
    ## 
    ## Kernel:  Quadratic Spectral
    ## 
    ## Coefficients:
    ##              Estimate  Std. Error  t value   Pr(>|t|)
    ## (Intercept)   0.63382   1.62750     0.38944   0.69695
    ## phill$unem   -0.13045   0.29777    -0.43808   0.66133
    ## 
    ## J-Test: degrees of freedom is 0 
    ##                 J-test                P-value             
    ## Test E(g)=0:    2.79099639581684e-31  *******

    Porque no está correlacionado. sin embargo, al usarlo como instrumento en la practica esto no parece muy verídico

    cor(m9_6$residuals,phillips[-1,])# quitamos la primera observación de la base de datos porque esta es un valor nulo para cinf, y por tanto para los errores támbien
    ##           year         unem       inf      inf_1    unem_1      cinf      cunem
    ## [1,] 0.1397998 3.465309e-16 0.5007163 -0.2537467 0.1914138 0.9467324 -0.2737429

    Es unem en el periodo t el que tiene poca correlación con el error en el primer modelo, mientras que unem en el periodo t-1 tiene una correlación más alta.

  3. Regress \(unem_t\) on \(unem_{t-1}\). Are \(unem_t\) and \(unem_{t-1}\) significantly correlated?

    #OLS
    m9_8<-lm(unem~unem_1,data=phillips)
    summary(m9_8)
    ## 
    ## Call:
    ## lm(formula = unem ~ unem_1, data = phillips)
    ## 
    ## Residuals:
    ##     Min      1Q  Median      3Q     Max 
    ## -2.1243 -0.6433 -0.2470  0.6098  2.8530 
    ## 
    ## Coefficients:
    ##             Estimate Std. Error t value Pr(>|t|)    
    ## (Intercept)  1.48968    0.52020   2.864  0.00599 ** 
    ## unem_1       0.74238    0.08929   8.314 3.54e-11 ***
    ## ---
    ## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
    ## 
    ## Residual standard error: 0.9986 on 53 degrees of freedom
    ##   (1 observation deleted due to missingness)
    ## Multiple R-squared:  0.566,  Adjusted R-squared:  0.5578 
    ## F-statistic: 69.12 on 1 and 53 DF,  p-value: 3.537e-11
    #GMM
    m9_8_2<-gmm(phill$unem~phill$unem_1,x=phill$unem_1)
    summary(m9_8_2)
    ## 
    ## Call:
    ## gmm(g = phill$unem ~ phill$unem_1, x = phill$unem_1)
    ## 
    ## 
    ## Method:  twoStep 
    ## 
    ## Kernel:  Quadratic Spectral
    ## 
    ## Coefficients:
    ##               Estimate    Std. Error  t value     Pr(>|t|)  
    ## (Intercept)   1.4897e+00  5.4535e-01  2.7316e+00  6.3023e-03
    ## phill$unem_1  7.4238e-01  9.7920e-02  7.5815e+00  3.4160e-14
    ## 
    ## J-Test: degrees of freedom is 0 
    ##                 J-test                P-value             
    ## Test E(g)=0:    7.10370141584368e-28  *******

    La correlación es bastante alta, del 74%

  4. Estimate the expectations augmented Phillips curve by IV. Report the results in the usual form and compare them with the \(OLS\) estimates from Example 11.5.

    la estimación IV es la m9_7 mientras que la de OLS es la m9_6. Los resultados muestran que, a pesar de que \(unem_{t-1}\) es un buen predictor de \(unem\), no sirve como instrumento cuando se pretende explicar \(\Delta inf_t\)

Ejercicio 10

C9

The purpose of this exercise is to compare the estimates and standard errors obtained by correctly using \(2SLS\) with those obtained using inappropriate procedures. Use the data file \(WAGE2\).
  1. Use a \(2SLS\) routine to estimate the equation \(log(wage)=\beta_0+\beta_1educ+\beta_2exper+\beta_3tenure+\beta_4black+u,\) where \(sibs\) is the IV for \(educ\). Report the results in the usual form.

    # IV Estimators
    m10_1<-AER::ivreg(lwage~educ+exper+tenure+black|.-educ+sibs, data=wage2) #despues de la linea vertical se debe marcar cual variable a retirar de las regresoras, y cual instrumento usar para calcular la estimación del primer paso en su lugar
    summary(m10_1)
    ## 
    ## Call:
    ## AER::ivreg(formula = lwage ~ educ + exper + tenure + black | 
    ##     . - educ + sibs, data = wage2)
    ## 
    ## Residuals:
    ##     Min      1Q  Median      3Q     Max 
    ## -1.8176 -0.2403  0.0139  0.2567  1.3225 
    ## 
    ## Coefficients:
    ##              Estimate Std. Error t value Pr(>|t|)    
    ## (Intercept)  5.215976   0.543451   9.598  < 2e-16 ***
    ## educ         0.093632   0.033719   2.777  0.00560 ** 
    ## exper        0.020922   0.008388   2.494  0.01279 *  
    ## tenure       0.011548   0.002740   4.215 2.74e-05 ***
    ## black       -0.183329   0.050136  -3.657  0.00027 ***
    ## ---
    ## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
    ## 
    ## Residual standard error: 0.3848 on 930 degrees of freedom
    ## Multiple R-Squared: 0.1685,  Adjusted R-squared: 0.165 
    ## Wald test: 24.92 on 4 and 930 DF,  p-value: < 2.2e-16
    #GMM Two Steps
    m10_1_2<-gmm(wage$lwage~wage$educ+wage$exper+wage$tenure+wage$black,x=cbind(wage$sibs,wage$exper,wage$tenure,wage$black), type="twoStep") # se tienen que incluir todas las variables de los betas en el x, solo cambiando la que va con instrumento, de lo contrario el número de instrumentos será inferior al número de variables
    summary(m10_1_2)
    ## 
    ## Call:
    ## gmm(g = wage$lwage ~ wage$educ + wage$exper + wage$tenure + wage$black, 
    ##     x = cbind(wage$sibs, wage$exper, wage$tenure, wage$black), 
    ##     type = "twoStep")
    ## 
    ## 
    ## Method:  twoStep 
    ## 
    ## Kernel:  Quadratic Spectral
    ## 
    ## Coefficients:
    ##              Estimate     Std. Error   t value      Pr(>|t|)   
    ## (Intercept)   5.1531e+00   6.9275e-01   7.4385e+00   1.0183e-13
    ## wage$educ     9.7994e-02   4.2009e-02   2.3327e+00   1.9665e-02
    ## wage$exper    2.4033e-02   1.1628e-02   2.0668e+00   3.8751e-02
    ## wage$tenure   8.2282e-03   3.3129e-03   2.4837e+00   1.3003e-02
    ## wage$black   -1.5505e-01   6.7448e-02  -2.2988e+00   2.1518e-02
    ## 
    ## J-Test: degrees of freedom is 0 
    ##                 J-test                P-value             
    ## Test E(g)=0:    2.26999056478394e-26  *******
  2. Now, manually carry out \(2SLS\). That is, first regress \(educ_i\) on \(sibs_i\), \(exper_i\), \(tenure_i\), and \(black_i\) and obtain the fitted values, \(\hat{educ}_i, i=1,..., n\). Then, run the second stage regression \(log(wage_i )\) on \(\hat{educ}_i\), \(exper_i\), \(tenure_i\), and \(black_i, i=1,..., n\). Verify that the \(\hat{\beta}_j\) are identical to those obtained from part (i), but that the standard errors are somewhat different. The standard errors obtained from the second stage regression when manually carrying out \(2SLS\) are generally inappropriate.

    #OLS
    m10_2<-lm(educ~sibs+exper+tenure+black,data=wage2)
    wage2$educ2<-m10_2$fitted.values
    m10_3<-lm(lwage~educ2+exper+tenure+black, data=wage2)
    summary(m10_3)
    ## 
    ## Call:
    ## lm(formula = lwage ~ educ2 + exper + tenure + black, data = wage2)
    ## 
    ## Residuals:
    ##      Min       1Q   Median       3Q      Max 
    ## -1.97409 -0.25720  0.00997  0.26147  1.25198 
    ## 
    ## Coefficients:
    ##              Estimate Std. Error t value Pr(>|t|)    
    ## (Intercept)  5.215976   0.568815   9.170  < 2e-16 ***
    ## educ2        0.093632   0.035293   2.653 0.008114 ** 
    ## exper        0.020922   0.008779   2.383 0.017368 *  
    ## tenure       0.011548   0.002868   4.027  6.1e-05 ***
    ## black       -0.183329   0.052476  -3.494 0.000499 ***
    ## ---
    ## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
    ## 
    ## Residual standard error: 0.4028 on 930 degrees of freedom
    ## Multiple R-squared:  0.08912,    Adjusted R-squared:  0.0852 
    ## F-statistic: 22.75 on 4 and 930 DF,  p-value: < 2.2e-16
    #GMM
    m10_2_2<-gmm(wage$educ~wage$sibs+wage$exper+wage$tenure+wage$black,x=cbind(wage$sibs,wage$exper,wage$tenure,wage$black), type="iterative")
    m10_3_2<-gmm(wage$lwage~m10_2_2$fitted.values+wage$exper+wage$tenure+wage$black, x=cbind(m10_2_2$fitted.values,wage$exper,wage$tenure,wage$black), type="iterative")
    summary(m10_3_2)
    ## 
    ## Call:
    ## gmm(g = wage$lwage ~ m10_2_2$fitted.values + wage$exper + wage$tenure + 
    ##     wage$black, x = cbind(m10_2_2$fitted.values, wage$exper, 
    ##     wage$tenure, wage$black), type = "iterative")
    ## 
    ## 
    ## Method:  iterative 
    ## 
    ## Kernel:  Quadratic Spectral
    ## 
    ## Coefficients:
    ##                        Estimate     Std. Error   t value      Pr(>|t|)   
    ## (Intercept)             5.1531e+00   6.9342e-01   7.4314e+00   1.0748e-13
    ## m10_2_2$fitted.values   9.7994e-02   4.2106e-02   2.3273e+00   1.9948e-02
    ## wage$exper              2.4033e-02   1.1567e-02   2.0778e+00   3.7728e-02
    ## wage$tenure             8.2282e-03   3.3990e-03   2.4208e+00   1.5488e-02
    ## wage$black             -1.5505e-01   6.4033e-02  -2.4214e+00   1.5462e-02
    ## 
    ## J-Test: degrees of freedom is 0 
    ##                 J-test                P-value             
    ## Test E(g)=0:    2.17828765976177e-24  *******

    El error estándar de estimarlo manual es más alto

  3. Now, use the following two-step procedure, which generally yields inconsistent parameter estimates of the \(\beta_j\), and not just inconsistent standard errors. In step one, regress \(educ_i\) on \(sibs_i\) only and obtain the fitted values, say \(\tilde{educ}_i\). (Note that this is an incorrect first stage regression.) Then, in the second step, run the regression of \(log(wage_i)\) on \(\tilde{educ}_i\), \(exper_i\), \(tenure_i\), and \(black_i, i=1,..., n\). How does the estimate from this incorrect, two-step procedure compare with the correct \(2SLS\) estimate of the return to education?

    #OLS
    m10_4<-lm(educ~sibs,data=wage2)
    wage2$educ3<-m10_4$fitted.values
    m10_5<-lm(lwage~educ3+exper+tenure+black, data=wage2)
    summary(m10_5)
    ## 
    ## Call:
    ## lm(formula = lwage ~ educ3 + exper + tenure + black, data = wage2)
    ## 
    ## Residuals:
    ##      Min       1Q   Median       3Q      Max 
    ## -1.97409 -0.25720  0.00997  0.26147  1.25198 
    ## 
    ## Coefficients:
    ##              Estimate Std. Error t value Pr(>|t|)    
    ## (Intercept)  5.771022   0.360376  16.014  < 2e-16 ***
    ## educ3        0.069975   0.026376   2.653  0.00811 ** 
    ## exper       -0.000394   0.003121  -0.126  0.89957    
    ## tenure       0.013975   0.002691   5.193 2.54e-07 ***
    ## black       -0.241631   0.041528  -5.819 8.16e-09 ***
    ## ---
    ## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
    ## 
    ## Residual standard error: 0.4028 on 930 degrees of freedom
    ## Multiple R-squared:  0.08912,    Adjusted R-squared:  0.0852 
    ## F-statistic: 22.75 on 4 and 930 DF,  p-value: < 2.2e-16
    #GMM
    m10_4_2<-gmm(wage$educ~wage$sibs,x=wage$sibs, type="iterative")
    m10_5_2<-gmm(wage$lwage~m10_4_2$fitted.values+wage$exper+wage$tenure+wage$black, x=cbind(m10_4_2$fitted.values,wage$exper,wage$tenure,wage$black), type="iterative")
    summary(m10_5_2)
    ## 
    ## Call:
    ## gmm(g = wage$lwage ~ m10_4_2$fitted.values + wage$exper + wage$tenure + 
    ##     wage$black, x = cbind(m10_4_2$fitted.values, wage$exper, 
    ##     wage$tenure, wage$black), type = "iterative")
    ## 
    ## 
    ## Method:  iterative 
    ## 
    ## Kernel:  Quadratic Spectral
    ## 
    ## Coefficients:
    ##                        Estimate     Std. Error   t value      Pr(>|t|)   
    ## (Intercept)             5.5779e+00   5.1129e-01   1.0909e+01   1.0396e-27
    ## m10_4_2$fitted.values   8.5334e-02   3.6658e-02   2.3279e+00   1.9920e-02
    ## wage$exper             -3.7720e-04   3.5106e-03  -1.0745e-01   9.1444e-01
    ## wage$tenure             1.2396e-02   2.9860e-03   4.1515e+00   3.3025e-05
    ## wage$black             -1.9812e-01   5.7235e-02  -3.4615e+00   5.3709e-04
    ## 
    ## J-Test: degrees of freedom is 0 
    ##                 J-test                P-value             
    ## Test E(g)=0:    1.13108248863633e-24  *******
    El estimador del beta que acompaña a educ ahora es más bajo. tambien se afecta la significancia estadística de la variable experiencia.

C12

Use the data in CATHOLIC to answer this question. The model of interest is \(math_{12}=\beta_0+\beta_1cathhs+\beta_2lfaminc+\beta_3motheduc+\beta_4fatheduc+u,\) where \(cathhs\) is a binary indicator for whether a student attends a Catholic high school.

data('catholic')
  1. How many students are in the sample? What percentage of these students attend a Catholic high school?

    # no todos en la muestra son estudiantes, solo los que no se habían graduado para 1994
    students<-sum(catholic$hsgrad==0, na.rm=TRUE)
    students
    ## [1] 416
    #Estudiantes católicos
    catolicos<-sum(catholic$cathhs==1 & catholic$hsgrad==0,na.rm=TRUE)
    catolicos/students*100
    ## [1] 1.442308
  2. Estimate the above equation by \(OLS\). What is the estimate of \(\beta_1\)? What is its 95% confidence interval?

    #OLS
    m10_6<-lm(math12~cathhs+lfaminc+motheduc+fatheduc, data=catholic)
    summary(m10_6)
    ## 
    ## Call:
    ## lm(formula = math12 ~ cathhs + lfaminc + motheduc + fatheduc, 
    ##     data = catholic)
    ## 
    ## Residuals:
    ##      Min       1Q   Median       3Q      Max 
    ## -27.0824  -6.1371   0.4904   6.4524  26.7728 
    ## 
    ## Coefficients:
    ##             Estimate Std. Error t value Pr(>|t|)    
    ## (Intercept) 11.14862    1.31098   8.504  < 2e-16 ***
    ## cathhs       1.47723    0.41794   3.535 0.000411 ***
    ## lfaminc      1.84867    0.14263  12.961  < 2e-16 ***
    ## motheduc     0.71635    0.06208  11.539  < 2e-16 ***
    ## fatheduc     0.89125    0.05615  15.872  < 2e-16 ***
    ## ---
    ## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
    ## 
    ## Residual standard error: 8.544 on 7425 degrees of freedom
    ## Multiple R-squared:  0.1846, Adjusted R-squared:  0.1841 
    ## F-statistic: 420.2 on 4 and 7425 DF,  p-value: < 2.2e-16
    #GMM
    m10_6_2<-gmm(catholic$math12~catholic$cathhs+catholic$lfaminc+catholic$motheduc+catholic$fatheduc, x=cbind(catholic$cathhs,catholic$lfaminc,catholic$motheduc,catholic$fatheduc), type="iterative")
    summary(m10_6_2)
    ## 
    ## Call:
    ## gmm(g = catholic$math12 ~ catholic$cathhs + catholic$lfaminc + 
    ##     catholic$motheduc + catholic$fatheduc, x = cbind(catholic$cathhs, 
    ##     catholic$lfaminc, catholic$motheduc, catholic$fatheduc), 
    ##     type = "iterative")
    ## 
    ## 
    ## Method:  iterative 
    ## 
    ## Kernel:  Quadratic Spectral
    ## 
    ## Coefficients:
    ##                    Estimate    Std. Error  t value     Pr(>|t|)  
    ## (Intercept)        1.1149e+01  1.2634e+00  8.8243e+00  1.1017e-18
    ## catholic$cathhs    1.4772e+00  4.2225e-01  3.4985e+00  4.6790e-04
    ## catholic$lfaminc   1.8487e+00  1.4036e-01  1.3171e+01  1.2888e-39
    ## catholic$motheduc  7.1635e-01  6.2587e-02  1.1446e+01  2.4723e-30
    ## catholic$fatheduc  8.9125e-01  5.6567e-02  1.5756e+01  6.2654e-56
    ## 
    ## J-Test: degrees of freedom is 0 
    ##                 J-test                P-value             
    ## Test E(g)=0:    1.10517958274478e-18  *******

    El estimador \(\beta_1\) es 1.47 por OLS o 1.74 por GMM y sí es significativo al 5% en ambos

  3. Using \(parcath\) as an instrument for \(cathhs\), estimate the reduced form for \(cathhs\). What is the t statistic for \(parcath\)? Is there evidence of a weak instrument problem?

    #OLS
    m10_7<-lm(cathhs~parcath+lfaminc+motheduc+fatheduc, data=catholic)
    summary(m10_7)
    ## 
    ## Call:
    ## lm(formula = cathhs ~ parcath + lfaminc + motheduc + fatheduc, 
    ##     data = catholic)
    ## 
    ## Residuals:
    ##      Min       1Q   Median       3Q      Max 
    ## -0.23375 -0.12118 -0.02169  0.00332  1.07310 
    ## 
    ## Coefficients:
    ##              Estimate Std. Error t value Pr(>|t|)    
    ## (Intercept) -0.321081   0.034755  -9.238  < 2e-16 ***
    ## parcath      0.143058   0.005566  25.701  < 2e-16 ***
    ## lfaminc      0.018351   0.003793   4.839 1.33e-06 ***
    ## motheduc     0.003923   0.001655   2.370   0.0178 *  
    ## fatheduc     0.006585   0.001493   4.411 1.04e-05 ***
    ## ---
    ## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
    ## 
    ## Residual standard error: 0.2273 on 7425 degrees of freedom
    ## Multiple R-squared:  0.09594,    Adjusted R-squared:  0.09545 
    ## F-statistic:   197 on 4 and 7425 DF,  p-value: < 2.2e-16
    #GMM
    m10_7_2<-gmm(catholic$cathhs~catholic$parcath+catholic$lfaminc+catholic$motheduc+catholic$fatheduc, x=cbind(catholic$parcath,catholic$lfaminc,catholic$motheduc,catholic$fatheduc), type="iterative")
    summary(m10_7_2)
    ## 
    ## Call:
    ## gmm(g = catholic$cathhs ~ catholic$parcath + catholic$lfaminc + 
    ##     catholic$motheduc + catholic$fatheduc, x = cbind(catholic$parcath, 
    ##     catholic$lfaminc, catholic$motheduc, catholic$fatheduc), 
    ##     type = "iterative")
    ## 
    ## 
    ## Method:  iterative 
    ## 
    ## Kernel:  Quadratic Spectral
    ## 
    ## Coefficients:
    ##                    Estimate     Std. Error   t value      Pr(>|t|)   
    ## (Intercept)        -3.2108e-01   5.2292e-02  -6.1402e+00   8.2410e-10
    ## catholic$parcath    1.4306e-01   1.4336e-02   9.9792e+00   1.8800e-23
    ## catholic$lfaminc    1.8351e-02   4.6713e-03   3.9286e+00   8.5458e-05
    ## catholic$motheduc   3.9228e-03   1.7177e-03   2.2838e+00   2.2383e-02
    ## catholic$fatheduc   6.5846e-03   1.7201e-03   3.8281e+00   1.2915e-04
    ## 
    ## J-Test: degrees of freedom is 0 
    ##                 J-test               P-value            
    ## Test E(g)=0:    3.4993953409197e-22  *******

    Por el momento, la variable parcath es significativa para estimar cathhs

  4. Estimate the above equation by IV, using \(parcath\) as an IV for \(cathhs\). How does the estimate and 95% CI compare with the OLS quantities?

    #IV Estimators
    m10_8<-AER::ivreg(math12~cathhs+lfaminc+motheduc+fatheduc|.-cathhs+parcath, data=catholic)
    summary(m10_8)
    ## 
    ## Call:
    ## AER::ivreg(formula = math12 ~ cathhs + lfaminc + motheduc + fatheduc | 
    ##     . - cathhs + parcath, data = catholic)
    ## 
    ## Residuals:
    ##      Min       1Q   Median       3Q      Max 
    ## -28.3521  -6.1166   0.4572   6.4546  26.8221 
    ## 
    ## Coefficients:
    ##             Estimate Std. Error t value Pr(>|t|)    
    ## (Intercept) 11.91472    1.37627   8.657  < 2e-16 ***
    ## cathhs       4.11742    1.46615   2.808  0.00499 ** 
    ## lfaminc      1.78453    0.14703  12.137  < 2e-16 ***
    ## motheduc     0.71331    0.06227  11.455  < 2e-16 ***
    ## fatheduc     0.87501    0.05696  15.361  < 2e-16 ***
    ## ---
    ## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
    ## 
    ## Residual standard error: 8.567 on 7425 degrees of freedom
    ## Multiple R-Squared: 0.1802,  Adjusted R-squared: 0.1797 
    ## Wald test: 416.8 on 4 and 7425 DF,  p-value: < 2.2e-16
    #GMM Two Step
    m10_8_2<-gmm(catholic$math12~catholic$cathhs+catholic$lfaminc+catholic$motheduc+catholic$fatheduc, x=cbind(catholic$parcath,catholic$lfaminc,catholic$motheduc,catholic$fatheduc), type="twoStep")
    summary(m10_8_2)
    ## 
    ## Call:
    ## gmm(g = catholic$math12 ~ catholic$cathhs + catholic$lfaminc + 
    ##     catholic$motheduc + catholic$fatheduc, x = cbind(catholic$parcath, 
    ##     catholic$lfaminc, catholic$motheduc, catholic$fatheduc), 
    ##     type = "twoStep")
    ## 
    ## 
    ## Method:  twoStep 
    ## 
    ## Kernel:  Quadratic Spectral
    ## 
    ## Coefficients:
    ##                    Estimate    Std. Error  t value     Pr(>|t|)  
    ## (Intercept)        1.1915e+01  1.3121e+00  9.0809e+00  1.0768e-19
    ## catholic$cathhs    4.1174e+00  1.4891e+00  2.7651e+00  5.6908e-03
    ## catholic$lfaminc   1.7845e+00  1.4296e-01  1.2483e+01  9.2950e-36
    ## catholic$motheduc  7.1331e-01  6.2804e-02  1.1358e+01  6.7956e-30
    ## catholic$fatheduc  8.7501e-01  5.7618e-02  1.5186e+01  4.3473e-52
    ## 
    ## J-Test: degrees of freedom is 0 
    ##                 J-test                P-value             
    ## Test E(g)=0:    9.91406636788409e-19  *******

    Sigue siendo estadísticamente significativo, parece que parcath es un buen instrumento, aunque sin instrumentos cathhs era significativa al 0.01% y ahora solo lo es al 1%.

  5. Test the null hypothesis that \(cathhs\) is exogenous. What is the p-value of the test?

    Tanto gmm como iv estimatos nos dan el p value. En este caso es de 0.00569 con GMM o de 0.0049 con IV

  6. Suppose you add the interaction between \(cathhs * motheduc\) to the above model. Why is it generally endogenous? Why is \(pareduc*motheduc\) a good IV candidate for \(cathhs*motheduc\)?

    #OLS
    m10_9<-lm(math12~cathhs+lfaminc+motheduc+fatheduc+cathhs*motheduc, data=catholic)
    summary(m10_9)
    ## 
    ## Call:
    ## lm(formula = math12 ~ cathhs + lfaminc + motheduc + fatheduc + 
    ##     cathhs * motheduc, data = catholic)
    ## 
    ## Residuals:
    ##      Min       1Q   Median       3Q      Max 
    ## -27.1084  -6.1397   0.4865   6.4471  26.8147 
    ## 
    ## Coefficients:
    ##                 Estimate Std. Error t value Pr(>|t|)    
    ## (Intercept)     11.05641    1.31736   8.393   <2e-16 ***
    ## cathhs           3.76228    3.22718   1.166    0.244    
    ## lfaminc          1.84739    0.14265  12.951   <2e-16 ***
    ## motheduc         0.72516    0.06330  11.456   <2e-16 ***
    ## fatheduc         0.89038    0.05617  15.852   <2e-16 ***
    ## cathhs:motheduc -0.16401    0.22968  -0.714    0.475    
    ## ---
    ## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
    ## 
    ## Residual standard error: 8.544 on 7424 degrees of freedom
    ## Multiple R-squared:  0.1846, Adjusted R-squared:  0.1841 
    ## F-statistic: 336.2 on 5 and 7424 DF,  p-value: < 2.2e-16
    #Sin instrumentos GMM
    m10_9_2<-gmm(catholic$math12~catholic$cathhs+catholic$lfaminc+catholic$motheduc+catholic$fatheduc+catholic$cathhs*catholic$motheduc, x=cbind(catholic$cathhs,catholic$lfaminc,catholic$motheduc,catholic$fatheduc,catholic$cathhs*catholic$motheduc), type="iterative")
    summary(m10_9_2)
    ## 
    ## Call:
    ## gmm(g = catholic$math12 ~ catholic$cathhs + catholic$lfaminc + 
    ##     catholic$motheduc + catholic$fatheduc + catholic$cathhs * 
    ##     catholic$motheduc, x = cbind(catholic$cathhs, catholic$lfaminc, 
    ##     catholic$motheduc, catholic$fatheduc, catholic$cathhs * catholic$motheduc), 
    ##     type = "iterative")
    ## 
    ## 
    ## Method:  iterative 
    ## 
    ## Kernel:  Quadratic Spectral
    ## 
    ## Coefficients:
    ##                                    Estimate     Std. Error   t value    
    ## (Intercept)                         1.1056e+01   1.2700e+00   8.7057e+00
    ## catholic$cathhs                     3.7623e+00   2.8550e+00   1.3178e+00
    ## catholic$lfaminc                    1.8474e+00   1.4036e-01   1.3162e+01
    ## catholic$motheduc                   7.2516e-01   6.4195e-02   1.1296e+01
    ## catholic$fatheduc                   8.9038e-01   5.6582e-02   1.5736e+01
    ## catholic$cathhs:catholic$motheduc  -1.6401e-01   2.0278e-01  -8.0879e-01
    ##                                    Pr(>|t|)   
    ## (Intercept)                         3.1569e-18
    ## catholic$cathhs                     1.8757e-01
    ## catholic$lfaminc                    1.4585e-39
    ## catholic$motheduc                   1.3714e-29
    ## catholic$fatheduc                   8.5631e-56
    ## catholic$cathhs:catholic$motheduc   4.1863e-01
    ## 
    ## J-Test: degrees of freedom is 0 
    ##                 J-test                P-value             
    ## Test E(g)=0:    1.09707624445294e-18  *******

    Es generalmente endogeno porque ya se incluyeron las dos variables previamente, de hecho incluirla afecta la significancia de de cathhs.

    #IV Estimators
    m10_10<-AER::ivreg(math12~cathhs+lfaminc+motheduc+fatheduc+cathhs*motheduc|.-cathhs+parcath-cathhs*motheduc+parcath*motheduc, data=catholic)
    summary(m10_10)
    ## 
    ## Call:
    ## AER::ivreg(formula = math12 ~ cathhs + lfaminc + motheduc + fatheduc + 
    ##     cathhs * motheduc | . - cathhs + parcath - cathhs * motheduc + 
    ##     parcath * motheduc, data = catholic)
    ## 
    ## Residuals:
    ##      Min       1Q   Median       3Q      Max 
    ## -30.4258  -6.2590   0.5254   6.5728  28.0794 
    ## 
    ## Coefficients:
    ##                 Estimate Std. Error t value Pr(>|t|)    
    ## (Intercept)      9.32144    1.47286   6.329 2.61e-10 ***
    ## cathhs          72.64904   15.78263   4.603 4.23e-06 ***
    ## lfaminc          1.73374    0.15262  11.360  < 2e-16 ***
    ## motheduc         0.97506    0.08612  11.323  < 2e-16 ***
    ## fatheduc         0.84569    0.05932  14.257  < 2e-16 ***
    ## cathhs:motheduc -4.88146    1.08382  -4.504 6.77e-06 ***
    ## ---
    ## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
    ## 
    ## Residual standard error: 8.816 on 7424 degrees of freedom
    ## Multiple R-Squared: 0.132,   Adjusted R-squared: 0.1314 
    ## Wald test: 317.8 on 5 and 7424 DF,  p-value: < 2.2e-16
    #GMM Two Step
    m10_10_2<-gmm(catholic$math12~catholic$cathhs+catholic$lfaminc+catholic$motheduc+catholic$fatheduc+catholic$cathhs*catholic$motheduc, x=cbind(catholic$parcath,catholic$lfaminc,catholic$motheduc,catholic$fatheduc,catholic$parcath*catholic$motheduc), type="twoStep")
    summary(m10_10_2)
    ## 
    ## Call:
    ## gmm(g = catholic$math12 ~ catholic$cathhs + catholic$lfaminc + 
    ##     catholic$motheduc + catholic$fatheduc + catholic$cathhs * 
    ##     catholic$motheduc, x = cbind(catholic$parcath, catholic$lfaminc, 
    ##     catholic$motheduc, catholic$fatheduc, catholic$parcath * 
    ##         catholic$motheduc), type = "twoStep")
    ## 
    ## 
    ## Method:  twoStep 
    ## 
    ## Kernel:  Quadratic Spectral
    ## 
    ## Coefficients:
    ##                                    Estimate     Std. Error   t value    
    ## (Intercept)                         9.3214e+00   1.4134e+00   6.5948e+00
    ## catholic$cathhs                     7.2649e+01   1.7570e+01   4.1348e+00
    ## catholic$lfaminc                    1.7337e+00   1.5162e-01   1.1435e+01
    ## catholic$motheduc                   9.7506e-01   8.6316e-02   1.1296e+01
    ## catholic$fatheduc                   8.4569e-01   6.0129e-02   1.4065e+01
    ## catholic$cathhs:catholic$motheduc  -4.8815e+00   1.2035e+00  -4.0559e+00
    ##                                    Pr(>|t|)   
    ## (Intercept)                         4.2575e-11
    ## catholic$cathhs                     3.5525e-05
    ## catholic$lfaminc                    2.7976e-30
    ## catholic$motheduc                   1.3659e-29
    ## catholic$fatheduc                   6.2559e-45
    ## catholic$cathhs:catholic$motheduc   4.9934e-05
    ## 
    ## J-Test: degrees of freedom is 0 
    ##                 J-test                P-value             
    ## Test E(g)=0:    1.12121069393814e-18  *******

    Al incluir el instrumento se elimina la endogeneidad

  7. Before you create the interactions in part (vi), first find the sample average of \(motheduc\) and create \(cathhs *(motheduc - \bar{motheduc})\) and \(parcath*(motheduc-\bar{motheduc})\). Add the first interaction to the model and use the second as an IV. Of course, \(cathhs\) is also instrumented. Is the interaction term statistically significant?

    #las interacciones
    catholic$interaccion1<-catholic$cathhs*(catholic$motheduc-mean(catholic$motheduc))
    catholic$interaccion2<-catholic$parcath*(catholic$motheduc-mean(catholic$motheduc))
    #Modelo IV Estimators
    m10_11<-AER::ivreg(math12~cathhs+lfaminc+motheduc+fatheduc+interaccion1|.-cathhs+parcath-interaccion1+interaccion2, data=catholic)
    summary(m10_11)
    ## 
    ## Call:
    ## AER::ivreg(formula = math12 ~ cathhs + lfaminc + motheduc + fatheduc + 
    ##     interaccion1 | . - cathhs + parcath - interaccion1 + interaccion2, 
    ##     data = catholic)
    ## 
    ## Residuals:
    ##      Min       1Q   Median       3Q      Max 
    ## -30.4258  -6.2590   0.5254   6.5728  28.0794 
    ## 
    ## Coefficients:
    ##              Estimate Std. Error t value Pr(>|t|)    
    ## (Intercept)   9.32144    1.47286   6.329 2.61e-10 ***
    ## cathhs        7.44802    1.88842   3.944 8.09e-05 ***
    ## lfaminc       1.73374    0.15262  11.360  < 2e-16 ***
    ## motheduc      0.97506    0.08612  11.323  < 2e-16 ***
    ## fatheduc      0.84569    0.05932  14.257  < 2e-16 ***
    ## interaccion1 -4.88146    1.08382  -4.504 6.77e-06 ***
    ## ---
    ## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
    ## 
    ## Residual standard error: 8.816 on 7424 degrees of freedom
    ## Multiple R-Squared: 0.132,   Adjusted R-squared: 0.1314 
    ## Wald test: 317.8 on 5 and 7424 DF,  p-value: < 2.2e-16
    #GMM Two Step
    m10_11_2<-gmm(catholic$math12~catholic$cathhs+catholic$lfaminc+catholic$motheduc+catholic$fatheduc+catholic$interaccion1, x=cbind(catholic$parcath,catholic$lfaminc,catholic$motheduc,catholic$fatheduc,catholic$interaccion2), type="twoStep")
    summary(m10_11_2)
    ## 
    ## Call:
    ## gmm(g = catholic$math12 ~ catholic$cathhs + catholic$lfaminc + 
    ##     catholic$motheduc + catholic$fatheduc + catholic$interaccion1, 
    ##     x = cbind(catholic$parcath, catholic$lfaminc, catholic$motheduc, 
    ##         catholic$fatheduc, catholic$interaccion2), type = "twoStep")
    ## 
    ## 
    ## Method:  twoStep 
    ## 
    ## Kernel:  Quadratic Spectral
    ## 
    ## Coefficients:
    ##                        Estimate     Std. Error   t value      Pr(>|t|)   
    ## (Intercept)             9.3214e+00   1.4134e+00   6.5948e+00   4.2576e-11
    ## catholic$cathhs         7.4480e+00   2.0454e+00   3.6413e+00   2.7127e-04
    ## catholic$lfaminc        1.7337e+00   1.5162e-01   1.1435e+01   2.7943e-30
    ## catholic$motheduc       9.7506e-01   8.6316e-02   1.1296e+01   1.3674e-29
    ## catholic$fatheduc       8.4569e-01   6.0130e-02   1.4065e+01   6.2728e-45
    ## catholic$interaccion1  -4.8815e+00   1.2035e+00  -4.0560e+00   4.9911e-05
    ## 
    ## J-Test: degrees of freedom is 0 
    ##                 J-test                P-value             
    ## Test E(g)=0:    1.12742185615146e-18  *******

    El resultado es el mismo que antes para los betas pero con errores estándar más pequeños

  8. Compare the coefficient on \(cathhs\) in (vii) to that in part (iv). Is including the interaction important for estimating the average partial effect?

    No es importante porque al incluir la interacción se incrementan los errores estándar y se reduce el \(R^2\)