Cargar librerías:
library(readr)
library(AER)
## Loading required package: car
## Loading required package: carData
## Loading required package: lmtest
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
## Loading required package: survival
library(systemfit)
## Loading required package: Matrix
##
## Please cite the 'systemfit' package as:
## Arne Henningsen and Jeff D. Hamann (2007). systemfit: A Package for Estimating Systems of Simultaneous Equations in R. Journal of Statistical Software 23(4), 1-40. http://www.jstatsoft.org/v23/i04/.
##
## If you have questions, suggestions, or comments regarding the 'systemfit' package, please use a forum or 'tracker' at systemfit's R-Forge site:
## https://r-forge.r-project.org/projects/systemfit/
Cargar datos (CSV):
df <- read_csv("data.csv")
## New names:
## Rows: 2000 Columns: 7
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," dbl
## (7): ...1, y1, y2, y3, x1, x2, x3
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
# Ecuación (1): y1 = a0 + a1*y2 + a2*y3 + a3*x1 + u1
m1 <- lm(y1 ~ y2 + y3 + x1, data = df)
# Resultados
summary(m1)
##
## Call:
## lm(formula = y1 ~ y2 + y3 + x1, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.2873 -4.9640 0.0814 4.9744 10.0940
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.020e+01 6.228e-01 16.38 <2e-16 ***
## y2 1.575e-03 8.660e-05 18.18 <2e-16 ***
## y3 1.529e+00 8.445e-02 18.11 <2e-16 ***
## x1 4.939e-04 4.935e-06 100.08 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.766 on 1996 degrees of freedom
## Multiple R-squared: 0.8818, Adjusted R-squared: 0.8816
## F-statistic: 4961 on 3 and 1996 DF, p-value: < 2.2e-16
# Extraer coeficientes (estimadores)
alpha_hat <- coef(m1) # (a0, a1, a2, a3)
alpha_hat
## (Intercept) y2 y3 x1
## 1.020354e+01 1.574896e-03 1.529471e+00 4.939153e-04
\[ \hat{y}_1 = 10.2035 + 0.001575\, y_2 + 1.5295\, y_3 + 0.000494\, x_1 \]
# Ecuación (2): y2 = b0 + b1*y3 + b2*x3 + u2
m2 <- lm(y2 ~ y3 + x3, data = df)
# Resultados
summary(m2)
##
## Call:
## lm(formula = y2 ~ y3 + x3, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -870.34 -412.68 9.62 414.35 836.14
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1034.261 48.722 21.23 <2e-16 ***
## y3 86.540 6.322 13.69 <2e-16 ***
## x3 142.448 1.078 132.20 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 477.1 on 1997 degrees of freedom
## Multiple R-squared: 0.8985, Adjusted R-squared: 0.8984
## F-statistic: 8843 on 2 and 1997 DF, p-value: < 2.2e-16
# Extraer coeficientes (estimadores)
beta_hat <- coef(m2) # (b0, b1, b2)
beta_hat
## (Intercept) y3 x3
## 1034.26126 86.54032 142.44826
\[ \hat{y}_2 = 1034.261 + 86.5403\, y_3 + 142.4483\, x_3 \]
# Ecuación (3): y3 = g0 + g1*x1 + g2*x2 + u3
m3 <- lm(y3 ~ x1 + x2, data = df)
# Resultados
summary(m3)
##
## Call:
## lm(formula = y3 ~ x1 + x2, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.91407 -1.24537 -0.04891 1.24418 3.03166
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.693e+00 1.475e-01 25.05 <2e-16 ***
## x1 2.503e-05 1.132e-06 22.10 <2e-16 ***
## x2 5.651e-04 3.743e-05 15.10 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.455 on 1997 degrees of freedom
## Multiple R-squared: 0.2582, Adjusted R-squared: 0.2575
## F-statistic: 347.6 on 2 and 1997 DF, p-value: < 2.2e-16
# Extraer coeficientes (estimadores)
gamma_hat <- coef(m3) # (g0, g1, g2)
gamma_hat
## (Intercept) x1 x2
## 3.693373e+00 2.502919e-05 5.650574e-04
\[ \hat{y}_3 = 3.6934 + 2.503\times 10^{-5}\, x_1 + 5.651\times 10^{-4}\, x_2 \]
# Guardar en una tablita bonita
est <- list(
alpha_hat = alpha_hat,
beta_hat = beta_hat,
gamma_hat = gamma_hat
)
est
## $alpha_hat
## (Intercept) y2 y3 x1
## 1.020354e+01 1.574896e-03 1.529471e+00 4.939153e-04
##
## $beta_hat
## (Intercept) y3 x3
## 1034.26126 86.54032 142.44826
##
## $gamma_hat
## (Intercept) x1 x2
## 3.693373e+00 2.502919e-05 5.650574e-04
La estimación por MCO presenta sesgo de simultaneidad, el cual constituye una forma de endogeneidad, ya que algunas variables explicativas están determinadas conjuntamente dentro del sistema y, por tanto, correlacionadas con el término de error.
Y si bien las tres relaciones lineales pueden estimarse formalmente mediante MCO, únicamente la tercera ecuación cumple plenamente con los supuestos necesarios para una interpretación causal directa. Las dos primeras ecuaciones forman parte de un sistema simultáneo y, por tanto, su estimación adecuada requeriría métodos alternativos como mínimos cuadrados en dos etapas.
Endógenas RHS: y2, y3 Instrumentos: x1, x2, x3 (x1 también es exógena incluida)
# 1ra etapa: proyectar endógenas sobre instrumentos (x1, x2, x3)
fs_y2 <- lm(y2 ~ x1 + x2 + x3, data = df)
fs_y3 <- lm(y3 ~ x1 + x2 + x3, data = df)
df$y2_hat <- fitted(fs_y2)
df$y3_hat <- fitted(fs_y3)
# 2da etapa: sustituir endógenas por sus predichas
ss_y1 <- lm(y1 ~ y2_hat + y3_hat + x1, data = df)
summary(fs_y2)
##
## Call:
## lm(formula = y2 ~ x1 + x2 + x3, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1053.91 -405.60 9.09 396.90 1066.94
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.358e+03 5.355e+01 25.357 < 2e-16 ***
## x1 2.942e-03 3.825e-04 7.692 2.26e-14 ***
## x2 3.501e-02 1.264e-02 2.770 0.00566 **
## x3 1.428e+02 1.110e+00 128.647 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 491.2 on 1996 degrees of freedom
## Multiple R-squared: 0.8925, Adjusted R-squared: 0.8924
## F-statistic: 5526 on 3 and 1996 DF, p-value: < 2.2e-16
summary(fs_y3)
##
## Call:
## lm(formula = y3 ~ x1 + x2 + x3, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.92161 -1.25110 -0.04314 1.24309 3.05933
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.646e+00 1.586e-01 22.984 <2e-16 ***
## x1 2.506e-05 1.133e-06 22.114 <2e-16 ***
## x2 5.648e-04 3.744e-05 15.087 <2e-16 ***
## x3 2.678e-03 3.287e-03 0.815 0.415
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.455 on 1996 degrees of freedom
## Multiple R-squared: 0.2585, Adjusted R-squared: 0.2574
## F-statistic: 231.9 on 3 and 1996 DF, p-value: < 2.2e-16
summary(ss_y1)
##
## Call:
## lm(formula = y1 ~ y2_hat + y3_hat + x1, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.3272 -4.9775 0.0517 5.0000 14.2594
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.048e+00 1.672e+00 5.413 6.96e-08 ***
## y2_hat 1.566e-03 9.975e-05 15.695 < 2e-16 ***
## y3_hat 1.739e+00 2.867e-01 6.066 1.57e-09 ***
## x1 4.888e-04 8.543e-06 57.219 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.283 on 1996 degrees of freedom
## Multiple R-squared: 0.8596, Adjusted R-squared: 0.8594
## F-statistic: 4072 on 3 and 1996 DF, p-value: < 2.2e-16
coef(ss_y1)
## (Intercept) y2_hat y3_hat x1
## 9.0479804608 0.0015655512 1.7392778865 0.0004887953
\[ \hat{y}_1 = 9.048 + 0.001566\, y_2 + 1.7393\, y_3 + 0.0004888\, x_1 \]
Endógena RHS: y3 Instrumentos: x1, x2, x3 (x3 incluida como exógena)
# 1ra etapa: y3 sobre instrumentos
fs2_y3 <- lm(y3 ~ x1 + x2 + x3, data = df)
df$y3_hat2 <- fitted(fs2_y3)
# 2da etapa
ss_y2 <- lm(y2 ~ y3_hat2 + x3, data = df)
summary(fs2_y3)
##
## Call:
## lm(formula = y3 ~ x1 + x2 + x3, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.92161 -1.25110 -0.04314 1.24309 3.05933
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.646e+00 1.586e-01 22.984 <2e-16 ***
## x1 2.506e-05 1.133e-06 22.114 <2e-16 ***
## x2 5.648e-04 3.744e-05 15.087 <2e-16 ***
## x3 2.678e-03 3.287e-03 0.815 0.415
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.455 on 1996 degrees of freedom
## Multiple R-squared: 0.2585, Adjusted R-squared: 0.2574
## F-statistic: 231.9 on 3 and 1996 DF, p-value: < 2.2e-16
summary(ss_y2)
##
## Call:
## lm(formula = y2 ~ y3_hat2 + x3, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1060.68 -407.40 5.27 397.19 1071.80
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 940.48 91.14 10.318 < 2e-16 ***
## y3_hat2 100.14 12.81 7.817 8.69e-15 ***
## x3 142.44 1.11 128.300 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 491.6 on 1997 degrees of freedom
## Multiple R-squared: 0.8923, Adjusted R-squared: 0.8922
## F-statistic: 8274 on 2 and 1997 DF, p-value: < 2.2e-16
coef(ss_y2)
## (Intercept) y3_hat2 x3
## 940.4750 100.1396 142.4347
\[ \hat{y}_2 = 940.475 + 100.140\, y_3 + 142.435\, x_3 \]
No tiene RHS endógenas (con MCO basta) No se aplica MC2E a esta ecuación porque no hay endogeneidad que corregir en esa ecuación. Y MC2E solo se justifica cuando existe endogeneidad.
m3 <- lm(y3 ~ x1 + x2, data = df)
summary(m3)
##
## Call:
## lm(formula = y3 ~ x1 + x2, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.91407 -1.24537 -0.04891 1.24418 3.03166
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.693e+00 1.475e-01 25.05 <2e-16 ***
## x1 2.503e-05 1.132e-06 22.10 <2e-16 ***
## x2 5.651e-04 3.743e-05 15.10 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.455 on 1997 degrees of freedom
## Multiple R-squared: 0.2582, Adjusted R-squared: 0.2575
## F-statistic: 347.6 on 2 and 1997 DF, p-value: < 2.2e-16
coef(m3)
## (Intercept) x1 x2
## 3.693373e+00 2.502919e-05 5.650574e-04
\[ \hat{y}_3 = 3.6934 + 2.503\times 10^{-5}\, x_1 + 5.651\times 10^{-4}\, x_2 \]
# Definir ecuaciones (estructurales)
eq1 <- y1 ~ y2 + y3 + x1
eq2 <- y2 ~ y3 + x3
eq3 <- y3 ~ x1 + x2
# Sistema
system <- list(eq1 = eq1, eq2 = eq2, eq3 = eq3)
# Instrumentos: todas las exógenas del sistema
inst <- ~ x1 + x2 + x3
# 3SLS
fit_3sls <- systemfit(system, method = "3SLS", inst = inst, data = df)
summary(fit_3sls)
##
## systemfit results
## method: 3SLS
##
## N DF SSR detRCov OLS-R2 McElroy-R2
## system 6000 5990 455786538 15996964 0.8983 0.849382
##
## N DF SSR MSE RMSE R2 Adj R2
## eq1 2000 1996 6.65647e+04 3.33490e+01 5.77486 0.881377 0.881199
## eq2 2000 1997 4.55716e+08 2.28200e+05 477.70302 0.898303 0.898201
## eq3 2000 1997 4.22602e+03 2.11619e+00 1.45471 0.258229 0.257486
##
## The covariance matrix of the residuals used for estimation
## eq1 eq2 eq3
## eq1 33.34550 21.8018 -0.49261
## eq2 21.80183 228199.0833 -38.78188
## eq3 -0.49261 -38.7819 2.11617
##
## The covariance matrix of the residuals
## eq1 eq2 eq3
## eq1 33.349042 21.0253 -0.500466
## eq2 21.025313 228200.1738 -38.906794
## eq3 -0.500466 -38.9068 2.116186
##
## The correlations of the residuals
## eq1 eq2 eq3
## eq1 1.00000000 0.00762154 -0.0595739
## eq2 0.00762154 1.00000000 -0.0559874
## eq3 -0.05957389 -0.05598743 1.0000000
##
##
## 3SLS estimates for 'eq1' (equation 1)
## Model Formula: y1 ~ y2 + y3 + x1
## Instruments: ~x1 + x2 + x3
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.01408e+00 1.53613e+00 5.86803 5.1509e-09 ***
## y2 1.56984e-03 9.15106e-05 17.15467 < 2.22e-16 ***
## y3 1.74256e+00 2.63502e-01 6.61309 4.8177e-11 ***
## x1 4.88665e-04 7.85041e-06 62.24713 < 2.22e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.774863 on 1996 degrees of freedom
## Number of observations: 2000 Degrees of Freedom: 1996
## SSR: 66564.688563 MSE: 33.349042 Root MSE: 5.774863
## Multiple R-Squared: 0.881377 Adjusted R-Squared: 0.881199
##
##
## 3SLS estimates for 'eq2' (equation 2)
## Model Formula: y2 ~ y3 + x3
## Instruments: ~x1 + x2 + x3
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 939.53376 88.56595 10.60830 < 2.22e-16 ***
## y3 100.15062 12.44917 8.04476 1.3323e-15 ***
## x3 142.48384 1.07716 132.27681 < 2.22e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 477.703018 on 1997 degrees of freedom
## Number of observations: 2000 Degrees of Freedom: 1997
## SSR: 455715747.132404 MSE: 228200.173827 Root MSE: 477.703018
## Multiple R-Squared: 0.898303 Adjusted R-Squared: 0.898201
##
##
## 3SLS estimates for 'eq3' (equation 3)
## Model Formula: y3 ~ x1 + x2
## Instruments: ~x1 + x2 + x3
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.70244e+00 1.47398e-01 25.1187 < 2.22e-16 ***
## x1 2.51028e-05 1.13193e-06 22.1771 < 2.22e-16 ***
## x2 5.61394e-04 3.73944e-05 15.0128 < 2.22e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.454712 on 1997 degrees of freedom
## Number of observations: 2000 Degrees of Freedom: 1997
## SSR: 4226.023099 MSE: 2.116186 Root MSE: 1.454712
## Multiple R-Squared: 0.258229 Adjusted R-Squared: 0.257486
# Matriz de covarianzas contemporáneas estimada (Sigma)
fit_3sls$Sigma
## NULL