1. Eliminar cualquier elemento

rm(list=ls())

2. Importar datos

library(readr)
panel <- read_delim("C:/Users/Jimmy Cueva/Desktop/R para finanzas/Econometría/Multicolinealidad/regresion data panel b (1).txt", 
                    delim = "\t", escape_double = FALSE, 
                    col_types = cols(g_k = col_number(), 
                                     g_w = col_number(), k_l = col_number(), 
                                     l_h = col_number(), w_l = col_number(), 
                                     y_h = col_number(), y_k = col_number(), 
                                     y_l = col_number()), trim_ws = TRUE)
View(panel)

3.Comando de regresión

original <- lm(y_h~.,data = panel)
summary(original)

## 
## Call:
## lm(formula = y_h ~ ., data = panel)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1222.78  -255.85   -21.16   265.72  1158.21 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.083e+04  5.620e+02 -19.262  < 2e-16 ***
## g_k         -1.446e+05  1.103e+04 -13.108  < 2e-16 ***
## g_w          3.963e+03  3.421e+02  11.583  < 2e-16 ***
## k_l         -3.343e-02  7.297e-03  -4.582 8.83e-06 ***
## l_h          2.433e+04  7.812e+02  31.147  < 2e-16 ***
## w_l         -6.072e-01  6.461e-02  -9.397  < 2e-16 ***
## y_k          7.167e+04  5.830e+03  12.294  < 2e-16 ***
## y_l          7.227e-01  3.964e-02  18.231  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 429 on 172 degrees of freedom
## Multiple R-squared:  0.9951, Adjusted R-squared:  0.9949 
## F-statistic:  5019 on 7 and 172 DF,  p-value: < 2.2e-16

#Se aprecia un R cuadrado y R cuadrado ajustado muy altos, cercanos a 1, posible problema de multicolinealidad.

4.Ejecutar los paquetes

library("lmtest")
library("tseries")
library("car")

5.Detección por el VIF

Es necesario la libreria “car”

vif1 <- vif(original)
confidenceEllipse(original, c("g_k","g_w"), levels = 0.95)

#Como el VIF es mayor a 10, se puede decir que sí existe el problema de multicolinealidad

Construyendo un bucle

vars <- names(coef(original))[-1] #Para excluir el intercepto
comb <- combn(vars,2,simplify = FALSE)
par(mfrow=c(3,3))
for(par_var in comb[1:9]){
  confidenceEllipse(original, par_var, levels = 0.95)}

6.Detección por matriz de correlaciones_paquetes

library("corrplot")

6.1 Detección por matriz de correlaciones

correlaciones <- cor(panel)
print(correlaciones)

##           g_k         g_w       k_l        l_h        w_l       y_h        y_k
## g_k 1.0000000  0.23413272 0.5043280 0.32493633  0.7202579 0.7101075  0.8668723
## g_w 0.2341327  1.00000000 0.3578855 0.02449209 -0.1799300 0.2054934 -0.2669429
## k_l 0.5043280  0.35788550 1.0000000 0.31694096  0.7332265 0.9215735  0.3256350
## l_h 0.3249363  0.02449209 0.3169410 1.00000000  0.3428589 0.5144877  0.3219709
## w_l 0.7202579 -0.17993004 0.7332265 0.34285895  1.0000000 0.8794087  0.8169678
## y_h 0.7101075  0.20549338 0.9215735 0.51448771  0.8794087 1.0000000  0.6100648
## y_k 0.8668723 -0.26694288 0.3256350 0.32197091  0.8169678 0.6100648  1.0000000
## y_l 0.7571530  0.19275277 0.9188456 0.35813496  0.9129599 0.9771368  0.6548165
##           y_l
## g_k 0.7571530
## g_w 0.1927528
## k_l 0.9188456
## l_h 0.3581350
## w_l 0.9129599
## y_h 0.9771368
## y_k 0.6548165
## y_l 1.0000000

corrplot(correlaciones,method = "number",type = "upper",number.digits = 2)

corrplot(correlaciones,method = "shade",type = "upper",number.digits = 3)

6.2 Detección. Matriz de correlaciones gráficas

library(PerformanceAnalytics)

chart.Correlation(panel, histogram = TRUE, pch = 19)

##El histagram= true es para que lo incluya, pch es el tamaño de letra
# # Hay que observar que las variables explicativas más
# correlacionadas son: (wl, yl) con 91%; (gk, yk) con 87%
# (kl, yl) con 92%.
# Si las variables se distribuyen como una campana, no es necesario aplicar alguna transformación

Pruebas de normalidad

Histograma de los residuos

residuos <- resid(original)
hist(residuos, breaks = 15, col = "skyblue", main = "Histograma de los residuos", xlab = "Residuos")

#Breaks, indica el número de barras

Interpretación: El histograma muestra una forma aproximadamente simétrica y acampanada, parecida a una distribución normal. No obstante, se nota una leve asimetría hacia la izquierda y colas algo más largas de lo esperado para una distribución normal.

hist(residuos, breaks = 15, col = "skyblue", probability = TRUE,
     main = "Histograma de los residuos vs curva teórica", xlab = "Residuos")
curve(dnorm(x, mean = mean(residuos), sd = sd(residuos)), add = TRUE, col = "red", lwd = 2)

Prueba de Jarque-Bera

\[ JB = \frac{n}{6} \left( S^2 + \frac{(K - 3)^2}{4} \right) \] Donde:

$n$: tamaño de la muestra
$S$: coeficiente de asimetría (skewness)
$K$: coeficiente de curtosis (kurtosis)
Bajo la hipótesis nula, los datos siguen una distribución normal, por lo que $S = 0$ y $K = 3$.

El estadístico JB sigue una distribución $\chi^2$ con 2 grados de libertad bajo $H_0$. Mientas el JB se acerque a 1, mejor.

H₀: Los residuos son normalmente distribuidos.

H₁: Los residuos no son normalmente distribuidos.

Interpretación:

p > 0.05: se rechaza la normalidad

p ≤ 0.05: Los residuos NO se distribuyen normalmente.

Es necesario la libreria “tseries”

jarque.bera.test(residuos)

## 
##  Jarque Bera Test
## 
## data:  residuos
## X-squared = 1.7196, df = 2, p-value = 0.4232

Gráfica Q-Q (cuantil-cuantil)

qqnorm(residuos)
qqline(residuos, col = "red")

Interpretación:

Si los puntos siguen aproximadamente la línea roja, los residuos son normales.

Desviaciones fuertes indican no normalidad.

Entonces: Los puntos se alinean razonablemente bien con la línea recta roja, especialmente en el centro de la distribución. Sin embargo, hay desviaciones notables en las colas (tanto en los extremos inferiores como superiores), lo que indica la presencia de colas pesadas o outliers. Esto sugiere que la distribución no es perfectamente normal.

7. Regresiones auxiliares (Regla de Klein)

#La regla práctica de Klein, sugiere que la multicolinealidad puede ser un problema complicado solamente si la R2 obtenida de una regresión auxiliar es mayor que la R2 global
summary(lm(y_l~k_l, data = panel))

## 
## Call:
## lm(formula = y_l ~ k_l, data = panel)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -22971  -3327  -1326   4698  10435 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.202e+03  9.307e+02  -2.366   0.0191 *  
## k_l          3.380e-01  1.088e-02  31.065   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5588 on 178 degrees of freedom
## Multiple R-squared:  0.8443, Adjusted R-squared:  0.8434 
## F-statistic: 965.1 on 1 and 178 DF,  p-value: < 2.2e-16

summary(lm(w_l~y_l, data = panel))

## 
## Call:
## lm(formula = w_l ~ y_l, data = panel)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4751.5 -1239.7  -194.9   457.1  4986.3 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.200e+03  3.108e+02   3.861 0.000158 ***
## y_l         3.370e-01  1.129e-02  29.850  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2133 on 178 degrees of freedom
## Multiple R-squared:  0.8335, Adjusted R-squared:  0.8326 
## F-statistic:   891 on 1 and 178 DF,  p-value: < 2.2e-16

summary(lm(y_k~g_k, data = panel))

## 
## Call:
## lm(formula = y_k ~ g_k, data = panel)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.068236 -0.027684  0.002246  0.015740  0.075923 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.01862    0.01232   1.512    0.132    
## g_k          1.55577    0.06706  23.199   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0356 on 178 degrees of freedom
## Multiple R-squared:  0.7515, Adjusted R-squared:  0.7501 
## F-statistic: 538.2 on 1 and 178 DF,  p-value: < 2.2e-16

#Sí existe multicolinealidad

8. Pruebas formales

La fórmula del número de condición es:

\[ \kappa = \frac{\sigma_{\text{máx}}}{\sigma_{\text{mín}}} \]

O, usando los autovalores:

\[ \kappa = \sqrt{\frac{\lambda_{\text{máx}}}{\lambda_{\text{mín}}}} \] La fórmula del índice de condición es:

Para cada autovalor $\lambda_j$, el índice de condición es:

\[ \text{Índice}_j = \sqrt{\frac{\lambda_{\text{máx}}}{\lambda_j}} \] Entonces tenemos esta regla práctica: Si k está entre 100 y 1 000, existe una multicolinealidad que va de moderada a fuerte, mientras que si excede de 1 000, existe multicolinealidad grave. De otro modo, si el IC (√k) está entre 10 y 30, hay multicolinealidad entre moderada y fuerte, y si excede de 30, una multicolinealidad grave.

Tolerancia y factor de infl ación de la varianza

El FIV indica cuánto se incrementa la varianza del estimador de un coeficiente debido a la colinealidad con otras variables independientes:

\[ \text{FIV}_j = \frac{1}{1 - R_j^2} \]

Donde $R_j^2$ es el coeficiente de determinación obtenido al regredir la variable $X_j$ sobre las demás variables independientes.

La tolerancia es simplemente el complemento de $R_j^2$, e inversa del FIV:

\[ \text{TOL}_j = 1 - R_j^2 = \frac{1}{\text{FIV}_j} \] Como regla práctica, si el FIV de una variable es superior a 10 (esto sucede si R2 j excede de 0.90), se dice que esa variable es muy colineal. Cuando $R_j^2 \to 1$ (es decir, existe colinealidad perfecta), entonces:

\[ \text{TOL}_j \to 0 \quad \text{y} \quad \text{FIV}_j \to \infty \]

Cuando $R_j^2 = 0$ (es decir, no existe colinealidad con las otras variables), entonces:

\[ \text{TOL}_j = 1 \quad \text{y} \quad \text{FIV}_j = 1 \]

Debido a la estrecha conexión entre el FIV y la TOL, ambos indicadores pueden utilizarse de manera equivalente para diagnosticar la multicolinealidad.

library("mctest")

#El número de condición mide cuánta colinealidad global hay en el conjunto de variables explicativas



mctest(original)

## 
## Call:
## omcdiag(mod = mod, Inter = TRUE, detr = detr, red = red, conf = conf, 
##     theil = theil, cn = cn)
## 
## 
## Overall Multicollinearity Diagnostics
## 
##                        MC Results detection
## Determinant |X'X|:         0.0000         1
## Farrar Chi-Square:      2464.4826         1
## Red Indicator:             0.5519         1
## Sum of Lambda Inverse:   871.4400         1
## Theil's Method:            0.1466         0
## Condition Number:        249.3864         1
## 
## 1 --> COLLINEARITY is detected by the test 
## 0 --> COLLINEARITY is not detected by the test

mctest(original, type="i")

## 
## Call:
## imcdiag(mod = mod, method = method, corr = FALSE, vif = vif, 
##     tol = tol, conf = conf, cvif = cvif, ind1 = ind1, ind2 = ind2, 
##     leamer = leamer, all = all)
## 
## 
## All Individual Multicollinearity Diagnostics Result
## 
##          VIF    TOL        Wi         Fi Leamer    CVIF Klein   IND1   IND2
## g_k 186.5063 0.0054 5348.7664  6455.6209 0.0732 -0.3291     0 0.0002 1.1382
## g_w  24.5442 0.0407  678.8564   819.3365 0.2018 -0.0433     0 0.0014 1.0977
## k_l  76.3132 0.0131 2171.5308  2620.8996 0.1145 -0.1347     0 0.0005 1.1293
## l_h   1.2420 0.8051    6.9788     8.4229 0.8973 -0.0022     0 0.0279 0.2230
## w_l 110.3176 0.0091 3151.9902  3804.2518 0.0952 -0.1947     0 0.0003 1.1339
## y_k 167.6964 0.0060 4806.4126  5801.0344 0.0772 -0.2959     0 0.0002 1.1375
## y_l 304.8203 0.0033 8760.1514 10572.9457 0.0573 -0.5378     1 0.0001 1.1405
## 
## 1 --> COLLINEARITY is detected by the test 
## 0 --> COLLINEARITY is not detected by the test
## 
## * all coefficients have significant t-ratios
## 
## R-square of y on all x: 0.9951 
## 
## * use method argument to check which regressors may be the reason of collinearity
## ===================================

mctest(original, type="i", corr=TRUE)

## 
## Call:
## imcdiag(mod = mod, method = method, corr = TRUE, vif = vif, tol = tol, 
##     conf = conf, cvif = cvif, ind1 = ind1, ind2 = ind2, leamer = leamer, 
##     all = all)
## 
## 
## All Individual Multicollinearity Diagnostics Result
## 
##          VIF    TOL        Wi         Fi Leamer    CVIF Klein   IND1   IND2
## g_k 186.5063 0.0054 5348.7664  6455.6209 0.0732 -0.3291     0 0.0002 1.1382
## g_w  24.5442 0.0407  678.8564   819.3365 0.2018 -0.0433     0 0.0014 1.0977
## k_l  76.3132 0.0131 2171.5308  2620.8996 0.1145 -0.1347     0 0.0005 1.1293
## l_h   1.2420 0.8051    6.9788     8.4229 0.8973 -0.0022     0 0.0279 0.2230
## w_l 110.3176 0.0091 3151.9902  3804.2518 0.0952 -0.1947     0 0.0003 1.1339
## y_k 167.6964 0.0060 4806.4126  5801.0344 0.0772 -0.2959     0 0.0002 1.1375
## y_l 304.8203 0.0033 8760.1514 10572.9457 0.0573 -0.5378     1 0.0001 1.1405
## 
## 1 --> COLLINEARITY is detected by the test 
## 0 --> COLLINEARITY is not detected by the test
## 
## * all coefficients have significant t-ratios
## 
## R-square of y on all x: 0.9951 
## 
## * use method argument to check which regressors may be the reason of collinearity
## ===================================
## 
## Correlation Matrix
##           g_k         g_w       k_l        l_h        w_l        y_k       y_l
## g_k 1.0000000  0.23413272 0.5043280 0.32493633  0.7202579  0.8668723 0.7571530
## g_w 0.2341327  1.00000000 0.3578855 0.02449209 -0.1799300 -0.2669429 0.1927528
## k_l 0.5043280  0.35788550 1.0000000 0.31694096  0.7332265  0.3256350 0.9188456
## l_h 0.3249363  0.02449209 0.3169410 1.00000000  0.3428589  0.3219709 0.3581350
## w_l 0.7202579 -0.17993004 0.7332265 0.34285895  1.0000000  0.8169678 0.9129599
## y_k 0.8668723 -0.26694288 0.3256350 0.32197091  0.8169678  1.0000000 0.6548165
## y_l 0.7571530  0.19275277 0.9188456 0.35813496  0.9129599  0.6548165 1.0000000
## 
## ====================NOTE===================
## 
## g_k and w_l may be collinear as |0.720258|>=0.7 
## k_l and w_l may be collinear as |0.733227|>=0.7 
## g_k and y_k may be collinear as |0.866872|>=0.7 
## w_l and y_k may be collinear as |0.816968|>=0.7 
## g_k and y_l may be collinear as |0.757153|>=0.7 
## k_l and y_l may be collinear as |0.918846|>=0.7 
## w_l and y_l may be collinear as |0.912960|>=0.7

9. Soluciones

9.1 Modelo log-log

mlog <- lm(log(y_h)~log(y_l)+ log(y_k)+ log(w_l)+ log(g_k)+ log(g_w)+ log(k_l)+ log(l_h), data = panel)
summary(mlog)

## 
## Call:
## lm(formula = log(y_h) ~ log(y_l) + log(y_k) + log(w_l) + log(g_k) + 
##     log(g_w) + log(k_l) + log(l_h), data = panel)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -1.773e-09 -6.585e-10  2.974e-11  6.244e-10  1.960e-09 
## 
## Coefficients: (2 not defined because of singularities)
##               Estimate Std. Error    t value Pr(>|t|)    
## (Intercept) -5.920e-09  6.511e-09 -9.090e-01    0.365    
## log(y_l)     1.000e+00  3.677e-09  2.719e+08   <2e-16 ***
## log(y_k)     2.650e-09  5.958e-09  4.450e-01    0.657    
## log(w_l)    -2.351e-09  3.748e-09 -6.270e-01    0.531    
## log(g_k)    -3.323e-09  5.948e-09 -5.590e-01    0.577    
## log(g_w)            NA         NA         NA       NA    
## log(k_l)            NA         NA         NA       NA    
## log(l_h)     1.000e+00  5.359e-10  1.866e+09   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.42e-10 on 174 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 1.67e+19 on 5 and 174 DF,  p-value: < 2.2e-16

#Como hay variables colineales, no lo puedo estimar "NA"
mctest(mlog)

## Warning in log(Det): Se han producido NaNs

## Warning in sqrt(max(ev)/ev): Se han producido NaNs

## Warning in sqrt(ordev): Se han producido NaNs

## 
## Call:
## omcdiag(mod = mod, Inter = TRUE, detr = detr, red = red, conf = conf, 
##     theil = theil, cn = cn)
## 
## 
## Overall Multicollinearity Diagnostics
## 
##                           MC Results detection
## Determinant |X'X|:      0.000000e+00         1
## Farrar Chi-Square:               NaN        NA
## Red Indicator:          5.785000e-01         1
## Sum of Lambda Inverse: -7.745199e+15         0
## Theil's Method:         1.601000e-01         0
## Condition Number:                NaN        NA
## 
## 1 --> COLLINEARITY is detected by the test 
## 0 --> COLLINEARITY is not detected by the test

mctest(mlog, type="i")

## 
## Call:
## imcdiag(mod = mod, method = method, corr = FALSE, vif = vif, 
##     tol = tol, conf = conf, cvif = cvif, ind1 = ind1, ind2 = ind2, 
##     leamer = leamer, all = all)
## 
## 
## All Individual Multicollinearity Diagnostics Result
## 
##             VIF    TOL     Wi     Fi Leamer CVIF Klein   IND1   IND2
## log(y_l)    Inf 0.0000    Inf    Inf 0.0000  NaN     0 0.0000 1.1363
## log(y_k)    Inf 0.0000    Inf    Inf 0.0000  NaN     0 0.0000 1.1363
## log(w_l)    Inf 0.0000    Inf    Inf 0.0000  NaN     0 0.0000 1.1363
## log(g_k)    Inf 0.0000    Inf    Inf 0.0000  NaN     0 0.0000 1.1363
## log(g_w)    Inf 0.0000    Inf    Inf 0.0000  NaN     0 0.0000 1.1363
## log(k_l)    Inf 0.0000    Inf    Inf 0.0000  NaN     0 0.0000 1.1363
## log(l_h) 1.1906 0.8399 5.4965 6.6339 0.9165    0     0 0.0192 0.1819
## 
## 1 --> COLLINEARITY is detected by the test 
## 0 --> COLLINEARITY is not detected by the test
## 
## log(y_k) , log(w_l) , log(g_k) , coefficient(s) are non-significant may be due to multicollinearity
## 
## R-square of y on all x: 1 
## 
## * use method argument to check which regressors may be the reason of collinearity
## ===================================

9.2 Modelo lineal dinámico

library("dynlm")

mdiferencias <- dynlm(log(y_h)~ I(diff(k_l))+I(diff(y_l))+log(l_h)+log(g_k), data = panel)
summary(mdiferencias)

## 
## Time series regression with "numeric" data:
## Start = 1, End = 179
## 
## Call:
## dynlm(formula = log(y_h) ~ I(diff(k_l)) + I(diff(y_l)) + log(l_h) + 
##     log(g_k), data = panel)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.60690 -0.18412 -0.01052  0.19155  0.72156 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.270e+01  1.996e-01  63.627  < 2e-16 ***
## I(diff(k_l))  4.915e-05  6.726e-06   7.308 9.42e-12 ***
## I(diff(y_l)) -1.128e-04  1.601e-05  -7.047 4.11e-11 ***
## log(l_h)      7.317e-01  1.722e-01   4.249 3.49e-05 ***
## log(g_k)      1.771e+00  9.050e-02  19.564  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.255 on 174 degrees of freedom
## Multiple R-squared:  0.8021, Adjusted R-squared:  0.7976 
## F-statistic: 176.3 on 4 and 174 DF,  p-value: < 2.2e-16

vif3 <- vif(mdiferencias)
mctest(mdiferencias)

## 
## Call:
## omcdiag(mod = mod, Inter = TRUE, detr = detr, red = red, conf = conf, 
##     theil = theil, cn = cn)
## 
## 
## Overall Multicollinearity Diagnostics
## 
##                        MC Results detection
## Determinant |X'X|:         0.1398         0
## Farrar Chi-Square:       345.9356         1
## Red Indicator:             0.4046         0
## Sum of Lambda Inverse:    14.7333         0
## Theil's Method:           -0.3570         0
## Condition Number:         22.6816         0
## 
## 1 --> COLLINEARITY is detected by the test 
## 0 --> COLLINEARITY is not detected by the test

mctest(mdiferencias, type="i")

## 
## Call:
## imcdiag(mod = mod, method = method, corr = FALSE, vif = vif, 
##     tol = tol, conf = conf, cvif = cvif, ind1 = ind1, ind2 = ind2, 
##     leamer = leamer, all = all)
## 
## 
## All Individual Multicollinearity Diagnostics Result
## 
##                 VIF    TOL       Wi       Fi Leamer      CVIF Klein   IND1
## I(diff(k_l)) 6.2266 0.1606 304.8871 459.9439 0.4007 -757.9535     1 0.0028
## I(diff(y_l)) 6.0333 0.1657 293.6099 442.9315 0.4071 -734.4207     1 0.0028
## log(l_h)     1.3178 0.7589  18.5362  27.9632 0.8711 -160.4082     0 0.0130
## log(g_k)     1.1556 0.8654   9.0762  13.6920 0.9302 -140.6674     0 0.0148
##                IND2
## I(diff(k_l)) 1.6383
## I(diff(y_l)) 1.6283
## log(l_h)     0.4706
## log(g_k)     0.2628
## 
## 1 --> COLLINEARITY is detected by the test 
## 0 --> COLLINEARITY is not detected by the test
## 
## * all coefficients have significant t-ratios
## 
## R-square of y on all x: 0.8021 
## 
## * use method argument to check which regressors may be the reason of collinearity
## ===================================

9.3 División con la variable más colineal

md <- lm(I(y_h/y_l)~I(1/y_l)+I(k_l/y_l)+I(l_h/y_l)+I(g_k/y_l), data = panel)
summary(md)

## 
## Call:
## lm(formula = I(y_h/y_l) ~ I(1/y_l) + I(k_l/y_l) + I(l_h/y_l) + 
##     I(g_k/y_l), data = panel)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.059074 -0.008649 -0.000518  0.009039  0.040414 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.638e-01  1.302e-02  27.933   <2e-16 ***
## I(1/y_l)    -6.010e+03  3.284e+02 -18.300   <2e-16 ***
## I(k_l/y_l)   1.039e-02  4.521e-03   2.297   0.0228 *  
## I(l_h/y_l)   1.600e+04  5.948e+02  26.897   <2e-16 ***
## I(g_k/y_l)  -2.186e+03  1.486e+03  -1.470   0.1432    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01871 on 175 degrees of freedom
## Multiple R-squared:  0.8364, Adjusted R-squared:  0.8327 
## F-statistic: 223.7 on 4 and 175 DF,  p-value: < 2.2e-16

vif4 <- vif(md)
mctest(md)

## 
## Call:
## omcdiag(mod = mod, Inter = TRUE, detr = detr, red = red, conf = conf, 
##     theil = theil, cn = cn)
## 
## 
## Overall Multicollinearity Diagnostics
## 
##                        MC Results detection
## Determinant |X'X|:         0.0028         1
## Farrar Chi-Square:      1037.8789         1
## Red Indicator:             0.8258         1
## Sum of Lambda Inverse:    57.2317         1
## Theil's Method:            1.1113         1
## Condition Number:         46.3335         1
## 
## 1 --> COLLINEARITY is detected by the test 
## 0 --> COLLINEARITY is not detected by the test

mctest(md, type="i")

## 
## Call:
## imcdiag(mod = mod, method = method, corr = FALSE, vif = vif, 
##     tol = tol, conf = conf, cvif = cvif, ind1 = ind1, ind2 = ind2, 
##     leamer = leamer, all = all)
## 
## 
## All Individual Multicollinearity Diagnostics Result
## 
##                VIF    TOL        Wi        Fi Leamer   CVIF Klein   IND1   IND2
## I(1/y_l)   30.0278 0.0333 1702.9631 2568.9586 0.1825 7.7247     1 0.0006 1.0680
## I(k_l/y_l)  6.8549 0.1459  343.4900  518.1624 0.3819 1.7635     1 0.0025 0.9436
## I(l_h/y_l) 11.5338 0.0867  617.9853  932.2449 0.2945 2.9671     1 0.0015 1.0090
## I(g_k/y_l)  8.8152 0.1134  458.4903  691.6431 0.3368 2.2677     1 0.0019 0.9794
## 
## 1 --> COLLINEARITY is detected by the test 
## 0 --> COLLINEARITY is not detected by the test
## 
## I(g_k/y_l) , coefficient(s) are non-significant may be due to multicollinearity
## 
## R-square of y on all x: 0.8364 
## 
## * use method argument to check which regressors may be the reason of collinearity
## ===================================

9.4 Componentes principales

Solo me interesa concentrarme en las variables explicativas, por lo que tengo que eliminar y_h

dd <- panel[ ,c(1,2,3,4,5,7,8)]
dd

## # A tibble: 180 × 7
##      g_k   g_w    k_l   l_h    w_l   y_k    y_l
##    <dbl> <dbl>  <dbl> <dbl>  <dbl> <dbl>  <dbl>
##  1 0.203 0.978 63556. 0.348 13163. 0.410 26035.
##  2 0.214 1.11  63990. 0.345 12390. 0.408 26113.
##  3 0.223 1.10  64006. 0.352 12953. 0.426 27252.
##  4 0.218 1.01  65293. 0.355 14045. 0.433 28251.
##  5 0.229 1.13  66320. 0.357 13384. 0.431 28562.
##  6 0.230 1.17  67901. 0.357 13385. 0.427 29022.
##  7 0.225 1.17  68714. 0.357 13200. 0.417 28669.
##  8 0.229 1.19  69221. 0.363 13256. 0.420 29085.
##  9 0.229 1.19  71294. 0.367 13709. 0.422 30058.
## 10 0.237 1.33  72989. 0.370 13052. 0.416 30385.
## # ℹ 170 more rows

library("ggplot2")
library("FactoMineR")
library("factoextra")

res.pca=PCA(dd,scale.unit = TRUE,ncp=7,graph=F)
res.pca=PCA(dd,scale.unit = TRUE,ncp=7,graph=T)

res.pca

## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 180 individuals, described by 7 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"

#Para saber cuántas variables son necesarias
res.pca$eig

##         eigenvalue percentage of variance cumulative percentage of variance
## comp 1 4.093336261            58.47623229                          58.47623
## comp 2 1.381608269            19.73726099                          78.21349
## comp 3 0.821502337            11.73574767                          89.94924
## comp 4 0.671536342             9.59337631                          99.54262
## comp 5 0.022363787             0.31948267                          99.86210
## comp 6 0.008227502             0.11753574                          99.97964
## comp 7 0.001425503             0.02036433                         100.00000

#Para cuáles son las variables
res.pca$var

## $coord
##          Dim.1        Dim.2       Dim.3       Dim.4        Dim.5         Dim.6
## g_k 0.86949278 -0.006212743 -0.11778731  0.47595591 -0.046251851 -0.0325459518
## g_w 0.09613619  0.936945616 -0.02212343  0.32995723  0.056394038  0.0179839662
## k_l 0.79502930  0.444539836 -0.04453580 -0.40243689 -0.069182765  0.0387035498
## l_h 0.47386817 -0.014149771  0.87978471  0.03479792  0.003687970 -0.0017572665
## w_l 0.93749974 -0.220153586 -0.11057197 -0.21937158  0.110006106  0.0008170764
## y_k 0.81918887 -0.477058663 -0.09461254  0.29871508 -0.007387455  0.0534199503
## y_l 0.95993624  0.172725598 -0.09978360 -0.18870507 -0.009406999 -0.0498941689
##             Dim.7
## g_k -0.0183670440
## g_w  0.0050810478
## k_l -0.0094676192
## l_h -0.0001114102
## w_l -0.0132125811
## y_k  0.0159414156
## y_l  0.0233235586
## 
## $cor
##          Dim.1        Dim.2       Dim.3       Dim.4        Dim.5         Dim.6
## g_k 0.86949278 -0.006212743 -0.11778731  0.47595591 -0.046251851 -0.0325459518
## g_w 0.09613619  0.936945616 -0.02212343  0.32995723  0.056394038  0.0179839662
## k_l 0.79502930  0.444539836 -0.04453580 -0.40243689 -0.069182765  0.0387035498
## l_h 0.47386817 -0.014149771  0.87978471  0.03479792  0.003687970 -0.0017572665
## w_l 0.93749974 -0.220153586 -0.11057197 -0.21937158  0.110006106  0.0008170764
## y_k 0.81918887 -0.477058663 -0.09461254  0.29871508 -0.007387455  0.0534199503
## y_l 0.95993624  0.172725598 -0.09978360 -0.18870507 -0.009406999 -0.0498941689
##             Dim.7
## g_k -0.0183670440
## g_w  0.0050810478
## k_l -0.0094676192
## l_h -0.0001114102
## w_l -0.0132125811
## y_k  0.0159414156
## y_l  0.0233235586
## 
## $cos2
##           Dim.1        Dim.2        Dim.3       Dim.4        Dim.5        Dim.6
## g_k 0.756017701 3.859817e-05 0.0138738501 0.226534030 2.139234e-03 1.059239e-03
## g_w 0.009242167 8.778671e-01 0.0004894464 0.108871772 3.180288e-03 3.234230e-04
## k_l 0.632071589 1.976157e-01 0.0019834379 0.161955452 4.786255e-03 1.497965e-03
## l_h 0.224551045 2.002160e-04 0.7740211428 0.001210895 1.360113e-05 3.087985e-06
## w_l 0.878905766 4.846760e-02 0.0122261597 0.048123890 1.210134e-02 6.676139e-07
## y_k 0.671070405 2.275850e-01 0.0089515329 0.089230699 5.457449e-05 2.853691e-03
## y_l 0.921477589 2.983413e-02 0.0099567674 0.035609604 8.849164e-05 2.489428e-03
##            Dim.7
## g_k 3.373483e-04
## g_w 2.581705e-05
## k_l 8.963581e-05
## l_h 1.241224e-08
## w_l 1.745723e-04
## y_k 2.541287e-04
## y_l 5.439884e-04
## 
## $contrib
##          Dim.1        Dim.2       Dim.3      Dim.4       Dim.5        Dim.6
## g_k 18.4694746  0.002793713  1.68883879 33.7336963  9.56561486 12.874369832
## g_w  0.2257857 63.539507318  0.05957942 16.2123425 14.22070228  3.930999457
## k_l 15.4414773 14.303306532  0.24144032 24.1171538 21.40180889 18.206800160
## l_h  5.4857708  0.014491520 94.22019972  0.1803171  0.06081763  0.037532481
## w_l 21.4716239  3.508056687  1.48826841  7.1662376 54.11133407  0.008114418
## y_k 16.3942164 16.472467157  1.08965398 13.2875459  0.24403063 34.684783337
## y_l 22.5116514  2.159377073  1.21201936  5.3027068  0.39569164 30.257400314
##            Dim.7
## g_k 2.366521e+01
## g_w 1.811083e+00
## k_l 6.288013e+00
## l_h 8.707271e-04
## w_l 1.224636e+01
## y_k 1.782730e+01
## y_l 3.816115e+01

#Gráficas por dimensiones
fviz_pca_var(res.pca, axes = c(1, 2))

fviz_pca_var(res.pca, axes = c(2, 3))

fviz_pca_var(res.pca, axes = c(1, 3)    )

fviz_pca_var(res.pca, axes = c(1, 2), arrowsize = 1, labelsize = 3,
             repel = TRUE)

#Arrosize, indica el tamaño de la flecha

Gráfico final

fviz_pca_biplot(res.pca, 
                # Individuals
                geom.ind = "point",
                col.ind = "black",
                pointshape = 21, pointsize = 3,arrowsize = 2,
                palette = "jco",
                addEllipses = TRUE,
                # Variables
                alpha.var ="contrib", col.var = "contrib",
                gradient.cols = "RdYlBu",
                
                legend.title = list(fill = "v1", color = "Contrib",
                                    alpha = "Contrib"))

Regresión con componentes principales

cp <- as.data.frame(res.pca$ind$coord)
# Incorporando las dos primeras componentes a la base de datos panel, creamos una llamada xy

xy <- panel
xy$cp <- cp
head(xy)

## # A tibble: 6 × 9
##     g_k   g_w    k_l   l_h    w_l    y_h   y_k    y_l cp$Dim.1 $Dim.2 $Dim.3
##   <dbl> <dbl>  <dbl> <dbl>  <dbl>  <dbl> <dbl>  <dbl>    <dbl>  <dbl>  <dbl>
## 1 0.203 0.978 63556. 0.348 13163.  9054. 0.410 26035.     1.04  -2.00 -0.681
## 2 0.214 1.11  63990. 0.345 12390.  9020. 0.408 26113.     1.10  -1.74 -0.758
## 3 0.223 1.10  64006. 0.352 12953.  9597. 0.426 27252.     1.42  -1.86 -0.692
## 4 0.218 1.01  65293. 0.355 14045. 10016. 0.433 28251.     1.54  -2.07 -0.663
## 5 0.229 1.13  66320. 0.357 13384. 10203. 0.431 28562.     1.64  -1.81 -0.634
## 6 0.230 1.17  67901. 0.357 13385. 10364. 0.427 29022.     1.67  -1.72 -0.644
## # ℹ 4 more variables: cp$Dim.4 <dbl>, $Dim.5 <dbl>, $Dim.6 <dbl>, $Dim.7 <dbl>

mpca <- lm(y_h~ cp$Dim.1+cp$Dim.2, data = xy)
summary(mpca)

## 
## Call:
## lm(formula = y_h ~ cp$Dim.1 + cp$Dim.2, data = xy)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2539.3 -1207.1  -232.1  1244.2  4830.8 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  8877.84     114.17   77.76   <2e-16 ***
## cp$Dim.1     2811.66      56.43   49.83   <2e-16 ***
## cp$Dim.2     1016.07      97.13   10.46   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1532 on 177 degrees of freedom
## Multiple R-squared:  0.9361, Adjusted R-squared:  0.9354 
## F-statistic:  1296 on 2 and 177 DF,  p-value: < 2.2e-16

vif5 <- vif(mpca)
mctest(mpca)

## 
## Call:
## omcdiag(mod = mod, Inter = TRUE, detr = detr, red = red, conf = conf, 
##     theil = theil, cn = cn)
## 
## 
## Overall Multicollinearity Diagnostics
## 
##                        MC Results detection
## Determinant |X'X|:         1.0000         0
## Farrar Chi-Square:         0.0000         0
## Red Indicator:             0.0000         0
## Sum of Lambda Inverse:     2.0000         0
## Theil's Method:           -0.9361         0
## Condition Number:          1.0000         0
## 
## 1 --> COLLINEARITY is detected by the test 
## 0 --> COLLINEARITY is not detected by the test

mctest(mpca, type="i")

## 
## Call:
## imcdiag(mod = mod, method = method, corr = FALSE, vif = vif, 
##     tol = tol, conf = conf, cvif = cvif, ind1 = ind1, ind2 = ind2, 
##     leamer = leamer, all = all)
## 
## 
## All Individual Multicollinearity Diagnostics Result
## 
##          VIF TOL Wi  Fi Leamer CVIF Klein   IND1   IND2
## cp$Dim.1   1   1  0 Inf      1    1     0 0.0056 0.4388
## cp$Dim.2   1   1  0 Inf      1    1     0 0.0056 1.5612
## 
## 1 --> COLLINEARITY is detected by the test 
## 0 --> COLLINEARITY is not detected by the test
## 
## * all coefficients have significant t-ratios
## 
## R-square of y on all x: 0.9361 
## 
## * use method argument to check which regressors may be the reason of collinearity
## ===================================

library("modelsummary")

## Warning: package 'modelsummary' was built under R version 4.3.3

modelsummary(list(Modelo_Original =original,
                  Modelo_Logaritmico =mlog,
                  Modelo_Dinámico=mdiferencias,
                  Modelo_División=md,
                  Modelo_CPrincipales=mpca))

	Modelo_Original	Modelo_Logaritmico	Modelo_Dinámico	Modelo_División	Modelo_CPrincipales
(Intercept)	-10825.036	-0.000	12.702	0.364	8877.838
	(561.995)	(0.000)	(0.200)	(0.013)	(114.165)
g_k	-144636.971
	(11034.327)
g_w	3962.856
	(342.114)
k_l	-0.033
	(0.007)
l_h	24330.595
	(781.157)
w_l	-0.607
	(0.065)
y_k	71673.068
	(5830.011)
y_l	0.723
	(0.040)
log(y_l)		1.000
		(0.000)
log(y_k)		0.000
		(0.000)
log(w_l)		-0.000
		(0.000)
log(g_k)		-0.000	1.771
		(0.000)	(0.091)
log(l_h)		1.000	0.732
		(0.000)	(0.172)
I(diff(k_l))			0.000
			(0.000)
I(diff(y_l))			-0.000
			(0.000)
I(1/y_l)				-6009.906
				(328.409)
I(k_l/y_l)				0.010
				(0.005)
I(l_h/y_l)				15997.204
				(594.751)
I(g_k/y_l)				-2185.608
				(1486.374)
cp$Dim.1					2811.660
					(56.428)
cp$Dim.2					1016.066
					(97.127)
Num.Obs.	180	180	179	180	180
R2	0.995	1.000	0.802	0.836	0.936
R2 Adj.	0.995	1.000	0.798	0.833	0.935
AIC	2702.7	-3795.5	3233.8	-914.6	3156.1
BIC	2731.5	-3773.2	3252.9	-895.4	3168.8
Log.Lik.	-1342.360	3508.790	-6.878	463.302	-1574.039
F	5019.461			223.745	1296.098
RMSE	419.31	0.00	0.25	0.02	1518.87

```

Multicolinealidad

Jimmy Cueva Ruesta

2025-04-30