Calculo de multicolinealidad

Modelar el modelo

library(wooldridge)
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
data(hprice1)
head(force(hprice1),n=5) #mostrar las primeras 5 observaciones
##   price assess bdrms lotsize sqrft colonial   lprice  lassess llotsize   lsqrft
## 1   300  349.1     4    6126  2438        1 5.703783 5.855359 8.720297 7.798934
## 2   370  351.5     3    9903  2076        1 5.913503 5.862210 9.200593 7.638198
## 3   191  217.7     3    5200  1374        0 5.252274 5.383118 8.556414 7.225482
## 4   195  231.8     3    4600  1448        1 5.273000 5.445875 8.433811 7.277938
## 5   373  319.1     4    6095  2514        1 5.921578 5.765504 8.715224 7.829630
modelo_price <- lm(formula = price~lotsize+sqrft+bdrms, data = hprice1)

stargazer(data= modelo_price,type = "text")
## 
## ===============================================
##                         Dependent variable:    
##                     ---------------------------
##                                price           
## -----------------------------------------------
## lotsize                      0.002***          
##                               (0.001)          
##                                                
## sqrft                        0.123***          
##                               (0.013)          
##                                                
## bdrms                         13.853           
##                               (9.010)          
##                                                
## Constant                      -21.770          
##                              (29.475)          
##                                                
## -----------------------------------------------
## Observations                    88             
## R2                             0.672           
## Adjusted R2                    0.661           
## Residual Std. Error      59.833 (df = 84)      
## F Statistic           57.460*** (df = 3; 84)   
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01

Prueba Indice de condición.

Calculo manual

# Paso 1: Calcular matriz X
matriz_x1 <- model.matrix(modelo_price)
head(matriz_x1,n=6)
##   (Intercept) lotsize sqrft bdrms
## 1           1    6126  2438     4
## 2           1    9903  2076     3
## 3           1    5200  1374     3
## 4           1    4600  1448     3
## 5           1    6095  2514     4
## 6           1    8566  2754     5
# Paso 2 Calcular la matriz XX
matriz_xx1<-t(matriz_x1)%*%matriz_x1
head(matriz_xx1)
##             (Intercept)     lotsize      sqrft   bdrms
## (Intercept)          88      793748     177205     314
## lotsize          793748 16165159010 1692290257 2933767
## sqrft            177205  1692290257  385820561  654755
## bdrms               314     2933767     654755    1182
# Paso 3 Calculo de la matriz normalizada
options(scipen = 999)
Sn<-solve(diag(sqrt(diag(matriz_xx1))))
head(Sn)
##           [,1]           [,2]          [,3]       [,4]
## [1,] 0.1066004 0.000000000000 0.00000000000 0.00000000
## [2,] 0.0000000 0.000007865204 0.00000000000 0.00000000
## [3,] 0.0000000 0.000000000000 0.00005091049 0.00000000
## [4,] 0.0000000 0.000000000000 0.00000000000 0.02908649
# Paso 4 normalizar la matriz

matriz_xx1_N <- (Sn%*%matriz_xx1)%*%Sn
head(matriz_xx1_N)
##           [,1]      [,2]      [,3]      [,4]
## [1,] 1.0000000 0.6655050 0.9617052 0.9735978
## [2,] 0.6655050 1.0000000 0.6776293 0.6711613
## [3,] 0.9617052 0.6776293 1.0000000 0.9695661
## [4,] 0.9735978 0.6711613 0.9695661 1.0000000
# Paso 5 Calcular los autovalores

lambda<-eigen(matriz_xx1_N,symmetric = TRUE)
head(lambda$values)
## [1] 3.48158596 0.45518380 0.03851083 0.02471941
# Paso 6 Calcular K
K_Estadistico <- sqrt(max(lambda$values)/min(lambda$values))
head(K_Estadistico)
## [1] 11.86778
resultado <- ifelse(K_Estadistico<20, "No hay multicolinealidad", ifelse(K_Estadistico<30, "Multicolinealidad moderada","Multicolinealida severa"))
print(resultado)
## [1] "No hay multicolinealidad"

Libreria mctest

library(mctest)
## Warning: package 'mctest' was built under R version 4.5.2
mctest(mod = modelo_price)
## 
## Call:
## omcdiag(mod = mod, Inter = TRUE, detr = detr, red = red, conf = conf, 
##     theil = theil, cn = cn)
## 
## 
## Overall Multicollinearity Diagnostics
## 
##                        MC Results detection
## Determinant |X'X|:         0.6918         0
## Farrar Chi-Square:        31.3812         1
## Red Indicator:             0.3341         0
## Sum of Lambda Inverse:     3.8525         0
## Theil's Method:           -0.7297         0
## Condition Number:         11.8678         0
## 
## 1 --> COLLINEARITY is detected by the test 
## 0 --> COLLINEARITY is not detected by the test

Prueba de Farrar Gaubler FG

Libreria psych

library(psych)
## Warning: package 'psych' was built under R version 4.5.3
FG_test<-cortest.bartlett(matriz_x1[,-1])
## R was not square, finding R from data
print(FG_test)
## $chisq
## [1] 31.38122
## 
## $p.value
## [1] 0.0000007065806
## 
## $df
## [1] 3

Libreria Mctest

library(mctest)
mctest(modelo_price)
## 
## Call:
## omcdiag(mod = mod, Inter = TRUE, detr = detr, red = red, conf = conf, 
##     theil = theil, cn = cn)
## 
## 
## Overall Multicollinearity Diagnostics
## 
##                        MC Results detection
## Determinant |X'X|:         0.6918         0
## Farrar Chi-Square:        31.3812         1
## Red Indicator:             0.3341         0
## Sum of Lambda Inverse:     3.8525         0
## Theil's Method:           -0.7297         0
## Condition Number:         11.8678         0
## 
## 1 --> COLLINEARITY is detected by the test 
## 0 --> COLLINEARITY is not detected by the test

Calculo Manual

library(stargazer)
# Paso 1 Calcular R
Zn<-scale(matriz_x1[,-1])
head(Zn, n=6)
##       lotsize      sqrft      bdrms
## 1 -0.28443295  0.7351230  0.5132184
## 2  0.08680198  0.1079482 -0.6752874
## 3 -0.37544792 -1.1082857 -0.6752874
## 4 -0.43442091 -0.9800787 -0.6752874
## 5 -0.28747989  0.8667951  0.5132184
## 6 -0.04460949  1.2826015  1.7017243
# Paso 2 Calcular la matriz R
n<-nrow(Zn)
R<-(t(Zn)%*%Zn)*(1/(n-1))
#Otra forma cor(X_mat[,-1])
head(R, digits=4)
##           lotsize     sqrft     bdrms
## lotsize 1.0000000 0.1838422 0.1363256
## sqrft   0.1838422 1.0000000 0.5314736
## bdrms   0.1363256 0.5314736 1.0000000
# Paso 3 Determinante
Determinante_R<-det(R)
print(Determinante_R)
## [1] 0.6917931

Aplicando la prueba

m<-ncol(matriz_x1[,-1])
n<-nrow(matriz_x1[,-1])
chi_FG<--(n-1-(2*m+5)/6)*log(Determinante_R)
print(chi_FG)
## [1] 31.38122

Valor critico

gl<-m*(m-1)/2
VC<-qchisq(p = 0.95,df = gl)
print(VC)
## [1] 7.814728

Prueba de hipotesis

resultado2 <- ifelse(chi_FG>VC,"Rechazar hipotesis nula","No rechazar hipotesis nula")
print(resultado2)
## [1] "Rechazar hipotesis nula"

Como el estadístico de Fauber Gauler es mayor al valor crítico (31.38122>7.814728), se rechaza la hipótesis nula. Esto confirma que existe multicolinealidad en los datos.

Visualizción

library(fastGraph)
## Warning: package 'fastGraph' was built under R version 4.5.3
alpha_sig<-0.05
chi<-qchisq(1-alpha_sig,gl,lower.tail = TRUE)
shadeDist(chi_FG,ddist = "dchisq",
          parm1 = gl,
          lower.tail = FALSE,xmin=0,
          sub=paste("VC:",round(VC,2)," ","chi_FG:",round(chi_FG,2)))

Factores Inflacionarios de la Varianza (FIV)

Libreria perfomance

library(performance)
## Warning: package 'performance' was built under R version 4.5.3
VIFs<-multicollinearity(x = modelo_price,verbose = FALSE)
VIFs
## # Check for Multicollinearity
## 
## Low Correlation
## 
##     Term  VIF    VIF 95% CI adj. VIF Tolerance Tolerance 95% CI
##  lotsize 1.04 [1.00, 11.02]     1.02      0.96     [0.09, 1.00]
##    sqrft 1.42 [1.18,  1.98]     1.19      0.70     [0.51, 0.85]
##    bdrms 1.40 [1.17,  1.95]     1.18      0.72     [0.51, 0.86]

Libreria Mctest

library(mctest)
mc.plot(mod = modelo_price,vif = 2)

Libreria Car

library(car)
## Cargando paquete requerido: carData
## 
## Adjuntando el paquete: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
VIFs_car<-vif(modelo_price)
print(VIFs_car)
##  lotsize    sqrft    bdrms 
## 1.037211 1.418654 1.396663

Calculo manual

# Matriz de correlación
print(R)
##           lotsize     sqrft     bdrms
## lotsize 1.0000000 0.1838422 0.1363256
## sqrft   0.1838422 1.0000000 0.5314736
## bdrms   0.1363256 0.5314736 1.0000000
# Inversa de la matriz de correlación
inversa_R<-solve(R)
print(inversa_R)
##             lotsize      sqrft       bdrms
## lotsize  1.03721145 -0.1610145 -0.05582352
## sqrft   -0.16101454  1.4186543 -0.73202696
## bdrms   -0.05582352 -0.7320270  1.39666321
# Diagonal de inversa de la matriz de correlación
Vifss <- diag(inversa_R)
print(Vifss)
##  lotsize    sqrft    bdrms 
## 1.037211 1.418654 1.396663

lotsize (VIF 1.04): Esta variable tiene un nivel de correlación prácticamente nulo con el resto de los predictores. Su impacto en la precisión del modelo es óptimo.

sqrft (VIF 1.42): Esta variable tiene un nivel de correlación prácticamente nulo con el resto de los predictores. Su impacto en la precisión del modelo es óptimo.

bdrms (VIF 1.40): Esta variable tiene un nivel de correlación prácticamente nulo con el resto de los predictores. Su impacto en la precisión del modelo es óptimo.