library(wooldridge)
data("hprice1")
head(force(hprice1),n=5)#mostrar solo las primeras 5 observaciones
## price assess bdrms lotsize sqrft colonial lprice lassess llotsize lsqrft
## 1 300 349.1 4 6126 2438 1 5.703783 5.855359 8.720297 7.798934
## 2 370 351.5 3 9903 2076 1 5.913503 5.862210 9.200593 7.638198
## 3 191 217.7 3 5200 1374 0 5.252274 5.383118 8.556414 7.225482
## 4 195 231.8 3 4600 1448 1 5.273000 5.445875 8.433811 7.277938
## 5 373 319.1 4 6095 2514 1 5.921578 5.765504 8.715224 7.829630
library(stargazer)
modelo_estimado<-lm(price~assess+bdrms+lotsize+colonial+llotsize,data = hprice1)
stargazer(modelo_estimado,title = "Modelo Estimado",type = "text")
##
## Modelo Estimado
## ===============================================
## Dependent variable:
## ---------------------------
## price
## -----------------------------------------------
## assess 0.940***
## (0.072)
##
## bdrms 8.620
## (6.791)
##
## lotsize 0.001
## (0.001)
##
## colonial 10.031
## (10.580)
##
## llotsize -13.357
## (17.813)
##
## Constant 68.090
## (146.133)
##
## -----------------------------------------------
## Observations 88
## R2 0.832
## Adjusted R2 0.822
## Residual Std. Error 43.364 (df = 82)
## F Statistic 81.224*** (df = 5; 82)
## ===============================================
## Note: *p<0.1; **p<0.05; ***p<0.01
El número de condición mide la sensibilidad de las estimaciones mínimo cuadráticas ante pequeños cambios en los datos.El número de condición, κ(x), es igual a la raíz cuadrada de la razón entre la raíz característica más grande (λmax) y la raíz característica más pequeña (λmin) de la matriz XtX.
Interpretación:
1.Si κ(x) es inferioro igual a 20, la multicolinealidad es leve, no se considera un problema.
2.Para 20<κ(x)<30, la multicolinealidad se considera moderada.
3.En el caso de que κ(x)≥ 30 la multicolinealidad es severa
Se basa la matriz XtX:
library(stargazer)
X_mat<-model.matrix(modelo_estimado)
stargazer(head(X_mat,n=6),type = "text")
##
## =====================================================
## (Intercept) assess bdrms lotsize colonial llotsize
## -----------------------------------------------------
## 1 1 349.100 4 6,126 1 8.720
## 2 1 351.500 3 9,903 1 9.201
## 3 1 217.700 3 5,200 0 8.556
## 4 1 231.800 3 4,600 1 8.434
## 5 1 319.100 4 6,095 1 8.715
## 6 1 414.500 5 8,566 1 9.056
## -----------------------------------------------------
XX_matrix<-t(X_mat)%*%X_mat
stargazer(XX_matrix,type = "text")
##
## ============================================================================================
## (Intercept) assess bdrms lotsize colonial llotsize
## --------------------------------------------------------------------------------------------
## (Intercept) 88 27,784.800 314 793,748 61 783.649
## assess 27,784.800 9,563,053.000 102,507.500 278,300,049.000 19,578.900 250,005.600
## bdrms 314 102,507.500 1,182 2,933,767 228 2,802.953
## lotsize 793,748 278,300,049.000 2,933,767 16,165,159,010 555,967 7,457,452.000
## colonial 61 19,578.900 228 555,967 61 544.060
## llotsize 783.649 250,005.600 2,802.953 7,457,452.000 544.060 7,004.230
## --------------------------------------------------------------------------------------------
La matriz XtX debe ser normalizada, para evitar sesgo de la escala de las variables.
library(stargazer)
options(scipen = 9999)
Sn<-solve(diag(sqrt(diag(XX_matrix))))
stargazer(Sn,type = "text")
##
## ======================================
## 0.107 0 0 0 0 0
## 0 0.0003 0 0 0 0
## 0 0 0.029 0 0 0
## 0 0 0 0.00001 0 0
## 0 0 0 0 0.128 0
## 0 0 0 0 0 0.012
## --------------------------------------
XtX normalzada:
library(stargazer)
XX_norm<-(Sn%*%XX_matrix)%*%Sn
stargazer(XX_norm,type = "text",digits = 4)
##
## =========================================
## 1 0.9578 0.9736 0.6655 0.8326 0.9982
## 0.9578 1 0.9642 0.7078 0.8106 0.9660
## 0.9736 0.9642 1 0.6712 0.8491 0.9742
## 0.6655 0.7078 0.6712 1 0.5599 0.7008
## 0.8326 0.8106 0.8491 0.5599 1 0.8323
## 0.9982 0.9660 0.9742 0.7008 0.8323 1
## -----------------------------------------
Autovalores de XtX normalizada:
library(stargazer)
#autovalores
lambdas<-eigen(XX_norm,symmetric = TRUE)
stargazer(lambdas$values,type = "text")
##
## ====================================
## 5.193 0.492 0.237 0.049 0.028 0.0005
## ------------------------------------
k<-sqrt(max(lambdas$values)/min(lambdas$values))
print(k)
## [1] 106.4903
Como κ(x)>30 se considera que la multicolinealidad es severa.
library(mctest)
X_mat<-model.matrix(modelo_estimado)
mctest(mod = modelo_estimado)
##
## Call:
## omcdiag(mod = mod, Inter = TRUE, detr = detr, red = red, conf = conf,
## theil = theil, cn = cn)
##
##
## Overall Multicollinearity Diagnostics
##
## MC Results detection
## Determinant |X'X|: 0.1420 0
## Farrar Chi-Square: 164.9525 1
## Red Indicator: 0.3832 0
## Sum of Lambda Inverse: 12.3289 0
## Theil's Method: -0.8940 0
## Condition Number: 106.4903 1
##
## 1 --> COLLINEARITY is detected by the test
## 0 --> COLLINEARITY is not detected by the test
library(olsrr)
ols_eigen_cindex(model = modelo_estimado)
## Eigenvalue Condition Index intercept assess bdrms
## 1 5.1933823834 1.000000 0.00003563203 0.0013514449 0.001229095
## 2 0.4923113019 3.247919 0.00004447199 0.0001049883 0.001469387
## 3 0.2365049515 4.686030 0.00028607748 0.0112620908 0.004101462
## 4 0.0490596083 10.288762 0.00483247587 0.4576465408 0.014764598
## 5 0.0282837924 13.550531 0.00119864705 0.2138228412 0.907363692
## 6 0.0004579625 106.490334 0.99360269558 0.3158120940 0.071071766
## lotsize colonial llotsize
## 1 0.003714445 0.007882944 0.00003054565
## 2 0.296269130 0.061464755 0.00001170881
## 3 0.032237901 0.854490636 0.00020426106
## 4 0.010083947 0.002127499 0.00265275719
## 5 0.009254094 0.069640172 0.00167335230
## 6 0.648440482 0.004393993 0.99542737500
Implementacón de la prueba de Bartlett:
Esta prueba identifica si a nivel poblacional, los regresores del
modelo presentan indepedencia estadística(son ortogonales), a través de
la matriz de correlación muestral R, y se verifica si a nivel
poblacional dicha matriz de correlación corresponde a una matrix
identidad,las hipótesis de la prueba son las siguintes:
H0:R~I
H1:R≁I
Si no se rechaza H0 no hay evidencia de multicolinealidad,caso
contrario si se rechaza H0 hay evidencia de
multicolinealidad.
Rechazar H0 si : χ^2FG≥V.C o si p≤α
*Cálculo de |R|
library(stargazer)
Zn<-scale(X_mat[,-1])
stargazer(head(Zn,n=6),type = "text")
##
## =========================================
## assess bdrms lotsize colonial llotsize
## -----------------------------------------
## 1 0.350 0.513 -0.284 0.662 -0.340
## 2 0.375 -0.675 0.087 0.662 0.543
## 3 -1.029 -0.675 -0.375 -1.495 -0.641
## 4 -0.881 -0.675 -0.434 0.662 -0.866
## 5 0.035 0.513 -0.287 0.662 -0.349
## 6 1.036 1.702 -0.045 0.662 0.277
## -----------------------------------------
library(stargazer)
n<-nrow(Zn)
R<-(t(Zn)%*%Zn)*(1/(n-1))#también puede calcularse R a través de cor(X_mat[,-1])
stargazer(R,type = "text",digits = 4)
##
## ================================================
## assess bdrms lotsize colonial llotsize
## ------------------------------------------------
## assess 1 0.4825 0.3281 0.0829 0.5717
## bdrms 0.4825 1 0.1363 0.3046 0.1695
## lotsize 0.3281 0.1363 1 0.0140 0.8079
## colonial 0.0829 0.3046 0.0140 1 0.0386
## llotsize 0.5717 0.1695 0.8079 0.0386 1
## ------------------------------------------------
determinante_R<-det(R)
print(determinante_R)
## [1] 0.1419755
m<-ncol(X_mat[,-1])
n<-nrow(X_mat[,-1])
chi_FG<--(n-1-(2*m+5)/6)*log(determinante_R)
print(chi_FG)
## [1] 164.9525
gl<-m*(m-1)/2
VC<-qchisq(p = 0.95,df = gl)
print(VC)
## [1] 18.30704
Como χ^2FG≥V.C se rechaza H0, por lo tanto hay evidencia de colinealidad en los regresores.
library(mctest)
mctest::omcdiag(mod = modelo_estimado)
##
## Call:
## mctest::omcdiag(mod = modelo_estimado)
##
##
## Overall Multicollinearity Diagnostics
##
## MC Results detection
## Determinant |X'X|: 0.1420 0
## Farrar Chi-Square: 164.9525 1
## Red Indicator: 0.3832 0
## Sum of Lambda Inverse: 12.3289 0
## Theil's Method: -0.8940 0
## Condition Number: 106.4903 1
##
## 1 --> COLLINEARITY is detected by the test
## 0 --> COLLINEARITY is not detected by the test
library(psych)
FG_test<-cortest.bartlett(X_mat[,-1])
print(FG_test)
## $chisq
## [1] 164.9525
##
## $p.value
## [1] 0.000000000000000000000000000003072151
##
## $df
## [1] 10
library(fastGraph)
shadeDist(10,ddist = "dchisq",parm1 = 3,lower.tail = FALSE)
library(dplyr)
R.cuadrado.regresores<-c(0,0.5,.8,.9)
as.data.frame(R.cuadrado.regresores)%>% mutate(VIF=1/(1-R.cuadrado.regresores))
## R.cuadrado.regresores VIF
## 1 0.0 1
## 2 0.5 2
## 3 0.8 5
## 4 0.9 10
print(R)#Cálculo manual
## assess bdrms lotsize colonial llotsize
## assess 1.00000000 0.4824739 0.32814633 0.08293582 0.5716654
## bdrms 0.48247394 1.0000000 0.13632563 0.30457549 0.1694902
## lotsize 0.32814633 0.1363256 1.00000000 0.01401865 0.8078552
## colonial 0.08293582 0.3045755 0.01401865 1.00000000 0.0386421
## llotsize 0.57166539 0.1694902 0.80785523 0.03864210 1.0000000
#Inversa de la matriz de correlacion R-1
inversa_R<-solve(R)
print(inversa_R)
## assess bdrms lotsize colonial llotsize
## assess 2.1535576 -0.9010888 0.8347216 0.1520968 -1.7586001
## bdrms -0.9010888 1.5104833 -0.3640744 -0.4021983 0.5687703
## lotsize 0.8347216 -0.3640744 3.2049651 0.1130042 -3.0089889
## colonial 0.1520968 -0.4021983 0.1130042 1.1142184 -0.1531266
## llotsize -1.7586001 0.5687703 -3.0089889 -0.1531266 4.3456744
#VIF's para el modelo estimado
VIFs<-diag(inversa_R)
print(VIFs)
## assess bdrms lotsize colonial llotsize
## 2.153558 1.510483 3.204965 1.114218 4.345674
library(performance)
VIFs<-multicollinearity(modelo_estimado,verbose = FALSE)
VIFs
## # Check for Multicollinearity
##
## Low Correlation
##
## Term VIF VIF 95% CI adj. VIF Tolerance Tolerance 95% CI
## assess 2.15 [1.67, 2.98] 1.47 0.46 [0.34, 0.60]
## bdrms 1.51 [1.24, 2.08] 1.23 0.66 [0.48, 0.81]
## lotsize 3.20 [2.39, 4.51] 1.79 0.31 [0.22, 0.42]
## colonial 1.11 [1.01, 1.87] 1.06 0.90 [0.53, 0.99]
## llotsize 4.35 [3.17, 6.17] 2.08 0.23 [0.16, 0.32]
plot(VIFs)
library(car)
VIFs_car<-vif(modelo_estimado)
print(VIFs_car)
## assess bdrms lotsize colonial llotsize
## 2.153558 1.510483 3.204965 1.114218 4.345674
library(mctest)
mc.plot(mod = modelo_estimado,vif = 2)