Multicolinealidad

Bárbara Elizabeth Mejía González

Mayo de 2019

Datos

library(haven)
hprice1 <- read_dta("F:/DESCARGAS/hprice1.dta")
head(hprice1, n=6)
## # A tibble: 6 x 10
##   price assess bdrms lotsize sqrft colonial lprice lassess llotsize lsqrft
##   <dbl>  <dbl> <dbl>   <dbl> <dbl>    <dbl>  <dbl>   <dbl>    <dbl>  <dbl>
## 1  300    349.     4    6126  2438        1   5.70    5.86     8.72   7.80
## 2  370    352.     3    9903  2076        1   5.91    5.86     9.20   7.64
## 3  191    218.     3    5200  1374        0   5.25    5.38     8.56   7.23
## 4  195    232.     3    4600  1448        1   5.27    5.45     8.43   7.28
## 5  373    319.     4    6095  2514        1   5.92    5.77     8.72   7.83
## 6  466.   414.     5    8566  2754        1   6.14    6.03     9.06   7.92

Estimación del modelo.

library(stargazer)
mod_est<- lm(price~assess+bdrms+lotsize+colonial+llotsize,data = hprice1)
stargazer(mod_est,type = "text",tixtle = "modelo estimado")
## 
## ===============================================
##                         Dependent variable:    
##                     ---------------------------
##                                price           
## -----------------------------------------------
## assess                       0.940***          
##                               (0.072)          
##                                                
## bdrms                          8.620           
##                               (6.791)          
##                                                
## lotsize                        0.001           
##                               (0.001)          
##                                                
## colonial                      10.031           
##                              (10.580)          
##                                                
## llotsize                      -13.357          
##                              (17.813)          
##                                                
## Constant                      68.090           
##                              (146.133)         
##                                                
## -----------------------------------------------
## Observations                    88             
## R2                             0.832           
## Adjusted R2                    0.822           
## Residual Std. Error      43.364 (df = 82)      
## F Statistic           81.224*** (df = 5; 82)   
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01
## 
## ===============
## modelo estimado
## ---------------

Calcular la Matriz \([X^{t}X]\).

X_mat<-model.matrix(mod_est)
stargazer(head(X_mat, n=6), type="text")
## 
## =====================================================
##   (Intercept) assess  bdrms lotsize colonial llotsize
## -----------------------------------------------------
## 1      1      349.100   4    6,126     1      8.720  
## 2      1      351.500   3    9,903     1      9.201  
## 3      1      217.700   3    5,200     0      8.556  
## 4      1      231.800   3    4,600     1      8.434  
## 5      1      319.100   4    6,095     1      8.715  
## 6      1      414.500   5    8,566     1      9.056  
## -----------------------------------------------------
XX_mat<-t(X_mat)%*%X_mat
stargazer(XX_mat, type="html")
## 
## <table style="text-align:center"><tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td>(Intercept)</td><td>assess</td><td>bdrms</td><td>lotsize</td><td>colonial</td><td>llotsize</td></tr>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">(Intercept)</td><td>88</td><td>27,784.800</td><td>314</td><td>793,748</td><td>61</td><td>783.649</td></tr>
## <tr><td style="text-align:left">assess</td><td>27,784.800</td><td>9,563,053.000</td><td>102,507.500</td><td>278,300,049.000</td><td>19,578.900</td><td>250,005.600</td></tr>
## <tr><td style="text-align:left">bdrms</td><td>314</td><td>102,507.500</td><td>1,182</td><td>2,933,767</td><td>228</td><td>2,802.953</td></tr>
## <tr><td style="text-align:left">lotsize</td><td>793,748</td><td>278,300,049.000</td><td>2,933,767</td><td>16,165,159,010</td><td>555,967</td><td>7,457,452.000</td></tr>
## <tr><td style="text-align:left">colonial</td><td>61</td><td>19,578.900</td><td>228</td><td>555,967</td><td>61</td><td>544.060</td></tr>
## <tr><td style="text-align:left">llotsize</td><td>783.649</td><td>250,005.600</td><td>2,802.953</td><td>7,457,452.000</td><td>544.060</td><td>7,004.230</td></tr>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr></table>

Normalización de \([X^{t}X]\).

options(scipen = 999)
Sn<-solve(diag(sqrt(diag(XX_mat))))
stargazer(Sn,type="text")
## 
## ======================================
## 0.107   0      0      0      0     0  
## 0     0.0003   0      0      0     0  
## 0       0    0.029    0      0     0  
## 0       0      0   0.00001   0     0  
## 0       0      0      0    0.128   0  
## 0       0      0      0      0   0.012
## --------------------------------------

\([X^{t}X]\) Normalizada:

XX_norm<-(Sn%*%XX_mat)%*%Sn
stargazer(XX_norm,type="text",digits = 4)
## 
## =========================================
## 1      0.9578 0.9736 0.6655 0.8326 0.9982
## 0.9578   1    0.9642 0.7078 0.8106 0.9660
## 0.9736 0.9642   1    0.6712 0.8491 0.9742
## 0.6655 0.7078 0.6712   1    0.5599 0.7008
## 0.8326 0.8106 0.8491 0.5599   1    0.8323
## 0.9982 0.9660 0.9742 0.7008 0.8323   1   
## -----------------------------------------

Cálculo del indice de condición

Autovalores de \([X^{t}X]\) Normalizada:

lambdas<-eigen(XX_norm,symmetric = TRUE)
stargazer(lambdas$values, type="text")
## 
## ====================================
## 5.193 0.492 0.237 0.049 0.028 0.0005
## ------------------------------------

Cálculo de \(K(x)=\sqrt\frac{\lambda_{max}}{\lambda_{min}}\)

K<-sqrt(max(lambdas$values)/min(lambdas$value))
print(K)
## [1] 106.4903

Uso de la libreria “mctest”, para obtener el indice de condición.

library(mctest)
eigprop(x=X_mat[,-1])
## 
## Call:
## eigprop(x = X_mat[, -1])
## 
##   Eigenvalues       CI Intercept assess  bdrms lotsize colonial llotsize
## 1      5.1934   1.0000    0.0000 0.0014 0.0012  0.0037   0.0079   0.0000
## 2      0.4923   3.2479    0.0000 0.0001 0.0015  0.2963   0.0615   0.0000
## 3      0.2365   4.6860    0.0003 0.0113 0.0041  0.0322   0.8545   0.0002
## 4      0.0491  10.2888    0.0048 0.4576 0.0148  0.0101   0.0021   0.0027
## 5      0.0283  13.5505    0.0012 0.2138 0.9074  0.0093   0.0696   0.0017
## 6      0.0005 106.4903    0.9936 0.3158 0.0711  0.6484   0.0044   0.9954
## 
## ===============================
## Row 5==> bdrms, proportion 0.907364 >= 0.50 
## Row 6==> lotsize, proportion 0.648440 >= 0.50 
## Row 3==> colonial, proportion 0.854491 >= 0.50 
## Row 6==> llotsize, proportion 0.995427 >= 0.50

Prueba de Farrar-Glaubar.

Calculo de \(|R|\).

Zn<-scale(X_mat[,-1])
stargazer(head(Zn,n=6),type = "text")
## 
## =========================================
##   assess bdrms  lotsize colonial llotsize
## -----------------------------------------
## 1 0.350  0.513  -0.284   0.662    -0.340 
## 2 0.375  -0.675  0.087   0.662    0.543  
## 3 -1.029 -0.675 -0.375   -1.495   -0.641 
## 4 -0.881 -0.675 -0.434   0.662    -0.866 
## 5 0.035  0.513  -0.287   0.662    -0.349 
## 6 1.036  1.702  -0.045   0.662    0.277  
## -----------------------------------------

Calcular la matriz R.

n<-nrow(Zn)
R<-(t(Zn)%*%Zn)*(1/(n-1))
stargazer(R,type = "text",digits = 4)
## 
## ================================================
##          assess bdrms  lotsize colonial llotsize
## ------------------------------------------------
## assess     1    0.4825 0.3281   0.0829   0.5717 
## bdrms    0.4825   1    0.1363   0.3046   0.1695 
## lotsize  0.3281 0.1363    1     0.0140   0.8079 
## colonial 0.0829 0.3046 0.0140     1      0.0386 
## llotsize 0.5717 0.1695 0.8079   0.0386     1    
## ------------------------------------------------

Calcular \(|R|\).

determinante_R<-det(R)
print(determinante_R)
## [1] 0.1419755

Aplicando la prueba de Farrer Glaubar (Bartlett).

Estadístico \(X^{2}_{FG}\)

m<-ncol(X_mat[,-1])
n<-nrow(X_mat[,-1])
chi_FG<--(n-1-(2*m+5)/6)*log(determinante_R)
print(chi_FG)
## [1] 164.9525

Valor crítico.

gl<-m*(m-1)/2
VC<-qchisq(p = 0.95,df = gl)
print(VC)
## [1] 18.30704

Como \(X^{2}_{FG}\geq\)V.C. se rechaza \(H_{0}\), por lo tanto hay evidencia de colinealidad en los regresores.

Uso de la libreria psych.

library(psych)
FG_test<-cortest.bartlett(X_mat[,-1])
## R was not square, finding R from data
print(FG_test)
## $chisq
## [1] 164.9525
## 
## $p.value
## [1] 0.000000000000000000000000000003072151
## 
## $df
## [1] 10

Calculando los VIF para el modelo estimado.

Matriz de Correlación de los regresores del modelo (Como se obtuvo con anterioridad):

print(R)
##              assess     bdrms    lotsize   colonial  llotsize
## assess   1.00000000 0.4824739 0.32814633 0.08293582 0.5716654
## bdrms    0.48247394 1.0000000 0.13632563 0.30457549 0.1694902
## lotsize  0.32814633 0.1363256 1.00000000 0.01401865 0.8078552
## colonial 0.08293582 0.3045755 0.01401865 1.00000000 0.0386421
## llotsize 0.57166539 0.1694902 0.80785523 0.03864210 1.0000000

Inversa de la matriz de correlación \(R^{-1}\):

inversa_R<-solve(R)
print(inversa_R)
##              assess      bdrms    lotsize   colonial   llotsize
## assess    2.1535576 -0.9010888  0.8347216  0.1520968 -1.7586001
## bdrms    -0.9010888  1.5104833 -0.3640744 -0.4021983  0.5687703
## lotsize   0.8347216 -0.3640744  3.2049651  0.1130042 -3.0089889
## colonial  0.1520968 -0.4021983  0.1130042  1.1142184 -0.1531266
## llotsize -1.7586001  0.5687703 -3.0089889 -0.1531266  4.3456744

VIF’s para el modelo estimado:

VIFs<-diag(inversa_R)
print(VIFs)
##   assess    bdrms  lotsize colonial llotsize 
## 2.153558 1.510483 3.204965 1.114218 4.345674

Uso de la librería “car” y “mctest”

Obtención de los VIF’s, a través de la librería “car”.

library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
VIFs_car<-vif(mod_est)
print(VIFs_car)
##   assess    bdrms  lotsize colonial llotsize 
## 2.153558 1.510483 3.204965 1.114218 4.345674

Obtención de los VIF’s, a través de la librería “mctest”.

mc.plot(x = X_mat[,-1],y = hprice1$price,vif = 2,)