Utilizando los datos del dataframe hprice1: disponible en el paquete wooldridge use el siguiente código para generar el dataframe:

library(wooldridge)
## Warning: package 'wooldridge' was built under R version 4.5.3
data(hprice1)
head(force(hprice1),n=5) #mostrar las primeras 5 observaciones
##   price assess bdrms lotsize sqrft colonial   lprice  lassess llotsize   lsqrft
## 1   300  349.1     4    6126  2438        1 5.703783 5.855359 8.720297 7.798934
## 2   370  351.5     3    9903  2076        1 5.913503 5.862210 9.200593 7.638198
## 3   191  217.7     3    5200  1374        0 5.252274 5.383118 8.556414 7.225482
## 4   195  231.8     3    4600  1448        1 5.273000 5.445875 8.433811 7.277938
## 5   373  319.1     4    6095  2514        1 5.921578 5.765504 8.715224 7.829630

1. estime el siguiente modelo,

library(stargazer)
## Warning: package 'stargazer' was built under R version 4.5.2
## 
## Please cite as:
##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
modelo_estimado <- lm(price ~ lotsize + sqrft + bdrms, data = hprice1)

stargazer(modelo_estimado, type = "text", title = "modelo estimado")
## 
## modelo estimado
## ===============================================
##                         Dependent variable:    
##                     ---------------------------
##                                price           
## -----------------------------------------------
## lotsize                      0.002***          
##                               (0.001)          
##                                                
## sqrft                        0.123***          
##                               (0.013)          
##                                                
## bdrms                         13.853           
##                               (9.010)          
##                                                
## Constant                      -21.770          
##                              (29.475)          
##                                                
## -----------------------------------------------
## Observations                    88             
## R2                             0.672           
## Adjusted R2                    0.661           
## Residual Std. Error      59.833 (df = 84)      
## F Statistic           57.460*** (df = 3; 84)   
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01

Cálculo “manual”:

library(stargazer)
X_mat<-model.matrix(modelo_estimado)
stargazer(head(X_mat,n=6),type="text")
## 
## =================================
##   (Intercept) lotsize sqrft bdrms
## ---------------------------------
## 1      1       6,126  2,438   4  
## 2      1       9,903  2,076   3  
## 3      1       5,200  1,374   3  
## 4      1       4,600  1,448   3  
## 5      1       6,095  2,514   4  
## 6      1       8,566  2,754   5  
## ---------------------------------
XX_matrix<-t(X_mat)%*%X_mat
stargazer(XX_matrix,type = "text")
## 
## ==============================================================
##             (Intercept)    lotsize         sqrft       bdrms  
## --------------------------------------------------------------
## (Intercept)     88         793,748        177,205       314   
## lotsize       793,748   16,165,159,010 1,692,290,257 2,933,767
## sqrft         177,205   1,692,290,257   385,820,561   654,755 
## bdrms           314       2,933,767       654,755      1,182  
## --------------------------------------------------------------

La matriz XtX debe ser normalizada, para evitar sesgo de la escala de las variables. Cálculo de la matriz de normalización:

library(stargazer)
options(scipen = 999)
Sn<-solve(diag(sqrt(diag(XX_matrix))))
stargazer(Sn,type = "text")
## 
## ==========================
## 0.107    0      0      0  
## 0     0.00001   0      0  
## 0        0    0.0001   0  
## 0        0      0    0.029
## --------------------------

XtX normalizada

library(knitr)
## Warning: package 'knitr' was built under R version 4.5.2
library(kableExtra)
## Warning: package 'kableExtra' was built under R version 4.5.3
options(scipen = 99)

XX_norm <- (Sn %*% XX_matrix) %*% Sn

kable(XX_norm,
      digits = 4,
      format = "html",
      align = "c",
      caption = "Matriz X'X normalizada") %>%
  kable_styling(bootstrap_options = c("bordered", "striped"),
                full_width = FALSE,
                position = "center")
Matriz X’X normalizada
1.0000 0.6655 0.9617 0.9736
0.6655 1.0000 0.6776 0.6712
0.9617 0.6776 1.0000 0.9696
0.9736 0.6712 0.9696 1.0000

Autovalores de XtX Normalizada:

library(knitr)
library(kableExtra)

lambdas <- eigen(XX_norm, symmetric = TRUE)

valores <- data.frame(
  i = 1:length(lambdas$values),
  Autovalores = lambdas$values
)

kable(valores,
      digits = 4,
      format = "html",
      align = "c") %>%   #  CENTRA TODO
  kable_styling(bootstrap_options = c("bordered", "striped"))
i Autovalores
1 3.4816
2 0.4552
3 0.0385
4 0.0247

Cálculo de κ(x)= √λmax/λmin

K<-sqrt(max(lambdas$values)/min(lambdas$values))
print(K)
## [1] 11.86778

l número de condición obtenido es K = 11.87, lo cual indica la presencia de multicolinealidad moderada entre las variables explicativas. Sin embargo, no se considera un problema grave para la estimación del modelo.

2. Verifique si hay evidencia de la independencia de los regresores (no colinealidad), a través de:

a) Indice de condición y prueba de FG, presente sus resultados de manera tabular en ambos casos y para la prueba de FG presente también sus resultados de forma gráfica usando la librería fastGraph

adicional ..Cálculo del Indice de Condición usando librería “mctest”
library(mctest)
## Warning: package 'mctest' was built under R version 4.5.2
X_mat<-model.matrix(modelo_estimado)
mctest(mod = modelo_estimado)
## 
## Call:
## omcdiag(mod = mod, Inter = TRUE, detr = detr, red = red, conf = conf, 
##     theil = theil, cn = cn)
## 
## 
## Overall Multicollinearity Diagnostics
## 
##                        MC Results detection
## Determinant |X'X|:         0.6918         0
## Farrar Chi-Square:        31.3812         1
## Red Indicator:             0.3341         0
## Sum of Lambda Inverse:     3.8525         0
## Theil's Method:           -0.7297         0
## Condition Number:         11.8678         0
## 
## 1 --> COLLINEARITY is detected by the test 
## 0 --> COLLINEARITY is not detected by the test

#####Cálculo del Indice de Condición usando librería “olsrr”

library(olsrr)
## Warning: package 'olsrr' was built under R version 4.5.3
## 
## Adjuntando el paquete: 'olsrr'
## The following object is masked from 'package:wooldridge':
## 
##     cement
## The following object is masked from 'package:datasets':
## 
##     rivers
ols_eigen_cindex(model = modelo_estimado)
##   Eigenvalue Condition Index   intercept      lotsize       sqrft       bdrms
## 1 3.48158596        1.000000 0.003663034 0.0277802824 0.004156293 0.002939554
## 2 0.45518380        2.765637 0.006800735 0.9670803174 0.006067321 0.005096396
## 3 0.03851083        9.508174 0.472581427 0.0051085488 0.816079307 0.016938178
## 4 0.02471941       11.867781 0.516954804 0.0000308514 0.173697079 0.975025872

Prueba de Farrar-Glaubar

calculo manual Calculo de |R| c

library(knitr)
library(kableExtra)

Zn <- scale(X_mat[,-1])

kable(head(Zn, 6), digits = 3, format = "html") %>%
  kable_styling(bootstrap_options = c("bordered", "striped"))
lotsize sqrft bdrms
-0.284 0.735 0.513
0.087 0.108 -0.675
-0.375 -1.108 -0.675
-0.434 -0.980 -0.675
-0.287 0.867 0.513
-0.045 1.283 1.702

Calcular la matriz R

library(knitr)
library(kableExtra)

n <- nrow(Zn)
R <- (t(Zn) %*% Zn) * (1/(n-1))

kable(R,
      digits = 4,
      format = "html",
      align = "c",
      caption = "Matriz  R") %>%
  kable_styling(bootstrap_options = c("bordered", "striped"),
                full_width = FALSE,
                position = "center")
Matriz R
lotsize sqrft bdrms
lotsize 1.0000 0.1838 0.1363
sqrft 0.1838 1.0000 0.5315
bdrms 0.1363 0.5315 1.0000

Calcular |R|

determinante_R<-det(R)
print(determinante_R)
## [1] 0.6917931

Aplicando la prueba de Farrer Glaubar (Bartlett)

m<-ncol(X_mat[,-1])
n<-nrow(X_mat[,-1])
chi_FG<--(n-1-(2*m+5)/6)*log(determinante_R)
print(chi_FG)
## [1] 31.38122

Regla de desición: Dado que el estadístico de prueba χ² = 31.38 es mayor que el valor crítico (7.815), se rechaza la hipótesis nula, concluyendo que existe multicolinealidad entre las variables explicativas del modelo.

Cálculo de FG usando “mctest”

library(mctest)
mctest::omcdiag(mod = modelo_estimado)
## 
## Call:
## mctest::omcdiag(mod = modelo_estimado)
## 
## 
## Overall Multicollinearity Diagnostics
## 
##                        MC Results detection
## Determinant |X'X|:         0.6918         0
## Farrar Chi-Square:        31.3812         1
## Red Indicator:             0.3341         0
## Sum of Lambda Inverse:     3.8525         0
## Theil's Method:           -0.7297         0
## Condition Number:         11.8678         0
## 
## 1 --> COLLINEARITY is detected by the test 
## 0 --> COLLINEARITY is not detected by the test

- Cálculo de FG usando la “psych”

library(psych)
## Warning: package 'psych' was built under R version 4.5.3
FG_test<-cortest.bartlett(X_mat[,-1])
## R was not square, finding R from data
print(FG_test)
## $chisq
## [1] 31.38122
## 
## $p.value
## [1] 0.0000007065806
## 
## $df
## [1] 3

b) Factores inflacionarios de la varianza, presente sus resultados de forma tabular y de forma gráfica.

Referencia entre R2j

library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.3
## 
## Adjuntando el paquete: 'dplyr'
## The following object is masked from 'package:kableExtra':
## 
##     group_rows
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
R.cuadrado.regresores<-c(0,0.5,.8,.9)
as.data.frame(R.cuadrado.regresores) %>% mutate(VIF=1/(1-R.cuadrado.regresores))
##   R.cuadrado.regresores VIF
## 1                   0.0   1
## 2                   0.5   2
## 3                   0.8   5
## 4                   0.9  10

##Cálculo manual: Matriz de Correlación de los regresores del modelo (Como se obtuvo con anterioridad):

print(R)
##           lotsize     sqrft     bdrms
## lotsize 1.0000000 0.1838422 0.1363256
## sqrft   0.1838422 1.0000000 0.5314736
## bdrms   0.1363256 0.5314736 1.0000000

Inversa de la matriz de correlación R−1

inversa_R<-solve(R)
print(inversa_R)
##             lotsize      sqrft       bdrms
## lotsize  1.03721145 -0.1610145 -0.05582352
## sqrft   -0.16101454  1.4186543 -0.73202696
## bdrms   -0.05582352 -0.7320270  1.39666321

VIF’s para el modelo estimado:

 VIF_s <-diag(inversa_R)
print (VIF_s)
##  lotsize    sqrft    bdrms 
## 1.037211 1.418654 1.396663

Cálculo de los VIF’s usando “performance”

library(performance)
## Warning: package 'performance' was built under R version 4.5.3
VIFs<-multicollinearity(x = modelo_estimado,verbose = FALSE)
VIFs
## # Check for Multicollinearity
## 
## Low Correlation
## 
##     Term  VIF    VIF 95% CI adj. VIF Tolerance Tolerance 95% CI
##  lotsize 1.04 [1.00, 11.02]     1.02      0.96     [0.09, 1.00]
##    sqrft 1.42 [1.18,  1.98]     1.19      0.70     [0.51, 0.85]
##    bdrms 1.40 [1.17,  1.95]     1.18      0.72     [0.51, 0.86]
library(performance)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.3
## 
## Adjuntando el paquete: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
VIFs <- multicollinearity(modelo_estimado)
df_vif <- VIFs[, c("Term", "VIF")]

ggplot(df_vif, aes(x = Term, y = VIF)) +
  geom_col() +
  labs(title = "VIF por variable",
       x = "Variables",
       y = "VIF")

Cálculo de los VIF’s usando “car”

library(car)
## Warning: package 'car' was built under R version 4.5.3
## Cargando paquete requerido: carData
## Warning: package 'carData' was built under R version 4.5.2
## 
## Adjuntando el paquete: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:psych':
## 
##     logit
VIFs_car<-vif(modelo_estimado)
print(VIFs_car)
##  lotsize    sqrft    bdrms 
## 1.037211 1.418654 1.396663

Cálculo de los VIF’s usando “mctest”

library(mctest)
mc.plot(mod = modelo_estimado,vif = 2)