Importacion de datos

library(wooldridge)
library(fastGraph)
library(haven)
hprice1 <- read_dta("C:/Users/USUARIO/Downloads/hprice1.dta")
head(hprice1, n=5)
## # A tibble: 5 x 10
##   price assess bdrms lotsize sqrft colonial lprice lassess llotsize lsqrft
##   <dbl>  <dbl> <dbl>   <dbl> <dbl>    <dbl>  <dbl>   <dbl>    <dbl>  <dbl>
## 1   300   349.     4    6126  2438        1   5.70    5.86     8.72   7.80
## 2   370   352.     3    9903  2076        1   5.91    5.86     9.20   7.64
## 3   191   218.     3    5200  1374        0   5.25    5.38     8.56   7.23
## 4   195   232.     3    4600  1448        1   5.27    5.45     8.43   7.28
## 5   373   319.     4    6095  2514        1   5.92    5.77     8.72   7.83

1. Estimacion del modelo

library(stargazer)
library(wooldridge)
modelo_estimado<-lm(formula= price~lotsize+sqrft+bdrms, data = hprice1)
stargazer(modelo_estimado, type="html", title="Modelo estimado")
Modelo estimado
Dependent variable:
price
lotsize 0.002***
(0.001)
sqrft 0.123***
(0.013)
bdrms 13.853
(9.010)
Constant -21.770
(29.475)
Observations 88
R2 0.672
Adjusted R2 0.661
Residual Std. Error 59.833 (df = 84)
F Statistic 57.460*** (df = 3; 84)
Note: p<0.1; p<0.05; p<0.01

2.Verifique si hay evidencia: a)indice de condicion y prueba de FG

#calcular xmat
library(stargazer)
xmat<-model.matrix(modelo_estimado)
stargazer(head(xmat, n=5), type = "html")
(Intercept) lotsize sqrft bdrms
1 1 6,126 2,438 4
2 1 9,903 2,076 3
3 1 5,200 1,374 3
4 1 4,600 1,448 3
5 1 6,095 2,514 4

continuando con la sigma matriz

xxmat<-t(xmat)%*%xmat
stargazer(xxmat, type = "html")
(Intercept) lotsize sqrft bdrms
(Intercept) 88 793,748 177,205 314
lotsize 793,748 16,165,159,010 1,692,290,257 2,933,767
sqrft 177,205 1,692,290,257 385,820,561 654,755
bdrms 314 2,933,767 654,755 1,182

sn matriz de normalizacion

library(stargazer)
options(scipen=999)
sn<-solve(diag(sqrt(diag(xxmat))))
stargazer(sn, type="html")
0.107 0 0 0
0 0.00001 0 0
0 0 0.0001 0
0 0 0 0.029

xxmat normalizada

xx_norm<-(sn%*%xxmat)%*%sn
stargazer(xx_norm, type = "html", digits = 4)
1 0.6655 0.9617 0.9736
0.6655 1 0.6776 0.6712
0.9617 0.6776 1 0.9696
0.9736 0.6712 0.9696 1

Autovalores

lambdas<-eigen(xx_norm, symmetric = TRUE)
stargazer(lambdas$values, type="html")
3.482 0.455 0.039 0.025

indice de condicion

k<-sqrt(max(lambdas$values)/min(lambdas$values))
print(k)
## [1] 11.86778

Indice de condicion usando mctest

library(mctest)
source(file ="C:/Users/USUARIO/Downloads/correccion_eigprop.R" )
eigprop(mod=modelo_estimado)
## 
## Call:
## eigprop(mod = modelo_estimado)
## 
##   Eigenvalues      CI (Intercept) lotsize  sqrft  bdrms
## 1      3.4816  1.0000      0.0037  0.0278 0.0042 0.0029
## 2      0.4552  2.7656      0.0068  0.9671 0.0061 0.0051
## 3      0.0385  9.5082      0.4726  0.0051 0.8161 0.0169
## 4      0.0247 11.8678      0.5170  0.0000 0.1737 0.9750
## 
## ===============================
## Row 2==> lotsize, proportion 0.967080 >= 0.50 
## Row 3==> sqrft, proportion 0.816079 >= 0.50 
## Row 4==> bdrms, proportion 0.975026 >= 0.50

Normalizar la matriz x calculo de R

library(stargazer)
zn<-scale(xmat[,-1])
stargazer(head(zn, n=6), type = "html")
lotsize sqrft bdrms
1 -0.284 0.735 0.513
2 0.087 0.108 -0.675
3 -0.375 -1.108 -0.675
4 -0.434 -0.980 -0.675
5 -0.287 0.867 0.513
6 -0.045 1.283 1.702

Calcular la matriz R

library(stargazer)
n<-nrow(zn)
R<-(t(zn)%*%zn)*(1/(n-1))
#tambien se puede calcular R atraves de cor(xmat[,-1])
stargazer(R, type= "html", digits=4)
lotsize sqrft bdrms
lotsize 1 0.1838 0.1363
sqrft 0.1838 1 0.5315
bdrms 0.1363 0.5315 1

usando cor

cor(xmat[,-1])
##           lotsize     sqrft     bdrms
## lotsize 1.0000000 0.1838422 0.1363256
## sqrft   0.1838422 1.0000000 0.5314736
## bdrms   0.1363256 0.5314736 1.0000000

Determinante R

determinante_R<-det(R)
print(determinante_R)
## [1] 0.6917931

Aplicando la prueba FG

library(fastGraph)
m<-ncol(xmat[,-1])
# cantidad de variables explicativas k-1
n<-nrow(xmat[,-1])#numero de observaciones
det_R<-det(cor(xmat[,-1]))
chi_FG<--(n-1-(2*m+5)/6)*log(det_R)
print(chi_FG)
## [1] 31.38122

Valor critico

gl<-m*(m-1)/2
vc<-qchisq(0.05,gl,lower.tail= FALSE)
print(vc)
## [1] 7.814728
shadeDist(xshade= chi_FG, ddist="dchisq", parm1= gl, lower.tail= FALSE, sub=paste("vc:", vc,"FG:", chi_FG))

FG con mctest

library(mctest)
mctest(modelo_estimado)
## 
## Call:
## omcdiag(mod = mod, Inter = TRUE, detr = detr, red = red, conf = conf, 
##     theil = theil, cn = cn)
## 
## 
## Overall Multicollinearity Diagnostics
## 
##                        MC Results detection
## Determinant |X'X|:         0.6918         0
## Farrar Chi-Square:        31.3812         1
## Red Indicator:             0.3341         0
## Sum of Lambda Inverse:     3.8525         0
## Theil's Method:           -0.7297         0
## Condition Number:         11.8678         0
## 
## 1 --> COLLINEARITY is detected by the test 
## 0 --> COLLINEARITY is not detected by the test

FG en psych

library(psych)
FG_test<-cortest.bartlett(xmat[,-1])
vc_1<-qchisq(0.05, FG_test$df, lower.tail = FALSE)
print(FG_test)

$chisq [1] 31.38122

$p.value [1] 0.0000007065806

$df [1] 3

shadeDist(xshade=FG_test$chisq, ddist="dchisq", parm1=FG_test$df, lower.tail=FALSE, sub=paste("vc:", vc_1, "FG:", FG_test$chisq))

b) Factores inflacionarios de la Varianza

VIF<-diag(solve(cor(xmat[,-1])))
print(VIF)
##  lotsize    sqrft    bdrms 
## 1.037211 1.418654 1.396663

Matriz de correlacion de los regresores

print(R)
##           lotsize     sqrft     bdrms
## lotsize 1.0000000 0.1838422 0.1363256
## sqrft   0.1838422 1.0000000 0.5314736
## bdrms   0.1363256 0.5314736 1.0000000

Inversa de la matriz de correlacion

inversa_R<-solve(R)
print(inversa_R)
##             lotsize      sqrft       bdrms
## lotsize  1.03721145 -0.1610145 -0.05582352
## sqrft   -0.16101454  1.4186543 -0.73202696
## bdrms   -0.05582352 -0.7320270  1.39666321

VIF para el modelo estimado

VIFs<-diag(inversa_R)
print(VIFs)
##  lotsize    sqrft    bdrms 
## 1.037211 1.418654 1.396663

VIF con libreria car

library(car)
VIF_car<-vif(modelo_estimado)
print(VIF_car)
##  lotsize    sqrft    bdrms 
## 1.037211 1.418654 1.396663

VIF con mctest

library(mctest)
mc.plot(modelo_estimado, vif=2)

la varianza de las variables esta cercano a 2 lo que seria el 50% que representa un nivel de tolerancia aceptable y cuando se calcula el indice de condicion se obtiene que es de 11.86, el valor es inferior a 20 indicando que es leve la multicolinealidad