Importacion de datos
library(wooldridge)
library(fastGraph)
library(haven)
hprice1 <- read_dta("C:/Users/USUARIO/Downloads/hprice1.dta")
head(hprice1, n=5)
## # A tibble: 5 x 10
## price assess bdrms lotsize sqrft colonial lprice lassess llotsize lsqrft
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 300 349. 4 6126 2438 1 5.70 5.86 8.72 7.80
## 2 370 352. 3 9903 2076 1 5.91 5.86 9.20 7.64
## 3 191 218. 3 5200 1374 0 5.25 5.38 8.56 7.23
## 4 195 232. 3 4600 1448 1 5.27 5.45 8.43 7.28
## 5 373 319. 4 6095 2514 1 5.92 5.77 8.72 7.83
1. Estimacion del modelo
library(stargazer)
library(wooldridge)
modelo_estimado<-lm(formula= price~lotsize+sqrft+bdrms, data = hprice1)
stargazer(modelo_estimado, type="html", title="Modelo estimado")
Modelo estimado
|
|
|
|
Dependent variable:
|
|
|
|
|
|
price
|
|
|
|
lotsize
|
0.002***
|
|
|
(0.001)
|
|
|
|
|
sqrft
|
0.123***
|
|
|
(0.013)
|
|
|
|
|
bdrms
|
13.853
|
|
|
(9.010)
|
|
|
|
|
Constant
|
-21.770
|
|
|
(29.475)
|
|
|
|
|
|
|
Observations
|
88
|
|
R2
|
0.672
|
|
Adjusted R2
|
0.661
|
|
Residual Std. Error
|
59.833 (df = 84)
|
|
F Statistic
|
57.460*** (df = 3; 84)
|
|
|
|
Note:
|
p<0.1; p<0.05; p<0.01
|
2.Verifique si hay evidencia: a)indice de condicion y prueba de FG
#calcular xmat
library(stargazer)
xmat<-model.matrix(modelo_estimado)
stargazer(head(xmat, n=5), type = "html")
|
|
|
|
(Intercept)
|
lotsize
|
sqrft
|
bdrms
|
|
|
|
1
|
1
|
6,126
|
2,438
|
4
|
|
2
|
1
|
9,903
|
2,076
|
3
|
|
3
|
1
|
5,200
|
1,374
|
3
|
|
4
|
1
|
4,600
|
1,448
|
3
|
|
5
|
1
|
6,095
|
2,514
|
4
|
|
|
continuando con la sigma matriz
xxmat<-t(xmat)%*%xmat
stargazer(xxmat, type = "html")
|
|
|
|
(Intercept)
|
lotsize
|
sqrft
|
bdrms
|
|
|
|
(Intercept)
|
88
|
793,748
|
177,205
|
314
|
|
lotsize
|
793,748
|
16,165,159,010
|
1,692,290,257
|
2,933,767
|
|
sqrft
|
177,205
|
1,692,290,257
|
385,820,561
|
654,755
|
|
bdrms
|
314
|
2,933,767
|
654,755
|
1,182
|
|
|
sn matriz de normalizacion
library(stargazer)
options(scipen=999)
sn<-solve(diag(sqrt(diag(xxmat))))
stargazer(sn, type="html")
|
|
|
0.107
|
0
|
0
|
0
|
|
0
|
0.00001
|
0
|
0
|
|
0
|
0
|
0.0001
|
0
|
|
0
|
0
|
0
|
0.029
|
|
|
xxmat normalizada
xx_norm<-(sn%*%xxmat)%*%sn
stargazer(xx_norm, type = "html", digits = 4)
|
|
|
1
|
0.6655
|
0.9617
|
0.9736
|
|
0.6655
|
1
|
0.6776
|
0.6712
|
|
0.9617
|
0.6776
|
1
|
0.9696
|
|
0.9736
|
0.6712
|
0.9696
|
1
|
|
|
Autovalores
lambdas<-eigen(xx_norm, symmetric = TRUE)
stargazer(lambdas$values, type="html")
indice de condicion
k<-sqrt(max(lambdas$values)/min(lambdas$values))
print(k)
## [1] 11.86778
Indice de condicion usando mctest
library(mctest)
source(file ="C:/Users/USUARIO/Downloads/correccion_eigprop.R" )
eigprop(mod=modelo_estimado)
##
## Call:
## eigprop(mod = modelo_estimado)
##
## Eigenvalues CI (Intercept) lotsize sqrft bdrms
## 1 3.4816 1.0000 0.0037 0.0278 0.0042 0.0029
## 2 0.4552 2.7656 0.0068 0.9671 0.0061 0.0051
## 3 0.0385 9.5082 0.4726 0.0051 0.8161 0.0169
## 4 0.0247 11.8678 0.5170 0.0000 0.1737 0.9750
##
## ===============================
## Row 2==> lotsize, proportion 0.967080 >= 0.50
## Row 3==> sqrft, proportion 0.816079 >= 0.50
## Row 4==> bdrms, proportion 0.975026 >= 0.50
Normalizar la matriz x calculo de R
library(stargazer)
zn<-scale(xmat[,-1])
stargazer(head(zn, n=6), type = "html")
|
|
|
|
lotsize
|
sqrft
|
bdrms
|
|
|
|
1
|
-0.284
|
0.735
|
0.513
|
|
2
|
0.087
|
0.108
|
-0.675
|
|
3
|
-0.375
|
-1.108
|
-0.675
|
|
4
|
-0.434
|
-0.980
|
-0.675
|
|
5
|
-0.287
|
0.867
|
0.513
|
|
6
|
-0.045
|
1.283
|
1.702
|
|
|
Calcular la matriz R
library(stargazer)
n<-nrow(zn)
R<-(t(zn)%*%zn)*(1/(n-1))
#tambien se puede calcular R atraves de cor(xmat[,-1])
stargazer(R, type= "html", digits=4)
|
|
|
|
lotsize
|
sqrft
|
bdrms
|
|
|
|
lotsize
|
1
|
0.1838
|
0.1363
|
|
sqrft
|
0.1838
|
1
|
0.5315
|
|
bdrms
|
0.1363
|
0.5315
|
1
|
|
|
usando cor
cor(xmat[,-1])
## lotsize sqrft bdrms
## lotsize 1.0000000 0.1838422 0.1363256
## sqrft 0.1838422 1.0000000 0.5314736
## bdrms 0.1363256 0.5314736 1.0000000
Determinante R
determinante_R<-det(R)
print(determinante_R)
## [1] 0.6917931
Aplicando la prueba FG
library(fastGraph)
m<-ncol(xmat[,-1])
# cantidad de variables explicativas k-1
n<-nrow(xmat[,-1])#numero de observaciones
det_R<-det(cor(xmat[,-1]))
chi_FG<--(n-1-(2*m+5)/6)*log(det_R)
print(chi_FG)
## [1] 31.38122
Valor critico
gl<-m*(m-1)/2
vc<-qchisq(0.05,gl,lower.tail= FALSE)
print(vc)
## [1] 7.814728
shadeDist(xshade= chi_FG, ddist="dchisq", parm1= gl, lower.tail= FALSE, sub=paste("vc:", vc,"FG:", chi_FG))

FG con mctest
library(mctest)
mctest(modelo_estimado)
##
## Call:
## omcdiag(mod = mod, Inter = TRUE, detr = detr, red = red, conf = conf,
## theil = theil, cn = cn)
##
##
## Overall Multicollinearity Diagnostics
##
## MC Results detection
## Determinant |X'X|: 0.6918 0
## Farrar Chi-Square: 31.3812 1
## Red Indicator: 0.3341 0
## Sum of Lambda Inverse: 3.8525 0
## Theil's Method: -0.7297 0
## Condition Number: 11.8678 0
##
## 1 --> COLLINEARITY is detected by the test
## 0 --> COLLINEARITY is not detected by the test
FG en psych
library(psych)
FG_test<-cortest.bartlett(xmat[,-1])
vc_1<-qchisq(0.05, FG_test$df, lower.tail = FALSE)
print(FG_test)
$chisq [1] 31.38122
$p.value [1] 0.0000007065806
$df [1] 3
shadeDist(xshade=FG_test$chisq, ddist="dchisq", parm1=FG_test$df, lower.tail=FALSE, sub=paste("vc:", vc_1, "FG:", FG_test$chisq))

b) Factores inflacionarios de la Varianza
VIF<-diag(solve(cor(xmat[,-1])))
print(VIF)
## lotsize sqrft bdrms
## 1.037211 1.418654 1.396663
Matriz de correlacion de los regresores
print(R)
## lotsize sqrft bdrms
## lotsize 1.0000000 0.1838422 0.1363256
## sqrft 0.1838422 1.0000000 0.5314736
## bdrms 0.1363256 0.5314736 1.0000000
Inversa de la matriz de correlacion
inversa_R<-solve(R)
print(inversa_R)
## lotsize sqrft bdrms
## lotsize 1.03721145 -0.1610145 -0.05582352
## sqrft -0.16101454 1.4186543 -0.73202696
## bdrms -0.05582352 -0.7320270 1.39666321
VIF para el modelo estimado
VIFs<-diag(inversa_R)
print(VIFs)
## lotsize sqrft bdrms
## 1.037211 1.418654 1.396663
VIF con libreria car
library(car)
VIF_car<-vif(modelo_estimado)
print(VIF_car)
## lotsize sqrft bdrms
## 1.037211 1.418654 1.396663
VIF con mctest
library(mctest)
mc.plot(modelo_estimado, vif=2)

la varianza de las variables esta cercano a 2 lo que seria el 50% que representa un nivel de tolerancia aceptable y cuando se calcula el indice de condicion se obtiene que es de 11.86, el valor es inferior a 20 indicando que es leve la multicolinealidad