getwd()
## [1] "C:/Users/Fercs/OneDrive/Tec de Monterrey/Uni/S4/Econometria"
setwd("C:/Users/Fercs/OneDrive/Tec de Monterrey/Uni/S4/Econometria") #definir directorio
df_ct<-read.csv("EV_CT.csv",encoding="latin1") #importar la base de datos
#Modelo general que incorpora todas las variables
lm_ct<-lm(desemp_tasa~.-país,df_ct)
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
stargazer(lm_ct,type="text")
##
## ===============================================
## Dependent variable:
## ---------------------------
## desemp_tasa
## -----------------------------------------------
## pob_total 0.186
## (0.208)
##
## pob_crec 0.961
## (1.845)
##
## IPC 0.000
## (0.000)
##
## empleo_total -0.0005
## (0.001)
##
## PIB_pc -0.0001
## (0.0004)
##
## inv_total_fija -0.00001
## (0.0001)
##
## inf -0.034
## (0.067)
##
## ing_disp_ph 0.113
## (0.220)
##
## gasto_cons 0.00000
## (0.00003)
##
## Constant 8.056**
## (3.012)
##
## -----------------------------------------------
## Observations 30
## R2 0.130
## Adjusted R2 -0.262
## Residual Std. Error 5.082 (df = 20)
## F Statistic 0.331 (df = 9; 20)
## ===============================================
## Note: *p<0.1; **p<0.05; ***p<0.01
#Pruebas de multicolinealidad
#VIF
library(car)
## Loading required package: carData
vif<-vif(lm_ct)
barplot(vif, main = "Valores de VIF", horiz = TRUE, col = "#CBC3E3",cex.names=0.8,las=1)
abline(v = 10, lwd = 3, lty = 2)

#Matriz de correlación
library(corrplot)
## corrplot 0.92 loaded
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
df_cor<-df_ct%>%
select(pob_total,pob_crec,IPC,empleo_total,PIB_pc,desemp_tasa,inv_total_fija,inf,ing_disp_ph,gasto_cons)
corrM<-round(cor(df_cor),2)
corrplot(corrM,method="number",type="upper",tl.col="black",tl.cex=0.5,number.digits=1,number.cex=0.5)

#Nuevo modelo eliminando variables problemáticas
best.model<-lm(desemp_tasa~pob_crec+inf+empleo_total+inv_total_fija+ing_disp_ph,df_ct)
# VIF del nuevo modelo
vif2<-vif(best.model)
barplot(vif2, main = "Valores de VIF", horiz = TRUE, col = "#CBC3E3",cex.names=0.8,las=1)
abline(v = 10, lwd = 3, lty = 2)

#Correlaciones nuevo modelo
df_cor2<-df_ct%>%
select(desemp_tasa,pob_crec,inf,empleo_total,inv_total_fija,ing_disp_ph)
corrM2<-round(cor(df_cor2),2)
corrplot(corrM2,method="number",type="upper",tl.col="black",tl.cex=0.5,number.digits=1,number.cex=0.5)

#Prueba de heterocedasticidad
#Prueba gráfica
plot(best.model)



## Warning in sqrt(crit * p * (1 - hh)/hh): Se han producido NaNs
## Warning in sqrt(crit * p * (1 - hh)/hh): Se han producido NaNs
#Breusch-Pagan
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric

bptest(best.model)
##
## studentized Breusch-Pagan test
##
## data: best.model
## BP = 1.8845, df = 5, p-value = 0.8649
#White
library(sandwich)
best.modelW<-coeftest(best.model,vcov=vcovHC(best.model,"HC1"))
summary(best.model)
##
## Call:
## lm(formula = desemp_tasa ~ pob_crec + inf + empleo_total + inv_total_fija +
## ing_disp_ph, data = df_ct)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.8346 -2.9186 0.1672 2.8190 9.3617
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.162e+00 2.606e+00 3.132 0.00452 **
## pob_crec 8.966e-01 1.420e+00 0.631 0.53370
## inf 3.121e-03 2.662e-03 1.172 0.25256
## empleo_total 1.726e-05 5.853e-05 0.295 0.77066
## inv_total_fija -2.695e-06 2.972e-06 -0.907 0.37344
## ing_disp_ph 6.610e-02 6.716e-02 0.984 0.33486
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.771 on 24 degrees of freedom
## Multiple R-squared: 0.07951, Adjusted R-squared: -0.1123
## F-statistic: 0.4146 on 5 and 24 DF, p-value: 0.8339
best.modelW
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.1615e+00 2.6393e+00 3.0924 0.004978 **
## pob_crec 8.9663e-01 1.4035e+00 0.6388 0.528977
## inf 3.1212e-03 1.8433e-03 1.6933 0.103347
## empleo_total 1.7258e-05 5.5609e-05 0.3103 0.758983
## inv_total_fija -2.6951e-06 2.4243e-06 -1.1117 0.277278
## ing_disp_ph 6.6097e-02 5.6285e-02 1.1743 0.251788
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Corrigiendo la heterocedasticidad
#Variables con presencia de outliers
boxplot(df_ct$inf,df_ct$empleo_total,df_ct$inv_total_fija,df_ct$ing_disp_ph)

library(stats)
#eliminando outliers de la inflación
cuar_inf <- quantile(df_ct$inf, probs=c(.25, .75), na.rm = FALSE)
IQR_inf <- IQR(df_ct$inf)
Lower_inf <- cuar_inf[1] - 1.5*IQR_inf
Upper_inf <- cuar_inf[2] + 1.5*IQR_inf
#eliminando outliers de empleo total
cuar_empleo_total <- quantile(df_ct$empleo_total, probs=c(.25, .75), na.rm = FALSE)
IQR_empleo_total <- IQR(df_ct$empleo_total)
Lower_empleo_total <- cuar_empleo_total[1] - 1.5*IQR_empleo_total
Upper_empleo_total <- cuar_empleo_total[2] + 1.5*IQR_empleo_total
#eliminando outliers de inversión
cuar_inv_total_fija <- quantile(df_ct$inv_total_fija, probs=c(.25, .75), na.rm = FALSE)
IQR_inv_total_fija <- IQR(df_ct$inv_total_fija)
Lower_inv_total_fija <- cuar_inv_total_fija[1] - 1.5*IQR_inv_total_fija
Upper_inv_total_fija <- cuar_inv_total_fija[2] + 1.5*IQR_inv_total_fija
#eliminando outliers de ingreso disponible
cuar_ing_disp_ph <- quantile(df_ct$ing_disp_ph, probs=c(.25, .75), na.rm = FALSE)
IQR_ing_disp_ph<- IQR(df_ct$ing_disp_ph)
Lower_ing_disp_ph<- cuar_ing_disp_ph[1] - 1.5*IQR_ing_disp_ph
Upper_ing_disp_ph <- cuar_ing_disp_ph + 1.5*IQR_ing_disp_ph
#Nuevo data frame con los datos de los cuartiles 1 a 3
df_no_out<-df_ct%>%
filter(ifelse(inf>Lower_inf,inf<Upper_inf,"NA"))%>%
filter(ifelse(empleo_total>Lower_empleo_total,empleo_total<Upper_empleo_total,"NA"))%>%
filter(ifelse(inv_total_fija>Lower_inv_total_fija,inv_total_fija<Upper_inv_total_fija,"NA"))%>%
filter(ifelse(ing_disp_ph>Lower_ing_disp_ph,ing_disp_ph<Upper_ing_disp_ph,"NA"))%>%
select(país,desemp_tasa,pob_crec,inf,empleo_total,inv_total_fija,ing_disp_ph)
boxplot(df_no_out$inf,df_no_out$empleo_total,df_no_out$inv_total_fija,df_no_out$ing_disp_ph)

#Nuevas pruebas de heterocedasticidad
modelHC<-lm(poly(desemp_tasa,2)~poly(pob_crec,2)+poly(inf,2)+log(empleo_total)+log(inv_total_fija)+ing_disp_ph,df_no_out)
desemp2<-(df_no_out$desemp_tasa)^2
pob2<-(df_no_out$pob_crec)^2
inf2<-(df_no_out$inf)^2
log_empleo<-log(df_no_out$empleo_total)
log_inv<-log(df_no_out$inv_total_fija)
log_ing<-log(df_no_out$ing_disp_ph)
modelp<-lm(poly(desemp_tasa,2)~pob2+inf2+log_empleo+log_inv+log_ing,df_no_out)
bptest(modelp)
##
## studentized Breusch-Pagan test
##
## data: modelp
## BP = 11.384, df = 5, p-value = 0.04428
summary(modelp)
## Response 1 :
##
## Call:
## lm(formula = `1` ~ pob2 + inf2 + log_empleo + log_inv + log_ing,
## data = df_no_out)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.3472 -0.1397 -0.0473 0.1516 0.3649
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.557738 0.785345 0.710 0.4924
## pob2 -0.029008 0.082617 -0.351 0.7321
## inf2 -0.003338 0.002868 -1.164 0.2691
## log_empleo -0.348994 0.173261 -2.014 0.0691 .
## log_inv 0.305386 0.153461 1.990 0.0720 .
## log_ing -0.158095 0.209593 -0.754 0.4665
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2331 on 11 degrees of freedom
## Multiple R-squared: 0.4022, Adjusted R-squared: 0.1305
## F-statistic: 1.48 on 5 and 11 DF, p-value: 0.2726
##
##
## Response 2 :
##
## Call:
## lm(formula = `2` ~ pob2 + inf2 + log_empleo + log_inv + log_ing,
## data = df_no_out)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.28243 -0.17770 -0.03127 0.12367 0.35420
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.041898 0.820911 -1.269 0.2306
## pob2 -0.020931 0.086358 -0.242 0.8130
## inf2 -0.004548 0.002998 -1.517 0.1574
## log_empleo 0.234471 0.181107 1.295 0.2220
## log_inv -0.206537 0.160410 -1.288 0.2243
## log_ing 0.411427 0.219085 1.878 0.0871 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2437 on 11 degrees of freedom
## Multiple R-squared: 0.3468, Adjusted R-squared: 0.04991
## F-statistic: 1.168 on 5 and 11 DF, p-value: 0.3837