Regresión lineal múltiple de corte transversal

getwd()

## [1] "C:/Users/Fercs/OneDrive/Tec de Monterrey/Uni/S4/Econometria"

setwd("C:/Users/Fercs/OneDrive/Tec de Monterrey/Uni/S4/Econometria") #definir directorio
df_ct<-read.csv("EV_CT.csv",encoding="latin1") #importar la base de datos

#Modelo general que incorpora todas las variables
lm_ct<-lm(desemp_tasa~.-país,df_ct)
library(stargazer)

## 
## Please cite as:

##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.

##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer

stargazer(lm_ct,type="text")

## 
## ===============================================
##                         Dependent variable:    
##                     ---------------------------
##                             desemp_tasa        
## -----------------------------------------------
## pob_total                      0.186           
##                               (0.208)          
##                                                
## pob_crec                       0.961           
##                               (1.845)          
##                                                
## IPC                            0.000           
##                               (0.000)          
##                                                
## empleo_total                  -0.0005          
##                               (0.001)          
##                                                
## PIB_pc                        -0.0001          
##                              (0.0004)          
##                                                
## inv_total_fija               -0.00001          
##                              (0.0001)          
##                                                
## inf                           -0.034           
##                               (0.067)          
##                                                
## ing_disp_ph                    0.113           
##                               (0.220)          
##                                                
## gasto_cons                    0.00000          
##                              (0.00003)         
##                                                
## Constant                      8.056**          
##                               (3.012)          
##                                                
## -----------------------------------------------
## Observations                    30             
## R2                             0.130           
## Adjusted R2                   -0.262           
## Residual Std. Error       5.082 (df = 20)      
## F Statistic             0.331 (df = 9; 20)     
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01

#Pruebas de multicolinealidad

#VIF
library(car)

## Loading required package: carData

vif<-vif(lm_ct)
barplot(vif, main = "Valores de VIF", horiz = TRUE, col = "#CBC3E3",cex.names=0.8,las=1)
abline(v = 10, lwd = 3, lty = 2)

#Matriz de correlación
library(corrplot)

## corrplot 0.92 loaded

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following object is masked from 'package:car':
## 
##     recode

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

df_cor<-df_ct%>%
  select(pob_total,pob_crec,IPC,empleo_total,PIB_pc,desemp_tasa,inv_total_fija,inf,ing_disp_ph,gasto_cons)

corrM<-round(cor(df_cor),2)
corrplot(corrM,method="number",type="upper",tl.col="black",tl.cex=0.5,number.digits=1,number.cex=0.5)

#Nuevo modelo eliminando variables problemáticas
best.model<-lm(desemp_tasa~pob_crec+inf+empleo_total+inv_total_fija+ing_disp_ph,df_ct)

# VIF del nuevo modelo
vif2<-vif(best.model)
barplot(vif2, main = "Valores de VIF", horiz = TRUE, col = "#CBC3E3",cex.names=0.8,las=1)
abline(v = 10, lwd = 3, lty = 2)

#Correlaciones nuevo modelo
df_cor2<-df_ct%>%
  select(desemp_tasa,pob_crec,inf,empleo_total,inv_total_fija,ing_disp_ph)
corrM2<-round(cor(df_cor2),2)
corrplot(corrM2,method="number",type="upper",tl.col="black",tl.cex=0.5,number.digits=1,number.cex=0.5)

#Prueba de heterocedasticidad

#Prueba gráfica
plot(best.model)

## Warning in sqrt(crit * p * (1 - hh)/hh): Se han producido NaNs

## Warning in sqrt(crit * p * (1 - hh)/hh): Se han producido NaNs

#Breusch-Pagan
library(lmtest)

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

bptest(best.model)

## 
##  studentized Breusch-Pagan test
## 
## data:  best.model
## BP = 1.8845, df = 5, p-value = 0.8649

#White
library(sandwich)
best.modelW<-coeftest(best.model,vcov=vcovHC(best.model,"HC1"))

summary(best.model)

## 
## Call:
## lm(formula = desemp_tasa ~ pob_crec + inf + empleo_total + inv_total_fija + 
##     ing_disp_ph, data = df_ct)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.8346 -2.9186  0.1672  2.8190  9.3617 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)   
## (Intercept)     8.162e+00  2.606e+00   3.132  0.00452 **
## pob_crec        8.966e-01  1.420e+00   0.631  0.53370   
## inf             3.121e-03  2.662e-03   1.172  0.25256   
## empleo_total    1.726e-05  5.853e-05   0.295  0.77066   
## inv_total_fija -2.695e-06  2.972e-06  -0.907  0.37344   
## ing_disp_ph     6.610e-02  6.716e-02   0.984  0.33486   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.771 on 24 degrees of freedom
## Multiple R-squared:  0.07951,    Adjusted R-squared:  -0.1123 
## F-statistic: 0.4146 on 5 and 24 DF,  p-value: 0.8339

best.modelW

## 
## t test of coefficients:
## 
##                   Estimate  Std. Error t value Pr(>|t|)   
## (Intercept)     8.1615e+00  2.6393e+00  3.0924 0.004978 **
## pob_crec        8.9663e-01  1.4035e+00  0.6388 0.528977   
## inf             3.1212e-03  1.8433e-03  1.6933 0.103347   
## empleo_total    1.7258e-05  5.5609e-05  0.3103 0.758983   
## inv_total_fija -2.6951e-06  2.4243e-06 -1.1117 0.277278   
## ing_disp_ph     6.6097e-02  5.6285e-02  1.1743 0.251788   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#Corrigiendo la heterocedasticidad

#Variables con presencia de outliers
boxplot(df_ct$inf,df_ct$empleo_total,df_ct$inv_total_fija,df_ct$ing_disp_ph)

library(stats)

#eliminando outliers de la inflación
cuar_inf <- quantile(df_ct$inf, probs=c(.25, .75), na.rm = FALSE)
IQR_inf <- IQR(df_ct$inf)
 
Lower_inf <- cuar_inf[1] - 1.5*IQR_inf
Upper_inf <- cuar_inf[2] + 1.5*IQR_inf 

#eliminando outliers de empleo total
cuar_empleo_total <- quantile(df_ct$empleo_total, probs=c(.25, .75), na.rm = FALSE)
IQR_empleo_total <- IQR(df_ct$empleo_total)
 
Lower_empleo_total <- cuar_empleo_total[1] - 1.5*IQR_empleo_total
Upper_empleo_total <- cuar_empleo_total[2] + 1.5*IQR_empleo_total

#eliminando outliers de inversión
cuar_inv_total_fija <- quantile(df_ct$inv_total_fija, probs=c(.25, .75), na.rm = FALSE)
IQR_inv_total_fija <- IQR(df_ct$inv_total_fija)
 
Lower_inv_total_fija <- cuar_inv_total_fija[1] - 1.5*IQR_inv_total_fija
Upper_inv_total_fija <- cuar_inv_total_fija[2] + 1.5*IQR_inv_total_fija

#eliminando outliers de ingreso disponible
cuar_ing_disp_ph <- quantile(df_ct$ing_disp_ph, probs=c(.25, .75), na.rm = FALSE)
IQR_ing_disp_ph<- IQR(df_ct$ing_disp_ph)
 
Lower_ing_disp_ph<- cuar_ing_disp_ph[1] - 1.5*IQR_ing_disp_ph
Upper_ing_disp_ph <- cuar_ing_disp_ph + 1.5*IQR_ing_disp_ph

#Nuevo data frame con los datos de los cuartiles 1 a 3

df_no_out<-df_ct%>%
  filter(ifelse(inf>Lower_inf,inf<Upper_inf,"NA"))%>%
  filter(ifelse(empleo_total>Lower_empleo_total,empleo_total<Upper_empleo_total,"NA"))%>%
  filter(ifelse(inv_total_fija>Lower_inv_total_fija,inv_total_fija<Upper_inv_total_fija,"NA"))%>%
  filter(ifelse(ing_disp_ph>Lower_ing_disp_ph,ing_disp_ph<Upper_ing_disp_ph,"NA"))%>%
  select(país,desemp_tasa,pob_crec,inf,empleo_total,inv_total_fija,ing_disp_ph)

boxplot(df_no_out$inf,df_no_out$empleo_total,df_no_out$inv_total_fija,df_no_out$ing_disp_ph)

#Nuevas pruebas de heterocedasticidad
modelHC<-lm(poly(desemp_tasa,2)~poly(pob_crec,2)+poly(inf,2)+log(empleo_total)+log(inv_total_fija)+ing_disp_ph,df_no_out)

desemp2<-(df_no_out$desemp_tasa)^2
pob2<-(df_no_out$pob_crec)^2
inf2<-(df_no_out$inf)^2
log_empleo<-log(df_no_out$empleo_total)
log_inv<-log(df_no_out$inv_total_fija)
log_ing<-log(df_no_out$ing_disp_ph)

modelp<-lm(poly(desemp_tasa,2)~pob2+inf2+log_empleo+log_inv+log_ing,df_no_out)

bptest(modelp)

## 
##  studentized Breusch-Pagan test
## 
## data:  modelp
## BP = 11.384, df = 5, p-value = 0.04428

summary(modelp)

## Response 1 :
## 
## Call:
## lm(formula = `1` ~ pob2 + inf2 + log_empleo + log_inv + log_ing, 
##     data = df_no_out)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.3472 -0.1397 -0.0473  0.1516  0.3649 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  0.557738   0.785345   0.710   0.4924  
## pob2        -0.029008   0.082617  -0.351   0.7321  
## inf2        -0.003338   0.002868  -1.164   0.2691  
## log_empleo  -0.348994   0.173261  -2.014   0.0691 .
## log_inv      0.305386   0.153461   1.990   0.0720 .
## log_ing     -0.158095   0.209593  -0.754   0.4665  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2331 on 11 degrees of freedom
## Multiple R-squared:  0.4022, Adjusted R-squared:  0.1305 
## F-statistic:  1.48 on 5 and 11 DF,  p-value: 0.2726
## 
## 
## Response 2 :
## 
## Call:
## lm(formula = `2` ~ pob2 + inf2 + log_empleo + log_inv + log_ing, 
##     data = df_no_out)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.28243 -0.17770 -0.03127  0.12367  0.35420 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -1.041898   0.820911  -1.269   0.2306  
## pob2        -0.020931   0.086358  -0.242   0.8130  
## inf2        -0.004548   0.002998  -1.517   0.1574  
## log_empleo   0.234471   0.181107   1.295   0.2220  
## log_inv     -0.206537   0.160410  -1.288   0.2243  
## log_ing      0.411427   0.219085   1.878   0.0871 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2437 on 11 degrees of freedom
## Multiple R-squared:  0.3468, Adjusted R-squared:  0.04991 
## F-statistic: 1.168 on 5 and 11 DF,  p-value: 0.3837

Regresión lineal múltiple de corte transversal

Mafer Valaguez

2024-06-08