## Lendo dados do Grupo
dados <-read_excel("..\\Dados\\Dados_trabalho.xlsx",sheet = "dados", col_names = TRUE)
head(dados)
## # A tibble: 6 x 9
## Ano PIB Des Ipca ExPob LnPIB LnDes LnIpca LnExPob
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1992 328187944301. 7.2 1119. 27954489 26.5 1.97 7.02 17.1
## 2 1993 368295777770. 6.8 2477. 28739397 26.6 1.92 7.81 17.2
## 3 1994 525369851354. 6.8 916. 25585004. 27.0 1.92 6.82 17.1
## 4 1995 769333330369. 6.7 22.4 22430610 27.4 1.90 3.11 16.9
## 5 1996 850426433004. 7.6 9.56 23320367 27.5 2.03 2.26 17.0
## 6 1997 883206452795. 8.5 5.22 23676733 27.5 2.14 1.65 17.0
Para este presente estudo, partimos de uma série temporal dos anos de 1992 a 2019 e buscamos explicar o Desemprego do país com base nas variáveis IPCA, desemprego e população vivendo na extrema pobreza.
Para isso, adotaremos primeiramente a investigação de três modelos funcionais para a regressão para explicar a variável dependente PIB:
Modelo 1: Linear Linear, representado pela equação EqLinDes.
Modelo 2: Log Linear, representado pela equação EqLoglinDes.
Modelo 3: Log Log, representado pela equação EqLogLogDes.
O modelo 2 considera o logaritimo natural para a variável dependente e as demais variáveis são apresentadas na sua forma linear.
O modelo 3 considera todas as variáveis na sua forma em logarítmo.
As variáveis serão definidas como:
Pib, representa o PIB do pais para um determinado ano.
Des, representa o número de desempregados no pais para um determinado ano.
Ipca, representa o índice de inflação para o pais num determinado ano.
Expob, representa a quantidade de pessoas na extrema pobreza no pais num determinado ano.
LnPIB, é a variável logarítma para o PIB.
LnDes, é a variavel logarítma para o desemprego.
LnIpca, é a variavél logarítma para a inflação.
LnExpob, é a variável logarítima para o número de pessoas na extrema pobreza.
options(scipen = 999)
# estimando o modelo linear linear
EqLinDes <- lm(Des~PIB+ Ipca, data = dados)
summary(EqLinDes)
##
## Call:
## lm(formula = Des ~ PIB + Ipca, data = dados)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.8210 -1.2003 0.0302 0.7678 3.8855
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.9947941018678748 0.7292716448105863 13.705 0.000000000000396
## PIB -0.0000000000005697 0.0000000000004642 -1.227 0.231
## Ipca -0.0015826648638489 0.0006635721489653 -2.385 0.025
##
## (Intercept) ***
## PIB
## Ipca *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.668 on 25 degrees of freedom
## Multiple R-squared: 0.1882, Adjusted R-squared: 0.1232
## F-statistic: 2.897 on 2 and 25 DF, p-value: 0.07385
EqLoglinDes <- lm(LnDes~ PIB+ Ipca + ExPob, data = dados)
summary(EqLoglinDes)
##
## Call:
## lm(formula = LnDes ~ PIB + Ipca + ExPob, data = dados)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.34763 -0.12805 -0.02225 0.08784 0.41370
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.0236858198313583 0.6054768628915962 4.994 0.0000422 ***
## PIB -0.0000000000002730 0.0000000000001731 -1.577 0.1278
## Ipca -0.0001592992268101 0.0000765416093185 -2.081 0.0483 *
## ExPob -0.0000000249820959 0.0000000209191138 -1.194 0.2441
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1807 on 24 degrees of freedom
## Multiple R-squared: 0.2666, Adjusted R-squared: 0.1749
## F-statistic: 2.908 on 3 and 24 DF, p-value: 0.05532
EqLogLogDes <- lm(LnDes~LnPIB + LnIpca +LnExPob , data = dados)
summary(EqLogLogDes)
##
## Call:
## lm(formula = LnDes ~ LnPIB + LnIpca + LnExPob, data = dados)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.24567 -0.09591 -0.00757 0.06291 0.36212
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.34721 10.76508 1.333 0.19513
## LnPIB -0.28873 0.20522 -1.407 0.17226
## LnIpca -0.09507 0.02597 -3.660 0.00124 **
## LnExPob -0.23641 0.31038 -0.762 0.45366
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.166 on 24 degrees of freedom
## Multiple R-squared: 0.3809, Adjusted R-squared: 0.3035
## F-statistic: 4.922 on 3 and 24 DF, p-value: 0.008353
EqLogLogDesStar <- lm(LnDes~LnPIB + LnIpca, data = dados)
summary(EqLogLogDesStar)
##
## Call:
## lm(formula = LnDes ~ LnPIB + LnIpca, data = dados)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.26102 -0.11026 0.00248 0.06340 0.34330
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.26121 1.77198 3.533 0.001623 **
## LnPIB -0.14003 0.06272 -2.233 0.034772 *
## LnIpca -0.08545 0.02251 -3.797 0.000834 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1646 on 25 degrees of freedom
## Multiple R-squared: 0.3659, Adjusted R-squared: 0.3152
## F-statistic: 7.214 on 2 and 25 DF, p-value: 0.003363
EqLoglinDesStar <- lm(LnDes~ PIB + Ipca, data = dados)
summary(EqLoglinDesStar)
##
## Call:
## lm(formula = LnDes ~ PIB + Ipca, data = dados)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.34255 -0.12168 0.01688 0.08618 0.39057
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 2.30679022899040653 0.07965683542884427 28.959
## PIB -0.00000000000007521 0.00000000000005071 -1.483
## Ipca -0.00019074228392427 0.00007248061520206 -2.632
## Pr(>|t|)
## (Intercept) <0.0000000000000002 ***
## PIB 0.1505
## Ipca 0.0143 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1822 on 25 degrees of freedom
## Multiple R-squared: 0.223, Adjusted R-squared: 0.1608
## F-statistic: 3.587 on 2 and 25 DF, p-value: 0.04269
# colocando todas as regressões estimadas juntas para análise:
screenreg(list(EqLinDes, EqLoglinDes,EqLogLogDes,EqLogLogDesStar,EqLoglinDesStar), digits = 4,
stars = c(0.01, 0.05, 0.1), ci.force = TRUE,
custom.model.names = c("Modelo Linear", "Modelo Log-Linear", "Modelo Log-Log", "Log log Star", "Log lin Star"),
caption = "Multiple model types and single row", label = "tab:3",
include.adjrs = TRUE, include.bic = TRUE)
##
## ===============================================================================================================
## Modelo Linear Modelo Log-Linear Modelo Log-Log Log log Star Log lin Star
## ---------------------------------------------------------------------------------------------------------------
## (Intercept) 9.9948 * 3.0237 * 14.3472 6.2612 * 2.3068 *
## [ 8.5654; 11.4241] [ 1.8370; 4.2104] [-6.7520; 35.4464] [ 2.7882; 9.7342] [ 2.1507; 2.4629]
## PIB -0.0000 -0.0000 -0.0000
## [-0.0000; 0.0000] [-0.0000; 0.0000] [-0.0000; 0.0000]
## Ipca -0.0016 * -0.0002 * -0.0002 *
## [-0.0029; -0.0003] [-0.0003; -0.0000] [-0.0003; -0.0000]
## ExPob -0.0000
## [-0.0000; 0.0000]
## LnPIB -0.2887 -0.1400 *
## [-0.6910; 0.1135] [-0.2630; -0.0171]
## LnIpca -0.0951 * -0.0855 *
## [-0.1460; -0.0442] [-0.1296; -0.0413]
## LnExPob -0.2364
## [-0.8447; 0.3719]
## ---------------------------------------------------------------------------------------------------------------
## R^2 0.1882 0.2666 0.3809 0.3659 0.2230
## Adj. R^2 0.1232 0.1749 0.3035 0.3152 0.1608
## Num. obs. 28 28 28 28 28
## ===============================================================================================================
## * Null hypothesis value outside the confidence interval.
#Teste Reset Para modelo Log Linear
resettest(EqLogLogDesStar, power = 2, type = "regressor", data = dados)
##
## RESET test
##
## data: EqLogLogDesStar
## RESET = 0.67744, df1 = 2, df2 = 23, p-value = 0.5178
# BG teste para autocorrelação.
bgtest(EqLogLogDesStar, order = 1, type = c("Chisq"))
##
## Breusch-Godfrey test for serial correlation of order up to 1
##
## data: EqLogLogDesStar
## LM test = 15.267, df = 1, p-value = 0.00009333
bgtest(EqLogLogDesStar, order = 3, type = c("Chisq"))
##
## Breusch-Godfrey test for serial correlation of order up to 3
##
## data: EqLogLogDesStar
## LM test = 17.781, df = 3, p-value = 0.000488
bgtest(EqLogLogDesStar, order = 6, type = c("Chisq"))
##
## Breusch-Godfrey test for serial correlation of order up to 6
##
## data: EqLogLogDesStar
## LM test = 18.674, df = 6, p-value = 0.004752
EqLogLogDesStarCor<-lm(LnDes~lag(LnDes,1)+LnPIB + LnIpca,data = dados)
summary(EqLogLogDesStarCor)
##
## Call:
## lm(formula = LnDes ~ lag(LnDes, 1) + LnPIB + LnIpca, data = dados)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.292584 -0.049322 0.005729 0.045586 0.213852
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.69835 1.42927 1.188 0.247
## lag(LnDes, 1) 0.75498 0.13195 5.722 0.00000793 ***
## LnPIB -0.03909 0.04560 -0.857 0.400
## LnIpca -0.02889 0.01926 -1.500 0.147
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1102 on 23 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.7279, Adjusted R-squared: 0.6924
## F-statistic: 20.51 on 3 and 23 DF, p-value: 0.00000108
bgtest(EqLogLogDesStarCor, order = 1, type = c("Chisq"))
##
## Breusch-Godfrey test for serial correlation of order up to 1
##
## data: EqLogLogDesStarCor
## LM test = 0.073114, df = 1, p-value = 0.7869
bgtest(EqLogLogDesStarCor, order = 3, type = c("Chisq"))
##
## Breusch-Godfrey test for serial correlation of order up to 3
##
## data: EqLogLogDesStarCor
## LM test = 10.08, df = 3, p-value = 0.0179
bgtest(EqLogLogDesStarCor, order = 6, type = c("Chisq"))
##
## Breusch-Godfrey test for serial correlation of order up to 6
##
## data: EqLogLogDesStarCor
## LM test = 12.521, df = 6, p-value = 0.0513
# transformação das variáveis para numéricas
PIB<-as.numeric(dados$PIB)
Des<-as.numeric(dados$Des)
Ipca<-as.numeric(dados$Ipca)
# calculando o valor de Fiv e Tol
vi <- data.frame(PIB, Des, Ipca)
pairs(vi)
fiv<-vif(vi)
FIV<-as.vector(fiv)
TOL<-as.vector(1/fiv)
fiv_tol<-round(cbind.data.frame(FIV, TOL),4)
fiv_tol
## FIV TOL
## 1 1.2492 0.8005
## 2 1.2318 0.8118
## 3 1.4463 0.6914
cor(as.matrix(dados[3:8]))
## Des Ipca ExPob LnPIB LnDes LnIpca
## Des 1.00000000 -0.3731659 -0.03231636 0.04064066 0.99623294 -0.4711026
## Ipca -0.37316592 1.0000000 0.46097694 -0.51828612 -0.39320756 0.8976872
## ExPob -0.03231636 0.4609769 1.00000000 -0.97269398 -0.01304496 0.5309030
## LnPIB 0.04064066 -0.5182861 -0.97269398 1.00000000 0.01861163 -0.6123698
## LnDes 0.99623294 -0.3932076 -0.01304496 0.01861163 1.00000000 -0.4894073
## LnIpca -0.47110257 0.8976872 0.53090305 -0.61236982 -0.48940731 1.0000000
cor(as.matrix(vi))
## PIB Des Ipca
## PIB 1.00000000 -0.05861838 -0.3889101
## Des -0.05861838 1.00000000 -0.3731659
## Ipca -0.38891008 -0.37316592 1.0000000
Pela Visualização gráfica e valores de Tol obtidos, não há multicolinearidade para as variáveis. Dessa forma, não precisamos adotar procedimentos corretivos.
bptest(EqLogLogDesStarCor)
##
## studentized Breusch-Pagan test
##
## data: EqLogLogDesStarCor
## BP = 5.4456, df = 3, p-value = 0.1419
kable(tidy(bptest(EqLogLogDesStarCor)),
caption="Breusch-Pagan heteroskedasticity test")
| statistic | p.value | parameter | method |
|---|---|---|---|
| 5.445569 | 0.1419307 | 3 | studentized Breusch-Pagan test |
bptest(EqLogLogDesStar)
##
## studentized Breusch-Pagan test
##
## data: EqLogLogDesStar
## BP = 7.0362, df = 2, p-value = 0.02965
kable(tidy(bptest(EqLogLogDesStar)),
caption="Breusch-Pagan heteroskedasticity test")
| statistic | p.value | parameter | method |
|---|---|---|---|
| 7.03625 | 0.029655 | 2 | studentized Breusch-Pagan test |
Por meio da correção de auto-correlação, também já corrigimos a heterocedasticia. Entretanto, o modelo EqLogLogDesStarCor, não é significativo para as variáveis de interesse, o que nos fez retomar o modelo EqLogLogDesStar como aquele que devemos seguir. Dessa forma, não conseguimos resolver ou mitigar o problema da autocorrelação e gerar um modelo bom o suficiente.
Portanto, corrigiremos a heterocedasticia do modelo com autocorrelação, pois este possue parâmetros significativos.
EqLogLogDesStar <- lm(LnDes~LnPIB + LnIpca, data = dados)
summary(EqLogLogDesStar)
##
## Call:
## lm(formula = LnDes ~ LnPIB + LnIpca, data = dados)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.26102 -0.11026 0.00248 0.06340 0.34330
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.26121 1.77198 3.533 0.001623 **
## LnPIB -0.14003 0.06272 -2.233 0.034772 *
## LnIpca -0.08545 0.02251 -3.797 0.000834 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1646 on 25 degrees of freedom
## Multiple R-squared: 0.3659, Adjusted R-squared: 0.3152
## F-statistic: 7.214 on 2 and 25 DF, p-value: 0.003363
EqLogLogDesStar_hc3<-coeftest(EqLogLogDesStar, vcov = vcovHC(EqLogLogDesStar, "HC3"))
class(EqLogLogDesStar_hc3)
## [1] "coeftest"
library(texreg)
# Plotando os três modelos de regressão
screenreg(list(EqLogLogDes,EqLogLogDesStar, EqLogLogDesStar_hc3),
digits = 4, stars = c(0.01, 0.05, 0.1),
custom.model.names = c("Modelo Log Linear", "Modelo Star","Star_hc3"),
caption = "Multiple model types and single row", label = "tab:2",
include.adjrs = TRUE, include.bic = TRUE)
##
## ========================================================
## Modelo Log Linear Modelo Star Star_hc3
## --------------------------------------------------------
## (Intercept) 14.3472 6.2612 *** 6.2612 ***
## (10.7651) (1.7720) (1.5734)
## LnPIB -0.2887 -0.1400 ** -0.1400 **
## (0.2052) (0.0627) (0.0569)
## LnIpca -0.0951 *** -0.0855 *** -0.0855 ***
## (0.0260) (0.0225) (0.0115)
## LnExPob -0.2364
## (0.3104)
## --------------------------------------------------------
## R^2 0.3809 0.3659
## Adj. R^2 0.3035 0.3152
## Num. obs. 28 28
## ========================================================
## *** p < 0.01; ** p < 0.05; * p < 0.1