Objetivo:

require(DT) #para a fun??o datatable
require(tidyverse)
require(mice)       #para imputação
require(broom.mixed)
require(broom)
require(lme4)
require(readxl)
require(tidyr)
require(redres)     #para os residuos
require(corrplot)

Planilha de dados

dados=read_excel("dados_renato.xlsx",sheet=1)
names(dados)=c("Ano","UF","Cod.UF","tx.latrc","tx.pres","gini.ibge","perc.jov.1524","perc.hom","pbf","densidade.urbana1","densidade.urbana2","taxa.casamentos","Taxa.desligamentos","raz.2020","raz.1040")
datatable(dados,options = list(pageLength = 5))

Variaveis Escolhidas para entrar no modelo:

Verificando os missing das variáveis:

summary(dados)
##       Ano            UF                Cod.UF      tx.latrc      
##  Min.   :2005   Length:297         Min.   : 1   Min.   :0.03206  
##  1st Qu.:2007   Class :character   1st Qu.: 7   1st Qu.:0.64466  
##  Median :2010   Mode  :character   Median :14   Median :0.94540  
##  Mean   :2010                      Mean   :14   Mean   :1.09195  
##  3rd Qu.:2013                      3rd Qu.:21   3rd Qu.:1.35786  
##  Max.   :2015                      Max.   :27   Max.   :5.42341  
##                                                 NA's   :16       
##     tx.pres         gini.ibge      perc.jov.1524       perc.hom    
##  Min.   : 51.71   Min.   :0.4190   Min.   : 9.711   Min.   :46.80  
##  1st Qu.:156.22   1st Qu.:0.4783   1st Qu.:16.774   1st Qu.:48.40  
##  Median :228.64   Median :0.5025   Median :18.242   Median :49.00  
##  Mean   :252.51   Mean   :0.5055   Mean   :18.076   Mean   :49.06  
##  3rd Qu.:337.17   3rd Qu.:0.5272   3rd Qu.:19.500   3rd Qu.:49.80  
##  Max.   :634.53   Max.   :0.6150   Max.   :23.500   Max.   :52.40  
##  NA's   :2        NA's   :27       NA's   :27       NA's   :27     
##       pbf         densidade.urbana1 densidade.urbana2 taxa.casamentos   
##  Min.   : 22.04   Min.   : 34.29    Min.   : 26.95    Min.   :0.001847  
##  1st Qu.: 79.91   1st Qu.: 45.06    1st Qu.: 38.80    1st Qu.:0.003916  
##  Median :137.69   Median : 63.79    Median : 53.12    Median :0.004859  
##  Mean   :159.97   Mean   : 73.50    Mean   : 57.94    Mean   :0.004876  
##  3rd Qu.:231.84   3rd Qu.: 97.48    3rd Qu.: 75.40    3rd Qu.:0.005605  
##  Max.   :423.79   Max.   :160.29    Max.   :131.26    Max.   :0.008422  
##                                     NA's   :27                          
##  Taxa.desligamentos    raz.2020         raz.1040     
##  Min.   :0.01196    Min.   : 9.034   Min.   : 8.349  
##  1st Qu.:0.03625    1st Qu.:14.177   1st Qu.:13.053  
##  Median :0.05304    Median :16.047   Median :14.912  
##  Mean   :0.06777    Mean   :16.825   Mean   :15.515  
##  3rd Qu.:0.09978    3rd Qu.:18.717   3rd Qu.:17.426  
##  Max.   :0.17911    Max.   :32.329   Max.   :27.742  
##                     NA's   :54       NA's   :54

Calculando o Número necessário de imputações:

[1] Rubin, D.B. (1987). Multiple Imputation for Nonresponse in Surveys. New York: John Wiley and Sons. Zaninotto, P.; Sacker, A. (2017) Missing Data in Longitudinal Surveys: A Comparison of Performance of Modern Techniques. https://digitalcommons.wayne.edu/cgi/viewcontent.cgi?article=2384&context=jmasm

lambda=1-nrow(dados[complete.cases(dados), ])/nrow(dados)
a=0.99
N=round(lambda/(1/a-1),0)
N
## [1] 24

Transformando em wide table para imputar:

dados_wide = dados %>%
  pivot_wider(id_cols = Cod.UF,names_from = Ano, names_glue = "{.value}_{Ano}",values_from = c(tx.latrc,perc.jov.1524,pbf,gini.ibge,Taxa.desligamentos,perc.hom,densidade.urbana1,tx.pres,taxa.casamentos))

Iniciando a imputação:

imp1 <- mice(dados_wide, m = N,print=FALSE)
## Warning: Number of logged events: 1331

Imputando os dados

imp_comp1 <- mice::complete(imp1, "all")
a0=imp_comp1[[1]]

Função para pivotar os dados imputados:

pivotar=function(dataset){
dataset %>%
  pivot_longer(
    !Cod.UF,
    names_to = c("Variavel", "Ano"),
    names_sep = "_")%>%
pivot_wider(
    names_from = Variavel,
    values_from = value )%>%
mutate(
  Ano=as.numeric(Ano),
  taxa.casamentos=taxa.casamentos*100000,
  Taxa.desligamentos=Taxa.desligamentos*100000,
  densidade.urbana1=densidade.urbana1*100
)
}
 a1= pivotar(imp_comp1[[1]])

Pivotando os dados imputados para formato long table:

imp_comp2 = imp_comp1 %>%
lapply(pivotar) 
a1=imp_comp2[[1]]

Modelo 1: Modelo gamma com efeito aleatório no Ano (cada estado tem seu coeficiente de Ano)

https://pj.freefaculty.org/guides/stat/Regression-GLM/Gamma/GammaGLM-01.pdf

formula1 ="tx.latrc ~ Ano  + perc.jov.1524 + pbf + gini.ibge + Taxa.desligamentos + perc.hom + densidade.urbana1 + (Ano| Cod.UF)"

modelo_1 <- lapply(imp_comp2, glmer, formula=formula1,family=Gamma(log))
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper = rep.int(Inf, :
## failure to converge in 10000 evaluations
## boundary (singular) fit: see ?isSingular
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper = rep.int(Inf, :
## failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper = rep.int(Inf, :
## failure to converge in 10000 evaluations
## boundary (singular) fit: see ?isSingular
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## boundary (singular) fit: see ?isSingular
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## boundary (singular) fit: see ?isSingular
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## boundary (singular) fit: see ?isSingular
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 4.07809 (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
##  - Rescale variables?;Model is nearly unidentifiable: large eigenvalue ratio
##  - Rescale variables?
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper = rep.int(Inf, :
## failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## boundary (singular) fit: see ?isSingular
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper = rep.int(Inf, :
## failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 94.7548 (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
##  - Rescale variables?;Model is nearly unidentifiable: large eigenvalue ratio
##  - Rescale variables?
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 21.6443 (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
##  - Rescale variables?;Model is nearly unidentifiable: large eigenvalue ratio
##  - Rescale variables?
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## boundary (singular) fit: see ?isSingular
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## boundary (singular) fit: see ?isSingular
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## boundary (singular) fit: see ?isSingular
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## boundary (singular) fit: see ?isSingular
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## boundary (singular) fit: see ?isSingular
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
summary(pool(modelo_1), conf.int = TRUE)
## Warning in vcov.merMod(object, use.hessian = use.hessian): variance-covariance matrix computed from finite-difference Hessian is
## not positive definite or contains NA values: falling back to var-cov estimated from RX
## Warning in vcov.merMod(object, correlation = correlation, sigm = sig): variance-covariance matrix computed from finite-difference Hessian is
## not positive definite or contains NA values: falling back to var-cov estimated from RX
##                 term      estimate    std.error  statistic         df
## 1        (Intercept) -1.528371e+02 9.229350e+01 -1.6559898   2.999993
## 2                Ano  7.522289e-02 4.543698e-02  1.6555434   2.833841
## 3      perc.jov.1524 -7.057399e-03 2.715397e-02 -0.2599031 145.070083
## 4                pbf -1.325778e-03 1.286864e-03 -1.0302394  22.393823
## 5          gini.ibge -2.845974e-01 1.728375e+00 -0.1646619  66.192476
## 6 Taxa.desligamentos -6.760052e-05 2.487674e-05 -2.7174186  49.855405
## 7           perc.hom  5.128848e-02 6.318741e-02  0.8116882 119.695375
## 8  densidade.urbana1  8.448705e-06 2.547289e-05  0.3316743 127.992589
##       p.value         2.5 %        97.5 %
## 1 0.196300641 -4.465566e+02  1.408824e+02
## 2 0.201684902 -7.429469e-02  2.247405e-01
## 3 0.795307043 -6.072590e-02  4.661110e-02
## 4 0.313897067 -3.991851e-03  1.340296e-03
## 5 0.869711958 -3.735222e+00  3.166027e+00
## 6 0.009023289 -1.175705e-04 -1.763052e-05
## 7 0.418581785 -7.382144e-02  1.763984e-01
## 8 0.740677672 -4.195380e-05  5.885121e-05

Pendencias:

https://www.rdocumentation.org/packages/ggResidpanel/versions/0.3.0/topics/resid_panel

#install.packages("ggResidpanel")
require( ggResidpanel)
## Loading required package: ggResidpanel
resid_panel(modelo_1[[1]],qqbands = TRUE,smoother = TRUE,plots = "all")
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

resid_panel(modelo_1[[2]],qqbands = TRUE,smoother = TRUE,plots = "all")
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'