Objetivo:
require(DT) #para a fun??o datatable
require(tidyverse)
require(mice) #para imputação
require(broom.mixed)
require(broom)
require(lme4)
require(readxl)
require(tidyr)
require(redres) #para os residuos
require(corrplot)
Planilha de dados
dados=read_excel("dados_renato.xlsx",sheet=1)
names(dados)=c("Ano","UF","Cod.UF","tx.latrc","tx.pres","gini.ibge","perc.jov.1524","perc.hom","pbf","densidade.urbana1","densidade.urbana2","taxa.casamentos","Taxa.desligamentos","raz.2020","raz.1040")
datatable(dados,options = list(pageLength = 5))
Variaveis Escolhidas para entrar no modelo:
Verificando os missing das variáveis:
summary(dados)
## Ano UF Cod.UF tx.latrc
## Min. :2005 Length:297 Min. : 1 Min. :0.03206
## 1st Qu.:2007 Class :character 1st Qu.: 7 1st Qu.:0.64466
## Median :2010 Mode :character Median :14 Median :0.94540
## Mean :2010 Mean :14 Mean :1.09195
## 3rd Qu.:2013 3rd Qu.:21 3rd Qu.:1.35786
## Max. :2015 Max. :27 Max. :5.42341
## NA's :16
## tx.pres gini.ibge perc.jov.1524 perc.hom
## Min. : 51.71 Min. :0.4190 Min. : 9.711 Min. :46.80
## 1st Qu.:156.22 1st Qu.:0.4783 1st Qu.:16.774 1st Qu.:48.40
## Median :228.64 Median :0.5025 Median :18.242 Median :49.00
## Mean :252.51 Mean :0.5055 Mean :18.076 Mean :49.06
## 3rd Qu.:337.17 3rd Qu.:0.5272 3rd Qu.:19.500 3rd Qu.:49.80
## Max. :634.53 Max. :0.6150 Max. :23.500 Max. :52.40
## NA's :2 NA's :27 NA's :27 NA's :27
## pbf densidade.urbana1 densidade.urbana2 taxa.casamentos
## Min. : 22.04 Min. : 34.29 Min. : 26.95 Min. :0.001847
## 1st Qu.: 79.91 1st Qu.: 45.06 1st Qu.: 38.80 1st Qu.:0.003916
## Median :137.69 Median : 63.79 Median : 53.12 Median :0.004859
## Mean :159.97 Mean : 73.50 Mean : 57.94 Mean :0.004876
## 3rd Qu.:231.84 3rd Qu.: 97.48 3rd Qu.: 75.40 3rd Qu.:0.005605
## Max. :423.79 Max. :160.29 Max. :131.26 Max. :0.008422
## NA's :27
## Taxa.desligamentos raz.2020 raz.1040
## Min. :0.01196 Min. : 9.034 Min. : 8.349
## 1st Qu.:0.03625 1st Qu.:14.177 1st Qu.:13.053
## Median :0.05304 Median :16.047 Median :14.912
## Mean :0.06777 Mean :16.825 Mean :15.515
## 3rd Qu.:0.09978 3rd Qu.:18.717 3rd Qu.:17.426
## Max. :0.17911 Max. :32.329 Max. :27.742
## NA's :54 NA's :54
Calculando o Número necessário de imputações:
[1] Rubin, D.B. (1987). Multiple Imputation for Nonresponse in Surveys. New York: John Wiley and Sons. Zaninotto, P.; Sacker, A. (2017) Missing Data in Longitudinal Surveys: A Comparison of Performance of Modern Techniques. https://digitalcommons.wayne.edu/cgi/viewcontent.cgi?article=2384&context=jmasm
lambda=1-nrow(dados[complete.cases(dados), ])/nrow(dados)
a=0.99
N=round(lambda/(1/a-1),0)
N
## [1] 24
Transformando em wide table para imputar:
dados_wide = dados %>%
pivot_wider(id_cols = Cod.UF,names_from = Ano, names_glue = "{.value}_{Ano}",values_from = c(tx.latrc,perc.jov.1524,pbf,gini.ibge,Taxa.desligamentos,perc.hom,densidade.urbana1,tx.pres,taxa.casamentos))
Iniciando a imputação:
imp1 <- mice(dados_wide, m = N,print=FALSE)
## Warning: Number of logged events: 1331
Imputando os dados
imp_comp1 <- mice::complete(imp1, "all")
a0=imp_comp1[[1]]
Função para pivotar os dados imputados:
pivotar=function(dataset){
dataset %>%
pivot_longer(
!Cod.UF,
names_to = c("Variavel", "Ano"),
names_sep = "_")%>%
pivot_wider(
names_from = Variavel,
values_from = value )%>%
mutate(
Ano=as.numeric(Ano),
taxa.casamentos=taxa.casamentos*100000,
Taxa.desligamentos=Taxa.desligamentos*100000,
densidade.urbana1=densidade.urbana1*100
)
}
a1= pivotar(imp_comp1[[1]])
Pivotando os dados imputados para formato long table:
imp_comp2 = imp_comp1 %>%
lapply(pivotar)
a1=imp_comp2[[1]]
Modelo 1: Modelo gamma com efeito aleatório no Ano (cada estado tem seu coeficiente de Ano)
https://pj.freefaculty.org/guides/stat/Regression-GLM/Gamma/GammaGLM-01.pdf
Há problemas de convergência
Some predictor variables are on very different scales:
formula1 ="tx.latrc ~ Ano + perc.jov.1524 + pbf + gini.ibge + Taxa.desligamentos + perc.hom + densidade.urbana1 + (Ano| Cod.UF)"
modelo_1 <- lapply(imp_comp2, glmer, formula=formula1,family=Gamma(log))
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper = rep.int(Inf, :
## failure to converge in 10000 evaluations
## boundary (singular) fit: see ?isSingular
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper = rep.int(Inf, :
## failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper = rep.int(Inf, :
## failure to converge in 10000 evaluations
## boundary (singular) fit: see ?isSingular
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## boundary (singular) fit: see ?isSingular
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## boundary (singular) fit: see ?isSingular
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## boundary (singular) fit: see ?isSingular
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 4.07809 (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
## - Rescale variables?;Model is nearly unidentifiable: large eigenvalue ratio
## - Rescale variables?
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper = rep.int(Inf, :
## failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## boundary (singular) fit: see ?isSingular
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning in (function (fn, par, lower = rep.int(-Inf, n), upper = rep.int(Inf, :
## failure to converge in 10000 evaluations
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 94.7548 (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
## - Rescale variables?;Model is nearly unidentifiable: large eigenvalue ratio
## - Rescale variables?
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 21.6443 (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
## - Rescale variables?;Model is nearly unidentifiable: large eigenvalue ratio
## - Rescale variables?
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## boundary (singular) fit: see ?isSingular
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## boundary (singular) fit: see ?isSingular
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## boundary (singular) fit: see ?isSingular
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## boundary (singular) fit: see ?isSingular
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## boundary (singular) fit: see ?isSingular
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
summary(pool(modelo_1), conf.int = TRUE)
## Warning in vcov.merMod(object, use.hessian = use.hessian): variance-covariance matrix computed from finite-difference Hessian is
## not positive definite or contains NA values: falling back to var-cov estimated from RX
## Warning in vcov.merMod(object, correlation = correlation, sigm = sig): variance-covariance matrix computed from finite-difference Hessian is
## not positive definite or contains NA values: falling back to var-cov estimated from RX
## term estimate std.error statistic df
## 1 (Intercept) -1.528371e+02 9.229350e+01 -1.6559898 2.999993
## 2 Ano 7.522289e-02 4.543698e-02 1.6555434 2.833841
## 3 perc.jov.1524 -7.057399e-03 2.715397e-02 -0.2599031 145.070083
## 4 pbf -1.325778e-03 1.286864e-03 -1.0302394 22.393823
## 5 gini.ibge -2.845974e-01 1.728375e+00 -0.1646619 66.192476
## 6 Taxa.desligamentos -6.760052e-05 2.487674e-05 -2.7174186 49.855405
## 7 perc.hom 5.128848e-02 6.318741e-02 0.8116882 119.695375
## 8 densidade.urbana1 8.448705e-06 2.547289e-05 0.3316743 127.992589
## p.value 2.5 % 97.5 %
## 1 0.196300641 -4.465566e+02 1.408824e+02
## 2 0.201684902 -7.429469e-02 2.247405e-01
## 3 0.795307043 -6.072590e-02 4.661110e-02
## 4 0.313897067 -3.991851e-03 1.340296e-03
## 5 0.869711958 -3.735222e+00 3.166027e+00
## 6 0.009023289 -1.175705e-04 -1.763052e-05
## 7 0.418581785 -7.382144e-02 1.763984e-01
## 8 0.740677672 -4.195380e-05 5.885121e-05
Pendencias:
https://www.rdocumentation.org/packages/ggResidpanel/versions/0.3.0/topics/resid_panel
#install.packages("ggResidpanel")
require( ggResidpanel)
## Loading required package: ggResidpanel
resid_panel(modelo_1[[1]],qqbands = TRUE,smoother = TRUE,plots = "all")
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
resid_panel(modelo_1[[2]],qqbands = TRUE,smoother = TRUE,plots = "all")
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'