Rotina no Markdown: MODELO_LINEAR_MISTO_TRANS_NOVO

require(tidyverse)
require(lme4)
require(redres) #residuos condicionais
require(geoR)
require(lmerTest) #exibe os valor-p no modelo lmer
require(gtools)  #função combinations
require(performance) #função R2_nakagawa
require(buildmer) #função converged
require(DT) #função datatable

Utilizando o scale para que a média seja igual a 0 (colocar as variaveis na mesma escala para auxiliar na convergencia)

dados = read.table("banco.csv",sep=";",dec=",",header=T)

l1=0.2953672 #este valor foi encontrado no boxcoxfit abaixo:
l2=0

matrix=dados %>%
  dplyr::select(ano,raz_2020,pbf,jovens_14_24,tx_desocup,tx_pres,perc_hom,dens1) %>%
  as.matrix()

bc <- boxcoxfit(dados$tx_latrc,matrix, lambda2 = TRUE)
l1 <- bc$lambda[1]
l2 <- bc$lambda[2]
l1

##    lambda 
## 0.2953672

l2

## lambda2 
##       0

vars

## function (...) 
## {
##     quos(...)
## }
## <bytecode: 0x0000000018fe0448>
## <environment: namespace:dplyr>

#Box cox
#http://www.biostat.jhsph.edu/~iruczins/teaching/jf/ch8.pdf

dados1 = dados %>%
    mutate(
          ytrans = ((tx_latrc + l2)^l1-1)/1,  #Box Cox - não utilizado
          ylog = log(tx_latrc),
          l_latrocinio=log(latrocinio),
          s_gini=scale(gini_ibge),
         s_pbf=scale(pbf),
         s_tx_desligamentos = scale(tx_desligamentos),
         s_dens1 = scale(dens1),
         s_jov_14_24 = scale(jovens_14_24),
         s_raz_2020=scale(raz_2020),
         s_raz_1040 = scale(raz_1040),
         s_tx_pres=scale(tx_pres),
         s_perc_hom = scale(perc_hom),
         s_tx_desocup=scale(tx_desocup),
         ano_2 = ano^2,
         pop_ativa_cont=pop*pop_ativa,
        ano1=poly(ano,1)[,1]) #transformação de ano em polinomio ortogonal


hist(dados1$ytrans)

plot(dados1$tx_latrc,dados1$ytrans,type='l')
lines(dados1$tx_latrc,log(dados1$tx_latrc),type='l',col=2)

shapiro.test(dados1$ytrans)

## 
##  Shapiro-Wilk normality test
## 
## data:  dados1$ytrans
## W = 0.9935, p-value = 0.4672

shapiro.test(log(dados1$tx_latrc))

## 
##  Shapiro-Wilk normality test
## 
## data:  log(dados1$tx_latrc)
## W = 0.98967, p-value = 0.1244

hist(log(dados1$tx_latrc))

qqnorm(log(dados1$tx_latrc))
qqline(log(dados1$tx_latrc))

Modelo linear com efeitos mistos

sem fazer o scale(), que significa padronizar as variáveis, o algoritmo não converge
para a variável ano, foi utilizado a transformação poly(ano,1), que significa criar um polinômio ortogonal, também para facilitar a convergência do algoritmo.
A rotina abaixo consiste em testar todas as combinações de variáveis,
Também poderia utilizar o método stepwise com critério AIC - disponíveis nos pacotes buildmer & stepcAIC, no entanto, tivemos problemas pois algumas combinações não apresentam convergência e fazem o algoritmo parar,
pendente pesquisar melhor sobre os pacotes que executam stepwise para modelos mistos.
Optou-se por não exibir os modelos com taxa de presos junto a proporção de homens, por serem muito correlacionadas;
- Veja por exemplo para o ano de 2017: https://www.gov.br/depen/pt-br/sisdepen/mais-informacoes/relatorios-infopen/relatorios-sinteticos/infopen-jun-2017.pdf

formula=c()
modelos=list() 
AIC=c()
R2=c()
matriz_singular=c()
convergencia=c()
resultados=c()
shapiro_res=c()
#shapiro_ef=c()

base=c("ytrans ~ ano + (1 | UF)", #os estados diferem por uma constante
       "ytrans ~ ano + (ano | UF)", #os estados diferem por uma constante + coeficiente linear no tempo
      "ytrans ~ poly(ano,2) + (poly(ano,2) | UF)", #os estados diferem por uma constante + coeficiente linear no tempo + coeficiente quadrático no tempo
       "ytrans ~ poly(ano,3) + (poly(ano,3) | UF)")

base="ytrans ~ poly(ano,1) + (poly(ano,1) | UF)"  
base="ylog ~ poly(ano,1) + (poly(ano,1) | UF)"  
base="l_latrocinio ~ poly(ano,1) + (poly(ano,1) | UF)" 
#base="l_latrocinio ~ ano + (ano | UF)" 
vars=c("+ s_raz_2020","+ s_pbf","+ s_jov_14_24","+ s_tx_desocup","+ s_tx_pres","+ s_perc_hom", "+ s_dens1","+ s_tx_desligamentos","+ s_gini", "+ s_raz_1040")
#vars=c("+ s_raz_2020","+ s_pbf","+ s_jov_14_24","+ s_tx_desocup","+ s_tx_pres","+ s_perc_hom")
#vars=c("+ raz_2020","+ pbf","+ jovens_14_24","+ tx_desocup","+ tx_pres","+ perc_hom", "+ dens1")

aux=c()
aux1=c()
aux2=c()
aux3=c()
aux4=c()
nvar=1
f=list()
while(nvar<=length(vars)){
c=combinations(n=length(vars),r=nvar,vars,repeats.allowed=FALSE)
f[[nvar]]=paste0(base,apply(c,1,paste,collapse=""))
#aux[[nvar]]=
nvar=nvar+1
}

f=unlist(f)
i=1
while(i<=length(f)){
aux[i]=ifelse(length(grep(pattern="tx_desligamentos",x=f[i]))>0 & length(grep(pattern="tx_desocup",x=f[i]))>0,1,0) #escolher qual variável utilizar, não pode entrar as duas ao mesmo tempo.
aux1[i]=ifelse(length(grep(pattern="raz_2020",x=f[i]))>0 & length(grep(pattern="raz_1040",x=f[i]))>0,1,0)
aux2[i]=ifelse(length(grep(pattern="raz_2020",x=f[i]))>0 & length(grep(pattern="gini",x=f[i]))>0,1,0)
aux3[i]=ifelse(length(grep(pattern="raz_1040",x=f[i]))>0 & length(grep(pattern="gini",x=f[i]))>0,1,0)
aux4[i]=ifelse(length(grep(pattern="perc_hom",x=f[i]))>0 & length(grep(pattern="tx_pres",x=f[i]))>0,1,0)
i=i+1
}

formula=data.frame(f,aux,aux1,aux2,aux3)%>%
  filter(aux+aux1+aux2+aux3+aux4==0)%>%
  select(f)%>%
  as.matrix()
head(formula)

##      f                                                             
## [1,] "l_latrocinio ~ poly(ano,1) + (poly(ano,1) | UF)+ s_dens1"    
## [2,] "l_latrocinio ~ poly(ano,1) + (poly(ano,1) | UF)+ s_gini"     
## [3,] "l_latrocinio ~ poly(ano,1) + (poly(ano,1) | UF)+ s_jov_14_24"
## [4,] "l_latrocinio ~ poly(ano,1) + (poly(ano,1) | UF)+ s_pbf"      
## [5,] "l_latrocinio ~ poly(ano,1) + (poly(ano,1) | UF)+ s_perc_hom" 
## [6,] "l_latrocinio ~ poly(ano,1) + (poly(ano,1) | UF)+ s_raz_1040"

i=1
modelos=list()
while(i<=length(formula)){
#  while(i<=2){
  modelos[[i]]=lmer(formula[i],data=dados1,offset=log(pop_ativa_cont))
  AIC[i]=AIC(modelos[[i]])
  R2[i]=r2_nakagawa(modelos[[i]])$R2_marginal
  matriz_singular[i]=isSingular(modelos[[i]])
  convergencia[i]=converged(modelos[[i]])[1]
  shapiro_res[i]=shapiro.test(compute_redres(modelos[[i]]))[2]
  resultados=rbind(resultados,c(i,formula[i],AIC[i],R2[i],matriz_singular[i],convergencia[i],shapiro_res[i]))
  i=i+1
}

## Loading required namespace: testthat

resultados = resultados%>%
data.frame()%>%
`colnames<-`(c("i","modelo","AIC","R2","matriz_singular","convergencia","shapiro_res"))


resultados1=resultados%>%
  filter(matriz_singular=="FALSE")%>%
  filter(convergencia=="TRUE")%>%
  dplyr::select(-convergencia)%>%
  dplyr::select(-matriz_singular)%>%
  dplyr::select(-modelo)%>%
  filter(shapiro_res>0.1)
datatable(resultados1)

Top 5 modelos pelo criterio R2 marginal (os maiores R2)

Nos modelos onde entram PBF e pobreza, temos uma interpretação oposta (quando cresce bolsa familia cresce os latrocinios); quando cresce pobreza , decresce os latrocinios- verificar o porque;
o pbf está muito correlacionado com densidade urbana! Justificar no relatorio
o R2 marginal é quanto os efeitos fixos explicam
o R2 condicional é quanto os efeitos fixos + aleatorios explicam

i=175
#https://easystats.github.io/performance/reference/r2_nakagawa.html
formula[[i]]

## [1] "l_latrocinio ~ poly(ano,1) + (poly(ano,1) | UF)+ s_dens1+ s_pbf+ s_tx_desocup+ s_tx_pres"

summary(modelos[[i]])

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: formula[i]
##    Data: dados1
##  Offset: log(pop_ativa_cont)
## 
## REML criterion at convergence: 184
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.96545 -0.55343  0.00567  0.55299  2.65445 
## 
## Random effects:
##  Groups   Name         Variance Std.Dev. Corr 
##  UF       (Intercept)  0.09776  0.3127        
##           poly(ano, 1) 8.05749  2.8386   -0.17
##  Residual              0.07932  0.2816        
## Number of obs: 216, groups:  UF, 27
## 
## Fixed effects:
##               Estimate Std. Error        df  t value Pr(>|t|)    
## (Intercept)  -11.01015    0.06315  23.68440 -174.350  < 2e-16 ***
## poly(ano, 1)  -1.83419    0.73459  49.97823   -2.497   0.0159 *  
## s_dens1       -0.15536    0.08372  32.74901   -1.856   0.0725 .  
## s_pbf          0.19344    0.07465  35.58896    2.591   0.0138 *  
## s_tx_desocup   0.23228    0.04242 190.72832    5.476 1.36e-07 ***
## s_tx_pres      0.06982    0.05042  92.87362    1.385   0.1694    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) pl(,1) s_dns1 s_pbf  s_tx_d
## poly(ano,1) -0.123                            
## s_dens1      0.000  0.161                     
## s_pbf        0.000 -0.004 -0.558              
## s_tx_desocp  0.000 -0.502 -0.291  0.092       
## s_tx_pres    0.000 -0.179  0.182  0.117  0.000

r2_nakagawa(modelos[[i]])

## # R2 for Mixed Models
## 
##   Conditional R2: 0.708
##      Marginal R2: 0.212

converged(modelos[[i]])[1]

## [1] TRUE

Análise de resíduos condicionais para o modelo i:

shapiro.test(compute_redres(modelos[[i]]))[2]

## $p.value
## [1] 0.8962107

plot(compute_redres(modelos[[i]]),main="resíduos condicionais versus índices")

plot_resqq(modelos[[i]])

Análise dos efeitos aleatórios para o modelo i:

random <- ranef(modelos[[i]])
aleatorio1 = random[["UF"]][["(Intercept)"]]
aleatorio2 = random[["UF"]][[2]] #É o termo do poly(ano,1)

plot(aleatorio1,main="efeitos aleatórios versus índices")

plot(aleatorio2,main="efeitos aleatórios versus índices")

plot_ranef(modelos[[i]])

shapiro.test(aleatorio1)

## 
##  Shapiro-Wilk normality test
## 
## data:  aleatorio1
## W = 0.98337, p-value = 0.9292

shapiro.test(aleatorio2)

## 
##  Shapiro-Wilk normality test
## 
## data:  aleatorio2
## W = 0.94053, p-value = 0.1256

i=167
#https://easystats.github.io/performance/reference/r2_nakagawa.html
formula[[i]]

## [1] "l_latrocinio ~ poly(ano,1) + (poly(ano,1) | UF)+ s_dens1+ s_pbf+ s_perc_hom+ s_tx_desocup"

summary(modelos[[i]])

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: formula[i]
##    Data: dados1
##  Offset: log(pop_ativa_cont)
## 
## REML criterion at convergence: 184
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.01229 -0.55097  0.00157  0.57054  2.78089 
## 
## Random effects:
##  Groups   Name         Variance Std.Dev. Corr 
##  UF       (Intercept)  0.09188  0.3031        
##           poly(ano, 1) 8.11907  2.8494   -0.19
##  Residual              0.07983  0.2825        
## Number of obs: 216, groups:  UF, 27
## 
## Fixed effects:
##               Estimate Std. Error        df  t value Pr(>|t|)    
## (Intercept)  -11.01015    0.06142  24.18334 -179.256  < 2e-16 ***
## poly(ano, 1)  -1.52564    0.72960  47.86471   -2.091   0.0419 *  
## s_dens1       -0.11614    0.09086  46.30211   -1.278   0.2076    
## s_pbf          0.13982    0.07801  44.91533    1.792   0.0798 .  
## s_perc_hom     0.06839    0.04720 189.56117    1.449   0.1490    
## s_tx_desocup   0.23053    0.04236 189.83421    5.442 1.61e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) pl(,1) s_dns1 s_pbf  s_prc_
## poly(ano,1) -0.133                            
## s_dens1      0.000  0.231                     
## s_pbf        0.000 -0.027 -0.661              
## s_perc_hom   0.000  0.120  0.466 -0.372       
## s_tx_desocp  0.000 -0.508 -0.277  0.089 -0.029

r2_nakagawa(modelos[[i]])

## # R2 for Mixed Models
## 
##   Conditional R2: 0.697
##      Marginal R2: 0.205

i=247
#https://easystats.github.io/performance/reference/r2_nakagawa.html
formula[[i]]

## [1] "l_latrocinio ~ poly(ano,1) + (poly(ano,1) | UF)+ s_dens1+ s_jov_14_24+ s_pbf+ s_tx_desocup+ s_tx_pres"

summary(modelos[[i]])

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: formula[i]
##    Data: dados1
##  Offset: log(pop_ativa_cont)
## 
## REML criterion at convergence: 187.3
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.95093 -0.53293  0.01909  0.56185  2.67375 
## 
## Random effects:
##  Groups   Name         Variance Std.Dev. Corr 
##  UF       (Intercept)  0.10433  0.3230        
##           poly(ano, 1) 8.09341  2.8449   -0.15
##  Residual              0.07885  0.2808        
## Number of obs: 216, groups:  UF, 27
## 
## Fixed effects:
##               Estimate Std. Error        df  t value Pr(>|t|)    
## (Intercept)  -11.01015    0.06503  21.94915 -169.301  < 2e-16 ***
## poly(ano, 1)  -2.10982    0.84880  78.76767   -2.486   0.0150 *  
## s_dens1       -0.17031    0.08990  39.02452   -1.895   0.0656 .  
## s_jov_14_24   -0.04241    0.06595 138.18011   -0.643   0.5213    
## s_pbf          0.22482    0.09199  64.39154    2.444   0.0173 *  
## s_tx_desocup   0.24227    0.04528 192.28706    5.351 2.48e-07 ***
## s_tx_pres      0.07822    0.05226 100.23945    1.497   0.1376    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) pl(,1) s_dns1 s__14_ s_pbf  s_tx_d
## poly(ano,1) -0.092                                   
## s_dens1      0.000  0.284                            
## s_jov_14_24  0.000  0.498  0.301                     
## s_pbf        0.000 -0.281 -0.611 -0.553              
## s_tx_desocp  0.000 -0.579 -0.361 -0.342  0.269       
## s_tx_pres    0.000 -0.257  0.103 -0.205  0.209  0.072

r2_nakagawa(modelos[[i]])

## # R2 for Mixed Models
## 
##   Conditional R2: 0.714
##      Marginal R2: 0.201

i=68
#https://easystats.github.io/performance/reference/r2_nakagawa.html
formula[[i]]

## [1] "l_latrocinio ~ poly(ano,1) + (poly(ano,1) | UF)+ s_dens1+ s_pbf+ s_tx_desocup"

summary(modelos[[i]])

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: formula[i]
##    Data: dados1
##  Offset: log(pop_ativa_cont)
## 
## REML criterion at convergence: 181.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.9148 -0.5497  0.0174  0.5786  2.6618 
## 
## Random effects:
##  Groups   Name         Variance Std.Dev. Corr 
##  UF       (Intercept)  0.09355  0.3059        
##           poly(ano, 1) 8.10743  2.8474   -0.16
##  Residual              0.08011  0.2830        
## Number of obs: 216, groups:  UF, 27
## 
## Fixed effects:
##               Estimate Std. Error        df  t value Pr(>|t|)    
## (Intercept)  -11.01015    0.06193  24.28161 -177.779  < 2e-16 ***
## poly(ano, 1)  -1.65094    0.72486  47.13957   -2.278   0.0273 *  
## s_dens1       -0.17528    0.08098  31.81505   -2.164   0.0380 *  
## s_pbf          0.18001    0.07312  35.63349    2.462   0.0188 *  
## s_tx_desocup   0.23210    0.04250 191.43291    5.461 1.46e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) pl(,1) s_dns1 s_pbf 
## poly(ano,1) -0.118                     
## s_dens1      0.000  0.200              
## s_pbf        0.000  0.018 -0.594       
## s_tx_desocp  0.000 -0.510 -0.298  0.088

r2_nakagawa(modelos[[i]])

## # R2 for Mixed Models
## 
##   Conditional R2: 0.696
##      Marginal R2: 0.199

i=261
#https://easystats.github.io/performance/reference/r2_nakagawa.html
formula[[i]]

## [1] "l_latrocinio ~ poly(ano,1) + (poly(ano,1) | UF)+ s_dens1+ s_pbf+ s_raz_1040+ s_tx_desocup+ s_tx_pres"

summary(modelos[[i]])

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: formula[i]
##    Data: dados1
##  Offset: log(pop_ativa_cont)
## 
## REML criterion at convergence: 186.4
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.95024 -0.56898 -0.00864  0.53863  2.63052 
## 
## Random effects:
##  Groups   Name         Variance Std.Dev. Corr 
##  UF       (Intercept)  0.10106  0.3179        
##           poly(ano, 1) 8.46792  2.9100   -0.18
##  Residual              0.07817  0.2796        
## Number of obs: 216, groups:  UF, 27
## 
## Fixed effects:
##               Estimate Std. Error        df  t value Pr(>|t|)    
## (Intercept)  -11.01015    0.06407  23.66744 -171.844  < 2e-16 ***
## poly(ano, 1)  -1.85591    0.74369  48.52292   -2.496   0.0160 *  
## s_dens1       -0.13387    0.08633  33.45528   -1.551   0.1304    
## s_pbf          0.20545    0.07586  37.79235    2.708   0.0101 *  
## s_raz_1040    -0.06757    0.05028 175.62238   -1.344   0.1807    
## s_tx_desocup   0.25270    0.04483 193.37214    5.637 6.06e-08 ***
## s_tx_pres      0.07601    0.05072  99.50728    1.499   0.1371    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) pl(,1) s_dns1 s_pbf  s__104 s_tx_d
## poly(ano,1) -0.129                                   
## s_dens1      0.000  0.153                            
## s_pbf        0.000 -0.008 -0.526                     
## s_raz_1040   0.000  0.017 -0.190 -0.106              
## s_tx_desocp  0.000 -0.471 -0.205  0.127 -0.333       
## s_tx_pres    0.000 -0.178  0.192  0.123 -0.078  0.027

r2_nakagawa(modelos[[i]])

## # R2 for Mixed Models
## 
##   Conditional R2: 0.713
##      Marginal R2: 0.197

Top 5 modelos pelo AIC (os menores AIC)

datatable(resultados1)

i=9
formula[[i]]

## [1] "l_latrocinio ~ poly(ano,1) + (poly(ano,1) | UF)+ s_tx_desocup"

summary(modelos[[i]])

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: formula[i]
##    Data: dados1
##  Offset: log(pop_ativa_cont)
## 
## REML criterion at convergence: 180.7
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.05777 -0.59973  0.01066  0.60272  2.61967 
## 
## Random effects:
##  Groups   Name         Variance Std.Dev. Corr
##  UF       (Intercept)  0.10468  0.3235       
##           poly(ano, 1) 7.94060  2.8179   0.03
##  Residual              0.08069  0.2841       
## Number of obs: 216, groups:  UF, 27
## 
## Fixed effects:
##               Estimate Std. Error        df  t value Pr(>|t|)    
## (Intercept)  -11.01015    0.06520  25.95350 -168.878  < 2e-16 ***
## poly(ano, 1)  -1.51229    0.69922  42.71817   -2.163   0.0362 *  
## s_tx_desocup   0.21085    0.04126 187.32007    5.110 7.89e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) pl(,1)
## poly(ano,1)  0.021       
## s_tx_desocp  0.000 -0.483

r2_nakagawa(modelos[[i]])

## # R2 for Mixed Models
## 
##   Conditional R2: 0.681
##      Marginal R2: 0.122

i=42
formula[[i]]

## [1] "l_latrocinio ~ poly(ano,1) + (poly(ano,1) | UF)+ s_perc_hom+ s_tx_desocup"

summary(modelos[[i]])

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: formula[i]
##    Data: dados1
##  Offset: log(pop_ativa_cont)
## 
## REML criterion at convergence: 180.1
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.13399 -0.56102  0.00438  0.56616  2.82249 
## 
## Random effects:
##  Groups   Name         Variance Std.Dev. Corr 
##  UF       (Intercept)  0.09108  0.3018        
##           poly(ano, 1) 8.04234  2.8359   -0.07
##  Residual              0.08021  0.2832        
## Number of obs: 216, groups:  UF, 27
## 
## Fixed effects:
##               Estimate Std. Error        df  t value Pr(>|t|)    
## (Intercept)  -11.01015    0.06119  25.45365 -179.926  < 2e-16 ***
## poly(ano, 1)  -1.45835    0.69890  41.96989   -2.087   0.0430 *  
## s_perc_hom     0.09750    0.04170 130.68318    2.338   0.0209 *  
## s_tx_desocup   0.22075    0.04069 182.58896    5.425 1.82e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) pl(,1) s_prc_
## poly(ano,1) -0.049              
## s_perc_hom   0.000  0.034       
## s_tx_desocp  0.000 -0.468  0.102

r2_nakagawa(modelos[[i]])

## # R2 for Mixed Models
## 
##   Conditional R2: 0.668
##      Marginal R2: 0.137

i=24
formula[[i]]

## [1] "l_latrocinio ~ poly(ano,1) + (poly(ano,1) | UF)+ s_gini+ s_tx_desocup"

summary(modelos[[i]])

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: formula[i]
##    Data: dados1
##  Offset: log(pop_ativa_cont)
## 
## REML criterion at convergence: 182.3
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.05877 -0.56044 -0.00191  0.54674  2.63550 
## 
## Random effects:
##  Groups   Name         Variance Std.Dev. Corr
##  UF       (Intercept)  0.11216  0.3349       
##           poly(ano, 1) 8.31569  2.8837   0.03
##  Residual              0.07876  0.2806       
## Number of obs: 216, groups:  UF, 27
## 
## Fixed effects:
##               Estimate Std. Error        df  t value Pr(>|t|)    
## (Intercept)  -11.01015    0.06722  24.85818 -163.791  < 2e-16 ***
## poly(ano, 1)  -1.62189    0.71029  42.43961   -2.283   0.0275 *  
## s_gini        -0.08232    0.05074 143.02885   -1.622   0.1069    
## s_tx_desocup   0.23705    0.04412 194.27190    5.373  2.2e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) pl(,1) s_gini
## poly(ano,1)  0.025              
## s_gini       0.000  0.093       
## s_tx_desocp  0.000 -0.476 -0.362

r2_nakagawa(modelos[[i]])

## # R2 for Mixed Models
## 
##   Conditional R2: 0.689
##      Marginal R2: 0.094

i=37
formula[[i]]

## [1] "l_latrocinio ~ poly(ano,1) + (poly(ano,1) | UF)+ s_pbf+ s_tx_desocup"

summary(modelos[[i]])

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: formula[i]
##    Data: dados1
##  Offset: log(pop_ativa_cont)
## 
## REML criterion at convergence: 182.8
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.00924 -0.59927  0.01059  0.59135  2.58433 
## 
## Random effects:
##  Groups   Name         Variance Std.Dev. Corr 
##  UF       (Intercept)  0.10422  0.3228        
##           poly(ano, 1) 8.01556  2.8312   -0.04
##  Residual              0.08037  0.2835        
## Number of obs: 216, groups:  UF, 27
## 
## Fixed effects:
##               Estimate Std. Error        df  t value Pr(>|t|)    
## (Intercept)  -11.01015    0.06505  24.86986 -169.248  < 2e-16 ***
## poly(ano, 1)  -1.36181    0.71018  44.86485   -1.918   0.0615 .  
## s_pbf          0.08007    0.06183  35.54221    1.295   0.2037    
## s_tx_desocup   0.20659    0.04122 190.23771    5.011 1.23e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) pl(,1) s_pbf 
## poly(ano,1) -0.026              
## s_pbf        0.000  0.167       
## s_tx_desocp  0.000 -0.486 -0.086

r2_nakagawa(modelos[[i]])

## # R2 for Mixed Models
## 
##   Conditional R2: 0.702
##      Marginal R2: 0.179

i=50
formula[[i]]

## [1] "l_latrocinio ~ poly(ano,1) + (poly(ano,1) | UF)+ s_tx_desocup+ s_tx_pres"

summary(modelos[[i]])

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: formula[i]
##    Data: dados1
##  Offset: log(pop_ativa_cont)
## 
## REML criterion at convergence: 183.3
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.09540 -0.57813  0.02067  0.58518  2.63823 
## 
## Random effects:
##  Groups   Name         Variance Std.Dev. Corr
##  UF       (Intercept)  0.10876  0.3298       
##           poly(ano, 1) 7.89518  2.8098   0.01
##  Residual              0.08005  0.2829       
## Number of obs: 216, groups:  UF, 27
## 
## Fixed effects:
##               Estimate Std. Error        df  t value Pr(>|t|)    
## (Intercept)  -11.01015    0.06632  25.32308 -166.005  < 2e-16 ***
## poly(ano, 1)  -1.74870    0.72204  48.13406   -2.422   0.0193 *  
## s_tx_desocup   0.21484    0.04134 188.96262    5.196 5.24e-07 ***
## s_tx_pres      0.06276    0.04995  94.10367    1.256   0.2121    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) pl(,1) s_tx_d
## poly(ano,1)  0.009              
## s_tx_desocp  0.000 -0.485       
## s_tx_pres    0.000 -0.259  0.073

r2_nakagawa(modelos[[i]])

## # R2 for Mixed Models
## 
##   Conditional R2: 0.688
##      Marginal R2: 0.121

Sugestão 1: entre os Top 10 dos melhores R2, pegar o menor AIC (verificar no datatable de resultados1, que o modelo com i=36:

É equivalente a escolher o melhor R2 entre os 10 menores AIC.

datatable(resultados1)

i=68
formula[[i]]

## [1] "l_latrocinio ~ poly(ano,1) + (poly(ano,1) | UF)+ s_dens1+ s_pbf+ s_tx_desocup"

summary(modelos[[i]])

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: formula[i]
##    Data: dados1
##  Offset: log(pop_ativa_cont)
## 
## REML criterion at convergence: 181.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.9148 -0.5497  0.0174  0.5786  2.6618 
## 
## Random effects:
##  Groups   Name         Variance Std.Dev. Corr 
##  UF       (Intercept)  0.09355  0.3059        
##           poly(ano, 1) 8.10743  2.8474   -0.16
##  Residual              0.08011  0.2830        
## Number of obs: 216, groups:  UF, 27
## 
## Fixed effects:
##               Estimate Std. Error        df  t value Pr(>|t|)    
## (Intercept)  -11.01015    0.06193  24.28161 -177.779  < 2e-16 ***
## poly(ano, 1)  -1.65094    0.72486  47.13957   -2.278   0.0273 *  
## s_dens1       -0.17528    0.08098  31.81505   -2.164   0.0380 *  
## s_pbf          0.18001    0.07312  35.63349    2.462   0.0188 *  
## s_tx_desocup   0.23210    0.04250 191.43291    5.461 1.46e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) pl(,1) s_dns1 s_pbf 
## poly(ano,1) -0.118                     
## s_dens1      0.000  0.200              
## s_pbf        0.000  0.018 -0.594       
## s_tx_desocp  0.000 -0.510 -0.298  0.088

r2_nakagawa(modelos[[i]])

## # R2 for Mixed Models
## 
##   Conditional R2: 0.696
##      Marginal R2: 0.199

Análise de resíduos condicionais para o modelo i=45:

shapiro.test(compute_redres(modelos[[i]]))[2]

## $p.value
## [1] 0.9450177

plot(compute_redres(modelos[[i]]),main="resíduos condicionais versus índices")

plot_resqq(modelos[[i]])

Análise dos efeitos aleatórios para o modelo i=45:

random <- ranef(modelos[[i]])
aleatorio1 = random[["UF"]][["(Intercept)"]]
aleatorio2 = random[["UF"]][[2]] #É o termo do poly(ano,1)

plot(aleatorio1,main="efeitos aleatórios versus índices")

plot(aleatorio2,main="efeitos aleatórios versus índices")

plot_ranef(modelos[[i]])

shapiro.test(aleatorio1)

## 
##  Shapiro-Wilk normality test
## 
## data:  aleatorio1
## W = 0.98386, p-value = 0.9371

shapiro.test(aleatorio2)

## 
##  Shapiro-Wilk normality test
## 
## data:  aleatorio2
## W = 0.93713, p-value = 0.1035

Modelo Linear Misto

Modelo linear com efeitos mistos

Top 5 modelos pelo criterio R2 marginal (os maiores R2)

Top 5 modelos pelo AIC (os menores AIC)

Sugestão 1: entre os Top 10 dos melhores R2, pegar o menor AIC (verificar no datatable de resultados1, que o modelo com i=36: