TCC

R Markdown

` This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

pacotes <- c("plotly", #plataforma gráfica
             "tidyverse", #carregar outros pacotes do R
             "ggrepel", #geoms de texto e rótulo para 'ggplot2' que ajudam a
             #evitar sobreposição de textos
             "knitr", "kableExtra", #formatação de tabelas
             "reshape2", #função 'melt'
             "PerformanceAnalytics", #função 'chart.Correlation' para plotagem
             "psych", #elaboração da fatorial e estatísticas
             "ltm", #determinação do alpha de Cronbach pela função 'cronbach.alpha'
             "Hmisc", # matriz de correlações com p-valor
             "readxl","tinytex", "rgl") # importar arquivo Excel

if(sum(as.numeric(!pacotes %in% installed.packages())) != 0){
  instalador <- pacotes[!pacotes %in% installed.packages()]
  for(i in 1:length(instalador)) {
    install.packages(instalador, dependencies = T)
    break()}
  sapply(pacotes, require, character = T) 
} else {
  sapply(pacotes, require, character = T) 
}

## Loading required package: plotly

## Loading required package: ggplot2

## Warning: package 'ggplot2' was built under R version 4.1.2

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

## Loading required package: tidyverse

## Warning: package 'tidyverse' was built under R version 4.1.2

## Warning: package 'tibble' was built under R version 4.1.2

## Warning: package 'tidyr' was built under R version 4.1.2

## Warning: package 'readr' was built under R version 4.1.2

## Warning: package 'purrr' was built under R version 4.1.2

## Warning: package 'dplyr' was built under R version 4.1.2

## Warning: package 'stringr' was built under R version 4.1.2

## Warning: package 'forcats' was built under R version 4.1.2

## Warning: package 'lubridate' was built under R version 4.1.2

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.0     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ lubridate 1.9.2     ✔ tibble    3.1.8
## ✔ purrr     1.0.1     ✔ tidyr     1.3.0

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks plotly::filter(), stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: ggrepel
## 
## Loading required package: knitr
## 
## Loading required package: kableExtra
## 
## 
## Attaching package: 'kableExtra'
## 
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
## 
## 
## Loading required package: reshape2
## 
## 
## Attaching package: 'reshape2'
## 
## 
## The following object is masked from 'package:tidyr':
## 
##     smiths
## 
## 
## Loading required package: PerformanceAnalytics
## 
## Loading required package: xts

## Warning: package 'xts' was built under R version 4.1.2

## Loading required package: zoo

## Warning: package 'zoo' was built under R version 4.1.2

## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## 
## Attaching package: 'xts'
## 
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## 
## 
## Attaching package: 'PerformanceAnalytics'
## 
## The following object is masked from 'package:graphics':
## 
##     legend
## 
## Loading required package: psych

## Warning: package 'psych' was built under R version 4.1.2

## 
## Attaching package: 'psych'
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## Loading required package: ltm

## Warning: package 'ltm' was built under R version 4.1.2

## Loading required package: MASS
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## The following object is masked from 'package:plotly':
## 
##     select
## 
## Loading required package: msm
## Loading required package: polycor

## Warning: package 'polycor' was built under R version 4.1.2

## 
## Attaching package: 'polycor'
## 
## The following object is masked from 'package:psych':
## 
##     polyserial
## 
## 
## Attaching package: 'ltm'
## 
## The following object is masked from 'package:psych':
## 
##     factor.scores
## 
## Loading required package: Hmisc

## Warning: package 'Hmisc' was built under R version 4.1.2

## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## 
## The following object is masked from 'package:psych':
## 
##     describe
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## The following object is masked from 'package:plotly':
## 
##     subplot
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## 
## Loading required package: readxl

## Warning: package 'readxl' was built under R version 4.1.2

## Loading required package: tinytex
## Loading required package: rgl

## Warning: package 'rgl' was built under R version 4.1.2

##               plotly            tidyverse              ggrepel 
##                 TRUE                 TRUE                 TRUE 
##                knitr           kableExtra             reshape2 
##                 TRUE                 TRUE                 TRUE 
## PerformanceAnalytics                psych                  ltm 
##                 TRUE                 TRUE                 TRUE 
##                Hmisc               readxl              tinytex 
##                 TRUE                 TRUE                 TRUE 
##                  rgl 
##                 TRUE

Get data:

BF_historica <- read_csv("Dadod_Bolsa_Familia_Brasil.csv", locale = readr::locale(encoding = "latin1"))

## Rows: 5940 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Unidade Territorial, Referência
## dbl (3): Código, Famílias PBF (até Out/2021), Famílias PBF (a partir de Mar/...
## num (4): Valor repassado às famílias PBF (até Out/2021), Valor repassado às ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

BF_historica <- BF_historica[,c(2,3,4,6,8)]

PIB_per_capita_ <- readxl::read_excel("PIB per capita .xlsx")

## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`

PIB <- PIB_per_capita_[c(9,11:17,19:27,29:36,38:41),c(1,4:11)]

obitos_inf <- read_delim("obitos_infantis.csv", 
    delim = ";", escape_double = FALSE, trim_ws = TRUE, locale = readr::locale(encoding = "latin1"))

## Rows: 34 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (1): Região/Unidade da Federação
## dbl (9): 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, Total
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

obitos_inf <- obitos_inf[c(2:8, 10:18, 20:23, 25:27, 29:32),-10]

ocupacao_2012 <- read_excel("ocupacao_2012.xlsx")

## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`

ocupacao_2012<- ocupacao_2012[,1:2]

ocupacao_2013 <- read_excel("ocupacao_2013.xlsx")

## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`

ocupacao_2013 <- ocupacao_2013[,1:2]

ocupacao_2014 <- read_excel("ocupacao_2014.xlsx")

## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`

ocupacao_2014 <- ocupacao_2014[,1:2]

ocupacao_2015 <- read_excel("ocupacao_2015.xlsx")

## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`

ocupacao_2015<- ocupacao_2015[,1:2]

ocupacao_2016 <- read_excel("ocupacao_2016.xlsx")

## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`

ocupacao_2016<- ocupacao_2016[,1:2]

ocupacao_2017 <- read_excel("ocupacao_2017.xlsx")

## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`

ocupacao_2017<- ocupacao_2017[,1:2]

ocupacao_2018 <- read_excel("ocupaçao_2018.xlsx")

## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`

ocupacao_2018<- ocupacao_2018[,1:2]

ocupacao_2019 <- read_excel("ocupação_2019.xlsx")

## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`

ocupacao_2019<- ocupacao_2019[,1:2]

matriculas <- read_delim("matriculas.csv", 
    delim = ";", escape_double = FALSE, trim_ws = TRUE)

## Rows: 38 Columns: 82
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## dbl (82): Colonne1, Acre_matriculas_inf, Acre_matriculas_fund, Acre_matricul...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Aggregate values of Bolsa Fmilia by year:

library(dplyr)
BF_historica1 <- separate_wider_delim(BF_historica, cols = Referência, delim = "/", names = c("mes", "ano"))
BF_historica1 <- BF_historica1[,-2]

BF_historica2 <- aggregate(cbind(BF_historica1$`Famílias PBF (até Out/2021)`,BF_historica1$`Valor repassado às famílias PBF (até Out/2021)`,BF_historica1$`Valor do Benefício médio (até Out/2021)`) ~ BF_historica1$`Unidade Territorial` + BF_historica1$ano, data = BF_historica1, FUN = sum,na.rm = TRUE)
 colnames(BF_historica2) <- c("Estado","Ano","Familias","Valor","Beneficio")
 BF_historica2 <- BF_historica2[,-5]
 BF_historica2 <- BF_historica2[217:432,]

Matrix Enrollment

Estados_indexes<-seq(2,82,3)
matriculas_inf_1 <- matriculas[1,Estados_indexes]
matriculas_inf_1 <- t(matriculas_inf_1)
rownames(matriculas_inf_1) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_1) <- c("matriculas_inf")

Estados_indexes_1<-seq(3,82,3)
matriculas_inf_2 <- matriculas[1,Estados_indexes_1]
matriculas_inf_2 <- t(matriculas_inf_2)
rownames(matriculas_inf_2) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_2) <- c("matriculas_fund")

Estados_indexes_2<-seq(4,82,3)
matriculas_inf_3 <- matriculas[1,Estados_indexes_2]
matriculas_inf_3 <- t(matriculas_inf_3)
rownames(matriculas_inf_3) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_3) <- c("matriculas_med")

mat_2012 <- cbind(matriculas_inf_1,matriculas_inf_2,matriculas_inf_3)
rownames(mat_2012) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")

Estados_indexes<-seq(2,82,3)
matriculas_inf_4 <- matriculas[2,Estados_indexes]
matriculas_inf_4 <- t(matriculas_inf_4)
rownames(matriculas_inf_4) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_4) <- c("matriculas_inf")

Estados_indexes_1<-seq(4,82,3)
matriculas_inf_5 <- matriculas[2,Estados_indexes_1]
matriculas_inf_5 <- t(matriculas_inf_5)
rownames(matriculas_inf_5) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_5) <- c("matriculas_med")

Estados_indexes_2<-seq(4,82,3)
matriculas_inf_6 <- matriculas[1,Estados_indexes_2]
matriculas_inf_6 <- t(matriculas_inf_6)
rownames(matriculas_inf_6) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_6) <- c("matriculas_med")

mat_2013 <- cbind(matriculas_inf_4,matriculas_inf_5,matriculas_inf_6)
mat <- rbind(mat_2012,mat_2013)

Estados_indexes<-seq(2,82,3)
matriculas_inf_7 <- matriculas[3,Estados_indexes]
matriculas_inf_7 <- t(matriculas_inf_7)
rownames(matriculas_inf_7) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_7) <- c("matriculas_inf")

Estados_indexes_1<-seq(4,82,3)
matriculas_inf_8 <- matriculas[3,Estados_indexes_1]
matriculas_inf_8 <- t(matriculas_inf_8)
rownames(matriculas_inf_8) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_8) <- c("matriculas_med")

Estados_indexes_2<-seq(4,82,3)
matriculas_inf_9 <- matriculas[3,Estados_indexes_2]
matriculas_inf_9 <- t(matriculas_inf_9)
rownames(matriculas_inf_9) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_9) <- c("matriculas_med")

mat_2014 <- cbind(matriculas_inf_7,matriculas_inf_8,matriculas_inf_9)
mat <- rbind(mat_2012,mat_2013,mat_2014)

Estados_indexes<-seq(2,82,3)
matriculas_inf_10 <- matriculas[4,Estados_indexes]
matriculas_inf_10 <- t(matriculas_inf_10)
rownames(matriculas_inf_10) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_10) <- c("matriculas_inf")

Estados_indexes_1<-seq(4,82,3)
matriculas_inf_11 <- matriculas[4,Estados_indexes_1]
matriculas_inf_11 <- t(matriculas_inf_11)
rownames(matriculas_inf_11) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_11) <- c("matriculas_med")

Estados_indexes_2<-seq(4,82,3)
matriculas_inf_12 <- matriculas[4,Estados_indexes_2]
matriculas_inf_12 <- t(matriculas_inf_12)
rownames(matriculas_inf_12) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_12) <- c("matriculas_med")

mat_2015 <- cbind(matriculas_inf_10,matriculas_inf_11,matriculas_inf_12)
mat <- rbind(mat_2012,mat_2013,mat_2014,mat_2015)

Estados_indexes<-seq(2,82,3)
matriculas_inf_13 <- matriculas[5,Estados_indexes]
matriculas_inf_13 <- t(matriculas_inf_13)
rownames(matriculas_inf_13) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_13) <- c("matriculas_inf")

Estados_indexes_1<-seq(4,82,3)
matriculas_inf_14 <- matriculas[5,Estados_indexes_1]
matriculas_inf_14 <- t(matriculas_inf_14)
rownames(matriculas_inf_14) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_14) <- c("matriculas_med")

Estados_indexes_2<-seq(4,82,3)
matriculas_inf_15 <- matriculas[5,Estados_indexes_2]
matriculas_inf_15 <- t(matriculas_inf_15)
rownames(matriculas_inf_15) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_15) <- c("matriculas_med")

mat_2016 <- cbind(matriculas_inf_13,matriculas_inf_14,matriculas_inf_15)
mat <- rbind(mat_2012,mat_2013,mat_2014,mat_2015, mat_2016)

Estados_indexes<-seq(2,82,3)
matriculas_inf_16 <- matriculas[6,Estados_indexes]
matriculas_inf_16 <- t(matriculas_inf_16)
rownames(matriculas_inf_16) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_16) <- c("matriculas_inf")

Estados_indexes_1<-seq(4,82,3)
matriculas_inf_17 <- matriculas[6,Estados_indexes_1]
matriculas_inf_17 <- t(matriculas_inf_17)
rownames(matriculas_inf_17) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_17) <- c("matriculas_med")

Estados_indexes_2<-seq(4,82,3)
matriculas_inf_18 <- matriculas[6,Estados_indexes_2]
matriculas_inf_18 <- t(matriculas_inf_18)
rownames(matriculas_inf_18) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_18) <- c("matriculas_med")

mat_2017 <- cbind(matriculas_inf_16,matriculas_inf_17,matriculas_inf_18)
mat <- rbind(mat_2012,mat_2013,mat_2014,mat_2015,mat_2016,mat_2017)

Estados_indexes<-seq(2,82,3)
matriculas_inf_19 <- matriculas[7,Estados_indexes]
matriculas_inf_19 <- t(matriculas_inf_19)
rownames(matriculas_inf_19) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_19) <- c("matriculas_inf")

Estados_indexes_1<-seq(4,82,3)
matriculas_inf_20 <- matriculas[7,Estados_indexes_1]
matriculas_inf_20 <- t(matriculas_inf_20)
rownames(matriculas_inf_20) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_20) <- c("matriculas_med")

Estados_indexes_2<-seq(4,82,3)
matriculas_inf_21 <- matriculas[7,Estados_indexes_2]
matriculas_inf_21 <- t(matriculas_inf_21)
rownames(matriculas_inf_21) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_21) <- c("matriculas_med")

mat_2018 <- cbind(matriculas_inf_19,matriculas_inf_20,matriculas_inf_21)
mat <- rbind(mat_2012,mat_2013,mat_2014,mat_2015, mat_2016, mat_2017, mat_2018)

Estados_indexes<-seq(2,82,3)
matriculas_inf_22 <- matriculas[8,Estados_indexes]
matriculas_inf_22 <- t(matriculas_inf_22)
rownames(matriculas_inf_22) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_22) <- c("matriculas_inf")

Estados_indexes_1<-seq(4,82,3)
matriculas_inf_23 <- matriculas[8,Estados_indexes_1]
matriculas_inf_23 <- t(matriculas_inf_23)
rownames(matriculas_inf_23) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_23) <- c("matriculas_med")

Estados_indexes_2<-seq(4,82,3)
matriculas_inf_24 <- matriculas[8,Estados_indexes_2]
matriculas_inf_24 <- t(matriculas_inf_24)
rownames(matriculas_inf_24) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_24) <- c("matriculas_med")

mat_2019 <- cbind(matriculas_inf_22,matriculas_inf_23,matriculas_inf_24)
mat <- rbind(mat_2012,mat_2013,mat_2014,mat_2015, mat_2016, mat_2017, mat_2018, mat_2019)

Ocupation:

Taxa_de_ocupacao<-rbind(ocupacao_2012,ocupacao_2013,ocupacao_2014,ocupacao_2015,ocupacao_2016,ocupacao_2017,ocupacao_2018,ocupacao_2019)
colnames(Taxa_de_ocupacao) <- c("Estados","Taxa_de_Ocupacao")
Taxa_de_ocupacao <- Taxa_de_ocupacao[c(-1,-29,-57,-85,-113,-141,-169,-197),]
Taxa_de_ocupacao <- as.numeric(unlist(Taxa_de_ocupacao$Taxa_de_Ocupacao))
Taxa_de_ocupacao <- as.data.frame(Taxa_de_ocupacao)

Child Death:

colnames(obitos_inf)<- c("ano","ano","ano","ano","ano","ano","ano","ano","ano")
obitos_inf1 <- obitos_inf[,c(1,2)]
obitos_inf2 <- obitos_inf[,c(1,3)]
obitos_inf3 <- obitos_inf[,c(1,4)]
obitos_inf4 <- obitos_inf[,c(1,5)]
obitos_inf5 <- obitos_inf[,c(1,6)]
obitos_inf6 <- obitos_inf[,c(1,7)]
obitos_inf7 <- obitos_inf[,c(1,8)]
obitos_inf8 <- obitos_inf[,c(1,9)]

obitos_inf <- rbind(obitos_inf1,obitos_inf2,obitos_inf3,obitos_inf4,obitos_inf5,obitos_inf6,obitos_inf7,obitos_inf8)

GDP:

colnames(PIB)<- c("Estado","ano","ano","ano","ano","ano","ano","ano","ano")
PIB1 <- PIB[,c(1,2)]
PIB2 <- PIB[,c(1,3)]
PIB3 <- PIB[,c(1,4)]
PIB4 <- PIB[,c(1,5)]
PIB5 <- PIB[,c(1,6)]
PIB6 <- PIB[,c(1,7)]
PIB7 <- PIB[,c(1,8)]
PIB8 <- PIB[,c(1,9)]

PIB <- rbind(PIB1,PIB2,PIB3,PIB4,PIB5,PIB6,PIB7,PIB8)
PIB <- PIB[-c(1,22,30,51,59,80,88,109,117,138,146,167,175,196,204,225),]
PIB

## # A tibble: 216 × 2
##    Estado       ano
##    <chr>      <dbl>
##  1 Rondônia  18939.
##  2 Acre      13361.
##  3 Amazonas  20118.
##  4 Roraima   16424.
##  5 Pará      13741.
##  6 Amapá     15933.
##  7 Tocantins 14590.
##  8 Maranhão   9009.
##  9 Piauí      9060.
## 10 Ceará     11268.
## # … with 206 more rows

#Deflated GDP- IPCA (Índice Nacional de Preços ao Consumidor Amplo
#)- ACUMULADO ANUAL
PIB_per_capita_D <- readxl::read_excel("PIB_IPCA.xlsx")

## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`

PIB_IPCA <- PIB_per_capita_D[c(9:15,17:25,27:30,32:34, 36:39),c(1,4:11)]
colnames(PIB_IPCA)<- c("Estado","ano","ano","ano","ano","ano","ano","ano","ano")
PIB_1 <- PIB_IPCA[,c(1,2)]
PIB_2 <- PIB_IPCA[,c(1,3)]
PIB_3 <- PIB_IPCA[,c(1,4)]
PIB_4 <- PIB_IPCA[,c(1,5)]
PIB_5 <- PIB_IPCA[,c(1,6)]
PIB_6 <- PIB_IPCA[,c(1,7)]
PIB_7 <- PIB_IPCA[,c(1,8)]
PIB_8 <- PIB_IPCA[,c(1,9)]

PIB_D <- rbind(PIB_1,PIB_2,PIB_3,PIB_4,PIB_5,PIB_6,PIB_7,PIB_8)
PIB_D

## # A tibble: 216 × 2
##    Estado       ano
##    <chr>      <dbl>
##  1 Rondônia  12161.
##  2 Acre       8579.
##  3 Amazonas  12918.
##  4 Roraima   10546.
##  5 Pará       8824.
##  6 Amapá     10231.
##  7 Tocantins  9369.
##  8 Maranhão   5785.
##  9 Piauí      5818.
## 10 Ceará      7236.
## # … with 206 more rows

Final Dataset:

Dataset<-cbind(Taxa_de_ocupacao,BF_historica2,mat, obitos_inf,PIB)
Dataset<- Dataset[,-c(9,11)]
colnames(Dataset) <- c("Pessoas_Ocupadas","Estado","Ano","Familias_PBF","Valor_PBF","Matriculas_inf","Matriculas_fund","Matriculas_med","obitos_inf","PIB")

#Data with deflated GDP 
Dataset.1<-cbind(Taxa_de_ocupacao,BF_historica2,mat,obitos_inf,PIB_D)
Dataset.1<- Dataset.1[,-c(9,11)]
colnames(Dataset.1) <- c("Pessoas_Ocupadas","Estado","Ano","Familias_PBF","Valor_PBF","Matriculas_inf","Matriculas_fund","Matriculas_med","obitos_inf","PIB_D")

Packages for the model:

pacotes <- c("plotly","tidyverse","ggrepel","fastDummies","knitr","kableExtra",
             "splines","reshape2","PerformanceAnalytics","correlation","see",
             "ggraph","psych","car","ggside","tidyquant","olsrr",
             "jtools","ggstance","magick","cowplot","emojifont","beepr","Rcpp",
             "equatiomatic","nortest","rgl")
#"nortest","rgl"
options(rgl.debug = TRUE)

if(sum(as.numeric(!pacotes %in% installed.packages())) != 0){
  instalador <- pacotes[!pacotes %in% installed.packages()]
  for(i in 1:length(instalador)) {
    install.packages(instalador, dependencies = T)
    break()}
  sapply(pacotes, require, character = T) 
} else {
  sapply(pacotes, require, character = T) 
}

## Loading required package: fastDummies

## Loading required package: splines

## Loading required package: correlation

## Warning: package 'correlation' was built under R version 4.1.2

## Loading required package: see

## Warning: package 'see' was built under R version 4.1.2

## Loading required package: ggraph

## Warning: package 'ggraph' was built under R version 4.1.2

## Loading required package: car

## Warning: package 'car' was built under R version 4.1.2

## Loading required package: carData

## 
## Attaching package: 'car'

## The following object is masked from 'package:psych':
## 
##     logit

## The following object is masked from 'package:dplyr':
## 
##     recode

## The following object is masked from 'package:purrr':
## 
##     some

## Loading required package: ggside

## Warning: package 'ggside' was built under R version 4.1.2

## Registered S3 method overwritten by 'ggside':
##   method from   
##   +.gg   ggplot2

## Loading required package: tidyquant

## Warning: package 'tidyquant' was built under R version 4.1.2

## Loading required package: quantmod

## Warning: package 'quantmod' was built under R version 4.1.2

## Loading required package: TTR

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

## 
## Attaching package: 'quantmod'

## The following object is masked from 'package:Hmisc':
## 
##     Lag

## Loading required package: olsrr

## 
## Attaching package: 'olsrr'

## The following object is masked from 'package:MASS':
## 
##     cement

## The following object is masked from 'package:datasets':
## 
##     rivers

## Loading required package: jtools

## 
## Attaching package: 'jtools'

## The following object is masked from 'package:Hmisc':
## 
##     %nin%

## Loading required package: ggstance

## Warning: package 'ggstance' was built under R version 4.1.2

## 
## Attaching package: 'ggstance'

## The following objects are masked from 'package:ggplot2':
## 
##     geom_errorbarh, GeomErrorbarh

## Loading required package: magick

## Linking to ImageMagick 6.9.12.3
## Enabled features: cairo, fontconfig, freetype, heic, lcms, pango, raw, rsvg, webp
## Disabled features: fftw, ghostscript, x11

## Loading required package: cowplot

## 
## Attaching package: 'cowplot'

## The following object is masked from 'package:lubridate':
## 
##     stamp

## Loading required package: emojifont

## Loading required package: beepr

## Loading required package: Rcpp

## Warning: package 'Rcpp' was built under R version 4.1.2

## Loading required package: equatiomatic

## Warning: package 'equatiomatic' was built under R version 4.1.2

## 
## Attaching package: 'equatiomatic'

## The following object is masked from 'package:olsrr':
## 
##     hsb

## Loading required package: nortest

##               plotly            tidyverse              ggrepel 
##                 TRUE                 TRUE                 TRUE 
##          fastDummies                knitr           kableExtra 
##                 TRUE                 TRUE                 TRUE 
##              splines             reshape2 PerformanceAnalytics 
##                 TRUE                 TRUE                 TRUE 
##          correlation                  see               ggraph 
##                 TRUE                 TRUE                 TRUE 
##                psych                  car               ggside 
##                 TRUE                 TRUE                 TRUE 
##            tidyquant                olsrr               jtools 
##                 TRUE                 TRUE                 TRUE 
##             ggstance               magick              cowplot 
##                 TRUE                 TRUE                 TRUE 
##            emojifont                beepr                 Rcpp 
##                 TRUE                 TRUE                 TRUE 
##         equatiomatic              nortest                  rgl 
##                 TRUE                 TRUE                 TRUE

summary(Dataset.1)

##  Pessoas_Ocupadas     Estado              Ano             Familias_PBF     
##  Min.   :  2.322   Length:216         Length:216         Min.   :  546364  
##  1st Qu.: 11.341   Class :character   Class :character   1st Qu.: 1639942  
##  Median : 33.398   Mode  :character   Mode  :character   Median : 4305635  
##  Mean   : 87.020                                         Mean   : 6129021  
##  3rd Qu.:136.449                                         3rd Qu.:10382639  
##  Max.   :556.239                                         Max.   :22062829  
##    Valor_PBF         Matriculas_inf    Matriculas_fund   Matriculas_med   
##  Min.   :8.201e+07   Min.   :  17465   Min.   :  21916   Min.   :  21055  
##  1st Qu.:3.217e+08   1st Qu.:  99475   1st Qu.: 110147   1st Qu.: 101714  
##  Median :1.067e+09   Median : 156990   Median : 213721   Median : 156967  
##  Mean   :6.631e+09   Mean   : 302638   Mean   : 397025   Mean   : 298022  
##  3rd Qu.:3.324e+09   3rd Qu.: 366233   3rd Qu.: 412980   3rd Qu.: 365410  
##  Max.   :9.579e+10   Max.   :2298675   Max.   :5805590   Max.   :1928274  
##    obitos_inf         PIB_D       
##  Min.   : 175.0   Min.   :  3995  
##  1st Qu.: 529.2   1st Qu.:  9934  
##  Median : 810.5   Median : 16222  
##  Mean   :1378.5   Mean   : 19461  
##  3rd Qu.:1708.8   3rd Qu.: 23219  
##  Max.   :7173.0   Max.   :102350

BOX-COX

##function 'powerTransform' from the 'car' package (which lambda maximizes the transformed variable's adherence to normality)
library(car)
lambda_BC <- powerTransform(Dataset$Pessoas_Ocupadas)
lambda_BC

## Estimated transformation parameter 
## Dataset$Pessoas_Ocupadas 
##               0.01067109

#Inserting the Box-Cox lambda into the database to investigate a new model
Dataset$bc_Pessoas_Ocupadas <- (((Dataset$Pessoas_Ocupadas ^ lambda_BC$lambda) - 1) / 
                            lambda_BC$lambda)

#Estimating a new OLS model with dependent variable transformed by Box-Cox
modelo_bc <- lm(formula = bc_Pessoas_Ocupadas ~ 1,
                    data = Dataset)

#Parameters
summary(modelo_bc)

## 
## Call:
## lm(formula = bc_Pessoas_Ocupadas ~ 1, data = Dataset)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.8619 -1.2479 -0.1331  1.3391  2.8312 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.70809    0.09942    37.3   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.461 on 215 degrees of freedom

##the decision was made to evaluate the normality of the residuals via Box-Cox transformation, understanding that in obtaining a lambda equal to or very close to 1, possibly the residuals of the future model would adhere to normality.
#lambda: 0.01 
#Therefore, the modeling began with the RDFPC variable transformed by Box-Cox and presenting another distribution, including its residuals.

#With deflated GDP
library(car)
lambda_BC <- powerTransform(Dataset.1$Pessoas_Ocupadas)
lambda_BC

## Estimated transformation parameter 
## Dataset.1$Pessoas_Ocupadas 
##                 0.01067109

#Inserting the Box-Cox lambda into the database to investigate a new model
Dataset.1$bc_Pessoas_Ocupadas <- (((Dataset.1$Pessoas_Ocupadas ^ lambda_BC$lambda) - 1) / 
                            lambda_BC$lambda)

s_Dataset<- scale(Dataset.1[,c(4,5,6,7,8,9,10)], center = TRUE, scale = TRUE)
Dataset2 <- cbind(Dataset.1[,c(1,2,3,11)],s_Dataset)

Packages used for HLM2

#Pacotes utilizados
pacotes <- c("plotly","tidyverse","reshape2","knitr","kableExtra",
             "gghalves","ggdist","tidyquant","car","nlme","lmtest",
             "fastDummies","msm","lmeInfo","jtools","gganimate","ggridges",
             "viridis","hrbrthemes")
#"rgl"
options(rgl.debug = TRUE)

if(sum(as.numeric(!pacotes %in% installed.packages())) != 0){
  instalador <- pacotes[!pacotes %in% installed.packages()]
  for(i in 1:length(instalador)) {
    install.packages(instalador, dependencies = T)
    break()}
  sapply(pacotes, require, character = T) 
} else {
  sapply(pacotes, require, character = T) 
}

## Loading required package: gghalves

## Warning: package 'gghalves' was built under R version 4.1.2

## Loading required package: ggdist

## Warning: package 'ggdist' was built under R version 4.1.2

## Loading required package: nlme

## 
## Attaching package: 'nlme'

## The following object is masked from 'package:dplyr':
## 
##     collapse

## Loading required package: lmtest

## Loading required package: lmeInfo

## Warning: package 'lmeInfo' was built under R version 4.1.2

## Loading required package: gganimate

## Warning: package 'gganimate' was built under R version 4.1.2

## Loading required package: ggridges

## Warning: package 'ggridges' was built under R version 4.1.2

## 
## Attaching package: 'ggridges'

## The following objects are masked from 'package:ggdist':
## 
##     scale_point_color_continuous, scale_point_color_discrete,
##     scale_point_colour_continuous, scale_point_colour_discrete,
##     scale_point_fill_continuous, scale_point_fill_discrete,
##     scale_point_size_continuous

## Loading required package: viridis

## Loading required package: viridisLite

## Loading required package: hrbrthemes

## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.

##       Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and

##       if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow

##      plotly   tidyverse    reshape2       knitr  kableExtra    gghalves 
##        TRUE        TRUE        TRUE        TRUE        TRUE        TRUE 
##      ggdist   tidyquant         car        nlme      lmtest fastDummies 
##        TRUE        TRUE        TRUE        TRUE        TRUE        TRUE 
##         msm     lmeInfo      jtools   gganimate    ggridges     viridis 
##        TRUE        TRUE        TRUE        TRUE        TRUE        TRUE 
##  hrbrthemes 
##        TRUE

Estimation of NULL MODEL

library(lme4) # for multilevel models

## Loading required package: Matrix

## 
## Attaching package: 'Matrix'

## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

## 
## Attaching package: 'lme4'

## The following object is masked from 'package:nlme':
## 
##     lmList

library(lmerTest) # for p-values

## 
## Attaching package: 'lmerTest'

## The following object is masked from 'package:lme4':
## 
##     lmer

## The following object is masked from 'package:stats':
## 
##     step

library(performance) # for ICC

s_Dataset<- scale(Dataset[,c(4,5,6,7,8,9,10)], center = TRUE, scale = TRUE)
Dataset1 <- cbind(Dataset[,c(1,2,3,11)],s_Dataset)

modelo_nulo_hlm2<-lmer(bc_Pessoas_Ocupadas~1+(1|Estado), REML=FALSE, data=Dataset1)
summary(modelo_nulo_hlm2)

## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
##   method [lmerModLmerTest]
## Formula: bc_Pessoas_Ocupadas ~ 1 + (1 | Estado)
##    Data: Dataset1
## 
##      AIC      BIC   logLik deviance df.resid 
##    212.8    222.9   -103.4    206.8      213 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.8528 -0.5401 -0.0468  0.5100  3.1890 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Estado   (Intercept) 2.04692  1.4307  
##  Residual             0.07814  0.2795  
## Number of obs: 216, groups:  Estado, 27
## 
## Fixed effects:
##             Estimate Std. Error     df t value Pr(>|t|)    
## (Intercept)    3.708      0.276 27.000   13.44 1.79e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

performance::icc(modelo_nulo_hlm2)

## # Intraclass Correlation Coefficient
## 
##     Adjusted ICC: 0.963
##   Unadjusted ICC: 0.963

#the ICC is interpreted as the proportion of variance between people: How much of the variance stems from people being different from one another versus fluctuating within themselves? A large ICC means that most of the variability is between people, not from people varying in their answers to a set of questions 
#96% of ocupation variance is attributed to the state



pacotes <- c("plotly","tidyverse","reshape2","knitr","kableExtra",
             "nlme","lmtest","fastDummies","msm","lmeInfo","jtools","gganimate",
             "ggridges","viridis","hrbrthemes")

if(sum(as.numeric(!pacotes %in% installed.packages())) != 0){
  instalador <- pacotes[!pacotes %in% installed.packages()]
  for(i in 1:length(instalador)) {
    install.packages(instalador, dependencies = T)
    break()}
  sapply(pacotes, require, character = T) 
} else {
  sapply(pacotes, require, character = T) 
}

##      plotly   tidyverse    reshape2       knitr  kableExtra        nlme 
##        TRUE        TRUE        TRUE        TRUE        TRUE        TRUE 
##      lmtest fastDummies         msm     lmeInfo      jtools   gganimate 
##        TRUE        TRUE        TRUE        TRUE        TRUE        TRUE 
##    ggridges     viridis  hrbrthemes 
##        TRUE        TRUE        TRUE

#---------------------------------------------------------
#Estimação do modelo nulo (função lme do pacote nlme)
modelo_nulo_hlm3 <- lme(fixed = bc_Pessoas_Ocupadas ~ 1,
                        random = list(Estado = ~1),
                        data = Dataset2,
                        method = "REML")
summary(modelo_nulo_hlm3)

## Linear mixed-effects model fit by REML
##   Data: Dataset2 
##        AIC      BIC    logLik
##   213.4934 223.6053 -103.7467
## 
## Random effects:
##  Formula: ~1 | Estado
##         (Intercept)  Residual
## StdDev:    1.458088 0.2795266
## 
## Fixed effects:  bc_Pessoas_Ocupadas ~ 1 
##                Value Std.Error  DF  t-value p-value
## (Intercept) 3.708092  0.281253 189 13.18419       0
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -2.85149177 -0.54083464 -0.04794658  0.51028348  3.18887016 
## 
## Number of Observations: 216
## Number of Groups: 27

################################################################################
#                    COMPARISON OF HLM2 NULO WITH NULL OLS                 #
################################################################################

#To estimate the null OLS model, we can command the following
modelo_ols_nulo <- lm(formula = bc_Pessoas_Ocupadas ~ 1, 
                      data = Dataset2)

#Parameters
summary(modelo_ols_nulo)

## 
## Call:
## lm(formula = bc_Pessoas_Ocupadas ~ 1, data = Dataset2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.8619 -1.2479 -0.1331  1.3391  2.8312 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.70809    0.09942    37.3   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.461 on 215 degrees of freedom

################################################################################
#            ESTIMATION OF THE MODEL WITH RANDOM INTERCEPTS HLM2             #
################################################################################

#Random Intercepts
modelo_intercept_hlm2.0<-lmer(bc_Pessoas_Ocupadas~ 1 + Familias_PBF + Valor_PBF + Matriculas_inf + Matriculas_fund + Matriculas_med + obitos_inf + PIB + Ano + (1|Estado), REML=FALSE, data=Dataset1)
summary(modelo_intercept_hlm2.0)

## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
##   method [lmerModLmerTest]
## Formula: bc_Pessoas_Ocupadas ~ 1 + Familias_PBF + Valor_PBF + Matriculas_inf +  
##     Matriculas_fund + Matriculas_med + obitos_inf + PIB + Ano +  
##     (1 | Estado)
##    Data: Dataset1
## 
##      AIC      BIC   logLik deviance df.resid 
##    146.3    203.6    -56.1    112.3      199 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.3816 -0.5275  0.0249  0.5848  2.7281 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Estado   (Intercept) 0.96695  0.9833  
##  Residual             0.05273  0.2296  
## Number of obs: 216, groups:  Estado, 27
## 
## Fixed effects:
##                  Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)       3.91379    0.20197  30.10982  19.378  < 2e-16 ***
## Familias_PBF      0.75363    0.15675  60.43172   4.808 1.05e-05 ***
## Valor_PBF        -0.05825    0.02600 189.13359  -2.240 0.026228 *  
## Matriculas_inf    0.71449    0.16303 140.52627   4.382 2.28e-05 ***
## Matriculas_fund   0.02045    0.03920 197.61237   0.522 0.602477    
## Matriculas_med   -0.44591    0.16489 196.66211  -2.704 0.007444 ** 
## obitos_inf        0.15636    0.16624  46.08356   0.941 0.351810    
## PIB              -0.12235    0.09714 190.65044  -1.260 0.209371    
## Ano2013          -0.30777    0.08157 190.61868  -3.773 0.000215 ***
## Ano2014          -0.36585    0.08359 195.40844  -4.377 1.96e-05 ***
## Ano2015          -0.39742    0.08468 197.83272  -4.693 5.02e-06 ***
## Ano2016          -0.10497    0.09847 200.59985  -1.066 0.287692    
## Ano2017          -0.24330    0.09109 206.25648  -2.671 0.008167 ** 
## Ano2018          -0.13912    0.09834 211.31596  -1.415 0.158636    
## Ano2019          -0.08720    0.11049 213.49642  -0.789 0.430846    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## 
## Correlation matrix not shown by default, as p = 15 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it

# In the “step-up” strategy, variables are added one by one, but it is possible to check the statistical significance of both variables and, if one of them does not pass the significance test, the variable that did not pass is removed from the model.

modelo_intercept_hlm2<-lmer(bc_Pessoas_Ocupadas~ 1 + Familias_PBF + Matriculas_inf + PIB_D + Ano + (1|Estado), REML=FALSE, data=Dataset2)
summary(modelo_intercept_hlm2)

## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
##   method [lmerModLmerTest]
## Formula: bc_Pessoas_Ocupadas ~ 1 + Familias_PBF + Matriculas_inf + PIB_D +  
##     Ano + (1 | Estado)
##    Data: Dataset2
## 
##      AIC      BIC   logLik deviance df.resid 
##    147.7    191.6    -60.8    121.7      203 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.04220 -0.57389  0.04913  0.55555  2.46761 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Estado   (Intercept) 1.14108  1.0682  
##  Residual             0.05414  0.2327  
## Number of obs: 216, groups:  Estado, 27
## 
## Fixed effects:
##                 Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)      3.94214    0.21180  22.99823  18.613 2.31e-15 ***
## Familias_PBF     0.66211    0.15961  59.84219   4.148 0.000107 ***
## Matriculas_inf   0.60810    0.15171  72.75088   4.008 0.000146 ***
## PIB_D           -0.09967    0.04864 194.40173  -2.049 0.041801 *  
## Ano2013         -0.33904    0.06355 182.59631  -5.335 2.80e-07 ***
## Ano2014         -0.40810    0.06408 187.14712  -6.369 1.44e-09 ***
## Ano2015         -0.47872    0.06604 187.46659  -7.249 1.07e-11 ***
## Ano2016         -0.25432    0.06523 192.38829  -3.899 0.000133 ***
## Ano2017         -0.15794    0.09863 196.96782  -1.601 0.110901    
## Ano2018         -0.09486    0.08571 200.41736  -1.107 0.269733    
## Ano2019         -0.13938    0.08109 204.53189  -1.719 0.087135 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Fm_PBF Mtrcl_ PIB_D  An2013 An2014 An2015 An2016 An2017
## Familis_PBF  0.007                                                        
## Matricls_nf  0.060 -0.062                                                 
## PIB_D        0.100 -0.016  0.047                                          
## Ano2013     -0.157 -0.034 -0.064 -0.039                                   
## Ano2014     -0.159 -0.082 -0.119 -0.039  0.505                            
## Ano2015     -0.127 -0.048 -0.126  0.242  0.479  0.485                     
## Ano2016     -0.169 -0.033 -0.210 -0.115  0.503  0.512  0.468              
## Ano2017     -0.180  0.038 -0.206 -0.747  0.359  0.365  0.149  0.432       
## Ano2018     -0.187 -0.029 -0.261 -0.631  0.409  0.422  0.236  0.481  0.747
## Ano2019     -0.188 -0.003 -0.312 -0.554  0.430  0.444  0.281  0.503  0.713
##             An2018
## Familis_PBF       
## Matricls_nf       
## PIB_D             
## Ano2013           
## Ano2014           
## Ano2015           
## Ano2016           
## Ano2017           
## Ano2018           
## Ano2019      0.706

#Parâmetros do modelo
summary(modelo_intercept_hlm2)

## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
##   method [lmerModLmerTest]
## Formula: bc_Pessoas_Ocupadas ~ 1 + Familias_PBF + Matriculas_inf + PIB_D +  
##     Ano + (1 | Estado)
##    Data: Dataset2
## 
##      AIC      BIC   logLik deviance df.resid 
##    147.7    191.6    -60.8    121.7      203 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.04220 -0.57389  0.04913  0.55555  2.46761 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Estado   (Intercept) 1.14108  1.0682  
##  Residual             0.05414  0.2327  
## Number of obs: 216, groups:  Estado, 27
## 
## Fixed effects:
##                 Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)      3.94214    0.21180  22.99823  18.613 2.31e-15 ***
## Familias_PBF     0.66211    0.15961  59.84219   4.148 0.000107 ***
## Matriculas_inf   0.60810    0.15171  72.75088   4.008 0.000146 ***
## PIB_D           -0.09967    0.04864 194.40173  -2.049 0.041801 *  
## Ano2013         -0.33904    0.06355 182.59631  -5.335 2.80e-07 ***
## Ano2014         -0.40810    0.06408 187.14712  -6.369 1.44e-09 ***
## Ano2015         -0.47872    0.06604 187.46659  -7.249 1.07e-11 ***
## Ano2016         -0.25432    0.06523 192.38829  -3.899 0.000133 ***
## Ano2017         -0.15794    0.09863 196.96782  -1.601 0.110901    
## Ano2018         -0.09486    0.08571 200.41736  -1.107 0.269733    
## Ano2019         -0.13938    0.08109 204.53189  -1.719 0.087135 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Fm_PBF Mtrcl_ PIB_D  An2013 An2014 An2015 An2016 An2017
## Familis_PBF  0.007                                                        
## Matricls_nf  0.060 -0.062                                                 
## PIB_D        0.100 -0.016  0.047                                          
## Ano2013     -0.157 -0.034 -0.064 -0.039                                   
## Ano2014     -0.159 -0.082 -0.119 -0.039  0.505                            
## Ano2015     -0.127 -0.048 -0.126  0.242  0.479  0.485                     
## Ano2016     -0.169 -0.033 -0.210 -0.115  0.503  0.512  0.468              
## Ano2017     -0.180  0.038 -0.206 -0.747  0.359  0.365  0.149  0.432       
## Ano2018     -0.187 -0.029 -0.261 -0.631  0.409  0.422  0.236  0.481  0.747
## Ano2019     -0.188 -0.003 -0.312 -0.554  0.430  0.444  0.281  0.503  0.713
##             An2018
## Familis_PBF       
## Matricls_nf       
## PIB_D             
## Ano2013           
## Ano2014           
## Ano2015           
## Ano2016           
## Ano2017           
## Ano2018           
## Ano2019      0.706

#--------------------------------------------------------------------
ctrl <- lmeControl(opt='optim')
library(nlme)
#Estimação do modelo com Tendência Linear e Interceptos Aleatórios
modelo_intercept_hlm3 <- lme(fixed = bc_Pessoas_Ocupadas ~ Ano + Familias_PBF + Valor_PBF + Matriculas_inf + Matriculas_med + PIB_D,
                                    random = list(Estado = ~1),
                                    data = Dataset2,
                                    method = "REML")
summary(modelo_intercept_hlm3)

## Linear mixed-effects model fit by REML
##   Data: Dataset2 
##        AIC      BIC    logLik
##   185.9016 235.5997 -77.95079
## 
## Random effects:
##  Formula: ~1 | Estado
##         (Intercept)  Residual
## StdDev:     1.04504 0.2347144
## 
## Fixed effects:  bc_Pessoas_Ocupadas ~ Ano + Familias_PBF + Valor_PBF + Matriculas_inf +      Matriculas_med + PIB_D 
##                    Value  Std.Error  DF   t-value p-value
## (Intercept)     3.938876 0.20799643 177 18.937231  0.0000
## Ano2013        -0.341969 0.06410624 177 -5.334416  0.0000
## Ano2014        -0.417955 0.06472115 177 -6.457781  0.0000
## Ano2015        -0.497912 0.06700036 177 -7.431485  0.0000
## Ano2016        -0.170127 0.08069917 177 -2.108164  0.0364
## Ano2017        -0.177817 0.09999225 177 -1.778311  0.0771
## Ano2018        -0.130648 0.08779126 177 -1.488167  0.1385
## Ano2019        -0.109845 0.09127651 177 -1.203431  0.2304
## Familias_PBF    0.775444 0.16122355 177  4.809745  0.0000
## Valor_PBF      -0.057946 0.02640062 177 -2.194888  0.0295
## Matriculas_inf  0.677327 0.15429590 177  4.389794  0.0000
## Matriculas_med -0.396272 0.15569620 177 -2.545163  0.0118
## PIB_D          -0.102393 0.04903191 177 -2.088285  0.0382
##  Correlation: 
##                (Intr) An2013 An2014 An2015 An2016 An2017 An2018 An2019 Fm_PBF
## Ano2013        -0.162                                                        
## Ano2014        -0.165  0.505                                                 
## Ano2015        -0.134  0.478  0.487                                          
## Ano2016        -0.173  0.411  0.418  0.384                                   
## Ano2017        -0.189  0.359  0.368  0.159  0.372                            
## Ano2018        -0.196  0.406  0.424  0.250  0.406  0.750                     
## Ano2019        -0.201  0.388  0.406  0.271  0.608  0.666  0.664              
## Familias_PBF    0.004 -0.035 -0.086 -0.060  0.053  0.027 -0.048  0.031       
## Valor_PBF       0.050  0.001  0.007  0.009 -0.573 -0.022 -0.009 -0.388 -0.155
## Matriculas_inf  0.068 -0.065 -0.126 -0.144 -0.186 -0.221 -0.286 -0.319 -0.037
## Matriculas_med -0.038  0.019  0.057  0.111  0.072  0.107  0.181  0.209 -0.124
## PIB_D           0.103 -0.039 -0.039  0.241 -0.106 -0.743 -0.620 -0.504 -0.018
##                Vl_PBF Mtrcls_n Mtrcls_m
## Ano2013                                
## Ano2014                                
## Ano2015                                
## Ano2016                                
## Ano2017                                
## Ano2018                                
## Ano2019                                
## Familias_PBF                           
## Valor_PBF                              
## Matriculas_inf  0.004                  
## Matriculas_med  0.034 -0.218           
## PIB_D           0.022  0.046    0.001  
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -3.25259841 -0.51753391  0.05106863  0.57309050  2.51640999 
## 
## Number of Observations: 216
## Number of Groups: 27

#Comparsion between Logliks
data.frame(OLS_Nulo = logLik(modelo_ols_nulo),
           HLM3_Nulo = logLik(modelo_nulo_hlm3),
           HLM3_Intercept_Aleat = logLik(modelo_intercept_hlm3)) %>%
  melt() %>%
  ggplot(aes(x = variable, y = (abs(-value)), fill = factor(variable))) +
  geom_bar(stat = "identity") +
  geom_label(aes(label = (round(value,3))), hjust = 1.1, color = "white", size = 6) +
  labs(title = "Comparação do LL", 
       y = "LogLik", 
       x = "Modelo Proposto") +
  coord_flip() +
  scale_fill_manual("Legenda:",
                    values = c("grey25","lightblue","lightpink")) +
  theme(legend.title = element_blank(), 
        panel.background = element_rect("white"),
        legend.position = "none",
        axis.line = element_line())

## No id variables; using all as measure variables

pacotes <- c("plotly","tidyverse","reshape2","knitr","kableExtra",
             "nlme","lmtest","fastDummies","msm","lmeInfo","jtools","gganimate",
             "ggridges","viridis","hrbrthemes")

if(sum(as.numeric(!pacotes %in% installed.packages())) != 0){
  instalador <- pacotes[!pacotes %in% installed.packages()]
  for(i in 1:length(instalador)) {
    install.packages(instalador, dependencies = T)
    break()}
  sapply(pacotes, require, character = T) 
} else {
  sapply(pacotes, require, character = T) 
}

##      plotly   tidyverse    reshape2       knitr  kableExtra        nlme 
##        TRUE        TRUE        TRUE        TRUE        TRUE        TRUE 
##      lmtest fastDummies         msm     lmeInfo      jtools   gganimate 
##        TRUE        TRUE        TRUE        TRUE        TRUE        TRUE 
##    ggridges     viridis  hrbrthemes 
##        TRUE        TRUE        TRUE

################################################################################
#      MODEL ESTIMATION WITH RANDOM INTERCEPTS AND SLOPES HLM2    #
################################################################################

#Estimação do modelo com Interceptos e Inclinações Aleatórios

#Mtariculas_fund insignificant

modelo_intercept_inclin_hlm2 <- lmer(bc_Pessoas_Ocupadas ~ 1 + Familias_PBF + Matriculas_inf + Ano + (Ano|Estado), data = Dataset1, REML = FALSE, control = lmerControl(check.nobs.vs.nRE = "ignore"))

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.0042761 (tol = 0.002, component 1)

coef(modelo_intercept_inclin_hlm2)

## $Estado
##                     (Intercept) Familias_PBF Matriculas_inf      Ano2013
## ACRE                   3.980293    0.5358777      0.5166905 -0.254768174
## ALAGOAS                4.996246    0.5358777      0.5166905 -0.143907857
## AMAPÁ                  3.266220    0.5358777      0.5166905 -0.720024042
## AMAZONAS               5.336584    0.5358777      0.5166905 -0.326227928
## BAHIA                  4.808881    0.5358777      0.5166905 -0.368683643
## CEARÁ                  5.305058    0.5358777      0.5166905 -0.279734262
## DISTRITO FEDERAL       2.236571    0.5358777      0.5166905 -0.262379934
## ESPÍRITO SANTO         3.586391    0.5358777      0.5166905 -0.248336545
## GOIÁS                  3.300757    0.5358777      0.5166905 -0.603698804
## MARANHÃO               5.630868    0.5358777      0.5166905 -0.287677351
## MATO GROSSO            3.004719    0.5358777      0.5166905 -0.520481109
## MATO GROSSO DO SUL     2.646494    0.5358777      0.5166905 -0.423599362
## MINAS GERAIS           4.013181    0.5358777      0.5166905 -0.251573180
## PARÁ                   3.433959    0.5358777      0.5166905 -0.366770787
## PARAÍBA                5.282644    0.5358777      0.5166905 -0.440367851
## PARANÁ                 5.941549    0.5358777      0.5166905 -0.186061622
## PERNAMBUCO             4.887186    0.5358777      0.5166905 -0.306655941
## PIAUÍ                  5.639193    0.5358777      0.5166905 -0.275302111
## RIO DE JANEIRO         4.178727    0.5358777      0.5166905 -0.242857770
## RIO GRANDE DO NORTE    3.942147    0.5358777      0.5166905 -0.274515718
## RIO GRANDE DO SUL      3.295303    0.5358777      0.5166905 -0.483670796
## RONDÔNIA               4.019384    0.5358777      0.5166905 -0.339286197
## RORAIMA                2.266508    0.5358777      0.5166905  0.081602074
## SANTA CATARINA         2.730278    0.5358777      0.5166905 -0.635081759
## SÃO PAULO              3.787413    0.5358777      0.5166905 -0.476901637
## SERGIPE                1.958313    0.5358777      0.5166905 -0.525030999
## TOCANTINS              3.856089    0.5358777      0.5166905 -0.001025597
##                         Ano2014      Ano2015     Ano2016     Ano2017
## ACRE                -0.17249297 -0.255222416 -0.02846258 -0.02931329
## ALAGOAS             -0.23109558 -0.370531055 -0.27349734 -0.46485314
## AMAPÁ               -0.81388180 -0.502429548 -0.10584473 -0.05837191
## AMAZONAS            -0.22108875 -0.214280002  0.12709073  0.09979881
## BAHIA               -0.41762993 -0.480775297 -0.43853668 -0.44852001
## CEARÁ               -0.39907210 -0.500570333 -0.39458632 -0.39843204
## DISTRITO FEDERAL    -0.30920835 -0.094841301 -0.10183590 -0.20241895
## ESPÍRITO SANTO      -0.01690119 -0.406276980 -0.65126075  0.09856109
## GOIÁS               -0.30801992 -0.485909337 -0.30180533 -0.06877417
## MARANHÃO            -0.34241013 -0.374678406 -0.30892527 -0.59270197
## MATO GROSSO         -0.88134098 -0.484210989 -0.45107767 -0.75347211
## MATO GROSSO DO SUL  -0.21498441 -0.373847440  0.33048064  0.58064041
## MINAS GERAIS        -0.20377911 -0.387371002 -0.16010282 -0.15004606
## PARÁ                -0.68702629 -0.716479413 -0.64798591 -0.75440311
## PARAÍBA             -0.55303443 -0.569730722 -0.39342735 -0.58473981
## PARANÁ              -0.13847304 -0.075821491 -0.08113065 -0.13714856
## PERNAMBUCO          -0.41953051 -0.404636587 -0.25735744 -0.45060989
## PIAUÍ               -0.12980391 -0.318943397 -0.32795272 -0.29327942
## RIO DE JANEIRO      -0.26187018 -0.302003850 -0.18670681 -0.25345987
## RIO GRANDE DO NORTE -0.24939647 -0.499591749 -0.28320213 -0.32335309
## RIO GRANDE DO SUL   -0.64568890 -0.449403918 -0.26531548 -0.31272269
## RONDÔNIA            -0.84325663 -0.813175847 -0.51803340 -0.94567992
## RORAIMA             -0.06973257  0.002995581 -0.07059229 -0.19898997
## SANTA CATARINA      -1.32780370 -1.084521153 -0.39742881 -0.80113607
## SÃO PAULO           -0.55167275 -0.633029079 -0.39840698 -0.47656098
## SERGIPE             -0.29385284 -0.695095282 -0.21332407  0.44932682
## TOCANTINS           -0.18789412 -0.302662002 -0.16345288 -0.58498049
##                         Ano2018      Ano2019
## ACRE                 0.03763780  0.003089549
## ALAGOAS             -0.48561626 -0.575627049
## AMAPÁ               -0.21178953  0.112172017
## AMAZONAS             0.11090242  0.072472105
## BAHIA               -0.41053472 -0.463975907
## CEARÁ               -0.32514682 -0.471966570
## DISTRITO FEDERAL     0.17112393  0.409546604
## ESPÍRITO SANTO       0.23871268  0.009403868
## GOIÁS                0.02233501 -0.043379707
## MARANHÃO            -0.63328915 -0.656771126
## MATO GROSSO         -0.31475490 -0.070930903
## MATO GROSSO DO SUL   0.36963259  0.373924083
## MINAS GERAIS        -0.10202837 -0.204679489
## PARÁ                -0.62337630 -0.644711179
## PARAÍBA             -0.53211954 -0.618045151
## PARANÁ              -0.08737384 -0.059914450
## PERNAMBUCO          -0.28990615 -0.351552518
## PIAUÍ               -0.31801794 -0.443806449
## RIO DE JANEIRO      -0.05345104 -0.093516251
## RIO GRANDE DO NORTE -0.15925918 -0.390189826
## RIO GRANDE DO SUL   -0.17510080  0.008028053
## RONDÔNIA            -0.83422553 -0.873581568
## RORAIMA              0.35763366  0.429317544
## SANTA CATARINA      -0.67700794 -0.623317688
## SÃO PAULO           -0.49032500 -0.506729141
## SERGIPE              0.50829337  0.239358520
## TOCANTINS           -0.16963753 -0.322337183
## 
## attr(,"class")
## [1] "coef.mer"

#Parâmetros do modelo
summary(modelo_intercept_inclin_hlm2)

## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
##   method [lmerModLmerTest]
## Formula: bc_Pessoas_Ocupadas ~ 1 + Familias_PBF + Matriculas_inf + Ano +  
##     (Ano | Estado)
##    Data: Dataset1
## Control: lmerControl(check.nobs.vs.nRE = "ignore")
## 
##      AIC      BIC   logLik deviance df.resid 
##    124.1    282.7    -15.1     30.1      169 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.98186 -0.37705  0.01176  0.37119  1.49766 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr                               
##  Estado   (Intercept) 1.24799  1.1171                                      
##           Ano2013     0.04197  0.2049    0.20                              
##           Ano2014     0.10178  0.3190    0.20  0.66                        
##           Ano2015     0.06188  0.2488    0.15  0.64  0.76                  
##           Ano2016     0.05885  0.2426   -0.11  0.24  0.40  0.56            
##           Ano2017     0.13753  0.3709   -0.26  0.03  0.58  0.40  0.62      
##           Ano2018     0.12789  0.3576   -0.44  0.25  0.63  0.55  0.58  0.86
##           Ano2019     0.13923  0.3731   -0.53  0.11  0.42  0.60  0.62  0.76
##  Residual             0.01003  0.1002                                      
##       
##       
##       
##       
##       
##       
##       
##       
##   0.90
##       
## Number of obs: 216, groups:  Estado, 27
## 
## Fixed effects:
##                Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)     3.97522    0.21603 21.06530  18.401 1.85e-14 ***
## Familias_PBF    0.53588    0.11997 33.73470   4.467 8.46e-05 ***
## Matriculas_inf  0.51669    0.11008 27.26613   4.694 6.79e-05 ***
## Ano2013        -0.33937    0.04805 27.21169  -7.063 1.30e-07 ***
## Ano2014        -0.40337    0.06752 27.53972  -5.974 2.09e-06 ***
## Ano2015        -0.43678    0.05553 27.37055  -7.866 1.69e-08 ***
## Ano2016        -0.25788    0.05495 28.41658  -4.693 6.22e-05 ***
## Ano2017        -0.29836    0.07740 27.77432  -3.855 0.000626 ***
## Ano2018        -0.18803    0.07546 27.50331  -2.492 0.019024 *  
## Ano2019        -0.21325    0.07864 28.91460  -2.712 0.011151 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Fm_PBF Mtrcl_ An2013 An2014 An2015 An2016 An2017 An2018
## Familis_PBF  0.004                                                        
## Matricls_nf  0.039 -0.114                                                 
## Ano2013      0.122 -0.031 -0.058                                          
## Ano2014      0.149 -0.055 -0.077  0.611                                   
## Ano2015      0.094 -0.033 -0.116  0.598  0.706                            
## Ano2016     -0.134 -0.022 -0.174  0.318  0.426  0.551                     
## Ano2017     -0.263  0.033 -0.159  0.129  0.573  0.424  0.601              
## Ano2018     -0.430 -0.024 -0.188  0.298  0.611  0.544  0.570  0.813       
## Ano2019     -0.515  0.001 -0.213  0.197  0.433  0.587  0.608  0.736  0.854
## optimizer (nloptwrap) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.0042761 (tol = 0.002, component 1)

#------------------------------------------------------------------------------
modelo_intercept_inclin_hlm3 <- lme(fixed = bc_Pessoas_Ocupadas ~ Ano + Familias_PBF + Matriculas_inf + PIB_D,
                                    random = list(Estado = ~Ano),
                                    control=ctrl,
                                    data = Dataset2,
                                    method = "REML")
summary(modelo_intercept_inclin_hlm3)

## Linear mixed-effects model fit by REML
##   Data: Dataset2 
##        AIC      BIC    logLik
##   166.1326 325.6371 -35.06632
## 
## Random effects:
##  Formula: ~Ano | Estado
##  Structure: General positive-definite, Log-Cholesky parametrization
##             StdDev    Corr                                            
## (Intercept) 1.1393093 (Intr) An2013 An2014 An2015 An2016 An2017 An2018
## Ano2013     0.2090605  0.198                                          
## Ano2014     0.3245790  0.195  0.651                                   
## Ano2015     0.2549485  0.127  0.602  0.743                            
## Ano2016     0.2511242 -0.133  0.247  0.393  0.524                     
## Ano2017     0.3672755 -0.305  0.106  0.610  0.395  0.683              
## Ano2018     0.3740032 -0.446  0.299  0.619  0.508  0.605  0.870       
## Ano2019     0.3852220 -0.555  0.165  0.416  0.582  0.650  0.770  0.901
## Residual    0.1017452                                                 
## 
## Fixed effects:  bc_Pessoas_Ocupadas ~ Ano + Familias_PBF + Matriculas_inf + PIB_D 
##                    Value  Std.Error  DF   t-value p-value
## (Intercept)     3.928607 0.22130686 179 17.751855  0.0000
## Ano2013        -0.333419 0.04903914 179 -6.799031  0.0000
## Ano2014        -0.396613 0.06876770 179 -5.767430  0.0000
## Ano2015        -0.468771 0.05881195 179 -7.970684  0.0000
## Ano2016        -0.239667 0.05718272 179 -4.191253  0.0000
## Ano2017        -0.140392 0.10519059 179 -1.334645  0.1837
## Ano2018        -0.070699 0.09488415 179 -0.745104  0.4572
## Ano2019        -0.114560 0.09232659 179 -1.240813  0.2163
## Familias_PBF    0.538770 0.12781219 179  4.215326  0.0000
## Matriculas_inf  0.475091 0.11663175 179  4.073432  0.0001
## PIB_D          -0.102342 0.04724436 179 -2.166222  0.0316
##  Correlation: 
##                (Intr) An2013 An2014 An2015 An2016 An2017 An2018 An2019 Fm_PBF
## Ano2013         0.118                                                        
## Ano2014         0.143  0.609                                                 
## Ano2015         0.094  0.540  0.659                                          
## Ano2016        -0.161  0.327  0.422  0.471                                   
## Ano2017        -0.289  0.176  0.464  0.122  0.564                            
## Ano2018        -0.413  0.310  0.525  0.266  0.563  0.882                     
## Ano2019        -0.513  0.231  0.401  0.361  0.613  0.803  0.891              
## Familias_PBF    0.012 -0.037 -0.060 -0.014 -0.033 -0.027 -0.064 -0.036       
## Matriculas_inf  0.047 -0.064 -0.083 -0.097 -0.186 -0.173 -0.199 -0.226 -0.105
## PIB_D           0.094 -0.054 -0.043  0.258 -0.134 -0.681 -0.559 -0.477  0.076
##                Mtrcl_
## Ano2013              
## Ano2014              
## Ano2015              
## Ano2016              
## Ano2017              
## Ano2018              
## Ano2019              
## Familias_PBF         
## Matriculas_inf       
## PIB_D           0.073
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -1.93644921 -0.34324933  0.01086983  0.37847537  1.55306851 
## 
## Number of Observations: 216
## Number of Groups: 27

coef(modelo_intercept_inclin_hlm3)

##                     (Intercept)     Ano2013      Ano2014     Ano2015
## ACRE                   3.899136 -0.25788408 -0.176455067 -0.28441692
## ALAGOAS                4.900414 -0.14276826 -0.232035202 -0.38187743
## AMAPÁ                  3.186562 -0.71895143 -0.792535147 -0.52087839
## AMAZONAS               5.253704 -0.30090260 -0.213281845 -0.27443449
## BAHIA                  4.743775 -0.36656267 -0.413391497 -0.50074031
## CEARÁ                  5.240315 -0.25707273 -0.397194944 -0.55543264
## DISTRITO FEDERAL       2.145476 -0.26756150 -0.311590353 -0.09369740
## ESPÍRITO SANTO         3.469372 -0.22794932  0.001567643 -0.43955656
## GOIÁS                  3.195565 -0.60715439 -0.313864466 -0.49492315
## MARANHÃO               5.547411 -0.29426052 -0.343781687 -0.37955564
## MATO GROSSO            2.916498 -0.51681380 -0.893726710 -0.50922657
## MATO GROSSO DO SUL     2.533411 -0.40384682 -0.211391599 -0.39807529
## MINAS GERAIS           3.973751 -0.23873720 -0.205703732 -0.41566092
## PARÁ                   3.350298 -0.36711406 -0.690512613 -0.71953241
## PARAÍBA                5.197989 -0.43837957 -0.562159253 -0.60174345
## PARANÁ                 5.855618 -0.16819295 -0.128292583 -0.11644480
## PERNAMBUCO             4.846977 -0.30040815 -0.418478492 -0.44393006
## PIAUÍ                  5.625261 -0.28131877 -0.131007372 -0.37543317
## RIO DE JANEIRO         4.181555 -0.23128564 -0.249427106 -0.36384823
## RIO GRANDE DO NORTE    3.978476 -0.28577827 -0.231812871 -0.52955554
## RIO GRANDE DO SUL      3.304091 -0.47328945 -0.619186409 -0.48620421
## RONDÔNIA               3.997087 -0.33331767 -0.835429694 -0.85616741
## RORAIMA                2.223089  0.09432401 -0.064475022 -0.02965124
## SANTA CATARINA         2.702584 -0.61522134 -1.311066423 -1.12474574
## SÃO PAULO              3.742568 -0.48704860 -0.534428705 -0.65583635
## SERGIPE                2.079907 -0.49703494 -0.260592585 -0.72602245
## TOCANTINS              3.981509 -0.01777134 -0.168294952 -0.37923884
##                         Ano2016     Ano2017      Ano2018       Ano2019
## ACRE                -0.01882895  0.11161356  0.134810606  0.0844641657
## ALAGOAS             -0.27519167 -0.35490470 -0.426369892 -0.5265926736
## AMAPÁ               -0.08145460  0.04831329 -0.122317933  0.1739514591
## AMAZONAS             0.15311200  0.22316802  0.226154783  0.1650986186
## BAHIA               -0.42757752 -0.33818569 -0.330989586 -0.3953906553
## CEARÁ               -0.38102160 -0.29919494 -0.231279005 -0.3964212035
## DISTRITO FEDERAL    -0.09424634 -0.05122763  0.258125000  0.4938018686
## ESPÍRITO SANTO      -0.62217602  0.16096725  0.314126276  0.0705325835
## GOIÁS               -0.30540043  0.03853155  0.085947979  0.0235686880
## MARANHÃO            -0.31089862 -0.48238585 -0.572264285 -0.6027169278
## MATO GROSSO         -0.45537982 -0.63684470 -0.239983549  0.0001733207
## MATO GROSSO DO SUL   0.35049708  0.68264660  0.448634247  0.4435797339
## MINAS GERAIS        -0.15009687 -0.01761727 -0.010237962 -0.1193155752
## PARÁ                -0.64023796 -0.63794285 -0.553221259 -0.5745943184
## PARAÍBA             -0.39820252 -0.48671771 -0.467079808 -0.5638302192
## PARANÁ              -0.05621764 -0.04253109  0.011148521  0.0207232256
## PERNAMBUCO          -0.24697436 -0.29989015 -0.175655036 -0.2590274289
## PIAUÍ               -0.34663843 -0.17436316 -0.217358310 -0.3777530336
## RIO DE JANEIRO      -0.17612891 -0.05017832  0.107666904  0.0299072253
## RIO GRANDE DO NORTE -0.24826784 -0.05511308  0.022569705 -0.2429205838
## RIO GRANDE DO SUL   -0.21993585 -0.10281895 -0.006900603  0.1485218089
## RONDÔNIA            -0.50157804 -0.71593279 -0.669928738 -0.7321331357
## RORAIMA             -0.04409670  0.03120904  0.523234442  0.5759124873
## SANTA CATARINA      -0.35377602 -0.58069486 -0.498858742 -0.4761025375
## SÃO PAULO           -0.36518952 -0.25168825 -0.338883279 -0.3798566244
## SERGIPE             -0.14993222  0.62884900  0.665853113  0.3737454022
## TOCANTINS           -0.10517589 -0.13765381  0.154195669 -0.0504467559
##                     Familias_PBF Matriculas_inf      PIB_D
## ACRE                     0.53877      0.4750915 -0.1023418
## ALAGOAS                  0.53877      0.4750915 -0.1023418
## AMAPÁ                    0.53877      0.4750915 -0.1023418
## AMAZONAS                 0.53877      0.4750915 -0.1023418
## BAHIA                    0.53877      0.4750915 -0.1023418
## CEARÁ                    0.53877      0.4750915 -0.1023418
## DISTRITO FEDERAL         0.53877      0.4750915 -0.1023418
## ESPÍRITO SANTO           0.53877      0.4750915 -0.1023418
## GOIÁS                    0.53877      0.4750915 -0.1023418
## MARANHÃO                 0.53877      0.4750915 -0.1023418
## MATO GROSSO              0.53877      0.4750915 -0.1023418
## MATO GROSSO DO SUL       0.53877      0.4750915 -0.1023418
## MINAS GERAIS             0.53877      0.4750915 -0.1023418
## PARÁ                     0.53877      0.4750915 -0.1023418
## PARAÍBA                  0.53877      0.4750915 -0.1023418
## PARANÁ                   0.53877      0.4750915 -0.1023418
## PERNAMBUCO               0.53877      0.4750915 -0.1023418
## PIAUÍ                    0.53877      0.4750915 -0.1023418
## RIO DE JANEIRO           0.53877      0.4750915 -0.1023418
## RIO GRANDE DO NORTE      0.53877      0.4750915 -0.1023418
## RIO GRANDE DO SUL        0.53877      0.4750915 -0.1023418
## RONDÔNIA                 0.53877      0.4750915 -0.1023418
## RORAIMA                  0.53877      0.4750915 -0.1023418
## SANTA CATARINA           0.53877      0.4750915 -0.1023418
## SÃO PAULO                0.53877      0.4750915 -0.1023418
## SERGIPE                  0.53877      0.4750915 -0.1023418
## TOCANTINS                0.53877      0.4750915 -0.1023418

#Comparsion between Logliks
data.frame(OLS_Nulo = logLik(modelo_ols_nulo),
           HLM3_Nulo = logLik(modelo_nulo_hlm3),
           HLM2_Intercept_Aleat = logLik(modelo_intercept_hlm3),
           HLM2_Intercept_Inclin_Aleat = logLik(modelo_intercept_inclin_hlm3)) %>%
  melt() %>%
  ggplot(aes(x = variable, y = (abs(-value)), fill = factor(variable))) +
  geom_bar(stat = "identity") +
  geom_label(aes(label = (round(value,3))), hjust = 1.1, color = "white", size = 4) +
  labs(title = "Comparação do LL", 
       y = "LogLik", 
       x = "Modelo Proposto") +
  coord_flip() +
  scale_fill_manual("Legenda:",
                    values = c("lightblue","lightgreen","lightpink","grey25")) +
  theme(legend.title = element_blank(), 
        panel.background = element_rect("white"),
        legend.position = "none",
        axis.line = element_line())

## No id variables; using all as measure variables

Conclusao



```r
#To graphically observe the behavior of the v0jk values, that is,
#of the random intercepts per state, we can command:
library(dplyr)
random.effects(modelo_intercept_inclin_hlm3) %>% 
  dplyr::rename(v0j = 1) %>% 
  rownames_to_column("Estado") %>% 
  mutate(color_v0j = ifelse(v0j < 0, "A", "B"),
         hjust_v0j = ifelse(v0j > 0, 1.15, -0.15)) %>% 
  arrange(Estado) %>% 
  ggplot(aes(label = format(v0j, digits = 2), 
             hjust = hjust_v0j)) +
  geom_bar(aes(x = fct_rev(Estado), y = v0j, fill = color_v0j),
           stat = "identity", color = "black") +
  geom_text(aes(x = Estado, y = 0), size = 4.1, color = "black") +
  coord_flip() +
  labs(x = "Estado",
       y = expression(nu[0][j])) +
  scale_fill_manual("oi", values = c("pink","lightblue")) +
  theme(panel.background = element_rect("white"),
        panel.border = element_rect(NA),
        panel.grid = element_line("grey95"),
        legend.position = "none")




```r
pacotes <- c("plotly", #plataforma gráfica
             "tidyverse", #carregar outros pacotes do R
             "ggrepel", #geoms de texto e rótulo para 'ggplot2' que ajudam a
                        #evitar sobreposição de textos
             "knitr", "kableExtra", #formatação de tabelas
             "reshape2", #função 'melt'
             "misc3d", #gráficos 3D
             "plot3D", #gráficos 3D
             "cluster", #função 'agnes' para elaboração de clusters hierárquicos
             "factoextra", #função 'fviz_dend' para construção de dendrogramas
             "ade4") #função 'ade4' para matriz de distâncias em var. binárias

if(sum(as.numeric(!pacotes %in% installed.packages())) != 0){
  instalador <- pacotes[!pacotes %in% installed.packages()]
  for(i in 1:length(instalador)) {
    install.packages(instalador, dependencies = T)
    break()}
  sapply(pacotes, require, character = T) 
} else {
  sapply(pacotes, require, character = T) 
}

## Loading required package: misc3d

## Error: package or namespace load failed for 'misc3d':
##  .onLoad failed in loadNamespace() for 'tcltk', details:
##   call: fun(libname, pkgname)
##   error: Tcl/Tk libraries are missing: install the Tcl/Tk component from the R installer

## Loading required package: plot3D

## Error: package or namespace load failed for 'plot3D':
##  .onLoad failed in loadNamespace() for 'tcltk', details:
##   call: fun(libname, pkgname)
##   error: Tcl/Tk libraries are missing: install the Tcl/Tk component from the R installer

## Loading required package: cluster

## Loading required package: factoextra

## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

## Loading required package: ade4

## Warning: package 'ade4' was built under R version 4.1.2

##     plotly  tidyverse    ggrepel      knitr kableExtra   reshape2     misc3d 
##       TRUE       TRUE       TRUE       TRUE       TRUE       TRUE      FALSE 
##     plot3D    cluster factoextra       ade4 
##      FALSE       TRUE       TRUE       TRUE

#Improved visualization of random intercepts and slopes by school,
#for the final HLM3 model

v_final <- data.frame(modelo_intercept_inclin_hlm3[["coefficients"]][["random"]][["Estado"]]) %>%
  rename(v00 = 1,
         v10 = 2)
v_final$Estado <- c(1:27)
v_final$Estado <- as.factor(v_final$Estado)

# Dissimilarity matrix
matriz_D <- v_final %>% 
 dplyr::select(v00, v10) %>% 
  dist(method = "euclidean")

# Method: parametrização da distância a ser utilizada

## "euclidean": distância euclidiana
## "euclidiana quadrática": elevar ao quadrado matriz_D (matriz_D^2)
## "maximum": distância de Chebychev;
## "manhattan": distância de Manhattan (ou distância absoluta ou bloco);
## "canberra": distância de Canberra;
## "minkowski": distância de Minkowski

# Visualizing Dissimilarity matrix
data.matrix(matriz_D) %>% 
  kable() %>%
  kable_styling(bootstrap_options = "striped", 
                full_width = FALSE, 
                font_size = 20)

	ACRE	ALAGOAS	AMAPÁ	AMAZONAS	BAHIA	CEARÁ	DISTRITO FEDERAL	ESPÍRITO SANTO	GOIÁS	MARANHÃO	MATO GROSSO	MATO GROSSO DO SUL	MINAS GERAIS	PARÁ	PARAÍBA	PARANÁ	PERNAMBUCO	PIAUÍ	RIO DE JANEIRO	RIO GRANDE DO NORTE	RIO GRANDE DO SUL	RONDÔNIA	RORAIMA	SANTA CATARINA	SÃO PAULO	SERGIPE	TOCANTINS
ACRE	0.0000000	1.0078728	0.8487320	1.3552509	0.8516019	1.3411784	1.7536867	0.4308062	0.7854953	1.6486756	1.0161807	1.3735034	0.0770316	0.5596019	1.3113336	1.9585363	0.9487943	1.7262834	0.2836680	0.0841006	0.6328342	0.1236305	1.7126543	1.2487709	0.2775428	1.8348809	0.2538492
ALAGOAS	1.0078728	0.0000000	1.8081139	0.3870670	0.2731657	0.3586060	2.7577622	1.4335750	1.7669646	0.6644961	2.0188690	2.3813576	0.9316193	1.5662656	0.4194485	0.9555427	0.1664505	0.7379700	0.7242882	0.9329630	1.6301815	0.9232054	2.6878018	2.2480367	1.2079466	2.8426679	0.9273669
AMAPÁ	0.8487320	1.8081139	0.0000000	2.1089914	1.5965878	2.1050496	1.1347295	0.5666256	0.1121590	2.3987436	0.3373337	0.7251874	0.9221020	0.3880713	2.0309013	2.7252884	1.7123548	2.4776554	1.1080746	0.9026451	0.2723286	0.8975882	1.2608314	0.4949694	0.6024303	1.1286853	1.0599979
AMAZONAS	1.3552509	0.3870670	2.1089914	0.0000000	0.5141390	0.0458295	3.1084068	1.7858237	2.0808001	0.2937813	2.3471584	2.7222408	1.2814627	1.9045573	0.1483380	0.6163698	0.4067274	0.3720721	1.0744076	1.2753177	1.9572204	1.2570356	3.0562776	2.5704113	1.5225581	3.1798516	1.3033203
BAHIA	0.8516019	0.2731657	1.5965878	0.5141390	0.0000000	0.5084676	2.6001843	1.2819200	1.5667930	0.8068813	1.8334444	2.2106789	0.7805623	1.3934770	0.4598560	1.1294001	0.1225849	0.8855976	0.5782663	0.7695509	1.4436353	0.7474282	2.5624744	2.0562819	1.0084308	2.6670613	0.8382749
CEARÁ	1.3411784	0.3586060	2.1050496	0.0458295	0.5084676	0.0000000	3.0948560	1.7711826	2.0745021	0.3093395	2.3382878	2.7108801	1.2666968	1.8932169	0.1861818	0.6216895	0.3957174	0.3857090	1.0590739	1.2621647	1.9482591	1.2455636	3.0376189	2.5628792	1.5152998	3.1695041	1.2813493
DISTRITO FEDERAL	1.7536867	2.7577622	1.1347295	3.1084068	2.6001843	3.0948560	0.0000000	1.3244875	1.1036344	3.4020390	0.8103090	0.4111773	1.8285013	1.2089280	3.0572881	3.7114720	2.7017005	3.4798115	2.0364014	1.8330905	1.1767373	1.8527776	0.3701147	0.6566854	1.6121031	0.2386574	1.8529468
ESPÍRITO SANTO	0.4308062	1.4335750	0.5666256	1.7858237	1.2819200	1.7711826	1.3244875	0.0000000	0.4677249	2.0790969	0.6237884	0.9523457	0.5044944	0.1831535	1.7413783	2.3869946	1.3795101	2.1565498	0.7121910	0.5123788	0.2958202	0.5381319	1.2872760	0.8590363	0.3765220	1.4152801	0.5535882
GOIÁS	0.7854953	1.7669646	0.1121590	2.0808001	1.5667930	2.0745021	1.1036344	0.4677249	0.0000000	2.3725686	0.2933254	0.6926628	0.8609903	0.2855904	2.0095240	2.6960285	1.6796596	2.4514468	1.0552030	0.8463056	0.1723301	0.8470089	1.1990749	0.4930473	0.5600340	1.1210789	0.9823854
MARANHÃO	1.6486756	0.6644961	2.3987436	0.2937813	0.8068813	0.3093395	3.4020390	2.0790969	2.3725686	0.0000000	2.6403091	3.0159914	1.5746394	2.1983198	0.3779762	0.3329936	0.7004604	0.0789185	1.3673070	1.5689572	2.2504526	1.5508157	3.3469555	2.8628757	1.8151099	3.4734273	1.5901237
MATO GROSSO	1.0161807	2.0188690	0.3373337	2.3471584	1.8334444	2.3382878	0.8103090	0.6237884	0.2933254	2.6403091	0.0000000	0.3993960	1.0932108	0.4589041	2.2828388	2.9597236	1.9425711	2.7189804	1.2968790	1.0868192	0.3900288	1.0960581	0.9242861	0.2354642	0.8266064	0.8368243	1.1761347
MATO GROSSO DO SUL	1.3735034	2.3813576	0.7251874	2.7222408	2.2106789	2.7108801	0.4111773	0.9523457	0.6926628	3.0159914	0.3993960	0.0000000	1.4497722	0.8177131	2.6648017	3.3305544	2.3158776	3.0942768	1.6571528	1.4498809	0.7738019	1.4653743	0.5869188	0.2707371	1.2120164	0.4629789	1.4986806
MINAS GERAIS	0.0770316	0.9316193	0.9221020	1.2814627	0.7805623	1.2666968	1.8285013	0.5044944	0.8609903	1.5746394	1.0932108	1.4497722	0.0000000	0.6365321	1.2404097	1.8831892	0.8754018	1.6520591	0.2079377	0.0472779	0.7095486	0.0974169	1.7820619	1.3257473	0.3392696	1.9113764	0.2211020
PARÁ	0.5596019	1.5662656	0.3880713	1.9045573	1.3934770	1.8932169	1.2089280	0.1831535	0.2855904	2.1983198	0.4589041	0.8177131	0.6365321	0.0000000	1.8490641	2.5132042	1.4981646	2.2765795	0.8422804	0.6334217	0.1157946	0.6476708	1.2180007	0.6936079	0.4101948	1.2770173	0.7214343
PARAÍBA	1.3113336	0.4194485	2.0309013	0.1483380	0.4598560	0.1861818	3.0572881	1.7413783	2.0095240	0.3779762	2.2828388	2.6648017	1.2404097	1.8490641	0.0000000	0.7109690	0.3771540	0.4552247	1.0373168	1.2290230	1.8942200	1.2054889	3.0222178	2.5016635	1.4562341	3.1186331	1.2871416
PARANÁ	1.9585363	0.9555427	2.7252884	0.6163698	1.1294001	0.6216895	3.7114720	2.3869946	2.6960285	0.3329936	2.9597236	3.3305544	1.8831892	2.5132042	0.7109690	0.0000000	1.0172693	0.2566357	1.6752518	1.8808207	2.5697035	1.8658521	3.6420022	3.1845660	2.1369718	3.7900037	1.8801356
PERNAMBUCO	0.9487943	0.1664505	1.7123548	0.4067274	0.1225849	0.3957174	2.7017005	1.3795101	1.6796596	0.7004604	1.9425711	2.3158776	0.8754018	1.4981646	0.3771540	1.0172693	0.0000000	0.7785176	0.6690031	0.8686241	1.5525423	0.8505274	2.6534133	2.1673791	1.1200689	2.7740473	0.9104495
PIAUÍ	1.7262834	0.7379700	2.4776554	0.3720721	0.8855976	0.3857090	3.4798115	2.1565498	2.4514468	0.0789185	2.7189804	3.0942768	1.6520591	2.2765795	0.4552247	0.2566357	0.7785176	0.0000000	1.4445728	1.6467904	2.3290951	1.6290041	3.4228466	2.9416889	1.8938998	3.5519100	1.6647451
RIO DE JANEIRO	0.2836680	0.7242882	1.1080746	1.0744076	0.5782663	1.0590739	2.0364014	0.7121910	1.0552030	1.3673070	1.2968790	1.6571528	0.2079377	0.8422804	1.0373168	1.6752518	0.6690031	1.4445728	0.0000000	0.2102623	0.9102248	0.2108054	1.9853485	1.5279929	0.5080589	2.1183825	0.2925859
RIO GRANDE DO NORTE	0.0841006	0.9329630	0.9026451	1.2753177	0.7695509	1.2621647	1.8330905	0.5123788	0.8463056	1.5689572	1.0868192	1.4498809	0.0472779	0.6334217	1.2290230	1.8808207	0.8686241	1.6467904	0.2102623	0.0000000	0.6999691	0.0510524	1.7960684	1.3177387	0.3101007	1.9102864	0.2680241
RIO GRANDE DO SUL	0.6328342	1.6301815	0.2723286	1.9572204	1.4436353	1.9482591	1.1767373	0.2958202	0.1723301	2.2504526	0.3900288	0.7738019	0.7095486	0.1157946	1.8942200	2.5697035	1.5525423	2.3290951	0.9102248	0.6999691	0.0000000	0.7069908	1.2209622	0.6180253	0.4386934	1.2244135	0.8163290
RONDÔNIA	0.1236305	0.9232054	0.8975882	1.2570356	0.7474282	1.2455636	1.8527776	0.5381319	0.8470089	1.5508157	1.0960581	1.4653743	0.0974169	0.6476708	1.2054889	1.8658521	0.8505274	1.6290041	0.2108054	0.0510524	0.7069908	0.0000000	1.8248137	1.3248428	0.2973432	1.9241572	0.3159306
RORAIMA	1.7126543	2.6878018	1.2608314	3.0562776	2.5624744	3.0376189	0.3701147	1.2872760	1.1990749	3.3469555	0.9242861	0.5869188	1.7820619	1.2180007	3.0222178	3.6420022	2.6534133	3.4228466	1.9853485	1.7960684	1.2209622	1.8248137	0.0000000	0.8563699	1.6269020	0.6084459	1.7619894
SANTA CATARINA	1.2487709	2.2480367	0.4949694	2.5704113	2.0562819	2.5628792	0.6566854	0.8590363	0.4930473	2.8628757	0.2354642	0.2707371	1.3257473	0.6936079	2.5016635	3.1845660	2.1673791	2.9416889	1.5279929	1.3177387	0.6180253	1.3248428	0.8563699	0.0000000	1.0478532	0.6337931	1.4115940
SÃO PAULO	0.2775428	1.2079466	0.6024303	1.5225581	1.0084308	1.5152998	1.6121031	0.3765220	0.5600340	1.8151099	0.8266064	1.2120164	0.3392696	0.4101948	1.4562341	2.1369718	1.1200689	1.8938998	0.5080589	0.3101007	0.4386934	0.2973432	1.6269020	1.0478532	0.0000000	1.6626908	0.5266062
SERGIPE	1.8348809	2.8426679	1.1286853	3.1798516	2.6670613	3.1695041	0.2386574	1.4152801	1.1210789	3.4734273	0.8368243	0.4629789	1.9113764	1.2770173	3.1186331	3.7900037	2.7740473	3.5519100	2.1183825	1.9102864	1.2244135	1.9241572	0.6084459	0.6337931	1.6626908	0.0000000	1.9610670
TOCANTINS	0.2538492	0.9273669	1.0599979	1.3033203	0.8382749	1.2813493	1.8529468	0.5535882	0.9823854	1.5901237	1.1761347	1.4986806	0.2211020	0.7214343	1.2871416	1.8801356	0.9104495	1.6647451	0.2925859	0.2680241	0.8163290	0.3159306	1.7619894	1.4115940	0.5266062	1.9610670	0.0000000

# Development of hierarchical clustering
library(cluster)
cluster_hier <- agnes(x = matriz_D, method = "single")

# Method is the chaining type:

## "complete": encadeamento completo (furthest neighbor ou complete linkage)
## "single": encadeamento único (nearest neighbor ou single linkage)
## "average": encadeamento médio (between groups ou average linkage)

# # Definition of the hierarchical clustering scheme

# Distances for combinations at each stage
coeficientes <- sort(cluster_hier$height, decreasing = FALSE) 
coeficientes

##  [1] 0.04582952 0.04727786 0.05105235 0.07703156 0.07891849 0.11215898
##  [7] 0.11579459 0.12258495 0.14833797 0.16645051 0.17233006 0.18315353
## [13] 0.20793772 0.22110203 0.23546417 0.23865743 0.25663569 0.27073710
## [19] 0.27754282 0.29332542 0.29378132 0.35860596 0.37011467 0.37652202
## [25] 0.41117735 0.57826631

# # Table with the clustering scheme. Output interpretation:

## The lines are the clustering stages
## In the columns Cluster1 and Cluster2, it is observed how the merging occurred
## When it is a negative number, it indicates isolated observation
## When it is a positive number, it indicates a previously formed cluster (stage)
## Coefficients: the distances for the combinations in each stage

esquema <- as.data.frame(cbind(cluster_hier$merge, coeficientes))
names(esquema) <- c("Cluster1", "Cluster2", "Coeficientes")
esquema

##    Cluster1 Cluster2 Coeficientes
## 1        -4       -6   0.04582952
## 2       -13      -20   0.04727786
## 3         2      -22   0.05105235
## 4        -1        3   0.07703156
## 5       -10      -18   0.07891849
## 6        -3       -9   0.11215898
## 7       -14      -21   0.11579459
## 8        -5      -17   0.12258495
## 9         1      -15   0.14833797
## 10       -2        8   0.16645051
## 11        6        7   0.17233006
## 12       11       -8   0.18315353
## 13        4      -19   0.20793772
## 14       13      -27   0.22110203
## 15      -11      -24   0.23546417
## 16       -7      -26   0.23865743
## 17        5      -16   0.25663569
## 18       15      -12   0.27073710
## 19       14      -25   0.27754282
## 20       12       18   0.29332542
## 21        9       17   0.29378132
## 22       10       21   0.35860596
## 23       16      -23   0.37011467
## 24       19       20   0.37652202
## 25       24       23   0.41117735
## 26       25       22   0.57826631

# Visualization of the hierarchical clustering scheme
esquema %>%
  kable(row.names = T) %>%
  kable_styling(bootstrap_options = "striped", 
                full_width = FALSE, 
                font_size = 20)

	Cluster1	Cluster2	Coeficientes
1	-4	-6	0.0458295
2	-13	-20	0.0472779
3	2	-22	0.0510524
4	-1	3	0.0770316
5	-10	-18	0.0789185
6	-3	-9	0.1121590
7	-14	-21	0.1157946
8	-5	-17	0.1225849
9	1	-15	0.1483380
10	-2	8	0.1664505
11	6	7	0.1723301
12	11	-8	0.1831535
13	4	-19	0.2079377
14	13	-27	0.2211020
15	-11	-24	0.2354642
16	-7	-26	0.2386574
17	5	-16	0.2566357
18	15	-12	0.2707371
19	14	-25	0.2775428
20	12	18	0.2933254
21	9	17	0.2937813
22	10	21	0.3586060
23	16	-23	0.3701147
24	19	20	0.3765220
25	24	23	0.4111773
26	25	22	0.5782663

# Construction of the dendrogram
dev.set(dev.next())

## quartz_off_screen 
##                 2

fviz_dend(x = cluster_hier)

## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ℹ The deprecated feature was likely used in the factoextra package.
##   Please report the issue at <https://github.com/kassambara/factoextra/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## Registered S3 methods overwritten by 'broom':
##   method            from  
##   tidy.glht         jtools
##   tidy.summary.glht jtools

# Dendrogram with visualization of clusters
# Defining 3 clusters to compare with regional ones
fviz_dend(x = cluster_hier,
          k = 3,
          k_colors = c("deeppink4", "darkviolet", "deeppink"),
          color_labels_by_k = F,
          rect = T,
          rect_fill = T,
          lwd = 1,
          ggtheme = theme_bw())

# Creating a categorical variable to indicate the cluster in the database
## The 'k' argument indicates the number of clusters
v_final$cluster_H <- factor(cutree(tree = cluster_hier, k = 3))

#---------- --------- K-MEANS non-hierarchical clustering scheme ---------------------

# Elaboration of the non-hierarchical k-means clustering
cluster_kmeans <- kmeans(v_final[,1:2],
                         centers = 3)

# Creating a categorical variable to indicate the cluster in the database
v_final$cluster_K <- factor(cluster_kmeans$cluster)

# Elbow method to identify the optimal number of clusters
fviz_nbclust(v_final[,1:2], kmeans, method = "wss", k.max = 10)

# Visualization
v_final %>%
  kable() %>%
  kable_styling(bootstrap_options = "striped", 
                full_width = FALSE,
                font_size = 20)

	v00	v10	Ano2014	Ano2015	Ano2016	Ano2017	Ano2018	Ano2019	Estado	cluster_H	cluster_K
ACRE	-0.0294709	0.0755345	0.2201578	0.1843545	0.2208383	0.2520057	0.2055092	0.1990242	1	1	2
ALAGOAS	0.9718063	0.1906503	0.1645777	0.0868940	-0.0355244	-0.2145126	-0.3556713	-0.4120326	2	2	1
AMAPÁ	-0.7420458	-0.3855328	-0.3959222	-0.0521069	0.1582126	0.1887054	-0.0516194	0.2885115	3	1	3
AMAZONAS	1.3250971	0.0325160	0.1833311	0.1943370	0.3927792	0.3635601	0.2968533	0.2796587	4	2	1
BAHIA	0.8151680	-0.0331441	-0.0167786	-0.0319688	-0.1879103	-0.1977936	-0.2602910	-0.2808306	5	2	1
CEARÁ	1.3117073	0.0763459	-0.0005820	-0.0866612	-0.1413544	-0.1588028	-0.1605805	-0.2818612	6	2	1
DISTRITO FEDERAL	-1.7831309	0.0658571	0.0850226	0.3750741	0.1454209	0.0891645	0.3288235	0.6083619	7	3	3
ESPÍRITO SANTO	-0.4592358	0.1054693	0.3981806	0.0292149	-0.3825088	0.3013594	0.3848248	0.1850926	8	1	2
GOIÁS	-0.7330425	-0.2737358	0.0827484	-0.0261517	-0.0657332	0.1789237	0.1566465	0.1381287	9	1	3
MARANHÃO	1.6188033	0.0391581	0.0528312	0.0892158	-0.0712314	-0.3419937	-0.5015657	-0.4881569	10	2	1
MATO GROSSO	-1.0121095	-0.1833952	-0.4971138	-0.0404551	-0.2157126	-0.4964526	-0.1692850	0.1147334	11	1	3
MATO GROSSO DO SUL	-1.3951965	-0.0704282	0.1852213	0.0706962	0.5901643	0.8230387	0.5193328	0.5581398	12	1	3
MINAS GERAIS	0.0451432	0.0946814	0.1909092	0.0531106	0.0895704	0.1227749	0.0604606	-0.0047555	13	1	2
PARÁ	-0.5783089	-0.0336955	-0.2938997	-0.2507609	-0.4005707	-0.4975507	-0.4825227	-0.4600343	14	1	2
PARAÍBA	1.2693814	-0.1049610	-0.1655463	-0.1329720	-0.1585353	-0.3463256	-0.3963813	-0.4492702	15	2	1
PARANÁ	1.9270106	0.1652256	0.2683203	0.3523267	0.1834496	0.0978610	0.0818471	0.1352833	16	2	1
PERNAMBUCO	0.9183700	0.0330104	-0.0218656	0.0248414	-0.0073071	-0.1594980	-0.1049565	-0.1444674	17	2	1
PIAUÍ	1.6966535	0.0520998	0.2656055	0.0933383	-0.1069712	-0.0339710	-0.1466598	-0.2631930	18	2	1
RIO DE JANEIRO	0.2529474	0.1021330	0.1471858	0.1049232	0.0635383	0.0902138	0.1783655	0.1444673	19	1	2
RIO GRANDE DO NORTE	0.0498691	0.0476403	0.1648000	-0.0607841	-0.0086006	0.0852790	0.0932683	-0.1283605	20	1	2
RIO GRANDE DO SUL	-0.6245168	-0.1398709	-0.2225735	-0.0174327	0.0197314	0.0375732	0.0637979	0.2630818	21	1	2
RONDÔNIA	0.0684795	0.0001009	-0.4388168	-0.3873959	-0.2619108	-0.5755407	-0.5992302	-0.6175731	22	1	2
RORAIMA	-1.7055181	0.4277426	0.3321379	0.4391202	0.1955705	0.1716012	0.5939330	0.6904725	23	3	3
SANTA CATARINA	-1.2260238	-0.2818027	-0.9144535	-0.6559743	-0.1141088	-0.4403027	-0.4281602	-0.3615425	24	1	3
SÃO PAULO	-0.1860392	-0.1536300	-0.1378158	-0.1870649	-0.1255223	-0.1112961	-0.2681847	-0.2652966	25	1	2
SERGIPE	-1.8487000	-0.1636163	0.1360203	-0.2572510	0.0897350	0.7692411	0.7365517	0.4883054	26	3	3
TOCANTINS	0.0529020	0.3156473	0.2283180	0.0895326	0.1344913	0.0027383	0.2248942	0.0641133	27	1	2

# Descriptive statistics of clusters by variable

# Descriptive statistics of intercepts
group_by(v_final, cluster_K) %>%
  summarise(
    mean = mean(v00, na.rm = TRUE),
    sd = sd(v00, na.rm = TRUE),
    min = min(v00, na.rm = TRUE),
    max = max(v00, na.rm = TRUE))

## # A tibble: 3 × 5
##   cluster_K   mean    sd    min    max
##   <fct>      <dbl> <dbl>  <dbl>  <dbl>
## 1 1          1.32  0.377  0.815  1.93 
## 2 2         -0.141 0.307 -0.625  0.253
## 3 3         -1.31  0.452 -1.85  -0.733

# One-way analysis of variance (ANOVA). Output interpretation:

## Mean Sq of cluster_K: indicates the variability between groups
## Mean Sq of Residuals: indicates the variability within groups
## F value: test statistic (Sum Sq of cluster_K / Sum Sq of Residuals)
## Pr(>F): p-value of the statistic
## p-value < 0.05: at least one cluster has a statistically different mean from the others

## The most discriminant variable of the groups has the highest F statistic (and is significant)
library(knitr)
# ANOVA of the intercept
summary(anova_atendimento <- aov(formula = v00 ~ cluster_K,
                                 data = v_final))

##             Df Sum Sq Mean Sq F value   Pr(>F)    
## cluster_K    2 29.451  14.725   103.4 1.59e-12 ***
## Residuals   24  3.417   0.142                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# FIM!

v_final %>%
  dplyr::select(Estado, cluster_K) %>%
  arrange(Estado)

##                     Estado cluster_K
## ACRE                     1         2
## ALAGOAS                  2         1
## AMAPÁ                    3         3
## AMAZONAS                 4         1
## BAHIA                    5         1
## CEARÁ                    6         1
## DISTRITO FEDERAL         7         3
## ESPÍRITO SANTO           8         2
## GOIÁS                    9         3
## MARANHÃO                10         1
## MATO GROSSO             11         3
## MATO GROSSO DO SUL      12         3
## MINAS GERAIS            13         2
## PARÁ                    14         2
## PARAÍBA                 15         1
## PARANÁ                  16         1
## PERNAMBUCO              17         1
## PIAUÍ                   18         1
## RIO DE JANEIRO          19         2
## RIO GRANDE DO NORTE     20         2
## RIO GRANDE DO SUL       21         2
## RONDÔNIA                22         2
## RORAIMA                 23         3
## SANTA CATARINA          24         3
## SÃO PAULO               25         2
## SERGIPE                 26         3
## TOCANTINS               27         2

TCC

2023-10-05

R Markdown