` This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
pacotes <- c("plotly", #plataforma gráfica
"tidyverse", #carregar outros pacotes do R
"ggrepel", #geoms de texto e rótulo para 'ggplot2' que ajudam a
#evitar sobreposição de textos
"knitr", "kableExtra", #formatação de tabelas
"reshape2", #função 'melt'
"PerformanceAnalytics", #função 'chart.Correlation' para plotagem
"psych", #elaboração da fatorial e estatísticas
"ltm", #determinação do alpha de Cronbach pela função 'cronbach.alpha'
"Hmisc", # matriz de correlações com p-valor
"readxl","tinytex", "rgl") # importar arquivo Excel
if(sum(as.numeric(!pacotes %in% installed.packages())) != 0){
instalador <- pacotes[!pacotes %in% installed.packages()]
for(i in 1:length(instalador)) {
install.packages(instalador, dependencies = T)
break()}
sapply(pacotes, require, character = T)
} else {
sapply(pacotes, require, character = T)
}
## Loading required package: plotly
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.1.2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
## Loading required package: tidyverse
## Warning: package 'tidyverse' was built under R version 4.1.2
## Warning: package 'tibble' was built under R version 4.1.2
## Warning: package 'tidyr' was built under R version 4.1.2
## Warning: package 'readr' was built under R version 4.1.2
## Warning: package 'purrr' was built under R version 4.1.2
## Warning: package 'dplyr' was built under R version 4.1.2
## Warning: package 'stringr' was built under R version 4.1.2
## Warning: package 'forcats' was built under R version 4.1.2
## Warning: package 'lubridate' was built under R version 4.1.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.0 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ lubridate 1.9.2 ✔ tibble 3.1.8
## ✔ purrr 1.0.1 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks plotly::filter(), stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: ggrepel
##
## Loading required package: knitr
##
## Loading required package: kableExtra
##
##
## Attaching package: 'kableExtra'
##
##
## The following object is masked from 'package:dplyr':
##
## group_rows
##
##
## Loading required package: reshape2
##
##
## Attaching package: 'reshape2'
##
##
## The following object is masked from 'package:tidyr':
##
## smiths
##
##
## Loading required package: PerformanceAnalytics
##
## Loading required package: xts
## Warning: package 'xts' was built under R version 4.1.2
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.1.2
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
##
## Attaching package: 'xts'
##
## The following objects are masked from 'package:dplyr':
##
## first, last
##
##
## Attaching package: 'PerformanceAnalytics'
##
## The following object is masked from 'package:graphics':
##
## legend
##
## Loading required package: psych
## Warning: package 'psych' was built under R version 4.1.2
##
## Attaching package: 'psych'
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
##
## Loading required package: ltm
## Warning: package 'ltm' was built under R version 4.1.2
## Loading required package: MASS
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
##
## The following object is masked from 'package:plotly':
##
## select
##
## Loading required package: msm
## Loading required package: polycor
## Warning: package 'polycor' was built under R version 4.1.2
##
## Attaching package: 'polycor'
##
## The following object is masked from 'package:psych':
##
## polyserial
##
##
## Attaching package: 'ltm'
##
## The following object is masked from 'package:psych':
##
## factor.scores
##
## Loading required package: Hmisc
## Warning: package 'Hmisc' was built under R version 4.1.2
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
##
## The following object is masked from 'package:psych':
##
## describe
##
## The following objects are masked from 'package:dplyr':
##
## src, summarize
##
## The following object is masked from 'package:plotly':
##
## subplot
##
## The following objects are masked from 'package:base':
##
## format.pval, units
##
## Loading required package: readxl
## Warning: package 'readxl' was built under R version 4.1.2
## Loading required package: tinytex
## Loading required package: rgl
## Warning: package 'rgl' was built under R version 4.1.2
## plotly tidyverse ggrepel
## TRUE TRUE TRUE
## knitr kableExtra reshape2
## TRUE TRUE TRUE
## PerformanceAnalytics psych ltm
## TRUE TRUE TRUE
## Hmisc readxl tinytex
## TRUE TRUE TRUE
## rgl
## TRUE
Get data:
BF_historica <- read_csv("Dadod_Bolsa_Familia_Brasil.csv", locale = readr::locale(encoding = "latin1"))
## Rows: 5940 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Unidade Territorial, Referência
## dbl (3): Código, Famílias PBF (até Out/2021), Famílias PBF (a partir de Mar/...
## num (4): Valor repassado às famílias PBF (até Out/2021), Valor repassado às ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
BF_historica <- BF_historica[,c(2,3,4,6,8)]
PIB_per_capita_ <- readxl::read_excel("PIB per capita .xlsx")
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
PIB <- PIB_per_capita_[c(9,11:17,19:27,29:36,38:41),c(1,4:11)]
obitos_inf <- read_delim("obitos_infantis.csv",
delim = ";", escape_double = FALSE, trim_ws = TRUE, locale = readr::locale(encoding = "latin1"))
## Rows: 34 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (1): Região/Unidade da Federação
## dbl (9): 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, Total
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
obitos_inf <- obitos_inf[c(2:8, 10:18, 20:23, 25:27, 29:32),-10]
ocupacao_2012 <- read_excel("ocupacao_2012.xlsx")
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
ocupacao_2012<- ocupacao_2012[,1:2]
ocupacao_2013 <- read_excel("ocupacao_2013.xlsx")
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
ocupacao_2013 <- ocupacao_2013[,1:2]
ocupacao_2014 <- read_excel("ocupacao_2014.xlsx")
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
ocupacao_2014 <- ocupacao_2014[,1:2]
ocupacao_2015 <- read_excel("ocupacao_2015.xlsx")
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
ocupacao_2015<- ocupacao_2015[,1:2]
ocupacao_2016 <- read_excel("ocupacao_2016.xlsx")
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
ocupacao_2016<- ocupacao_2016[,1:2]
ocupacao_2017 <- read_excel("ocupacao_2017.xlsx")
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
ocupacao_2017<- ocupacao_2017[,1:2]
ocupacao_2018 <- read_excel("ocupaçao_2018.xlsx")
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
ocupacao_2018<- ocupacao_2018[,1:2]
ocupacao_2019 <- read_excel("ocupação_2019.xlsx")
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
ocupacao_2019<- ocupacao_2019[,1:2]
matriculas <- read_delim("matriculas.csv",
delim = ";", escape_double = FALSE, trim_ws = TRUE)
## Rows: 38 Columns: 82
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## dbl (82): Colonne1, Acre_matriculas_inf, Acre_matriculas_fund, Acre_matricul...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Aggregate values of Bolsa Fmilia by year:
library(dplyr)
BF_historica1 <- separate_wider_delim(BF_historica, cols = Referência, delim = "/", names = c("mes", "ano"))
BF_historica1 <- BF_historica1[,-2]
BF_historica2 <- aggregate(cbind(BF_historica1$`Famílias PBF (até Out/2021)`,BF_historica1$`Valor repassado às famílias PBF (até Out/2021)`,BF_historica1$`Valor do Benefício médio (até Out/2021)`) ~ BF_historica1$`Unidade Territorial` + BF_historica1$ano, data = BF_historica1, FUN = sum,na.rm = TRUE)
colnames(BF_historica2) <- c("Estado","Ano","Familias","Valor","Beneficio")
BF_historica2 <- BF_historica2[,-5]
BF_historica2 <- BF_historica2[217:432,]
Matrix Enrollment
Estados_indexes<-seq(2,82,3)
matriculas_inf_1 <- matriculas[1,Estados_indexes]
matriculas_inf_1 <- t(matriculas_inf_1)
rownames(matriculas_inf_1) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_1) <- c("matriculas_inf")
Estados_indexes_1<-seq(3,82,3)
matriculas_inf_2 <- matriculas[1,Estados_indexes_1]
matriculas_inf_2 <- t(matriculas_inf_2)
rownames(matriculas_inf_2) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_2) <- c("matriculas_fund")
Estados_indexes_2<-seq(4,82,3)
matriculas_inf_3 <- matriculas[1,Estados_indexes_2]
matriculas_inf_3 <- t(matriculas_inf_3)
rownames(matriculas_inf_3) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_3) <- c("matriculas_med")
mat_2012 <- cbind(matriculas_inf_1,matriculas_inf_2,matriculas_inf_3)
rownames(mat_2012) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
Estados_indexes<-seq(2,82,3)
matriculas_inf_4 <- matriculas[2,Estados_indexes]
matriculas_inf_4 <- t(matriculas_inf_4)
rownames(matriculas_inf_4) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_4) <- c("matriculas_inf")
Estados_indexes_1<-seq(4,82,3)
matriculas_inf_5 <- matriculas[2,Estados_indexes_1]
matriculas_inf_5 <- t(matriculas_inf_5)
rownames(matriculas_inf_5) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_5) <- c("matriculas_med")
Estados_indexes_2<-seq(4,82,3)
matriculas_inf_6 <- matriculas[1,Estados_indexes_2]
matriculas_inf_6 <- t(matriculas_inf_6)
rownames(matriculas_inf_6) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_6) <- c("matriculas_med")
mat_2013 <- cbind(matriculas_inf_4,matriculas_inf_5,matriculas_inf_6)
mat <- rbind(mat_2012,mat_2013)
Estados_indexes<-seq(2,82,3)
matriculas_inf_7 <- matriculas[3,Estados_indexes]
matriculas_inf_7 <- t(matriculas_inf_7)
rownames(matriculas_inf_7) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_7) <- c("matriculas_inf")
Estados_indexes_1<-seq(4,82,3)
matriculas_inf_8 <- matriculas[3,Estados_indexes_1]
matriculas_inf_8 <- t(matriculas_inf_8)
rownames(matriculas_inf_8) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_8) <- c("matriculas_med")
Estados_indexes_2<-seq(4,82,3)
matriculas_inf_9 <- matriculas[3,Estados_indexes_2]
matriculas_inf_9 <- t(matriculas_inf_9)
rownames(matriculas_inf_9) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_9) <- c("matriculas_med")
mat_2014 <- cbind(matriculas_inf_7,matriculas_inf_8,matriculas_inf_9)
mat <- rbind(mat_2012,mat_2013,mat_2014)
Estados_indexes<-seq(2,82,3)
matriculas_inf_10 <- matriculas[4,Estados_indexes]
matriculas_inf_10 <- t(matriculas_inf_10)
rownames(matriculas_inf_10) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_10) <- c("matriculas_inf")
Estados_indexes_1<-seq(4,82,3)
matriculas_inf_11 <- matriculas[4,Estados_indexes_1]
matriculas_inf_11 <- t(matriculas_inf_11)
rownames(matriculas_inf_11) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_11) <- c("matriculas_med")
Estados_indexes_2<-seq(4,82,3)
matriculas_inf_12 <- matriculas[4,Estados_indexes_2]
matriculas_inf_12 <- t(matriculas_inf_12)
rownames(matriculas_inf_12) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_12) <- c("matriculas_med")
mat_2015 <- cbind(matriculas_inf_10,matriculas_inf_11,matriculas_inf_12)
mat <- rbind(mat_2012,mat_2013,mat_2014,mat_2015)
Estados_indexes<-seq(2,82,3)
matriculas_inf_13 <- matriculas[5,Estados_indexes]
matriculas_inf_13 <- t(matriculas_inf_13)
rownames(matriculas_inf_13) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_13) <- c("matriculas_inf")
Estados_indexes_1<-seq(4,82,3)
matriculas_inf_14 <- matriculas[5,Estados_indexes_1]
matriculas_inf_14 <- t(matriculas_inf_14)
rownames(matriculas_inf_14) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_14) <- c("matriculas_med")
Estados_indexes_2<-seq(4,82,3)
matriculas_inf_15 <- matriculas[5,Estados_indexes_2]
matriculas_inf_15 <- t(matriculas_inf_15)
rownames(matriculas_inf_15) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_15) <- c("matriculas_med")
mat_2016 <- cbind(matriculas_inf_13,matriculas_inf_14,matriculas_inf_15)
mat <- rbind(mat_2012,mat_2013,mat_2014,mat_2015, mat_2016)
Estados_indexes<-seq(2,82,3)
matriculas_inf_16 <- matriculas[6,Estados_indexes]
matriculas_inf_16 <- t(matriculas_inf_16)
rownames(matriculas_inf_16) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_16) <- c("matriculas_inf")
Estados_indexes_1<-seq(4,82,3)
matriculas_inf_17 <- matriculas[6,Estados_indexes_1]
matriculas_inf_17 <- t(matriculas_inf_17)
rownames(matriculas_inf_17) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_17) <- c("matriculas_med")
Estados_indexes_2<-seq(4,82,3)
matriculas_inf_18 <- matriculas[6,Estados_indexes_2]
matriculas_inf_18 <- t(matriculas_inf_18)
rownames(matriculas_inf_18) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_18) <- c("matriculas_med")
mat_2017 <- cbind(matriculas_inf_16,matriculas_inf_17,matriculas_inf_18)
mat <- rbind(mat_2012,mat_2013,mat_2014,mat_2015,mat_2016,mat_2017)
Estados_indexes<-seq(2,82,3)
matriculas_inf_19 <- matriculas[7,Estados_indexes]
matriculas_inf_19 <- t(matriculas_inf_19)
rownames(matriculas_inf_19) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_19) <- c("matriculas_inf")
Estados_indexes_1<-seq(4,82,3)
matriculas_inf_20 <- matriculas[7,Estados_indexes_1]
matriculas_inf_20 <- t(matriculas_inf_20)
rownames(matriculas_inf_20) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_20) <- c("matriculas_med")
Estados_indexes_2<-seq(4,82,3)
matriculas_inf_21 <- matriculas[7,Estados_indexes_2]
matriculas_inf_21 <- t(matriculas_inf_21)
rownames(matriculas_inf_21) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_21) <- c("matriculas_med")
mat_2018 <- cbind(matriculas_inf_19,matriculas_inf_20,matriculas_inf_21)
mat <- rbind(mat_2012,mat_2013,mat_2014,mat_2015, mat_2016, mat_2017, mat_2018)
Estados_indexes<-seq(2,82,3)
matriculas_inf_22 <- matriculas[8,Estados_indexes]
matriculas_inf_22 <- t(matriculas_inf_22)
rownames(matriculas_inf_22) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_22) <- c("matriculas_inf")
Estados_indexes_1<-seq(4,82,3)
matriculas_inf_23 <- matriculas[8,Estados_indexes_1]
matriculas_inf_23 <- t(matriculas_inf_23)
rownames(matriculas_inf_23) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_23) <- c("matriculas_med")
Estados_indexes_2<-seq(4,82,3)
matriculas_inf_24 <- matriculas[8,Estados_indexes_2]
matriculas_inf_24 <- t(matriculas_inf_24)
rownames(matriculas_inf_24) <- c("AC","AL","AP","AM","BA","CE","DF","ES","GO","MA","MT","MS","MG","PA","PB","PR","PE","PI","RJ","RN","RS","RO","RR","SC","SP","SE","TO")
colnames(matriculas_inf_24) <- c("matriculas_med")
mat_2019 <- cbind(matriculas_inf_22,matriculas_inf_23,matriculas_inf_24)
mat <- rbind(mat_2012,mat_2013,mat_2014,mat_2015, mat_2016, mat_2017, mat_2018, mat_2019)
Ocupation:
Taxa_de_ocupacao<-rbind(ocupacao_2012,ocupacao_2013,ocupacao_2014,ocupacao_2015,ocupacao_2016,ocupacao_2017,ocupacao_2018,ocupacao_2019)
colnames(Taxa_de_ocupacao) <- c("Estados","Taxa_de_Ocupacao")
Taxa_de_ocupacao <- Taxa_de_ocupacao[c(-1,-29,-57,-85,-113,-141,-169,-197),]
Taxa_de_ocupacao <- as.numeric(unlist(Taxa_de_ocupacao$Taxa_de_Ocupacao))
Taxa_de_ocupacao <- as.data.frame(Taxa_de_ocupacao)
Child Death:
colnames(obitos_inf)<- c("ano","ano","ano","ano","ano","ano","ano","ano","ano")
obitos_inf1 <- obitos_inf[,c(1,2)]
obitos_inf2 <- obitos_inf[,c(1,3)]
obitos_inf3 <- obitos_inf[,c(1,4)]
obitos_inf4 <- obitos_inf[,c(1,5)]
obitos_inf5 <- obitos_inf[,c(1,6)]
obitos_inf6 <- obitos_inf[,c(1,7)]
obitos_inf7 <- obitos_inf[,c(1,8)]
obitos_inf8 <- obitos_inf[,c(1,9)]
obitos_inf <- rbind(obitos_inf1,obitos_inf2,obitos_inf3,obitos_inf4,obitos_inf5,obitos_inf6,obitos_inf7,obitos_inf8)
GDP:
colnames(PIB)<- c("Estado","ano","ano","ano","ano","ano","ano","ano","ano")
PIB1 <- PIB[,c(1,2)]
PIB2 <- PIB[,c(1,3)]
PIB3 <- PIB[,c(1,4)]
PIB4 <- PIB[,c(1,5)]
PIB5 <- PIB[,c(1,6)]
PIB6 <- PIB[,c(1,7)]
PIB7 <- PIB[,c(1,8)]
PIB8 <- PIB[,c(1,9)]
PIB <- rbind(PIB1,PIB2,PIB3,PIB4,PIB5,PIB6,PIB7,PIB8)
PIB <- PIB[-c(1,22,30,51,59,80,88,109,117,138,146,167,175,196,204,225),]
PIB
## # A tibble: 216 × 2
## Estado ano
## <chr> <dbl>
## 1 Rondônia 18939.
## 2 Acre 13361.
## 3 Amazonas 20118.
## 4 Roraima 16424.
## 5 Pará 13741.
## 6 Amapá 15933.
## 7 Tocantins 14590.
## 8 Maranhão 9009.
## 9 Piauí 9060.
## 10 Ceará 11268.
## # … with 206 more rows
#Deflated GDP- IPCA (Índice Nacional de Preços ao Consumidor Amplo
#)- ACUMULADO ANUAL
PIB_per_capita_D <- readxl::read_excel("PIB_IPCA.xlsx")
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
PIB_IPCA <- PIB_per_capita_D[c(9:15,17:25,27:30,32:34, 36:39),c(1,4:11)]
colnames(PIB_IPCA)<- c("Estado","ano","ano","ano","ano","ano","ano","ano","ano")
PIB_1 <- PIB_IPCA[,c(1,2)]
PIB_2 <- PIB_IPCA[,c(1,3)]
PIB_3 <- PIB_IPCA[,c(1,4)]
PIB_4 <- PIB_IPCA[,c(1,5)]
PIB_5 <- PIB_IPCA[,c(1,6)]
PIB_6 <- PIB_IPCA[,c(1,7)]
PIB_7 <- PIB_IPCA[,c(1,8)]
PIB_8 <- PIB_IPCA[,c(1,9)]
PIB_D <- rbind(PIB_1,PIB_2,PIB_3,PIB_4,PIB_5,PIB_6,PIB_7,PIB_8)
PIB_D
## # A tibble: 216 × 2
## Estado ano
## <chr> <dbl>
## 1 Rondônia 12161.
## 2 Acre 8579.
## 3 Amazonas 12918.
## 4 Roraima 10546.
## 5 Pará 8824.
## 6 Amapá 10231.
## 7 Tocantins 9369.
## 8 Maranhão 5785.
## 9 Piauí 5818.
## 10 Ceará 7236.
## # … with 206 more rows
Final Dataset:
Dataset<-cbind(Taxa_de_ocupacao,BF_historica2,mat, obitos_inf,PIB)
Dataset<- Dataset[,-c(9,11)]
colnames(Dataset) <- c("Pessoas_Ocupadas","Estado","Ano","Familias_PBF","Valor_PBF","Matriculas_inf","Matriculas_fund","Matriculas_med","obitos_inf","PIB")
#Data with deflated GDP
Dataset.1<-cbind(Taxa_de_ocupacao,BF_historica2,mat,obitos_inf,PIB_D)
Dataset.1<- Dataset.1[,-c(9,11)]
colnames(Dataset.1) <- c("Pessoas_Ocupadas","Estado","Ano","Familias_PBF","Valor_PBF","Matriculas_inf","Matriculas_fund","Matriculas_med","obitos_inf","PIB_D")
Packages for the model:
pacotes <- c("plotly","tidyverse","ggrepel","fastDummies","knitr","kableExtra",
"splines","reshape2","PerformanceAnalytics","correlation","see",
"ggraph","psych","car","ggside","tidyquant","olsrr",
"jtools","ggstance","magick","cowplot","emojifont","beepr","Rcpp",
"equatiomatic","nortest","rgl")
#"nortest","rgl"
options(rgl.debug = TRUE)
if(sum(as.numeric(!pacotes %in% installed.packages())) != 0){
instalador <- pacotes[!pacotes %in% installed.packages()]
for(i in 1:length(instalador)) {
install.packages(instalador, dependencies = T)
break()}
sapply(pacotes, require, character = T)
} else {
sapply(pacotes, require, character = T)
}
## Loading required package: fastDummies
## Loading required package: splines
## Loading required package: correlation
## Warning: package 'correlation' was built under R version 4.1.2
## Loading required package: see
## Warning: package 'see' was built under R version 4.1.2
## Loading required package: ggraph
## Warning: package 'ggraph' was built under R version 4.1.2
## Loading required package: car
## Warning: package 'car' was built under R version 4.1.2
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:purrr':
##
## some
## Loading required package: ggside
## Warning: package 'ggside' was built under R version 4.1.2
## Registered S3 method overwritten by 'ggside':
## method from
## +.gg ggplot2
## Loading required package: tidyquant
## Warning: package 'tidyquant' was built under R version 4.1.2
## Loading required package: quantmod
## Warning: package 'quantmod' was built under R version 4.1.2
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
##
## Attaching package: 'quantmod'
## The following object is masked from 'package:Hmisc':
##
## Lag
## Loading required package: olsrr
##
## Attaching package: 'olsrr'
## The following object is masked from 'package:MASS':
##
## cement
## The following object is masked from 'package:datasets':
##
## rivers
## Loading required package: jtools
##
## Attaching package: 'jtools'
## The following object is masked from 'package:Hmisc':
##
## %nin%
## Loading required package: ggstance
## Warning: package 'ggstance' was built under R version 4.1.2
##
## Attaching package: 'ggstance'
## The following objects are masked from 'package:ggplot2':
##
## geom_errorbarh, GeomErrorbarh
## Loading required package: magick
## Linking to ImageMagick 6.9.12.3
## Enabled features: cairo, fontconfig, freetype, heic, lcms, pango, raw, rsvg, webp
## Disabled features: fftw, ghostscript, x11
## Loading required package: cowplot
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:lubridate':
##
## stamp
## Loading required package: emojifont
## Loading required package: beepr
## Loading required package: Rcpp
## Warning: package 'Rcpp' was built under R version 4.1.2
## Loading required package: equatiomatic
## Warning: package 'equatiomatic' was built under R version 4.1.2
##
## Attaching package: 'equatiomatic'
## The following object is masked from 'package:olsrr':
##
## hsb
## Loading required package: nortest
## plotly tidyverse ggrepel
## TRUE TRUE TRUE
## fastDummies knitr kableExtra
## TRUE TRUE TRUE
## splines reshape2 PerformanceAnalytics
## TRUE TRUE TRUE
## correlation see ggraph
## TRUE TRUE TRUE
## psych car ggside
## TRUE TRUE TRUE
## tidyquant olsrr jtools
## TRUE TRUE TRUE
## ggstance magick cowplot
## TRUE TRUE TRUE
## emojifont beepr Rcpp
## TRUE TRUE TRUE
## equatiomatic nortest rgl
## TRUE TRUE TRUE
summary(Dataset.1)
## Pessoas_Ocupadas Estado Ano Familias_PBF
## Min. : 2.322 Length:216 Length:216 Min. : 546364
## 1st Qu.: 11.341 Class :character Class :character 1st Qu.: 1639942
## Median : 33.398 Mode :character Mode :character Median : 4305635
## Mean : 87.020 Mean : 6129021
## 3rd Qu.:136.449 3rd Qu.:10382639
## Max. :556.239 Max. :22062829
## Valor_PBF Matriculas_inf Matriculas_fund Matriculas_med
## Min. :8.201e+07 Min. : 17465 Min. : 21916 Min. : 21055
## 1st Qu.:3.217e+08 1st Qu.: 99475 1st Qu.: 110147 1st Qu.: 101714
## Median :1.067e+09 Median : 156990 Median : 213721 Median : 156967
## Mean :6.631e+09 Mean : 302638 Mean : 397025 Mean : 298022
## 3rd Qu.:3.324e+09 3rd Qu.: 366233 3rd Qu.: 412980 3rd Qu.: 365410
## Max. :9.579e+10 Max. :2298675 Max. :5805590 Max. :1928274
## obitos_inf PIB_D
## Min. : 175.0 Min. : 3995
## 1st Qu.: 529.2 1st Qu.: 9934
## Median : 810.5 Median : 16222
## Mean :1378.5 Mean : 19461
## 3rd Qu.:1708.8 3rd Qu.: 23219
## Max. :7173.0 Max. :102350
BOX-COX
##function 'powerTransform' from the 'car' package (which lambda maximizes the transformed variable's adherence to normality)
library(car)
lambda_BC <- powerTransform(Dataset$Pessoas_Ocupadas)
lambda_BC
## Estimated transformation parameter
## Dataset$Pessoas_Ocupadas
## 0.01067109
#Inserting the Box-Cox lambda into the database to investigate a new model
Dataset$bc_Pessoas_Ocupadas <- (((Dataset$Pessoas_Ocupadas ^ lambda_BC$lambda) - 1) /
lambda_BC$lambda)
#Estimating a new OLS model with dependent variable transformed by Box-Cox
modelo_bc <- lm(formula = bc_Pessoas_Ocupadas ~ 1,
data = Dataset)
#Parameters
summary(modelo_bc)
##
## Call:
## lm(formula = bc_Pessoas_Ocupadas ~ 1, data = Dataset)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.8619 -1.2479 -0.1331 1.3391 2.8312
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.70809 0.09942 37.3 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.461 on 215 degrees of freedom
##the decision was made to evaluate the normality of the residuals via Box-Cox transformation, understanding that in obtaining a lambda equal to or very close to 1, possibly the residuals of the future model would adhere to normality.
#lambda: 0.01
#Therefore, the modeling began with the RDFPC variable transformed by Box-Cox and presenting another distribution, including its residuals.
#With deflated GDP
library(car)
lambda_BC <- powerTransform(Dataset.1$Pessoas_Ocupadas)
lambda_BC
## Estimated transformation parameter
## Dataset.1$Pessoas_Ocupadas
## 0.01067109
#Inserting the Box-Cox lambda into the database to investigate a new model
Dataset.1$bc_Pessoas_Ocupadas <- (((Dataset.1$Pessoas_Ocupadas ^ lambda_BC$lambda) - 1) /
lambda_BC$lambda)
s_Dataset<- scale(Dataset.1[,c(4,5,6,7,8,9,10)], center = TRUE, scale = TRUE)
Dataset2 <- cbind(Dataset.1[,c(1,2,3,11)],s_Dataset)
Packages used for HLM2
#Pacotes utilizados
pacotes <- c("plotly","tidyverse","reshape2","knitr","kableExtra",
"gghalves","ggdist","tidyquant","car","nlme","lmtest",
"fastDummies","msm","lmeInfo","jtools","gganimate","ggridges",
"viridis","hrbrthemes")
#"rgl"
options(rgl.debug = TRUE)
if(sum(as.numeric(!pacotes %in% installed.packages())) != 0){
instalador <- pacotes[!pacotes %in% installed.packages()]
for(i in 1:length(instalador)) {
install.packages(instalador, dependencies = T)
break()}
sapply(pacotes, require, character = T)
} else {
sapply(pacotes, require, character = T)
}
## Loading required package: gghalves
## Warning: package 'gghalves' was built under R version 4.1.2
## Loading required package: ggdist
## Warning: package 'ggdist' was built under R version 4.1.2
## Loading required package: nlme
##
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
##
## collapse
## Loading required package: lmtest
## Loading required package: lmeInfo
## Warning: package 'lmeInfo' was built under R version 4.1.2
## Loading required package: gganimate
## Warning: package 'gganimate' was built under R version 4.1.2
## Loading required package: ggridges
## Warning: package 'ggridges' was built under R version 4.1.2
##
## Attaching package: 'ggridges'
## The following objects are masked from 'package:ggdist':
##
## scale_point_color_continuous, scale_point_color_discrete,
## scale_point_colour_continuous, scale_point_colour_discrete,
## scale_point_fill_continuous, scale_point_fill_discrete,
## scale_point_size_continuous
## Loading required package: viridis
## Loading required package: viridisLite
## Loading required package: hrbrthemes
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
## Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
## if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
## plotly tidyverse reshape2 knitr kableExtra gghalves
## TRUE TRUE TRUE TRUE TRUE TRUE
## ggdist tidyquant car nlme lmtest fastDummies
## TRUE TRUE TRUE TRUE TRUE TRUE
## msm lmeInfo jtools gganimate ggridges viridis
## TRUE TRUE TRUE TRUE TRUE TRUE
## hrbrthemes
## TRUE
Estimation of NULL MODEL
library(lme4) # for multilevel models
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Attaching package: 'lme4'
## The following object is masked from 'package:nlme':
##
## lmList
library(lmerTest) # for p-values
##
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
##
## lmer
## The following object is masked from 'package:stats':
##
## step
library(performance) # for ICC
s_Dataset<- scale(Dataset[,c(4,5,6,7,8,9,10)], center = TRUE, scale = TRUE)
Dataset1 <- cbind(Dataset[,c(1,2,3,11)],s_Dataset)
modelo_nulo_hlm2<-lmer(bc_Pessoas_Ocupadas~1+(1|Estado), REML=FALSE, data=Dataset1)
summary(modelo_nulo_hlm2)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: bc_Pessoas_Ocupadas ~ 1 + (1 | Estado)
## Data: Dataset1
##
## AIC BIC logLik deviance df.resid
## 212.8 222.9 -103.4 206.8 213
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.8528 -0.5401 -0.0468 0.5100 3.1890
##
## Random effects:
## Groups Name Variance Std.Dev.
## Estado (Intercept) 2.04692 1.4307
## Residual 0.07814 0.2795
## Number of obs: 216, groups: Estado, 27
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 3.708 0.276 27.000 13.44 1.79e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
performance::icc(modelo_nulo_hlm2)
## # Intraclass Correlation Coefficient
##
## Adjusted ICC: 0.963
## Unadjusted ICC: 0.963
#the ICC is interpreted as the proportion of variance between people: How much of the variance stems from people being different from one another versus fluctuating within themselves? A large ICC means that most of the variability is between people, not from people varying in their answers to a set of questions
#96% of ocupation variance is attributed to the state
pacotes <- c("plotly","tidyverse","reshape2","knitr","kableExtra",
"nlme","lmtest","fastDummies","msm","lmeInfo","jtools","gganimate",
"ggridges","viridis","hrbrthemes")
if(sum(as.numeric(!pacotes %in% installed.packages())) != 0){
instalador <- pacotes[!pacotes %in% installed.packages()]
for(i in 1:length(instalador)) {
install.packages(instalador, dependencies = T)
break()}
sapply(pacotes, require, character = T)
} else {
sapply(pacotes, require, character = T)
}
## plotly tidyverse reshape2 knitr kableExtra nlme
## TRUE TRUE TRUE TRUE TRUE TRUE
## lmtest fastDummies msm lmeInfo jtools gganimate
## TRUE TRUE TRUE TRUE TRUE TRUE
## ggridges viridis hrbrthemes
## TRUE TRUE TRUE
#---------------------------------------------------------
#Estimação do modelo nulo (função lme do pacote nlme)
modelo_nulo_hlm3 <- lme(fixed = bc_Pessoas_Ocupadas ~ 1,
random = list(Estado = ~1),
data = Dataset2,
method = "REML")
summary(modelo_nulo_hlm3)
## Linear mixed-effects model fit by REML
## Data: Dataset2
## AIC BIC logLik
## 213.4934 223.6053 -103.7467
##
## Random effects:
## Formula: ~1 | Estado
## (Intercept) Residual
## StdDev: 1.458088 0.2795266
##
## Fixed effects: bc_Pessoas_Ocupadas ~ 1
## Value Std.Error DF t-value p-value
## (Intercept) 3.708092 0.281253 189 13.18419 0
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -2.85149177 -0.54083464 -0.04794658 0.51028348 3.18887016
##
## Number of Observations: 216
## Number of Groups: 27
################################################################################
# COMPARISON OF HLM2 NULO WITH NULL OLS #
################################################################################
#To estimate the null OLS model, we can command the following
modelo_ols_nulo <- lm(formula = bc_Pessoas_Ocupadas ~ 1,
data = Dataset2)
#Parameters
summary(modelo_ols_nulo)
##
## Call:
## lm(formula = bc_Pessoas_Ocupadas ~ 1, data = Dataset2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.8619 -1.2479 -0.1331 1.3391 2.8312
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.70809 0.09942 37.3 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.461 on 215 degrees of freedom
################################################################################
# ESTIMATION OF THE MODEL WITH RANDOM INTERCEPTS HLM2 #
################################################################################
#Random Intercepts
modelo_intercept_hlm2.0<-lmer(bc_Pessoas_Ocupadas~ 1 + Familias_PBF + Valor_PBF + Matriculas_inf + Matriculas_fund + Matriculas_med + obitos_inf + PIB + Ano + (1|Estado), REML=FALSE, data=Dataset1)
summary(modelo_intercept_hlm2.0)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: bc_Pessoas_Ocupadas ~ 1 + Familias_PBF + Valor_PBF + Matriculas_inf +
## Matriculas_fund + Matriculas_med + obitos_inf + PIB + Ano +
## (1 | Estado)
## Data: Dataset1
##
## AIC BIC logLik deviance df.resid
## 146.3 203.6 -56.1 112.3 199
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.3816 -0.5275 0.0249 0.5848 2.7281
##
## Random effects:
## Groups Name Variance Std.Dev.
## Estado (Intercept) 0.96695 0.9833
## Residual 0.05273 0.2296
## Number of obs: 216, groups: Estado, 27
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 3.91379 0.20197 30.10982 19.378 < 2e-16 ***
## Familias_PBF 0.75363 0.15675 60.43172 4.808 1.05e-05 ***
## Valor_PBF -0.05825 0.02600 189.13359 -2.240 0.026228 *
## Matriculas_inf 0.71449 0.16303 140.52627 4.382 2.28e-05 ***
## Matriculas_fund 0.02045 0.03920 197.61237 0.522 0.602477
## Matriculas_med -0.44591 0.16489 196.66211 -2.704 0.007444 **
## obitos_inf 0.15636 0.16624 46.08356 0.941 0.351810
## PIB -0.12235 0.09714 190.65044 -1.260 0.209371
## Ano2013 -0.30777 0.08157 190.61868 -3.773 0.000215 ***
## Ano2014 -0.36585 0.08359 195.40844 -4.377 1.96e-05 ***
## Ano2015 -0.39742 0.08468 197.83272 -4.693 5.02e-06 ***
## Ano2016 -0.10497 0.09847 200.59985 -1.066 0.287692
## Ano2017 -0.24330 0.09109 206.25648 -2.671 0.008167 **
## Ano2018 -0.13912 0.09834 211.31596 -1.415 0.158636
## Ano2019 -0.08720 0.11049 213.49642 -0.789 0.430846
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation matrix not shown by default, as p = 15 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
# In the “step-up” strategy, variables are added one by one, but it is possible to check the statistical significance of both variables and, if one of them does not pass the significance test, the variable that did not pass is removed from the model.
modelo_intercept_hlm2<-lmer(bc_Pessoas_Ocupadas~ 1 + Familias_PBF + Matriculas_inf + PIB_D + Ano + (1|Estado), REML=FALSE, data=Dataset2)
summary(modelo_intercept_hlm2)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: bc_Pessoas_Ocupadas ~ 1 + Familias_PBF + Matriculas_inf + PIB_D +
## Ano + (1 | Estado)
## Data: Dataset2
##
## AIC BIC logLik deviance df.resid
## 147.7 191.6 -60.8 121.7 203
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.04220 -0.57389 0.04913 0.55555 2.46761
##
## Random effects:
## Groups Name Variance Std.Dev.
## Estado (Intercept) 1.14108 1.0682
## Residual 0.05414 0.2327
## Number of obs: 216, groups: Estado, 27
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 3.94214 0.21180 22.99823 18.613 2.31e-15 ***
## Familias_PBF 0.66211 0.15961 59.84219 4.148 0.000107 ***
## Matriculas_inf 0.60810 0.15171 72.75088 4.008 0.000146 ***
## PIB_D -0.09967 0.04864 194.40173 -2.049 0.041801 *
## Ano2013 -0.33904 0.06355 182.59631 -5.335 2.80e-07 ***
## Ano2014 -0.40810 0.06408 187.14712 -6.369 1.44e-09 ***
## Ano2015 -0.47872 0.06604 187.46659 -7.249 1.07e-11 ***
## Ano2016 -0.25432 0.06523 192.38829 -3.899 0.000133 ***
## Ano2017 -0.15794 0.09863 196.96782 -1.601 0.110901
## Ano2018 -0.09486 0.08571 200.41736 -1.107 0.269733
## Ano2019 -0.13938 0.08109 204.53189 -1.719 0.087135 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Fm_PBF Mtrcl_ PIB_D An2013 An2014 An2015 An2016 An2017
## Familis_PBF 0.007
## Matricls_nf 0.060 -0.062
## PIB_D 0.100 -0.016 0.047
## Ano2013 -0.157 -0.034 -0.064 -0.039
## Ano2014 -0.159 -0.082 -0.119 -0.039 0.505
## Ano2015 -0.127 -0.048 -0.126 0.242 0.479 0.485
## Ano2016 -0.169 -0.033 -0.210 -0.115 0.503 0.512 0.468
## Ano2017 -0.180 0.038 -0.206 -0.747 0.359 0.365 0.149 0.432
## Ano2018 -0.187 -0.029 -0.261 -0.631 0.409 0.422 0.236 0.481 0.747
## Ano2019 -0.188 -0.003 -0.312 -0.554 0.430 0.444 0.281 0.503 0.713
## An2018
## Familis_PBF
## Matricls_nf
## PIB_D
## Ano2013
## Ano2014
## Ano2015
## Ano2016
## Ano2017
## Ano2018
## Ano2019 0.706
#Parâmetros do modelo
summary(modelo_intercept_hlm2)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: bc_Pessoas_Ocupadas ~ 1 + Familias_PBF + Matriculas_inf + PIB_D +
## Ano + (1 | Estado)
## Data: Dataset2
##
## AIC BIC logLik deviance df.resid
## 147.7 191.6 -60.8 121.7 203
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.04220 -0.57389 0.04913 0.55555 2.46761
##
## Random effects:
## Groups Name Variance Std.Dev.
## Estado (Intercept) 1.14108 1.0682
## Residual 0.05414 0.2327
## Number of obs: 216, groups: Estado, 27
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 3.94214 0.21180 22.99823 18.613 2.31e-15 ***
## Familias_PBF 0.66211 0.15961 59.84219 4.148 0.000107 ***
## Matriculas_inf 0.60810 0.15171 72.75088 4.008 0.000146 ***
## PIB_D -0.09967 0.04864 194.40173 -2.049 0.041801 *
## Ano2013 -0.33904 0.06355 182.59631 -5.335 2.80e-07 ***
## Ano2014 -0.40810 0.06408 187.14712 -6.369 1.44e-09 ***
## Ano2015 -0.47872 0.06604 187.46659 -7.249 1.07e-11 ***
## Ano2016 -0.25432 0.06523 192.38829 -3.899 0.000133 ***
## Ano2017 -0.15794 0.09863 196.96782 -1.601 0.110901
## Ano2018 -0.09486 0.08571 200.41736 -1.107 0.269733
## Ano2019 -0.13938 0.08109 204.53189 -1.719 0.087135 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Fm_PBF Mtrcl_ PIB_D An2013 An2014 An2015 An2016 An2017
## Familis_PBF 0.007
## Matricls_nf 0.060 -0.062
## PIB_D 0.100 -0.016 0.047
## Ano2013 -0.157 -0.034 -0.064 -0.039
## Ano2014 -0.159 -0.082 -0.119 -0.039 0.505
## Ano2015 -0.127 -0.048 -0.126 0.242 0.479 0.485
## Ano2016 -0.169 -0.033 -0.210 -0.115 0.503 0.512 0.468
## Ano2017 -0.180 0.038 -0.206 -0.747 0.359 0.365 0.149 0.432
## Ano2018 -0.187 -0.029 -0.261 -0.631 0.409 0.422 0.236 0.481 0.747
## Ano2019 -0.188 -0.003 -0.312 -0.554 0.430 0.444 0.281 0.503 0.713
## An2018
## Familis_PBF
## Matricls_nf
## PIB_D
## Ano2013
## Ano2014
## Ano2015
## Ano2016
## Ano2017
## Ano2018
## Ano2019 0.706
#--------------------------------------------------------------------
ctrl <- lmeControl(opt='optim')
library(nlme)
#Estimação do modelo com Tendência Linear e Interceptos Aleatórios
modelo_intercept_hlm3 <- lme(fixed = bc_Pessoas_Ocupadas ~ Ano + Familias_PBF + Valor_PBF + Matriculas_inf + Matriculas_med + PIB_D,
random = list(Estado = ~1),
data = Dataset2,
method = "REML")
summary(modelo_intercept_hlm3)
## Linear mixed-effects model fit by REML
## Data: Dataset2
## AIC BIC logLik
## 185.9016 235.5997 -77.95079
##
## Random effects:
## Formula: ~1 | Estado
## (Intercept) Residual
## StdDev: 1.04504 0.2347144
##
## Fixed effects: bc_Pessoas_Ocupadas ~ Ano + Familias_PBF + Valor_PBF + Matriculas_inf + Matriculas_med + PIB_D
## Value Std.Error DF t-value p-value
## (Intercept) 3.938876 0.20799643 177 18.937231 0.0000
## Ano2013 -0.341969 0.06410624 177 -5.334416 0.0000
## Ano2014 -0.417955 0.06472115 177 -6.457781 0.0000
## Ano2015 -0.497912 0.06700036 177 -7.431485 0.0000
## Ano2016 -0.170127 0.08069917 177 -2.108164 0.0364
## Ano2017 -0.177817 0.09999225 177 -1.778311 0.0771
## Ano2018 -0.130648 0.08779126 177 -1.488167 0.1385
## Ano2019 -0.109845 0.09127651 177 -1.203431 0.2304
## Familias_PBF 0.775444 0.16122355 177 4.809745 0.0000
## Valor_PBF -0.057946 0.02640062 177 -2.194888 0.0295
## Matriculas_inf 0.677327 0.15429590 177 4.389794 0.0000
## Matriculas_med -0.396272 0.15569620 177 -2.545163 0.0118
## PIB_D -0.102393 0.04903191 177 -2.088285 0.0382
## Correlation:
## (Intr) An2013 An2014 An2015 An2016 An2017 An2018 An2019 Fm_PBF
## Ano2013 -0.162
## Ano2014 -0.165 0.505
## Ano2015 -0.134 0.478 0.487
## Ano2016 -0.173 0.411 0.418 0.384
## Ano2017 -0.189 0.359 0.368 0.159 0.372
## Ano2018 -0.196 0.406 0.424 0.250 0.406 0.750
## Ano2019 -0.201 0.388 0.406 0.271 0.608 0.666 0.664
## Familias_PBF 0.004 -0.035 -0.086 -0.060 0.053 0.027 -0.048 0.031
## Valor_PBF 0.050 0.001 0.007 0.009 -0.573 -0.022 -0.009 -0.388 -0.155
## Matriculas_inf 0.068 -0.065 -0.126 -0.144 -0.186 -0.221 -0.286 -0.319 -0.037
## Matriculas_med -0.038 0.019 0.057 0.111 0.072 0.107 0.181 0.209 -0.124
## PIB_D 0.103 -0.039 -0.039 0.241 -0.106 -0.743 -0.620 -0.504 -0.018
## Vl_PBF Mtrcls_n Mtrcls_m
## Ano2013
## Ano2014
## Ano2015
## Ano2016
## Ano2017
## Ano2018
## Ano2019
## Familias_PBF
## Valor_PBF
## Matriculas_inf 0.004
## Matriculas_med 0.034 -0.218
## PIB_D 0.022 0.046 0.001
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -3.25259841 -0.51753391 0.05106863 0.57309050 2.51640999
##
## Number of Observations: 216
## Number of Groups: 27
#Comparsion between Logliks
data.frame(OLS_Nulo = logLik(modelo_ols_nulo),
HLM3_Nulo = logLik(modelo_nulo_hlm3),
HLM3_Intercept_Aleat = logLik(modelo_intercept_hlm3)) %>%
melt() %>%
ggplot(aes(x = variable, y = (abs(-value)), fill = factor(variable))) +
geom_bar(stat = "identity") +
geom_label(aes(label = (round(value,3))), hjust = 1.1, color = "white", size = 6) +
labs(title = "Comparação do LL",
y = "LogLik",
x = "Modelo Proposto") +
coord_flip() +
scale_fill_manual("Legenda:",
values = c("grey25","lightblue","lightpink")) +
theme(legend.title = element_blank(),
panel.background = element_rect("white"),
legend.position = "none",
axis.line = element_line())
## No id variables; using all as measure variables
pacotes <- c("plotly","tidyverse","reshape2","knitr","kableExtra",
"nlme","lmtest","fastDummies","msm","lmeInfo","jtools","gganimate",
"ggridges","viridis","hrbrthemes")
if(sum(as.numeric(!pacotes %in% installed.packages())) != 0){
instalador <- pacotes[!pacotes %in% installed.packages()]
for(i in 1:length(instalador)) {
install.packages(instalador, dependencies = T)
break()}
sapply(pacotes, require, character = T)
} else {
sapply(pacotes, require, character = T)
}
## plotly tidyverse reshape2 knitr kableExtra nlme
## TRUE TRUE TRUE TRUE TRUE TRUE
## lmtest fastDummies msm lmeInfo jtools gganimate
## TRUE TRUE TRUE TRUE TRUE TRUE
## ggridges viridis hrbrthemes
## TRUE TRUE TRUE
################################################################################
# MODEL ESTIMATION WITH RANDOM INTERCEPTS AND SLOPES HLM2 #
################################################################################
#Estimação do modelo com Interceptos e Inclinações Aleatórios
#Mtariculas_fund insignificant
modelo_intercept_inclin_hlm2 <- lmer(bc_Pessoas_Ocupadas ~ 1 + Familias_PBF + Matriculas_inf + Ano + (Ano|Estado), data = Dataset1, REML = FALSE, control = lmerControl(check.nobs.vs.nRE = "ignore"))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.0042761 (tol = 0.002, component 1)
coef(modelo_intercept_inclin_hlm2)
## $Estado
## (Intercept) Familias_PBF Matriculas_inf Ano2013
## ACRE 3.980293 0.5358777 0.5166905 -0.254768174
## ALAGOAS 4.996246 0.5358777 0.5166905 -0.143907857
## AMAPÁ 3.266220 0.5358777 0.5166905 -0.720024042
## AMAZONAS 5.336584 0.5358777 0.5166905 -0.326227928
## BAHIA 4.808881 0.5358777 0.5166905 -0.368683643
## CEARÁ 5.305058 0.5358777 0.5166905 -0.279734262
## DISTRITO FEDERAL 2.236571 0.5358777 0.5166905 -0.262379934
## ESPÍRITO SANTO 3.586391 0.5358777 0.5166905 -0.248336545
## GOIÁS 3.300757 0.5358777 0.5166905 -0.603698804
## MARANHÃO 5.630868 0.5358777 0.5166905 -0.287677351
## MATO GROSSO 3.004719 0.5358777 0.5166905 -0.520481109
## MATO GROSSO DO SUL 2.646494 0.5358777 0.5166905 -0.423599362
## MINAS GERAIS 4.013181 0.5358777 0.5166905 -0.251573180
## PARÁ 3.433959 0.5358777 0.5166905 -0.366770787
## PARAÍBA 5.282644 0.5358777 0.5166905 -0.440367851
## PARANÁ 5.941549 0.5358777 0.5166905 -0.186061622
## PERNAMBUCO 4.887186 0.5358777 0.5166905 -0.306655941
## PIAUÍ 5.639193 0.5358777 0.5166905 -0.275302111
## RIO DE JANEIRO 4.178727 0.5358777 0.5166905 -0.242857770
## RIO GRANDE DO NORTE 3.942147 0.5358777 0.5166905 -0.274515718
## RIO GRANDE DO SUL 3.295303 0.5358777 0.5166905 -0.483670796
## RONDÔNIA 4.019384 0.5358777 0.5166905 -0.339286197
## RORAIMA 2.266508 0.5358777 0.5166905 0.081602074
## SANTA CATARINA 2.730278 0.5358777 0.5166905 -0.635081759
## SÃO PAULO 3.787413 0.5358777 0.5166905 -0.476901637
## SERGIPE 1.958313 0.5358777 0.5166905 -0.525030999
## TOCANTINS 3.856089 0.5358777 0.5166905 -0.001025597
## Ano2014 Ano2015 Ano2016 Ano2017
## ACRE -0.17249297 -0.255222416 -0.02846258 -0.02931329
## ALAGOAS -0.23109558 -0.370531055 -0.27349734 -0.46485314
## AMAPÁ -0.81388180 -0.502429548 -0.10584473 -0.05837191
## AMAZONAS -0.22108875 -0.214280002 0.12709073 0.09979881
## BAHIA -0.41762993 -0.480775297 -0.43853668 -0.44852001
## CEARÁ -0.39907210 -0.500570333 -0.39458632 -0.39843204
## DISTRITO FEDERAL -0.30920835 -0.094841301 -0.10183590 -0.20241895
## ESPÍRITO SANTO -0.01690119 -0.406276980 -0.65126075 0.09856109
## GOIÁS -0.30801992 -0.485909337 -0.30180533 -0.06877417
## MARANHÃO -0.34241013 -0.374678406 -0.30892527 -0.59270197
## MATO GROSSO -0.88134098 -0.484210989 -0.45107767 -0.75347211
## MATO GROSSO DO SUL -0.21498441 -0.373847440 0.33048064 0.58064041
## MINAS GERAIS -0.20377911 -0.387371002 -0.16010282 -0.15004606
## PARÁ -0.68702629 -0.716479413 -0.64798591 -0.75440311
## PARAÍBA -0.55303443 -0.569730722 -0.39342735 -0.58473981
## PARANÁ -0.13847304 -0.075821491 -0.08113065 -0.13714856
## PERNAMBUCO -0.41953051 -0.404636587 -0.25735744 -0.45060989
## PIAUÍ -0.12980391 -0.318943397 -0.32795272 -0.29327942
## RIO DE JANEIRO -0.26187018 -0.302003850 -0.18670681 -0.25345987
## RIO GRANDE DO NORTE -0.24939647 -0.499591749 -0.28320213 -0.32335309
## RIO GRANDE DO SUL -0.64568890 -0.449403918 -0.26531548 -0.31272269
## RONDÔNIA -0.84325663 -0.813175847 -0.51803340 -0.94567992
## RORAIMA -0.06973257 0.002995581 -0.07059229 -0.19898997
## SANTA CATARINA -1.32780370 -1.084521153 -0.39742881 -0.80113607
## SÃO PAULO -0.55167275 -0.633029079 -0.39840698 -0.47656098
## SERGIPE -0.29385284 -0.695095282 -0.21332407 0.44932682
## TOCANTINS -0.18789412 -0.302662002 -0.16345288 -0.58498049
## Ano2018 Ano2019
## ACRE 0.03763780 0.003089549
## ALAGOAS -0.48561626 -0.575627049
## AMAPÁ -0.21178953 0.112172017
## AMAZONAS 0.11090242 0.072472105
## BAHIA -0.41053472 -0.463975907
## CEARÁ -0.32514682 -0.471966570
## DISTRITO FEDERAL 0.17112393 0.409546604
## ESPÍRITO SANTO 0.23871268 0.009403868
## GOIÁS 0.02233501 -0.043379707
## MARANHÃO -0.63328915 -0.656771126
## MATO GROSSO -0.31475490 -0.070930903
## MATO GROSSO DO SUL 0.36963259 0.373924083
## MINAS GERAIS -0.10202837 -0.204679489
## PARÁ -0.62337630 -0.644711179
## PARAÍBA -0.53211954 -0.618045151
## PARANÁ -0.08737384 -0.059914450
## PERNAMBUCO -0.28990615 -0.351552518
## PIAUÍ -0.31801794 -0.443806449
## RIO DE JANEIRO -0.05345104 -0.093516251
## RIO GRANDE DO NORTE -0.15925918 -0.390189826
## RIO GRANDE DO SUL -0.17510080 0.008028053
## RONDÔNIA -0.83422553 -0.873581568
## RORAIMA 0.35763366 0.429317544
## SANTA CATARINA -0.67700794 -0.623317688
## SÃO PAULO -0.49032500 -0.506729141
## SERGIPE 0.50829337 0.239358520
## TOCANTINS -0.16963753 -0.322337183
##
## attr(,"class")
## [1] "coef.mer"
#Parâmetros do modelo
summary(modelo_intercept_inclin_hlm2)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: bc_Pessoas_Ocupadas ~ 1 + Familias_PBF + Matriculas_inf + Ano +
## (Ano | Estado)
## Data: Dataset1
## Control: lmerControl(check.nobs.vs.nRE = "ignore")
##
## AIC BIC logLik deviance df.resid
## 124.1 282.7 -15.1 30.1 169
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.98186 -0.37705 0.01176 0.37119 1.49766
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## Estado (Intercept) 1.24799 1.1171
## Ano2013 0.04197 0.2049 0.20
## Ano2014 0.10178 0.3190 0.20 0.66
## Ano2015 0.06188 0.2488 0.15 0.64 0.76
## Ano2016 0.05885 0.2426 -0.11 0.24 0.40 0.56
## Ano2017 0.13753 0.3709 -0.26 0.03 0.58 0.40 0.62
## Ano2018 0.12789 0.3576 -0.44 0.25 0.63 0.55 0.58 0.86
## Ano2019 0.13923 0.3731 -0.53 0.11 0.42 0.60 0.62 0.76
## Residual 0.01003 0.1002
##
##
##
##
##
##
##
##
## 0.90
##
## Number of obs: 216, groups: Estado, 27
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 3.97522 0.21603 21.06530 18.401 1.85e-14 ***
## Familias_PBF 0.53588 0.11997 33.73470 4.467 8.46e-05 ***
## Matriculas_inf 0.51669 0.11008 27.26613 4.694 6.79e-05 ***
## Ano2013 -0.33937 0.04805 27.21169 -7.063 1.30e-07 ***
## Ano2014 -0.40337 0.06752 27.53972 -5.974 2.09e-06 ***
## Ano2015 -0.43678 0.05553 27.37055 -7.866 1.69e-08 ***
## Ano2016 -0.25788 0.05495 28.41658 -4.693 6.22e-05 ***
## Ano2017 -0.29836 0.07740 27.77432 -3.855 0.000626 ***
## Ano2018 -0.18803 0.07546 27.50331 -2.492 0.019024 *
## Ano2019 -0.21325 0.07864 28.91460 -2.712 0.011151 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Fm_PBF Mtrcl_ An2013 An2014 An2015 An2016 An2017 An2018
## Familis_PBF 0.004
## Matricls_nf 0.039 -0.114
## Ano2013 0.122 -0.031 -0.058
## Ano2014 0.149 -0.055 -0.077 0.611
## Ano2015 0.094 -0.033 -0.116 0.598 0.706
## Ano2016 -0.134 -0.022 -0.174 0.318 0.426 0.551
## Ano2017 -0.263 0.033 -0.159 0.129 0.573 0.424 0.601
## Ano2018 -0.430 -0.024 -0.188 0.298 0.611 0.544 0.570 0.813
## Ano2019 -0.515 0.001 -0.213 0.197 0.433 0.587 0.608 0.736 0.854
## optimizer (nloptwrap) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.0042761 (tol = 0.002, component 1)
#------------------------------------------------------------------------------
modelo_intercept_inclin_hlm3 <- lme(fixed = bc_Pessoas_Ocupadas ~ Ano + Familias_PBF + Matriculas_inf + PIB_D,
random = list(Estado = ~Ano),
control=ctrl,
data = Dataset2,
method = "REML")
summary(modelo_intercept_inclin_hlm3)
## Linear mixed-effects model fit by REML
## Data: Dataset2
## AIC BIC logLik
## 166.1326 325.6371 -35.06632
##
## Random effects:
## Formula: ~Ano | Estado
## Structure: General positive-definite, Log-Cholesky parametrization
## StdDev Corr
## (Intercept) 1.1393093 (Intr) An2013 An2014 An2015 An2016 An2017 An2018
## Ano2013 0.2090605 0.198
## Ano2014 0.3245790 0.195 0.651
## Ano2015 0.2549485 0.127 0.602 0.743
## Ano2016 0.2511242 -0.133 0.247 0.393 0.524
## Ano2017 0.3672755 -0.305 0.106 0.610 0.395 0.683
## Ano2018 0.3740032 -0.446 0.299 0.619 0.508 0.605 0.870
## Ano2019 0.3852220 -0.555 0.165 0.416 0.582 0.650 0.770 0.901
## Residual 0.1017452
##
## Fixed effects: bc_Pessoas_Ocupadas ~ Ano + Familias_PBF + Matriculas_inf + PIB_D
## Value Std.Error DF t-value p-value
## (Intercept) 3.928607 0.22130686 179 17.751855 0.0000
## Ano2013 -0.333419 0.04903914 179 -6.799031 0.0000
## Ano2014 -0.396613 0.06876770 179 -5.767430 0.0000
## Ano2015 -0.468771 0.05881195 179 -7.970684 0.0000
## Ano2016 -0.239667 0.05718272 179 -4.191253 0.0000
## Ano2017 -0.140392 0.10519059 179 -1.334645 0.1837
## Ano2018 -0.070699 0.09488415 179 -0.745104 0.4572
## Ano2019 -0.114560 0.09232659 179 -1.240813 0.2163
## Familias_PBF 0.538770 0.12781219 179 4.215326 0.0000
## Matriculas_inf 0.475091 0.11663175 179 4.073432 0.0001
## PIB_D -0.102342 0.04724436 179 -2.166222 0.0316
## Correlation:
## (Intr) An2013 An2014 An2015 An2016 An2017 An2018 An2019 Fm_PBF
## Ano2013 0.118
## Ano2014 0.143 0.609
## Ano2015 0.094 0.540 0.659
## Ano2016 -0.161 0.327 0.422 0.471
## Ano2017 -0.289 0.176 0.464 0.122 0.564
## Ano2018 -0.413 0.310 0.525 0.266 0.563 0.882
## Ano2019 -0.513 0.231 0.401 0.361 0.613 0.803 0.891
## Familias_PBF 0.012 -0.037 -0.060 -0.014 -0.033 -0.027 -0.064 -0.036
## Matriculas_inf 0.047 -0.064 -0.083 -0.097 -0.186 -0.173 -0.199 -0.226 -0.105
## PIB_D 0.094 -0.054 -0.043 0.258 -0.134 -0.681 -0.559 -0.477 0.076
## Mtrcl_
## Ano2013
## Ano2014
## Ano2015
## Ano2016
## Ano2017
## Ano2018
## Ano2019
## Familias_PBF
## Matriculas_inf
## PIB_D 0.073
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -1.93644921 -0.34324933 0.01086983 0.37847537 1.55306851
##
## Number of Observations: 216
## Number of Groups: 27
coef(modelo_intercept_inclin_hlm3)
## (Intercept) Ano2013 Ano2014 Ano2015
## ACRE 3.899136 -0.25788408 -0.176455067 -0.28441692
## ALAGOAS 4.900414 -0.14276826 -0.232035202 -0.38187743
## AMAPÁ 3.186562 -0.71895143 -0.792535147 -0.52087839
## AMAZONAS 5.253704 -0.30090260 -0.213281845 -0.27443449
## BAHIA 4.743775 -0.36656267 -0.413391497 -0.50074031
## CEARÁ 5.240315 -0.25707273 -0.397194944 -0.55543264
## DISTRITO FEDERAL 2.145476 -0.26756150 -0.311590353 -0.09369740
## ESPÍRITO SANTO 3.469372 -0.22794932 0.001567643 -0.43955656
## GOIÁS 3.195565 -0.60715439 -0.313864466 -0.49492315
## MARANHÃO 5.547411 -0.29426052 -0.343781687 -0.37955564
## MATO GROSSO 2.916498 -0.51681380 -0.893726710 -0.50922657
## MATO GROSSO DO SUL 2.533411 -0.40384682 -0.211391599 -0.39807529
## MINAS GERAIS 3.973751 -0.23873720 -0.205703732 -0.41566092
## PARÁ 3.350298 -0.36711406 -0.690512613 -0.71953241
## PARAÍBA 5.197989 -0.43837957 -0.562159253 -0.60174345
## PARANÁ 5.855618 -0.16819295 -0.128292583 -0.11644480
## PERNAMBUCO 4.846977 -0.30040815 -0.418478492 -0.44393006
## PIAUÍ 5.625261 -0.28131877 -0.131007372 -0.37543317
## RIO DE JANEIRO 4.181555 -0.23128564 -0.249427106 -0.36384823
## RIO GRANDE DO NORTE 3.978476 -0.28577827 -0.231812871 -0.52955554
## RIO GRANDE DO SUL 3.304091 -0.47328945 -0.619186409 -0.48620421
## RONDÔNIA 3.997087 -0.33331767 -0.835429694 -0.85616741
## RORAIMA 2.223089 0.09432401 -0.064475022 -0.02965124
## SANTA CATARINA 2.702584 -0.61522134 -1.311066423 -1.12474574
## SÃO PAULO 3.742568 -0.48704860 -0.534428705 -0.65583635
## SERGIPE 2.079907 -0.49703494 -0.260592585 -0.72602245
## TOCANTINS 3.981509 -0.01777134 -0.168294952 -0.37923884
## Ano2016 Ano2017 Ano2018 Ano2019
## ACRE -0.01882895 0.11161356 0.134810606 0.0844641657
## ALAGOAS -0.27519167 -0.35490470 -0.426369892 -0.5265926736
## AMAPÁ -0.08145460 0.04831329 -0.122317933 0.1739514591
## AMAZONAS 0.15311200 0.22316802 0.226154783 0.1650986186
## BAHIA -0.42757752 -0.33818569 -0.330989586 -0.3953906553
## CEARÁ -0.38102160 -0.29919494 -0.231279005 -0.3964212035
## DISTRITO FEDERAL -0.09424634 -0.05122763 0.258125000 0.4938018686
## ESPÍRITO SANTO -0.62217602 0.16096725 0.314126276 0.0705325835
## GOIÁS -0.30540043 0.03853155 0.085947979 0.0235686880
## MARANHÃO -0.31089862 -0.48238585 -0.572264285 -0.6027169278
## MATO GROSSO -0.45537982 -0.63684470 -0.239983549 0.0001733207
## MATO GROSSO DO SUL 0.35049708 0.68264660 0.448634247 0.4435797339
## MINAS GERAIS -0.15009687 -0.01761727 -0.010237962 -0.1193155752
## PARÁ -0.64023796 -0.63794285 -0.553221259 -0.5745943184
## PARAÍBA -0.39820252 -0.48671771 -0.467079808 -0.5638302192
## PARANÁ -0.05621764 -0.04253109 0.011148521 0.0207232256
## PERNAMBUCO -0.24697436 -0.29989015 -0.175655036 -0.2590274289
## PIAUÍ -0.34663843 -0.17436316 -0.217358310 -0.3777530336
## RIO DE JANEIRO -0.17612891 -0.05017832 0.107666904 0.0299072253
## RIO GRANDE DO NORTE -0.24826784 -0.05511308 0.022569705 -0.2429205838
## RIO GRANDE DO SUL -0.21993585 -0.10281895 -0.006900603 0.1485218089
## RONDÔNIA -0.50157804 -0.71593279 -0.669928738 -0.7321331357
## RORAIMA -0.04409670 0.03120904 0.523234442 0.5759124873
## SANTA CATARINA -0.35377602 -0.58069486 -0.498858742 -0.4761025375
## SÃO PAULO -0.36518952 -0.25168825 -0.338883279 -0.3798566244
## SERGIPE -0.14993222 0.62884900 0.665853113 0.3737454022
## TOCANTINS -0.10517589 -0.13765381 0.154195669 -0.0504467559
## Familias_PBF Matriculas_inf PIB_D
## ACRE 0.53877 0.4750915 -0.1023418
## ALAGOAS 0.53877 0.4750915 -0.1023418
## AMAPÁ 0.53877 0.4750915 -0.1023418
## AMAZONAS 0.53877 0.4750915 -0.1023418
## BAHIA 0.53877 0.4750915 -0.1023418
## CEARÁ 0.53877 0.4750915 -0.1023418
## DISTRITO FEDERAL 0.53877 0.4750915 -0.1023418
## ESPÍRITO SANTO 0.53877 0.4750915 -0.1023418
## GOIÁS 0.53877 0.4750915 -0.1023418
## MARANHÃO 0.53877 0.4750915 -0.1023418
## MATO GROSSO 0.53877 0.4750915 -0.1023418
## MATO GROSSO DO SUL 0.53877 0.4750915 -0.1023418
## MINAS GERAIS 0.53877 0.4750915 -0.1023418
## PARÁ 0.53877 0.4750915 -0.1023418
## PARAÍBA 0.53877 0.4750915 -0.1023418
## PARANÁ 0.53877 0.4750915 -0.1023418
## PERNAMBUCO 0.53877 0.4750915 -0.1023418
## PIAUÍ 0.53877 0.4750915 -0.1023418
## RIO DE JANEIRO 0.53877 0.4750915 -0.1023418
## RIO GRANDE DO NORTE 0.53877 0.4750915 -0.1023418
## RIO GRANDE DO SUL 0.53877 0.4750915 -0.1023418
## RONDÔNIA 0.53877 0.4750915 -0.1023418
## RORAIMA 0.53877 0.4750915 -0.1023418
## SANTA CATARINA 0.53877 0.4750915 -0.1023418
## SÃO PAULO 0.53877 0.4750915 -0.1023418
## SERGIPE 0.53877 0.4750915 -0.1023418
## TOCANTINS 0.53877 0.4750915 -0.1023418
#Comparsion between Logliks
data.frame(OLS_Nulo = logLik(modelo_ols_nulo),
HLM3_Nulo = logLik(modelo_nulo_hlm3),
HLM2_Intercept_Aleat = logLik(modelo_intercept_hlm3),
HLM2_Intercept_Inclin_Aleat = logLik(modelo_intercept_inclin_hlm3)) %>%
melt() %>%
ggplot(aes(x = variable, y = (abs(-value)), fill = factor(variable))) +
geom_bar(stat = "identity") +
geom_label(aes(label = (round(value,3))), hjust = 1.1, color = "white", size = 4) +
labs(title = "Comparação do LL",
y = "LogLik",
x = "Modelo Proposto") +
coord_flip() +
scale_fill_manual("Legenda:",
values = c("lightblue","lightgreen","lightpink","grey25")) +
theme(legend.title = element_blank(),
panel.background = element_rect("white"),
legend.position = "none",
axis.line = element_line())
## No id variables; using all as measure variables
Conclusao
```r
#To graphically observe the behavior of the v0jk values, that is,
#of the random intercepts per state, we can command:
library(dplyr)
random.effects(modelo_intercept_inclin_hlm3) %>%
dplyr::rename(v0j = 1) %>%
rownames_to_column("Estado") %>%
mutate(color_v0j = ifelse(v0j < 0, "A", "B"),
hjust_v0j = ifelse(v0j > 0, 1.15, -0.15)) %>%
arrange(Estado) %>%
ggplot(aes(label = format(v0j, digits = 2),
hjust = hjust_v0j)) +
geom_bar(aes(x = fct_rev(Estado), y = v0j, fill = color_v0j),
stat = "identity", color = "black") +
geom_text(aes(x = Estado, y = 0), size = 4.1, color = "black") +
coord_flip() +
labs(x = "Estado",
y = expression(nu[0][j])) +
scale_fill_manual("oi", values = c("pink","lightblue")) +
theme(panel.background = element_rect("white"),
panel.border = element_rect(NA),
panel.grid = element_line("grey95"),
legend.position = "none")
```r
pacotes <- c("plotly", #plataforma gráfica
"tidyverse", #carregar outros pacotes do R
"ggrepel", #geoms de texto e rótulo para 'ggplot2' que ajudam a
#evitar sobreposição de textos
"knitr", "kableExtra", #formatação de tabelas
"reshape2", #função 'melt'
"misc3d", #gráficos 3D
"plot3D", #gráficos 3D
"cluster", #função 'agnes' para elaboração de clusters hierárquicos
"factoextra", #função 'fviz_dend' para construção de dendrogramas
"ade4") #função 'ade4' para matriz de distâncias em var. binárias
if(sum(as.numeric(!pacotes %in% installed.packages())) != 0){
instalador <- pacotes[!pacotes %in% installed.packages()]
for(i in 1:length(instalador)) {
install.packages(instalador, dependencies = T)
break()}
sapply(pacotes, require, character = T)
} else {
sapply(pacotes, require, character = T)
}
## Loading required package: misc3d
## Error: package or namespace load failed for 'misc3d':
## .onLoad failed in loadNamespace() for 'tcltk', details:
## call: fun(libname, pkgname)
## error: Tcl/Tk libraries are missing: install the Tcl/Tk component from the R installer
## Loading required package: plot3D
## Error: package or namespace load failed for 'plot3D':
## .onLoad failed in loadNamespace() for 'tcltk', details:
## call: fun(libname, pkgname)
## error: Tcl/Tk libraries are missing: install the Tcl/Tk component from the R installer
## Loading required package: cluster
## Loading required package: factoextra
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
## Loading required package: ade4
## Warning: package 'ade4' was built under R version 4.1.2
## plotly tidyverse ggrepel knitr kableExtra reshape2 misc3d
## TRUE TRUE TRUE TRUE TRUE TRUE FALSE
## plot3D cluster factoextra ade4
## FALSE TRUE TRUE TRUE
#Improved visualization of random intercepts and slopes by school,
#for the final HLM3 model
v_final <- data.frame(modelo_intercept_inclin_hlm3[["coefficients"]][["random"]][["Estado"]]) %>%
rename(v00 = 1,
v10 = 2)
v_final$Estado <- c(1:27)
v_final$Estado <- as.factor(v_final$Estado)
# Dissimilarity matrix
matriz_D <- v_final %>%
dplyr::select(v00, v10) %>%
dist(method = "euclidean")
# Method: parametrização da distância a ser utilizada
## "euclidean": distância euclidiana
## "euclidiana quadrática": elevar ao quadrado matriz_D (matriz_D^2)
## "maximum": distância de Chebychev;
## "manhattan": distância de Manhattan (ou distância absoluta ou bloco);
## "canberra": distância de Canberra;
## "minkowski": distância de Minkowski
# Visualizing Dissimilarity matrix
data.matrix(matriz_D) %>%
kable() %>%
kable_styling(bootstrap_options = "striped",
full_width = FALSE,
font_size = 20)
| ACRE | ALAGOAS | AMAPÁ | AMAZONAS | BAHIA | CEARÁ | DISTRITO FEDERAL | ESPÍRITO SANTO | GOIÁS | MARANHÃO | MATO GROSSO | MATO GROSSO DO SUL | MINAS GERAIS | PARÁ | PARAÍBA | PARANÁ | PERNAMBUCO | PIAUÍ | RIO DE JANEIRO | RIO GRANDE DO NORTE | RIO GRANDE DO SUL | RONDÔNIA | RORAIMA | SANTA CATARINA | SÃO PAULO | SERGIPE | TOCANTINS | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| ACRE | 0.0000000 | 1.0078728 | 0.8487320 | 1.3552509 | 0.8516019 | 1.3411784 | 1.7536867 | 0.4308062 | 0.7854953 | 1.6486756 | 1.0161807 | 1.3735034 | 0.0770316 | 0.5596019 | 1.3113336 | 1.9585363 | 0.9487943 | 1.7262834 | 0.2836680 | 0.0841006 | 0.6328342 | 0.1236305 | 1.7126543 | 1.2487709 | 0.2775428 | 1.8348809 | 0.2538492 |
| ALAGOAS | 1.0078728 | 0.0000000 | 1.8081139 | 0.3870670 | 0.2731657 | 0.3586060 | 2.7577622 | 1.4335750 | 1.7669646 | 0.6644961 | 2.0188690 | 2.3813576 | 0.9316193 | 1.5662656 | 0.4194485 | 0.9555427 | 0.1664505 | 0.7379700 | 0.7242882 | 0.9329630 | 1.6301815 | 0.9232054 | 2.6878018 | 2.2480367 | 1.2079466 | 2.8426679 | 0.9273669 |
| AMAPÁ | 0.8487320 | 1.8081139 | 0.0000000 | 2.1089914 | 1.5965878 | 2.1050496 | 1.1347295 | 0.5666256 | 0.1121590 | 2.3987436 | 0.3373337 | 0.7251874 | 0.9221020 | 0.3880713 | 2.0309013 | 2.7252884 | 1.7123548 | 2.4776554 | 1.1080746 | 0.9026451 | 0.2723286 | 0.8975882 | 1.2608314 | 0.4949694 | 0.6024303 | 1.1286853 | 1.0599979 |
| AMAZONAS | 1.3552509 | 0.3870670 | 2.1089914 | 0.0000000 | 0.5141390 | 0.0458295 | 3.1084068 | 1.7858237 | 2.0808001 | 0.2937813 | 2.3471584 | 2.7222408 | 1.2814627 | 1.9045573 | 0.1483380 | 0.6163698 | 0.4067274 | 0.3720721 | 1.0744076 | 1.2753177 | 1.9572204 | 1.2570356 | 3.0562776 | 2.5704113 | 1.5225581 | 3.1798516 | 1.3033203 |
| BAHIA | 0.8516019 | 0.2731657 | 1.5965878 | 0.5141390 | 0.0000000 | 0.5084676 | 2.6001843 | 1.2819200 | 1.5667930 | 0.8068813 | 1.8334444 | 2.2106789 | 0.7805623 | 1.3934770 | 0.4598560 | 1.1294001 | 0.1225849 | 0.8855976 | 0.5782663 | 0.7695509 | 1.4436353 | 0.7474282 | 2.5624744 | 2.0562819 | 1.0084308 | 2.6670613 | 0.8382749 |
| CEARÁ | 1.3411784 | 0.3586060 | 2.1050496 | 0.0458295 | 0.5084676 | 0.0000000 | 3.0948560 | 1.7711826 | 2.0745021 | 0.3093395 | 2.3382878 | 2.7108801 | 1.2666968 | 1.8932169 | 0.1861818 | 0.6216895 | 0.3957174 | 0.3857090 | 1.0590739 | 1.2621647 | 1.9482591 | 1.2455636 | 3.0376189 | 2.5628792 | 1.5152998 | 3.1695041 | 1.2813493 |
| DISTRITO FEDERAL | 1.7536867 | 2.7577622 | 1.1347295 | 3.1084068 | 2.6001843 | 3.0948560 | 0.0000000 | 1.3244875 | 1.1036344 | 3.4020390 | 0.8103090 | 0.4111773 | 1.8285013 | 1.2089280 | 3.0572881 | 3.7114720 | 2.7017005 | 3.4798115 | 2.0364014 | 1.8330905 | 1.1767373 | 1.8527776 | 0.3701147 | 0.6566854 | 1.6121031 | 0.2386574 | 1.8529468 |
| ESPÍRITO SANTO | 0.4308062 | 1.4335750 | 0.5666256 | 1.7858237 | 1.2819200 | 1.7711826 | 1.3244875 | 0.0000000 | 0.4677249 | 2.0790969 | 0.6237884 | 0.9523457 | 0.5044944 | 0.1831535 | 1.7413783 | 2.3869946 | 1.3795101 | 2.1565498 | 0.7121910 | 0.5123788 | 0.2958202 | 0.5381319 | 1.2872760 | 0.8590363 | 0.3765220 | 1.4152801 | 0.5535882 |
| GOIÁS | 0.7854953 | 1.7669646 | 0.1121590 | 2.0808001 | 1.5667930 | 2.0745021 | 1.1036344 | 0.4677249 | 0.0000000 | 2.3725686 | 0.2933254 | 0.6926628 | 0.8609903 | 0.2855904 | 2.0095240 | 2.6960285 | 1.6796596 | 2.4514468 | 1.0552030 | 0.8463056 | 0.1723301 | 0.8470089 | 1.1990749 | 0.4930473 | 0.5600340 | 1.1210789 | 0.9823854 |
| MARANHÃO | 1.6486756 | 0.6644961 | 2.3987436 | 0.2937813 | 0.8068813 | 0.3093395 | 3.4020390 | 2.0790969 | 2.3725686 | 0.0000000 | 2.6403091 | 3.0159914 | 1.5746394 | 2.1983198 | 0.3779762 | 0.3329936 | 0.7004604 | 0.0789185 | 1.3673070 | 1.5689572 | 2.2504526 | 1.5508157 | 3.3469555 | 2.8628757 | 1.8151099 | 3.4734273 | 1.5901237 |
| MATO GROSSO | 1.0161807 | 2.0188690 | 0.3373337 | 2.3471584 | 1.8334444 | 2.3382878 | 0.8103090 | 0.6237884 | 0.2933254 | 2.6403091 | 0.0000000 | 0.3993960 | 1.0932108 | 0.4589041 | 2.2828388 | 2.9597236 | 1.9425711 | 2.7189804 | 1.2968790 | 1.0868192 | 0.3900288 | 1.0960581 | 0.9242861 | 0.2354642 | 0.8266064 | 0.8368243 | 1.1761347 |
| MATO GROSSO DO SUL | 1.3735034 | 2.3813576 | 0.7251874 | 2.7222408 | 2.2106789 | 2.7108801 | 0.4111773 | 0.9523457 | 0.6926628 | 3.0159914 | 0.3993960 | 0.0000000 | 1.4497722 | 0.8177131 | 2.6648017 | 3.3305544 | 2.3158776 | 3.0942768 | 1.6571528 | 1.4498809 | 0.7738019 | 1.4653743 | 0.5869188 | 0.2707371 | 1.2120164 | 0.4629789 | 1.4986806 |
| MINAS GERAIS | 0.0770316 | 0.9316193 | 0.9221020 | 1.2814627 | 0.7805623 | 1.2666968 | 1.8285013 | 0.5044944 | 0.8609903 | 1.5746394 | 1.0932108 | 1.4497722 | 0.0000000 | 0.6365321 | 1.2404097 | 1.8831892 | 0.8754018 | 1.6520591 | 0.2079377 | 0.0472779 | 0.7095486 | 0.0974169 | 1.7820619 | 1.3257473 | 0.3392696 | 1.9113764 | 0.2211020 |
| PARÁ | 0.5596019 | 1.5662656 | 0.3880713 | 1.9045573 | 1.3934770 | 1.8932169 | 1.2089280 | 0.1831535 | 0.2855904 | 2.1983198 | 0.4589041 | 0.8177131 | 0.6365321 | 0.0000000 | 1.8490641 | 2.5132042 | 1.4981646 | 2.2765795 | 0.8422804 | 0.6334217 | 0.1157946 | 0.6476708 | 1.2180007 | 0.6936079 | 0.4101948 | 1.2770173 | 0.7214343 |
| PARAÍBA | 1.3113336 | 0.4194485 | 2.0309013 | 0.1483380 | 0.4598560 | 0.1861818 | 3.0572881 | 1.7413783 | 2.0095240 | 0.3779762 | 2.2828388 | 2.6648017 | 1.2404097 | 1.8490641 | 0.0000000 | 0.7109690 | 0.3771540 | 0.4552247 | 1.0373168 | 1.2290230 | 1.8942200 | 1.2054889 | 3.0222178 | 2.5016635 | 1.4562341 | 3.1186331 | 1.2871416 |
| PARANÁ | 1.9585363 | 0.9555427 | 2.7252884 | 0.6163698 | 1.1294001 | 0.6216895 | 3.7114720 | 2.3869946 | 2.6960285 | 0.3329936 | 2.9597236 | 3.3305544 | 1.8831892 | 2.5132042 | 0.7109690 | 0.0000000 | 1.0172693 | 0.2566357 | 1.6752518 | 1.8808207 | 2.5697035 | 1.8658521 | 3.6420022 | 3.1845660 | 2.1369718 | 3.7900037 | 1.8801356 |
| PERNAMBUCO | 0.9487943 | 0.1664505 | 1.7123548 | 0.4067274 | 0.1225849 | 0.3957174 | 2.7017005 | 1.3795101 | 1.6796596 | 0.7004604 | 1.9425711 | 2.3158776 | 0.8754018 | 1.4981646 | 0.3771540 | 1.0172693 | 0.0000000 | 0.7785176 | 0.6690031 | 0.8686241 | 1.5525423 | 0.8505274 | 2.6534133 | 2.1673791 | 1.1200689 | 2.7740473 | 0.9104495 |
| PIAUÍ | 1.7262834 | 0.7379700 | 2.4776554 | 0.3720721 | 0.8855976 | 0.3857090 | 3.4798115 | 2.1565498 | 2.4514468 | 0.0789185 | 2.7189804 | 3.0942768 | 1.6520591 | 2.2765795 | 0.4552247 | 0.2566357 | 0.7785176 | 0.0000000 | 1.4445728 | 1.6467904 | 2.3290951 | 1.6290041 | 3.4228466 | 2.9416889 | 1.8938998 | 3.5519100 | 1.6647451 |
| RIO DE JANEIRO | 0.2836680 | 0.7242882 | 1.1080746 | 1.0744076 | 0.5782663 | 1.0590739 | 2.0364014 | 0.7121910 | 1.0552030 | 1.3673070 | 1.2968790 | 1.6571528 | 0.2079377 | 0.8422804 | 1.0373168 | 1.6752518 | 0.6690031 | 1.4445728 | 0.0000000 | 0.2102623 | 0.9102248 | 0.2108054 | 1.9853485 | 1.5279929 | 0.5080589 | 2.1183825 | 0.2925859 |
| RIO GRANDE DO NORTE | 0.0841006 | 0.9329630 | 0.9026451 | 1.2753177 | 0.7695509 | 1.2621647 | 1.8330905 | 0.5123788 | 0.8463056 | 1.5689572 | 1.0868192 | 1.4498809 | 0.0472779 | 0.6334217 | 1.2290230 | 1.8808207 | 0.8686241 | 1.6467904 | 0.2102623 | 0.0000000 | 0.6999691 | 0.0510524 | 1.7960684 | 1.3177387 | 0.3101007 | 1.9102864 | 0.2680241 |
| RIO GRANDE DO SUL | 0.6328342 | 1.6301815 | 0.2723286 | 1.9572204 | 1.4436353 | 1.9482591 | 1.1767373 | 0.2958202 | 0.1723301 | 2.2504526 | 0.3900288 | 0.7738019 | 0.7095486 | 0.1157946 | 1.8942200 | 2.5697035 | 1.5525423 | 2.3290951 | 0.9102248 | 0.6999691 | 0.0000000 | 0.7069908 | 1.2209622 | 0.6180253 | 0.4386934 | 1.2244135 | 0.8163290 |
| RONDÔNIA | 0.1236305 | 0.9232054 | 0.8975882 | 1.2570356 | 0.7474282 | 1.2455636 | 1.8527776 | 0.5381319 | 0.8470089 | 1.5508157 | 1.0960581 | 1.4653743 | 0.0974169 | 0.6476708 | 1.2054889 | 1.8658521 | 0.8505274 | 1.6290041 | 0.2108054 | 0.0510524 | 0.7069908 | 0.0000000 | 1.8248137 | 1.3248428 | 0.2973432 | 1.9241572 | 0.3159306 |
| RORAIMA | 1.7126543 | 2.6878018 | 1.2608314 | 3.0562776 | 2.5624744 | 3.0376189 | 0.3701147 | 1.2872760 | 1.1990749 | 3.3469555 | 0.9242861 | 0.5869188 | 1.7820619 | 1.2180007 | 3.0222178 | 3.6420022 | 2.6534133 | 3.4228466 | 1.9853485 | 1.7960684 | 1.2209622 | 1.8248137 | 0.0000000 | 0.8563699 | 1.6269020 | 0.6084459 | 1.7619894 |
| SANTA CATARINA | 1.2487709 | 2.2480367 | 0.4949694 | 2.5704113 | 2.0562819 | 2.5628792 | 0.6566854 | 0.8590363 | 0.4930473 | 2.8628757 | 0.2354642 | 0.2707371 | 1.3257473 | 0.6936079 | 2.5016635 | 3.1845660 | 2.1673791 | 2.9416889 | 1.5279929 | 1.3177387 | 0.6180253 | 1.3248428 | 0.8563699 | 0.0000000 | 1.0478532 | 0.6337931 | 1.4115940 |
| SÃO PAULO | 0.2775428 | 1.2079466 | 0.6024303 | 1.5225581 | 1.0084308 | 1.5152998 | 1.6121031 | 0.3765220 | 0.5600340 | 1.8151099 | 0.8266064 | 1.2120164 | 0.3392696 | 0.4101948 | 1.4562341 | 2.1369718 | 1.1200689 | 1.8938998 | 0.5080589 | 0.3101007 | 0.4386934 | 0.2973432 | 1.6269020 | 1.0478532 | 0.0000000 | 1.6626908 | 0.5266062 |
| SERGIPE | 1.8348809 | 2.8426679 | 1.1286853 | 3.1798516 | 2.6670613 | 3.1695041 | 0.2386574 | 1.4152801 | 1.1210789 | 3.4734273 | 0.8368243 | 0.4629789 | 1.9113764 | 1.2770173 | 3.1186331 | 3.7900037 | 2.7740473 | 3.5519100 | 2.1183825 | 1.9102864 | 1.2244135 | 1.9241572 | 0.6084459 | 0.6337931 | 1.6626908 | 0.0000000 | 1.9610670 |
| TOCANTINS | 0.2538492 | 0.9273669 | 1.0599979 | 1.3033203 | 0.8382749 | 1.2813493 | 1.8529468 | 0.5535882 | 0.9823854 | 1.5901237 | 1.1761347 | 1.4986806 | 0.2211020 | 0.7214343 | 1.2871416 | 1.8801356 | 0.9104495 | 1.6647451 | 0.2925859 | 0.2680241 | 0.8163290 | 0.3159306 | 1.7619894 | 1.4115940 | 0.5266062 | 1.9610670 | 0.0000000 |
# Development of hierarchical clustering
library(cluster)
cluster_hier <- agnes(x = matriz_D, method = "single")
# Method is the chaining type:
## "complete": encadeamento completo (furthest neighbor ou complete linkage)
## "single": encadeamento único (nearest neighbor ou single linkage)
## "average": encadeamento médio (between groups ou average linkage)
# # Definition of the hierarchical clustering scheme
# Distances for combinations at each stage
coeficientes <- sort(cluster_hier$height, decreasing = FALSE)
coeficientes
## [1] 0.04582952 0.04727786 0.05105235 0.07703156 0.07891849 0.11215898
## [7] 0.11579459 0.12258495 0.14833797 0.16645051 0.17233006 0.18315353
## [13] 0.20793772 0.22110203 0.23546417 0.23865743 0.25663569 0.27073710
## [19] 0.27754282 0.29332542 0.29378132 0.35860596 0.37011467 0.37652202
## [25] 0.41117735 0.57826631
# # Table with the clustering scheme. Output interpretation:
## The lines are the clustering stages
## In the columns Cluster1 and Cluster2, it is observed how the merging occurred
## When it is a negative number, it indicates isolated observation
## When it is a positive number, it indicates a previously formed cluster (stage)
## Coefficients: the distances for the combinations in each stage
esquema <- as.data.frame(cbind(cluster_hier$merge, coeficientes))
names(esquema) <- c("Cluster1", "Cluster2", "Coeficientes")
esquema
## Cluster1 Cluster2 Coeficientes
## 1 -4 -6 0.04582952
## 2 -13 -20 0.04727786
## 3 2 -22 0.05105235
## 4 -1 3 0.07703156
## 5 -10 -18 0.07891849
## 6 -3 -9 0.11215898
## 7 -14 -21 0.11579459
## 8 -5 -17 0.12258495
## 9 1 -15 0.14833797
## 10 -2 8 0.16645051
## 11 6 7 0.17233006
## 12 11 -8 0.18315353
## 13 4 -19 0.20793772
## 14 13 -27 0.22110203
## 15 -11 -24 0.23546417
## 16 -7 -26 0.23865743
## 17 5 -16 0.25663569
## 18 15 -12 0.27073710
## 19 14 -25 0.27754282
## 20 12 18 0.29332542
## 21 9 17 0.29378132
## 22 10 21 0.35860596
## 23 16 -23 0.37011467
## 24 19 20 0.37652202
## 25 24 23 0.41117735
## 26 25 22 0.57826631
# Visualization of the hierarchical clustering scheme
esquema %>%
kable(row.names = T) %>%
kable_styling(bootstrap_options = "striped",
full_width = FALSE,
font_size = 20)
| Cluster1 | Cluster2 | Coeficientes | |
|---|---|---|---|
| 1 | -4 | -6 | 0.0458295 |
| 2 | -13 | -20 | 0.0472779 |
| 3 | 2 | -22 | 0.0510524 |
| 4 | -1 | 3 | 0.0770316 |
| 5 | -10 | -18 | 0.0789185 |
| 6 | -3 | -9 | 0.1121590 |
| 7 | -14 | -21 | 0.1157946 |
| 8 | -5 | -17 | 0.1225849 |
| 9 | 1 | -15 | 0.1483380 |
| 10 | -2 | 8 | 0.1664505 |
| 11 | 6 | 7 | 0.1723301 |
| 12 | 11 | -8 | 0.1831535 |
| 13 | 4 | -19 | 0.2079377 |
| 14 | 13 | -27 | 0.2211020 |
| 15 | -11 | -24 | 0.2354642 |
| 16 | -7 | -26 | 0.2386574 |
| 17 | 5 | -16 | 0.2566357 |
| 18 | 15 | -12 | 0.2707371 |
| 19 | 14 | -25 | 0.2775428 |
| 20 | 12 | 18 | 0.2933254 |
| 21 | 9 | 17 | 0.2937813 |
| 22 | 10 | 21 | 0.3586060 |
| 23 | 16 | -23 | 0.3701147 |
| 24 | 19 | 20 | 0.3765220 |
| 25 | 24 | 23 | 0.4111773 |
| 26 | 25 | 22 | 0.5782663 |
# Construction of the dendrogram
dev.set(dev.next())
## quartz_off_screen
## 2
fviz_dend(x = cluster_hier)
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ℹ The deprecated feature was likely used in the factoextra package.
## Please report the issue at <https://github.com/kassambara/factoextra/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Registered S3 methods overwritten by 'broom':
## method from
## tidy.glht jtools
## tidy.summary.glht jtools
# Dendrogram with visualization of clusters
# Defining 3 clusters to compare with regional ones
fviz_dend(x = cluster_hier,
k = 3,
k_colors = c("deeppink4", "darkviolet", "deeppink"),
color_labels_by_k = F,
rect = T,
rect_fill = T,
lwd = 1,
ggtheme = theme_bw())
# Creating a categorical variable to indicate the cluster in the database
## The 'k' argument indicates the number of clusters
v_final$cluster_H <- factor(cutree(tree = cluster_hier, k = 3))
#---------- --------- K-MEANS non-hierarchical clustering scheme ---------------------
# Elaboration of the non-hierarchical k-means clustering
cluster_kmeans <- kmeans(v_final[,1:2],
centers = 3)
# Creating a categorical variable to indicate the cluster in the database
v_final$cluster_K <- factor(cluster_kmeans$cluster)
# Elbow method to identify the optimal number of clusters
fviz_nbclust(v_final[,1:2], kmeans, method = "wss", k.max = 10)
# Visualization
v_final %>%
kable() %>%
kable_styling(bootstrap_options = "striped",
full_width = FALSE,
font_size = 20)
| v00 | v10 | Ano2014 | Ano2015 | Ano2016 | Ano2017 | Ano2018 | Ano2019 | Estado | cluster_H | cluster_K | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| ACRE | -0.0294709 | 0.0755345 | 0.2201578 | 0.1843545 | 0.2208383 | 0.2520057 | 0.2055092 | 0.1990242 | 1 | 1 | 2 |
| ALAGOAS | 0.9718063 | 0.1906503 | 0.1645777 | 0.0868940 | -0.0355244 | -0.2145126 | -0.3556713 | -0.4120326 | 2 | 2 | 1 |
| AMAPÁ | -0.7420458 | -0.3855328 | -0.3959222 | -0.0521069 | 0.1582126 | 0.1887054 | -0.0516194 | 0.2885115 | 3 | 1 | 3 |
| AMAZONAS | 1.3250971 | 0.0325160 | 0.1833311 | 0.1943370 | 0.3927792 | 0.3635601 | 0.2968533 | 0.2796587 | 4 | 2 | 1 |
| BAHIA | 0.8151680 | -0.0331441 | -0.0167786 | -0.0319688 | -0.1879103 | -0.1977936 | -0.2602910 | -0.2808306 | 5 | 2 | 1 |
| CEARÁ | 1.3117073 | 0.0763459 | -0.0005820 | -0.0866612 | -0.1413544 | -0.1588028 | -0.1605805 | -0.2818612 | 6 | 2 | 1 |
| DISTRITO FEDERAL | -1.7831309 | 0.0658571 | 0.0850226 | 0.3750741 | 0.1454209 | 0.0891645 | 0.3288235 | 0.6083619 | 7 | 3 | 3 |
| ESPÍRITO SANTO | -0.4592358 | 0.1054693 | 0.3981806 | 0.0292149 | -0.3825088 | 0.3013594 | 0.3848248 | 0.1850926 | 8 | 1 | 2 |
| GOIÁS | -0.7330425 | -0.2737358 | 0.0827484 | -0.0261517 | -0.0657332 | 0.1789237 | 0.1566465 | 0.1381287 | 9 | 1 | 3 |
| MARANHÃO | 1.6188033 | 0.0391581 | 0.0528312 | 0.0892158 | -0.0712314 | -0.3419937 | -0.5015657 | -0.4881569 | 10 | 2 | 1 |
| MATO GROSSO | -1.0121095 | -0.1833952 | -0.4971138 | -0.0404551 | -0.2157126 | -0.4964526 | -0.1692850 | 0.1147334 | 11 | 1 | 3 |
| MATO GROSSO DO SUL | -1.3951965 | -0.0704282 | 0.1852213 | 0.0706962 | 0.5901643 | 0.8230387 | 0.5193328 | 0.5581398 | 12 | 1 | 3 |
| MINAS GERAIS | 0.0451432 | 0.0946814 | 0.1909092 | 0.0531106 | 0.0895704 | 0.1227749 | 0.0604606 | -0.0047555 | 13 | 1 | 2 |
| PARÁ | -0.5783089 | -0.0336955 | -0.2938997 | -0.2507609 | -0.4005707 | -0.4975507 | -0.4825227 | -0.4600343 | 14 | 1 | 2 |
| PARAÍBA | 1.2693814 | -0.1049610 | -0.1655463 | -0.1329720 | -0.1585353 | -0.3463256 | -0.3963813 | -0.4492702 | 15 | 2 | 1 |
| PARANÁ | 1.9270106 | 0.1652256 | 0.2683203 | 0.3523267 | 0.1834496 | 0.0978610 | 0.0818471 | 0.1352833 | 16 | 2 | 1 |
| PERNAMBUCO | 0.9183700 | 0.0330104 | -0.0218656 | 0.0248414 | -0.0073071 | -0.1594980 | -0.1049565 | -0.1444674 | 17 | 2 | 1 |
| PIAUÍ | 1.6966535 | 0.0520998 | 0.2656055 | 0.0933383 | -0.1069712 | -0.0339710 | -0.1466598 | -0.2631930 | 18 | 2 | 1 |
| RIO DE JANEIRO | 0.2529474 | 0.1021330 | 0.1471858 | 0.1049232 | 0.0635383 | 0.0902138 | 0.1783655 | 0.1444673 | 19 | 1 | 2 |
| RIO GRANDE DO NORTE | 0.0498691 | 0.0476403 | 0.1648000 | -0.0607841 | -0.0086006 | 0.0852790 | 0.0932683 | -0.1283605 | 20 | 1 | 2 |
| RIO GRANDE DO SUL | -0.6245168 | -0.1398709 | -0.2225735 | -0.0174327 | 0.0197314 | 0.0375732 | 0.0637979 | 0.2630818 | 21 | 1 | 2 |
| RONDÔNIA | 0.0684795 | 0.0001009 | -0.4388168 | -0.3873959 | -0.2619108 | -0.5755407 | -0.5992302 | -0.6175731 | 22 | 1 | 2 |
| RORAIMA | -1.7055181 | 0.4277426 | 0.3321379 | 0.4391202 | 0.1955705 | 0.1716012 | 0.5939330 | 0.6904725 | 23 | 3 | 3 |
| SANTA CATARINA | -1.2260238 | -0.2818027 | -0.9144535 | -0.6559743 | -0.1141088 | -0.4403027 | -0.4281602 | -0.3615425 | 24 | 1 | 3 |
| SÃO PAULO | -0.1860392 | -0.1536300 | -0.1378158 | -0.1870649 | -0.1255223 | -0.1112961 | -0.2681847 | -0.2652966 | 25 | 1 | 2 |
| SERGIPE | -1.8487000 | -0.1636163 | 0.1360203 | -0.2572510 | 0.0897350 | 0.7692411 | 0.7365517 | 0.4883054 | 26 | 3 | 3 |
| TOCANTINS | 0.0529020 | 0.3156473 | 0.2283180 | 0.0895326 | 0.1344913 | 0.0027383 | 0.2248942 | 0.0641133 | 27 | 1 | 2 |
# Descriptive statistics of clusters by variable
# Descriptive statistics of intercepts
group_by(v_final, cluster_K) %>%
summarise(
mean = mean(v00, na.rm = TRUE),
sd = sd(v00, na.rm = TRUE),
min = min(v00, na.rm = TRUE),
max = max(v00, na.rm = TRUE))
## # A tibble: 3 × 5
## cluster_K mean sd min max
## <fct> <dbl> <dbl> <dbl> <dbl>
## 1 1 1.32 0.377 0.815 1.93
## 2 2 -0.141 0.307 -0.625 0.253
## 3 3 -1.31 0.452 -1.85 -0.733
# One-way analysis of variance (ANOVA). Output interpretation:
## Mean Sq of cluster_K: indicates the variability between groups
## Mean Sq of Residuals: indicates the variability within groups
## F value: test statistic (Sum Sq of cluster_K / Sum Sq of Residuals)
## Pr(>F): p-value of the statistic
## p-value < 0.05: at least one cluster has a statistically different mean from the others
## The most discriminant variable of the groups has the highest F statistic (and is significant)
library(knitr)
# ANOVA of the intercept
summary(anova_atendimento <- aov(formula = v00 ~ cluster_K,
data = v_final))
## Df Sum Sq Mean Sq F value Pr(>F)
## cluster_K 2 29.451 14.725 103.4 1.59e-12 ***
## Residuals 24 3.417 0.142
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# FIM!
v_final %>%
dplyr::select(Estado, cluster_K) %>%
arrange(Estado)
## Estado cluster_K
## ACRE 1 2
## ALAGOAS 2 1
## AMAPÁ 3 3
## AMAZONAS 4 1
## BAHIA 5 1
## CEARÁ 6 1
## DISTRITO FEDERAL 7 3
## ESPÍRITO SANTO 8 2
## GOIÁS 9 3
## MARANHÃO 10 1
## MATO GROSSO 11 3
## MATO GROSSO DO SUL 12 3
## MINAS GERAIS 13 2
## PARÁ 14 2
## PARAÍBA 15 1
## PARANÁ 16 1
## PERNAMBUCO 17 1
## PIAUÍ 18 1
## RIO DE JANEIRO 19 2
## RIO GRANDE DO NORTE 20 2
## RIO GRANDE DO SUL 21 2
## RONDÔNIA 22 2
## RORAIMA 23 3
## SANTA CATARINA 24 3
## SÃO PAULO 25 2
## SERGIPE 26 3
## TOCANTINS 27 2