library(readxl)
dad_matr_INPE <- read_excel("C:/Users/eduma/OneDrive/mestrado/Estatistica/trabalho final/dad_matr_INPE.xlsx")
View(dad_matr_INPE)
library(kableExtra)
## Warning: package 'kableExtra' was built under R version 4.0.4
kable(dad_matr_INPE, row.names = FALSE)%>%
kable_styling( full_width = T,bootstrap_options = c("striped", "hover", "condensed", "responsive"),
position = "center", fixed_thead = T) %>%
scroll_box(width = "900px", height = "600px")
| reg | est | cod | tot2019 | tot2020 |
|---|---|---|---|---|
| Norte | Rondônia | 11 | 416212 | 406519 |
| Norte | Acre | 12 | 264593 | 260644 |
| Norte | Amazonas | 13 | 1165535 | 1157298 |
| Norte | Roraima | 14 | 166147 | 168378 |
| Norte | Pará | 15 | 2294276 | 2252916 |
| Norte | Amapá | 16 | 220269 | 213513 |
| Norte | Tocantins | 17 | 397631 | 392807 |
| Nordeste | Maranhão | 21 | 1993909 | 1927085 |
| Nordeste | Piauí | 22 | 895308 | 855397 |
| Nordeste | Ceará | 23 | 2161816 | 2132240 |
| Nordeste | Rio Grande do Norte | 24 | 820485 | 804775 |
| Nordeste | Paraíba | 25 | 969806 | 947860 |
| Nordeste | Pernambuco | 26 | 2232556 | 2206605 |
| Nordeste | Alagoas | 27 | 865501 | 827842 |
| Nordeste | Sergipe | 28 | 543464 | 533450 |
| Nordeste | Bahia | 29 | 3485631 | 3434828 |
| Sudeste | Minas Gerais | 31 | 4364668 | 4328917 |
| Sudeste | Espírito Santo | 32 | 881826 | 883113 |
| Sudeste | Rio de Janeiro | 33 | 3573417 | 3555949 |
| Sudeste | São Paulo | 35 | 10018115 | 9958883 |
| Sul | Paraná | 41 | 2572007 | 2554321 |
| Sul | Santa Catarina | 42 | 1610086 | 1619551 |
| Sul | Rio Grande do Sul | 43 | 2294325 | 2242547 |
| Centro-Oeste | Mato Grosso do Sul | 50 | 680108 | 667164 |
| Centro-Oeste | Mato Grosso | 51 | 880844 | 850915 |
| Centro-Oeste | Goiás | 52 | 1447842 | 1457872 |
| Centro-Oeste | Distrito Federal | 53 | 657869 | 653905 |
summary(dad_matr_INPE)
## reg est cod tot2019
## Length:27 Length:27 Min. :11.00 Min. : 166147
## Class :character Class :character 1st Qu.:19.00 1st Qu.: 668988
## Mode :character Mode :character Median :27.00 Median : 969806
## Mean :29.11 Mean : 1773120
## 3rd Qu.:38.00 3rd Qu.: 2263416
## Max. :53.00 Max. :10018115
## tot2020
## Min. : 168378
## 1st Qu.: 660534
## Median : 947860
## Mean :1751678
## 3rd Qu.:2224576
## Max. :9958883
boxplot(tot2019 ~reg, data=dad_matr_INPE, col=c("red","#faa005","#1bc704","#0f8efc","yellow"), main="Bloxpot 1 \n Região x Matr 2019 \n")
boxplot(tot2020 ~reg, data=dad_matr_INPE, col=c("red","#faa005","#1bc704","#0f8efc","yellow"), main="Bloxpot 2 \n Região x Matr 2020 \n")
boxplot(tot2019 ~est, data=dad_matr_INPE, col=c("red","#faa005","#1bc704","#0f8efc","yellow"), main="Bloxpot 3 \n Estados x Matr.2019 \n")
boxplot(tot2020 ~est, data=dad_matr_INPE, col=c("red","#faa005","#1bc704","#0f8efc","yellow"), main="Bloxpot 4 \n Estados x Matr.2020 \n")
library(geobr)
## Warning: package 'geobr' was built under R version 4.0.4
## Loading required namespace: sf
estado <- read_state(code_state="all", year=2010)
## Using year 2010
## Loading data for the whole country
##
|
| | 0%
|
|=== | 4%
|
|===== | 7%
|
|======== | 11%
|
|========== | 15%
|
|============= | 19%
|
|================ | 22%
|
|================== | 26%
|
|===================== | 30%
|
|======================= | 33%
|
|========================== | 37%
|
|============================= | 41%
|
|=============================== | 44%
|
|================================== | 48%
|
|==================================== | 52%
|
|======================================= | 56%
|
|========================================= | 59%
|
|============================================ | 63%
|
|=============================================== | 67%
|
|================================================= | 70%
|
|==================================================== | 74%
|
|====================================================== | 78%
|
|========================================================= | 81%
|
|============================================================ | 85%
|
|============================================================== | 89%
|
|================================================================= | 93%
|
|=================================================================== | 96%
|
|======================================================================| 100%
regiao <- read_region(year=2010)
## Using year 2010
##
|
| | 0%
|
|======================================================================| 100%
##
Downloading: 770 B
Downloading: 770 B
Downloading: 2.1 kB
Downloading: 2.1 kB
Downloading: 2.1 kB
Downloading: 2.1 kB
Downloading: 4.2 kB
Downloading: 4.2 kB
Downloading: 4.2 kB
Downloading: 4.2 kB
Downloading: 4.2 kB
Downloading: 4.2 kB
Downloading: 20 kB
Downloading: 20 kB
Downloading: 20 kB
Downloading: 20 kB
Downloading: 29 kB
Downloading: 29 kB
Downloading: 45 kB
Downloading: 45 kB
Downloading: 45 kB
Downloading: 45 kB
Downloading: 53 kB
Downloading: 53 kB
Downloading: 53 kB
Downloading: 53 kB
Downloading: 53 kB
Downloading: 53 kB
Downloading: 69 kB
Downloading: 69 kB
Downloading: 85 kB
Downloading: 85 kB
Downloading: 93 kB
Downloading: 93 kB
Downloading: 100 kB
Downloading: 100 kB
Downloading: 120 kB
Downloading: 120 kB
Downloading: 130 kB
Downloading: 130 kB
Downloading: 130 kB
Downloading: 130 kB
Downloading: 130 kB
Downloading: 130 kB
Downloading: 140 kB
Downloading: 140 kB
Downloading: 160 kB
Downloading: 160 kB
Downloading: 170 kB
Downloading: 170 kB
Downloading: 170 kB
Downloading: 170 kB
Downloading: 170 kB
Downloading: 170 kB
Downloading: 180 kB
Downloading: 180 kB
Downloading: 190 kB
Downloading: 190 kB
Downloading: 210 kB
Downloading: 210 kB
Downloading: 210 kB
Downloading: 210 kB
Downloading: 210 kB
Downloading: 210 kB
Downloading: 210 kB
Downloading: 210 kB
Downloading: 210 kB
Downloading: 210 kB
Downloading: 220 kB
Downloading: 220 kB
Downloading: 240 kB
Downloading: 240 kB
Downloading: 260 kB
Downloading: 260 kB
Downloading: 260 kB
Downloading: 260 kB
Downloading: 260 kB
Downloading: 260 kB
Downloading: 280 kB
Downloading: 280 kB
Downloading: 290 kB
Downloading: 290 kB
Downloading: 300 kB
Downloading: 300 kB
Downloading: 300 kB
Downloading: 300 kB
Downloading: 300 kB
Downloading: 300 kB
Downloading: 320 kB
Downloading: 320 kB
Downloading: 330 kB
Downloading: 330 kB
Downloading: 340 kB
Downloading: 340 kB
Downloading: 340 kB
Downloading: 340 kB
Downloading: 350 kB
Downloading: 350 kB
Downloading: 370 kB
Downloading: 370 kB
Downloading: 380 kB
Downloading: 380 kB
Downloading: 380 kB
Downloading: 380 kB
Downloading: 380 kB
Downloading: 380 kB
Downloading: 380 kB
Downloading: 380 kB
Downloading: 400 kB
Downloading: 400 kB
Downloading: 410 kB
Downloading: 410 kB
Downloading: 410 kB
Downloading: 410 kB
Downloading: 420 kB
Downloading: 420 kB
Downloading: 420 kB
Downloading: 420 kB
Downloading: 430 kB
Downloading: 430 kB
Downloading: 440 kB
Downloading: 440 kB
Downloading: 460 kB
Downloading: 460 kB
Downloading: 460 kB
Downloading: 460 kB
Downloading: 460 kB
Downloading: 460 kB
Downloading: 470 kB
Downloading: 470 kB
Downloading: 490 kB
Downloading: 490 kB
Downloading: 490 kB
Downloading: 490 kB
Downloading: 500 kB
Downloading: 500 kB
Downloading: 500 kB
Downloading: 500 kB
Downloading: 500 kB
Downloading: 500 kB
Downloading: 500 kB
Downloading: 500 kB
Downloading: 510 kB
Downloading: 510 kB
Downloading: 520 kB
Downloading: 520 kB
Downloading: 540 kB
Downloading: 540 kB
Downloading: 560 kB
Downloading: 560 kB
Downloading: 570 kB
Downloading: 570 kB
Downloading: 570 kB
Downloading: 570 kB
Downloading: 570 kB
Downloading: 570 kB
Downloading: 580 kB
Downloading: 580 kB
Downloading: 580 kB
Downloading: 580 kB
Downloading: 580 kB
Downloading: 580 kB
Downloading: 580 kB
Downloading: 580 kB
Downloading: 590 kB
Downloading: 590 kB
Downloading: 590 kB
Downloading: 590 kB
Downloading: 600 kB
Downloading: 600 kB
Downloading: 600 kB
Downloading: 600 kB
Downloading: 610 kB
Downloading: 610 kB
Downloading: 630 kB
Downloading: 630 kB
Downloading: 640 kB
Downloading: 640 kB
Downloading: 650 kB
Downloading: 650 kB
Downloading: 650 kB
Downloading: 650 kB
Downloading: 660 kB
Downloading: 660 kB
Downloading: 660 kB
Downloading: 660 kB
Downloading: 660 kB
Downloading: 660 kB
Downloading: 660 kB
Downloading: 660 kB
Downloading: 670 kB
Downloading: 670 kB
Downloading: 670 kB
Downloading: 670 kB
Downloading: 670 kB
Downloading: 670 kB
Downloading: 680 kB
Downloading: 680 kB
Downloading: 680 kB
Downloading: 680 kB
Downloading: 690 kB
Downloading: 690 kB
Downloading: 710 kB
Downloading: 710 kB
Downloading: 730 kB
Downloading: 730 kB
Downloading: 740 kB
Downloading: 740 kB
Downloading: 740 kB
Downloading: 740 kB
Downloading: 740 kB
Downloading: 740 kB
Downloading: 750 kB
Downloading: 750 kB
Downloading: 750 kB
Downloading: 750 kB
Downloading: 750 kB
Downloading: 750 kB
Downloading: 760 kB
Downloading: 760 kB
Downloading: 760 kB
Downloading: 760 kB
Downloading: 760 kB
Downloading: 760 kB
Downloading: 760 kB
Downloading: 760 kB
Downloading: 780 kB
Downloading: 780 kB
Downloading: 780 kB
Downloading: 780 kB
metadata<-download_metadata()
head(metadata)
## geo year code
## 1 amazonia_legal 2012 am
## 2 amazonia_legal 2012 am
## 3 amc 1872 AM
## 4 amc 1872 AM
## 5 amc 1872 AM
## 6 amc 1872 AM
## download_path
## 1 http://www.ipea.gov.br/geobr/data_gpkg/amazonia_legal/2012/amazonia_legal.gpkg
## 2 http://www.ipea.gov.br/geobr/data_gpkg/amazonia_legal/2012/amazonia_legal_simplified.gpkg
## 3 http://www.ipea.gov.br/geobr/data_gpkg/amc/1872/AMC_1872_1900.gpkg
## 4 http://www.ipea.gov.br/geobr/data_gpkg/amc/1872/AMC_1872_1900_simplified.gpkg
## 5 http://www.ipea.gov.br/geobr/data_gpkg/amc/1872/AMC_1872_1911.gpkg
## 6 http://www.ipea.gov.br/geobr/data_gpkg/amc/1872/AMC_1872_1911_simplified.gpkg
## code_abrev
## 1 amazonia_legal
## 2 amazonia_legal
## 3 amc
## 4 amc
## 5 amc
## 6 amc
library(ggplot2)
ggplot() +
geom_sf(data=regiao, fill=c("#1bc704","#faa005","#0f8efc","yellow","red"), color= "BLACK", size=.15, show.legend = TRUE) +
geom_sf_text(data=regiao,aes(label = name_region), size = 4, color="black")+
labs(subtitle="Mapa 1 - Regiões do Brasil", size= 10) +
theme_minimal()
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
colnames(dad_matr_INPE)[3]<-"code_state"
dad_matr_INPE$code_state<-as.numeric(dad_matr_INPE$code_state)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.4
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:kableExtra':
##
## group_rows
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
BR<-estado %>% left_join(dad_matr_INPE)
## Joining, by = "code_state"
max(dad_matr_INPE$tot2019)
## [1] 10018115
min(dad_matr_INPE$tot2019)
## [1] 166147
library(ggplot2)
ggplot() +
geom_sf(data=BR, aes(fill=tot2019), color= "black", size=.15) +
labs(subtitle="Ano 2019", size=10) +
geom_sf_text(data=BR,aes(label = abbrev_state), size=2, color= "black")+
scale_fill_distiller(palette = "Blues", name="Numero de Matriculas de 2019", limits = c(166147,10018115)) +
theme_minimal()
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
max(dad_matr_INPE$tot2020)
## [1] 9958883
min(dad_matr_INPE$tot2020)
## [1] 168378
library(ggplot2)
ggplot() +
geom_sf(data=BR, aes(fill=tot2020), color= "black", size=.15) +
labs(subtitle="Ano 2020", size=20) +
geom_sf_text(data=BR,aes(label = abbrev_state), size=2, color= "black")+
scale_fill_distiller(palette = "greens",direction = 2, name="Número de Matriculas de 2020", limits = c(168378,9958883)) +
theme_minimal()
## Warning in pal_name(palette, type): Unknown palette greens
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
H0: os dados seguem uma distribuição normal
H1: os dados NÃO seguem uma distribuição normal
alpha = 0.05
shapiro.test(dad_matr_INPE$tot2020)
##
## Shapiro-Wilk normality test
##
## data: dad_matr_INPE$tot2020
## W = 0.68025, p-value = 2.084e-06
shapiro.test(dad_matr_INPE$tot2019)
##
## Shapiro-Wilk normality test
##
## data: dad_matr_INPE$tot2019
## W = 0.68249, p-value = 2.234e-06
Para as variáveis tot2019 e tot2020, o pvalor < 0.05, logo, rejeita-se a hipótese nula H0.
Portanto, os dados não seguem uma distribuição normal e deve ser executado, o teste de Kruskal-Wallis;
Para dados que NÃO seguem uma distribuição normal
H0: os grupos são amostrados de Matriculas com distribuições idênticas.
H1: os grupos são amostrados de Matriculas com diferentes distribuições
kruskal.test(dad_matr_INPE$tot2019~dad_matr_INPE$reg)
##
## Kruskal-Wallis rank sum test
##
## data: dad_matr_INPE$tot2019 by dad_matr_INPE$reg
## Kruskal-Wallis chi-squared = 12.577, df = 4, p-value = 0.01354
kruskal.test(dad_matr_INPE$tot2020~dad_matr_INPE$reg)
##
## Kruskal-Wallis rank sum test
##
## data: dad_matr_INPE$tot2020 by dad_matr_INPE$reg
## Kruskal-Wallis chi-squared = 12.411, df = 4, p-value = 0.01455
Como pvalor < 0.05, rejeito H0
os grupos são amostrados de populações com diferentes distribuições
pmw <- pairwise.wilcox.test(dad_matr_INPE$tot2019,dad_matr_INPE$reg,p.adjust.method = "fdr")
pmw
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: dad_matr_INPE$tot2019 and dad_matr_INPE$reg
##
## Centro-Oeste Nordeste Norte Sudeste
## Nordeste 0.29 - - -
## Norte 0.29 0.11 - -
## Sudeste 0.11 0.13 0.11 -
## Sul 0.11 0.29 0.11 0.40
##
## P value adjustment method: fdr
pmw2 <- pairwise.wilcox.test(dad_matr_INPE$tot2020,dad_matr_INPE$reg,p.adjust.method = "fdr")
pmw2
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: dad_matr_INPE$tot2020 and dad_matr_INPE$reg
##
## Centro-Oeste Nordeste Norte Sudeste
## Nordeste 0.29 - - -
## Norte 0.29 0.11 - -
## Sudeste 0.11 0.11 0.11 -
## Sul 0.11 0.29 0.11 0.40
##
## P value adjustment method: fdr
Observa-se que somente no sudeste a variação entre 2019 e 2020
Teste se a região interfere com a matricula
modelo <- aov(tot2019~reg, data=dad_matr_INPE)
residuos <- residuals(modelo)
residuos
## 1 2 3 4 5 6
## -287311.29 -438930.29 462011.71 -537376.29 1590752.71 -483254.29
## 7 8 9 10 11 12
## -305892.29 441856.11 -656744.89 609763.11 -731567.89 -582246.89
## 13 14 15 16 17 18
## 680503.11 -686551.89 -1008588.89 1933578.11 -344838.50 -3827680.50
## 19 20 21 22 23 24
## -1136089.50 5308608.50 413201.00 -548720.00 135519.00 -236557.75
## 25 26 27
## -35821.75 531176.25 -258796.75
Criterios;
H0: os dados seguem uma distribuição normal
H1: os dados NÃO seguem uma distribuição normal
alpha = 0.05
shapiro.test(residuos)
##
## Shapiro-Wilk normality test
##
## data: residuos
## W = 0.79349, p-value = 0.0001058
pvalor<0.05 rejeito H0 (os dados não seguem distribuição normal)
H0: todas as variâncias são iguais H1: pelo menos uma das variâncias é diferente alpha = 0.05
bartlett.test(residuos~dad_matr_INPE$reg)
##
## Bartlett test of homogeneity of variances
##
## data: residuos by dad_matr_INPE$reg
## Bartlett's K-squared = 22.259, df = 4, p-value = 0.000178
pvalor<0.05, rejeito H0 as variâncias não são iguais