bibliotecas

Preparação dos dados

Abrir bases originais:

# abrir base e criar amostras
#dados_finais_2000_2010 <- readRDS("~/RStudio/ENEM/2_Dado tratado/dados_finais_2000_2010.rds")
#df<-slice_sample(dados_finais_2000_2010,prop = 0.01) 
#saveRDS(df, file = "sample_00_10.rds")
#table(dados_finais_2000_2010$NU_ANO)
#table(df$NU_ANO)
#dados_finais_2000_2010 <- readRDS("~/RStudio/ENEM/2_Dado tratado/dados_finais_2012_2014.rds")

#df2<-slice_sample(dados_finais_2000_2010,prop = 0.01) 
#saveRDS(df2, file = "sample_12_14.rds")


#dados_finais_2015_2020 <- readRDS("~/RStudio/ENEM/2_Dado tratado/dados_finais_2015_2020.rds")
#df3<-slice_sample(dados_finais_2015_2020,prop = 0.01) 
#saveRDS(df3, file = "sample_15_20.rds")
#Base de 2000-2010
sample_00_10 <- readRDS("~/RStudio/ENEM/sample_00_10.rds")
#Base de 2011-2015
sample_12_14 <- readRDS("~/RStudio/ENEM/sample_12_14.rds")
#Base de 2015-2016
sample_15_20 <- readRDS("~/RStudio/ENEM/sample_15_20.rds")
#base de Variaveis Ambientais
# unificar base de dados ambiental
Centro <- readRDS("~/RStudio/ENEM/Data-20220615T014950Z-001/Data/Centro.rds")
NE <- readRDS("~/RStudio/ENEM/Data-20220615T014950Z-001/Data/NE.rds")
NO <- readRDS("~/RStudio/ENEM/Data-20220615T014950Z-001/Data/NO.rds")
SE <- readRDS("~/RStudio/ENEM/Data-20220615T014950Z-001/Data/SE.rds")
SUL <- readRDS("~/RStudio/ENEM/Data-20220615T014950Z-001/Data/Sul.rds")
df_Am<-bind_rows(NE, NO, SE,SUL)
rm(NE,NO,SE,SUL,Centro)
head(df_Am)

Preparar Bases:

Selecionar Variaveis de Interesse


sample_00_10$TP_SEXO[sample_00_10$Q1=="A"]<-"M"
sample_00_10$TP_SEXO[sample_00_10$Q1=="B"]<-"F"

sample_12_14$TP_SEXO[sample_12_14$TP_SEXO=="0"]<-"M"
sample_12_14$TP_SEXO[sample_12_14$TP_SEXO=="1"]<-"F"

#Base de 2000-2010
sample_00_10%>% select(NU_INSCRICAO,#número de inscrição
  TP_SEXO,
  NU_ANO,#ano
  SG_UF_RESIDENCIA,#Estado
  NO_MUNICIPIO_RESIDENCIA,#municipio
  CO_MUNICIPIO_RESIDENCIA,#codigo do municipio
  NU_NOTA_REDACAO,#nota Redação
  NU_NOTA_COMP1,#nota componente X
  NU_NOTA_COMP2,#nota componente X
  NU_NOTA_COMP3,#nota componente X
  NU_NOTA_COMP4,
  NU_NOTA_COMP5,
  Q9,# escolaridade do Pai
  Q10,
  TP_DEPENDENCIA_ADM_ESC,# Dependência administrativa   (1  Federal 2   Estadual    3   Municipal, 4    Privada
  TP_LOCALIZACAO_ESC, #Localização/Zona da escola   1   Urbana 2    Rural
  Q15#renda A-Q
)->sample_00_10



#Base de 2011-2015
sample_12_14%>% select(NU_INSCRICAO,#número de inscrição
  TP_SEXO,
  NU_ANO,#ano
  UF_RESIDENCIA,#Estado
  NO_MUNICIPIO_RESIDENCIA,#municipio
  CO_MUNICIPIO_RESIDENCIA,#codigo do municipio
  NU_NOTA_REDACAO,#nota Redação
  NU_NOTA_COMP1,#nota componente X
  NU_NOTA_COMP2,#nota componente X
  NU_NOTA_COMP3,#nota componente X
  NU_NOTA_COMP4,
  NU_NOTA_COMP5,
  Q9,# escolaridade do Pai
  Q10,
  TP_DEPENDENCIA_ADM_ESC,# Dependência administrativa   (1  Federal 2   Estadual    3   Municipal, 4    Privada
  TP_LOCALIZACAO_ESC, #Localização/Zona da escola   1   Urbana 2    Rural
  Q15#renda A-Q#nota componente X
  #Questão renda
#  ,#Questão
#  ,#Questão
#  ,#Questão
)->sample_12_14
#Base de 2015-2016

sample_15_20%>% select(NU_INSCRICAO,#número de inscrição
  TP_SEXO,
  NU_ANO,#ano
  SG_UF_RESIDENCIA,#Estado
  NO_MUNICIPIO_RESIDENCIA,#municipio
  CO_MUNICIPIO_RESIDENCIA,#codigo do municipio
  NU_NOTA_REDACAO,#nota Redação
  NU_NOTA_COMP1,#nota componente X
  NU_NOTA_COMP2,#nota componente X
  NU_NOTA_COMP3,#nota componente X
  NU_NOTA_COMP4,
  NU_NOTA_COMP5,#nota componente X
  Q9,# escolaridade do Pai
  Q10,
  TP_DEPENDENCIA_ADM_ESC,# Dependência administrativa   (1  Federal 2   Estadual    3   Municipal, 4    Privada
  TP_LOCALIZACAO_ESC, #Localização/Zona da escola   1   Urbana 2    Rural
  Q15#renda A-Q
#  ,#Questão renda
#  ,#Questão
#  ,#Questão
#  ,#Questão
)->sample_15_20
#base de Variaveis Ambientais

Fundir Bases:

unique(df_enem$NU_ANO)
 [1] "2007" "2008" "2010" "2003" "2004" "2001" "2009" "2005" "2000" "2002" "2006" "2014" "2013" "2012" "2020" "2017" "2018"
[18] "2016" "2015" "2019"

Analise Exploratória

Sumário dos Dados

Resumo dos Dados

#ajustando variaveis
df[,2] <- lapply(df[,2], as.factor)
df[,4:6] <- lapply(df[,4:6], as.factor)
df[,7:12] <- lapply(df[,7:12], as.numeric)
Warning in lapply(df[, 7:12], as.numeric) : NAs introduced by coercion
Warning in lapply(df[, 7:12], as.numeric) : NAs introduced by coercion
Warning in lapply(df[, 7:12], as.numeric) : NAs introduced by coercion
Warning in lapply(df[, 7:12], as.numeric) : NAs introduced by coercion
Warning in lapply(df[, 7:12], as.numeric) : NAs introduced by coercion
Warning in lapply(df[, 7:12], as.numeric) : NAs introduced by coercion
df[,13:20] <- lapply(df[,13:20], as.factor)

Analise das uni

#variaveis numéricas
  plot_num(df)
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.


#variaveis Categoricas
freq(df)
Warning in freq_logic(data = data, input = input[i], plot, na.rm, path_out = path_out) :
  Skipping plot for variable 'NU_INSCRICAO' (more than 100 categories)
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning in freq_logic(data = data, input = input[i], plot, na.rm, path_out = path_out) :
  Skipping plot for variable 'NO_MUNICIPIO_RESIDENCIA' (more than 100 categories)
Warning in freq_logic(data = data, input = input[i], plot, na.rm, path_out = path_out) :
  Skipping plot for variable 'CO_MUNICIPIO_RESIDENCIA' (more than 100 categories)
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.

[1] "Variables processed: NU_INSCRICAO, TP_SEXO, SG_UF_RESIDENCIA, NO_MUNICIPIO_RESIDENCIA, CO_MUNICIPIO_RESIDENCIA, Q9, Q10, TP_DEPENDENCIA_ADM_ESC, TP_LOCALIZACAO_ESC, Q15, UF_RESIDENCIA, name_state, name_region"

Analise de Correlações

for (i in 7:12) {
  print(i)
  print(ggplot(df1, aes(x = df1$NU_ANO, y =  df1[[i]]))  +facet_wrap(~name_state)+
          geom_point(alpha=0.1)+geom_smooth()+
    ylab(A[i]))
    Sys.sleep(1)

  }
[1] 7
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
[1] 8
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
[1] 9
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
[1] 10
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
[1] 11
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
[1] 12
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)
Warning: Computation failed in `stat_smooth()`:
NA/NaN/Inf in foreign function call (arg 3)

Pelos gráficos parece que a escala de notas são realmente diferentes sendo o ano um efeito fixo

for (i in 7:12) {
  print(i)
  print(ggplot(df1, aes(x = df1$pm25_ugm3, y =  df1[[i]])) +  facet_wrap(~name_state)+
          geom_point(alpha=0.1)+geom_smooth()+
    ylab(A[i]))
    Sys.sleep(1)

  }
[1] 7
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Warning: Removed 2927 rows containing non-finite values (stat_smooth).
Warning: Computation failed in `stat_smooth()`:
x has insufficient unique values to support 10 knots: reduce k.
Warning: Removed 2927 rows containing missing values (geom_point).
[1] 8
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Warning: Removed 3001 rows containing non-finite values (stat_smooth).
Warning: Computation failed in `stat_smooth()`:
x has insufficient unique values to support 10 knots: reduce k.
Warning: Removed 3001 rows containing missing values (geom_point).
[1] 9
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Warning: Removed 3001 rows containing non-finite values (stat_smooth).
Warning: Computation failed in `stat_smooth()`:
x has insufficient unique values to support 10 knots: reduce k.
Warning: Removed 3001 rows containing missing values (geom_point).
[1] 10
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Warning: Removed 3001 rows containing non-finite values (stat_smooth).
Warning: Computation failed in `stat_smooth()`:
x has insufficient unique values to support 10 knots: reduce k.
Warning: Removed 3001 rows containing missing values (geom_point).
[1] 11
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Warning: Removed 3001 rows containing non-finite values (stat_smooth).
Warning: Computation failed in `stat_smooth()`:
x has insufficient unique values to support 10 knots: reduce k.
Warning: Removed 3001 rows containing missing values (geom_point).
[1] 12
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Warning: Removed 3001 rows containing non-finite values (stat_smooth).
Warning: Computation failed in `stat_smooth()`:
x has insufficient unique values to support 10 knots: reduce k.
Warning: Removed 3001 rows containing missing values (geom_point).


for (i in 7:12) {
  print(i)
  print(ggplot(df1, aes(x = df1$no2_ppb, y =  df1[[i]])) + facet_wrap(~name_state)+
          geom_point(alpha=0.1)+geom_smooth()+
    ylab(A[i]))
    Sys.sleep(1)

  }
[1] 7
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Warning: Removed 3088 rows containing non-finite values (stat_smooth).
Warning: Computation failed in `stat_smooth()`:
x has insufficient unique values to support 10 knots: reduce k.
Warning: Removed 3088 rows containing missing values (geom_point).
[1] 8
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Warning: Removed 3162 rows containing non-finite values (stat_smooth).
Warning: Computation failed in `stat_smooth()`:
x has insufficient unique values to support 10 knots: reduce k.
Warning: Removed 3162 rows containing missing values (geom_point).
[1] 9
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Warning: Removed 3162 rows containing non-finite values (stat_smooth).
Warning: Computation failed in `stat_smooth()`:
x has insufficient unique values to support 10 knots: reduce k.
Warning: Removed 3162 rows containing missing values (geom_point).
[1] 10
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Warning: Removed 3162 rows containing non-finite values (stat_smooth).
Warning: Computation failed in `stat_smooth()`:
x has insufficient unique values to support 10 knots: reduce k.
Warning: Removed 3162 rows containing missing values (geom_point).
[1] 11
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Warning: Removed 3162 rows containing non-finite values (stat_smooth).
Warning: Computation failed in `stat_smooth()`:
x has insufficient unique values to support 10 knots: reduce k.
Warning: Removed 3162 rows containing missing values (geom_point).
[1] 12
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Warning: Removed 3162 rows containing non-finite values (stat_smooth).
Warning: Computation failed in `stat_smooth()`:
x has insufficient unique values to support 10 knots: reduce k.
Warning: Removed 3162 rows containing missing values (geom_point).


for (i in 7:12) {
  print(i)
  print(ggplot(df1, aes(x = df1$o3_ppb, y =  df1[[i]])) +facet_wrap(~name_state)+
          geom_point(alpha=0.1)+geom_smooth()+
    ylab(A[i]))
    Sys.sleep(1)

  }
[1] 7
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Warning: Removed 2719 rows containing non-finite values (stat_smooth).
Warning: Computation failed in `stat_smooth()`:
x has insufficient unique values to support 10 knots: reduce k.
Warning: Removed 2719 rows containing missing values (geom_point).
[1] 8
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Warning: Removed 2793 rows containing non-finite values (stat_smooth).
Warning: Computation failed in `stat_smooth()`:
x has insufficient unique values to support 10 knots: reduce k.
Warning: Removed 2793 rows containing missing values (geom_point).
[1] 9
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Warning: Removed 2793 rows containing non-finite values (stat_smooth).
Warning: Computation failed in `stat_smooth()`:
x has insufficient unique values to support 10 knots: reduce k.
Warning: Removed 2793 rows containing missing values (geom_point).
[1] 10
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Warning: Removed 2793 rows containing non-finite values (stat_smooth).
Warning: Computation failed in `stat_smooth()`:
x has insufficient unique values to support 10 knots: reduce k.
Warning: Removed 2793 rows containing missing values (geom_point).
[1] 11
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Warning: Removed 2793 rows containing non-finite values (stat_smooth).
Warning: Computation failed in `stat_smooth()`:
x has insufficient unique values to support 10 knots: reduce k.
Warning: Removed 2793 rows containing missing values (geom_point).
[1] 12
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Warning: Removed 2793 rows containing non-finite values (stat_smooth).
Warning: Computation failed in `stat_smooth()`:
x has insufficient unique values to support 10 knots: reduce k.
Warning: Removed 2793 rows containing missing values (geom_point).

Regressões

Modelo basico de referencia

#modelo Linear
mainlm<-lm(NU_NOTA_REDACAO~ no2_ppb,data = df1)
#Efeitos Mistos
mainlmer<-lmer(NU_NOTA_REDACAO~ no2_ppb + (1+no2_ppb|SG_UF_RESIDENCIA),data =df1 )
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv,  :
  Model failed to converge with max|grad| = 0.0369633 (tol = 0.002, component 1)
summary(mainlm)

Call:
lm(formula = NU_NOTA_REDACAO ~ no2_ppb, data = df1)

Residuals:
    Min      1Q  Median      3Q     Max 
-337.92 -281.06   46.86  237.44  669.20 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 338.5043     4.7211  71.701  < 2e-16 ***
no2_ppb      -5.1953     0.9078  -5.723  1.1e-08 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 277.3 on 6068 degrees of freedom
  (3088 observations deleted due to missingness)
Multiple R-squared:  0.005368,  Adjusted R-squared:  0.005205 
F-statistic: 32.75 on 1 and 6068 DF,  p-value: 1.097e-08
summary(mainlmer)
Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: NU_NOTA_REDACAO ~ no2_ppb + (1 + no2_ppb | SG_UF_RESIDENCIA)
   Data: df1

REML criterion at convergence: 60691.2

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-1.3536 -0.8972 -0.5688  0.9095  2.5544 

Random effects:
 Groups           Name        Variance Std.Dev. Corr 
 SG_UF_RESIDENCIA (Intercept)  3617.0   60.14        
                  no2_ppb       131.7   11.48   -0.99
 Residual                     76992.9  277.48        
Number of obs: 4306, groups:  SG_UF_RESIDENCIA, 25

Fixed effects:
            Estimate Std. Error      df t value Pr(>|t|)    
(Intercept)  311.604     15.122  15.247  20.606 1.51e-12 ***
no2_ppb       -8.025      3.414   7.868  -2.351   0.0472 *  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Correlation of Fixed Effects:
        (Intr)
no2_ppb -0.868
optimizer (nloptwrap) convergence code: 0 (OK)
Model failed to converge with max|grad| = 0.0369633 (tol = 0.002, component 1)

extração de Coeficientes

coef_int= function (modelo) {
  coef(modelo)[[1]][1]
  coef<-coef(modelo)[[1]]
  SE<-(standard_error(modelo, effects = "random")[[1]])
  RES<-cbind(coef,SE,rownames(coef))
  colnames(RES)<- c(colnames(RES)[1],"SLOPE",paste("SE_",colnames(RES)[3]),paste("SE_","SLOPE"),"fator")
  return(RES)

}
G<-coef_int(mainlmer) 
#grafico Intercept
G %>%
  ggplot(aes(G[,1],G[,5]))+geom_point()+
  geom_errorbar(aes(xmin=(G[,1]+1.645*G[,3]),xmax=(G[,1]-1.645*G[,3])),size=0.1)+
  geom_pointrange(aes(xmin=(G[,1]+1.645*G[,3]),xmax=(G[,1]-1.645*G[,3])),size=0.1)+
  geom_vline(xintercept = 0, color = "red", size=1.0)+
  labs(title = "Modelo mistos",
       subtitle = "Coeficientes(IC90%)",
       caption = "EPPG FGV")+xlab("Intercept")+ylab("Estado")



#grafico Slope
G %>%
  ggplot(aes(G[,2],G[,5]))+geom_point()+
  geom_errorbar(aes(xmin=(G[,2]+1.645*G[,4]),xmax=(G[,2]-1.645*G[,4])),size=0.1)+
  geom_pointrange(aes(xmin=(G[,2]+1.645*G[,4]),xmax=(G[,2]-1.645*G[,4])),size=0.1)+
  geom_vline(xintercept = 0, color = "red", size=1.0)+
  labs(title = "Modelo mistos",
       subtitle = "Coeficientes(IC90%)",
       caption = "EPPG FGV")+xlab("Slope")+ylab("Estado")

NA
NA

todos os Modelos

df1$NU_NOTA_COMP5<-as.numeric(df1$NU_NOTA_COMP5)
  #Variação de Poluente
mod_Red_NO2<-lmer(NU_NOTA_REDACAO~ no2_ppb + (1+no2_ppb|SG_UF_RESIDENCIA),data =df1)
mod_Red_O3<-lmer(NU_NOTA_REDACAO~ o3_ppb + (1+o3_ppb|SG_UF_RESIDENCIA),data =df1 )
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv,  :
  Model failed to converge with max|grad| = 0.0104493 (tol = 0.002, component 1)
mod_Red_PM25<-lmer(NU_NOTA_REDACAO~ pm25_ugm3 + (1+pm25_ugm3|SG_UF_RESIDENCIA),data =df1 )

mod_COMP1_NO2<-lmer(NU_NOTA_COMP1~ no2_ppb + (1+no2_ppb|SG_UF_RESIDENCIA),data =df1)
mod_COMP1_O3<-lmer(NU_NOTA_COMP1~ o3_ppb + (1+o3_ppb|SG_UF_RESIDENCIA),data =df1 )
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv,  :
  Model failed to converge with max|grad| = 7.76049 (tol = 0.002, component 1)
mod_COMP1_PM25<-lmer(NU_NOTA_COMP1~ pm25_ugm3 + (1+pm25_ugm3|SG_UF_RESIDENCIA),data =df1 )
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv,  :
  Model failed to converge with max|grad| = 0.137549 (tol = 0.002, component 1)
mod_COMP2_NO2<-lmer(NU_NOTA_COMP2~ no2_ppb + (1+no2_ppb|SG_UF_RESIDENCIA),data =df1)
mod_COMP2_O3<-lmer(NU_NOTA_COMP2~ o3_ppb + (1+o3_ppb|SG_UF_RESIDENCIA),data =df1 )
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv,  :
  Model failed to converge with max|grad| = 0.682631 (tol = 0.002, component 1)
mod_COMP2_PM25<-lmer(NU_NOTA_COMP2~ pm25_ugm3 + (1+pm25_ugm3|SG_UF_RESIDENCIA),data =df1 )

mod_COMP3_NO2<-lmer(NU_NOTA_COMP3~ no2_ppb + (1+no2_ppb|SG_UF_RESIDENCIA),data =df1)
mod_COMP3_O3<-lmer(NU_NOTA_COMP3~ o3_ppb + (1+o3_ppb|SG_UF_RESIDENCIA),data =df1 )
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv,  :
  Model failed to converge with max|grad| = 13.4513 (tol = 0.002, component 1)
mod_COMP3_PM25<-lmer(NU_NOTA_COMP3~ pm25_ugm3 + (1+pm25_ugm3|SG_UF_RESIDENCIA),data =df1 )
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv,  :
  Model failed to converge with max|grad| = 0.0600179 (tol = 0.002, component 1)
mod_COMP4_NO2<-lmer(NU_NOTA_COMP4~ no2_ppb + (1+no2_ppb|SG_UF_RESIDENCIA),data =df1)
mod_COMP4_O3<-lmer(NU_NOTA_COMP4~ o3_ppb + (1+o3_ppb|SG_UF_RESIDENCIA),data =df1 )
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv,  :
  Model failed to converge with max|grad| = 6.97955 (tol = 0.002, component 1)
mod_COMP4_PM25<-lmer(NU_NOTA_COMP4~ pm25_ugm3 + (1+pm25_ugm3|SG_UF_RESIDENCIA),data =df1 )
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv,  :
  Model failed to converge with max|grad| = 0.0100228 (tol = 0.002, component 1)
mod_COMP5_NO2<-lmer(NU_NOTA_COMP5~ no2_ppb + (1+no2_ppb|SG_UF_RESIDENCIA),data =df1)
mod_COMP5_O3<-lmer(NU_NOTA_COMP5~ o3_ppb + (1+o3_ppb|SG_UF_RESIDENCIA),data =df1 )
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv,  :
  unable to evaluate scaled gradient
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv,  :
  Model failed to converge: degenerate  Hessian with 1 negative eigenvalues
Warning: Model failed to converge with 1 negative eigenvalue: -8.4e+00
mod_COMP5_PM25<-lmer(NU_NOTA_COMP5~ pm25_ugm3 + (1+pm25_ugm3|SG_UF_RESIDENCIA),data =df1 )
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv,  :
  Model failed to converge with max|grad| = 0.00332599 (tol = 0.002, component 1)
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv,  :
  Model is nearly unidentifiable: very large eigenvalue
 - Rescale variables?
Juntando resultados
A<-coef_int(mod_Red_NO2)
A$Poluente<-"NO2"
A$Nota<- "redação"

aB<-coef_int(mod_Red_O3)
aB$Poluente<-"O3"
aB$Nota<- "redação"

aC<-coef_int(mod_Red_PM25)
aC$Poluente<-"PM25"
aC$Nota<- "redação"


aD<-coef_int(mod_COMP1_NO2)
aD$Poluente <-"NO2"
aD$Nota<-"COMP1"

aE<-coef_int(mod_COMP1_O3)
aE$Poluente<-"O3"
aE$Nota<-"COMP1"

aF<-coef_int(mod_COMP1_PM25)
aF$Poluente<-"PM25"
aF$Nota <-"COMP1"

aG<-coef_int(mod_COMP2_NO2)
aG$Poluente<-"NO2"
aG$Nota<-"COMP2"

aH<-coef_int(mod_COMP2_O3)
aH$Poluente<-"O3"
aH$Nota<-"COMP2"

aI<-coef_int(mod_COMP2_PM25)
aI$Poluente<-"PM25"
aI$Nota<-"COMP2"

aJ<-coef_int(mod_COMP3_NO2)
aJ$Poluente<-"NO2"
aJ$Nota<-"COMP3"

aL<-coef_int(mod_COMP3_O3)
aL$Poluente<-"O3"
aL$Nota<-"COMP3"

aM<-coef_int(mod_COMP3_PM25)
aM$Poluente<-"PM25"
aM$Nota<-"COMP3"

aN<-coef_int(mod_COMP4_NO2)
aN$Poluente<-"NO2"
aN$Nota<-"COMP4"

aO<-coef_int(mod_COMP4_O3)
aO$Poluente<-"O3"
aO$Nota<-"COMP4"

aP<-coef_int(mod_COMP4_PM25)
aP$Poluente<-"PM25"
aP$Nota<-"COMP4"

aQ<-coef_int(mod_COMP5_NO2)
aQ$Poluente<-"NO2"
aQ$Nota<-"COMP5"

aR<-coef_int(mod_COMP5_O3)
aR$Poluente<-"O3"
aR$Nota<-"COMP5"

aT<-coef_int(mod_COMP5_PM25)
aT$Poluente <-"PM25"
aT$Nota<-"COMP5"

rbind(A[],aB[])

coef<-bind_rows(A,aB,aC,aD,aE,aF,aG,aH,aI,aJ,aL,aM,aN,aO,aP,aQ,aR,aT)
rm(A,aB,aC,aD,aE,aF,aG,aH,aI,aJ,aL,aM,aN,aO,aP,aQ,aR,aT)
#grafico Slope
G %>% 
  ggplot(aes(G$SLOPE,G$fator))+geom_point()+facet_grid(Nota~Poluente,scales = "free")+
  geom_errorbar(aes(xmin=(G[,2]+1.645*G[,4]),xmax=(G[,2]-1.645*G[,4])),size=0.1)+
  geom_pointrange(aes(xmin=(G[,2]+1.645*G[,4]),xmax=(G[,2]-1.645*G[,4])),size=0.1)+
  geom_vline(xintercept = 0, color = "red", size=1.0)+
  labs(title = "Modelo mistos",
       subtitle = "Coeficientes(IC90%)",
       caption = "EPPG FGV")+xlab("Slope")+ylab("Estado")
Warning: Use of `G$SLOPE` is discouraged. Use `SLOPE` instead.
Warning: Use of `G$fator` is discouraged. Use `fator` instead.
Warning: Use of `G$SLOPE` is discouraged. Use `SLOPE` instead.
Warning: Use of `G$fator` is discouraged. Use `fator` instead.
Warning: Use of `G$SLOPE` is discouraged. Use `SLOPE` instead.
Warning: Use of `G$fator` is discouraged. Use `fator` instead.

incluir outras variaveis

ANO

  #Variação de Poluente
mod_Red_NO2<-lmer(NU_NOTA_REDACAO~ no2_ppb +bs(NU_ANO)+ (1+no2_ppb|SG_UF_RESIDENCIA),data =df1)
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv,  :
  Model failed to converge with max|grad| = 0.0238592 (tol = 0.002, component 1)
mod_Red_O3<-lmer(NU_NOTA_REDACAO~ o3_ppb +bs(NU_ANO)+ (1+o3_ppb|SG_UF_RESIDENCIA),data =df1 )
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv,  :
  Model failed to converge with max|grad| = 1.28009 (tol = 0.002, component 1)
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv,  :
  Model is nearly unidentifiable: very large eigenvalue
 - Rescale variables?
mod_Red_PM25<-lmer(NU_NOTA_REDACAO~ pm25_ugm3 +bs(NU_ANO)+ (1+pm25_ugm3|SG_UF_RESIDENCIA),data =df1 )
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv,  :
  Model failed to converge with max|grad| = 22.717 (tol = 0.002, component 1)
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv,  :
  Model is nearly unidentifiable: very large eigenvalue
 - Rescale variables?
mod_COMP1_NO2<-lmer(NU_NOTA_COMP1~ no2_ppb +bs(NU_ANO)+ (1+no2_ppb|SG_UF_RESIDENCIA),data =df1)
mod_COMP1_O3<-lmer(NU_NOTA_COMP1~ o3_ppb +bs(NU_ANO)+ (1+o3_ppb|SG_UF_RESIDENCIA),data =df1 )
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv,  :
  Model failed to converge with max|grad| = 0.498451 (tol = 0.002, component 1)
mod_COMP1_PM25<-lmer(NU_NOTA_COMP1~ pm25_ugm3 +bs(NU_ANO)+ (1+pm25_ugm3|SG_UF_RESIDENCIA),data =df1 )
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv,  :
  Model failed to converge with max|grad| = 0.0350266 (tol = 0.002, component 1)
mod_COMP2_NO2<-lmer(NU_NOTA_COMP2~ no2_ppb +bs(NU_ANO)+ (1+no2_ppb|SG_UF_RESIDENCIA),data =df1)
mod_COMP2_O3<-lmer(NU_NOTA_COMP2~ o3_ppb +bs(NU_ANO)+ (1+o3_ppb|SG_UF_RESIDENCIA),data =df1 )
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv,  :
  Model failed to converge with max|grad| = 0.0253796 (tol = 0.002, component 1)
mod_COMP2_PM25<-lmer(NU_NOTA_COMP2~ pm25_ugm3 +bs(NU_ANO)+ (1+pm25_ugm3|SG_UF_RESIDENCIA),data =df1 )
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv,  :
  Model failed to converge with max|grad| = 0.149335 (tol = 0.002, component 1)
mod_COMP3_NO2<-lmer(NU_NOTA_COMP3~ no2_ppb + bs(NU_ANO)+ (1+no2_ppb|SG_UF_RESIDENCIA),data =df1)
mod_COMP3_O3<-lmer(NU_NOTA_COMP3~ o3_ppb + bs(NU_ANO)+(1+o3_ppb|SG_UF_RESIDENCIA),data =df1 )
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv,  :
  Model failed to converge with max|grad| = 0.700352 (tol = 0.002, component 1)
mod_COMP3_PM25<-lmer(NU_NOTA_COMP3~ pm25_ugm3 +bs(NU_ANO)+ (1+pm25_ugm3|SG_UF_RESIDENCIA),data =df1 )
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv,  :
  Model failed to converge with max|grad| = 0.0114945 (tol = 0.002, component 1)
mod_COMP4_NO2<-lmer(NU_NOTA_COMP4~ no2_ppb + bs(NU_ANO)+(1+no2_ppb|SG_UF_RESIDENCIA),data =df1)
mod_COMP4_O3<-lmer(NU_NOTA_COMP4~ o3_ppb + bs(NU_ANO)+(1+o3_ppb|SG_UF_RESIDENCIA),data =df1 )
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv,  :
  Model failed to converge with max|grad| = 0.0238664 (tol = 0.002, component 1)
mod_COMP4_PM25<-lmer(NU_NOTA_COMP4~ pm25_ugm3 + bs(NU_ANO)+(1+pm25_ugm3|SG_UF_RESIDENCIA),data =df1 )
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv,  :
  Model failed to converge with max|grad| = 0.00434433 (tol = 0.002, component 1)
mod_COMP5_NO2<-lmer(NU_NOTA_COMP5~ no2_ppb +bs(NU_ANO)+ (1+no2_ppb|SG_UF_RESIDENCIA),data =df1)
boundary (singular) fit: see help('isSingular')
Warning: Model failed to converge with 1 negative eigenvalue: -4.2e+02
mod_COMP5_O3<-lmer(NU_NOTA_COMP5~ o3_ppb + bs(NU_ANO)+(1+o3_ppb|SG_UF_RESIDENCIA),data =df1 )
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv,  :
  Model failed to converge with max|grad| = 19.0571 (tol = 0.002, component 1)
mod_COMP5_PM25<-lmer(NU_NOTA_COMP5~ pm25_ugm3 + bs(NU_ANO)+(1+pm25_ugm3|SG_UF_RESIDENCIA),data =df1 )
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv,  :
  Model failed to converge with max|grad| = 0.00254503 (tol = 0.002, component 1)
#extração de coeficiente
coef_int= function (modelo) {
  coef<-coef(modelo)[[1]][1:2]
  SE<-(standard_error(modelo, effects = "random")[[1]])
  RES<-cbind(coef,SE,rownames(coef))
  colnames(RES)<- c(colnames(RES)[1],"SLOPE",paste("SE_",colnames(RES)[3]),paste("SE_","SLOPE"),"fator")
  return(RES)

}
#grafico Slope
G1 %>% 
  ggplot(aes(G1$SLOPE,G1$fator))+geom_point()+facet_grid(Nota~Poluente,scales = "free")+
  geom_errorbar(aes(xmin=(G1[,2]+1.645*G1[,4]),xmax=(G1[,2]-1.645*G1[,4])),size=0.1)+
  geom_pointrange(aes(xmin=(G1[,2]+1.645*G1[,4]),xmax=(G1[,2]-1.645*G1[,4])),size=0.1)+
  geom_vline(xintercept = 0, color = "red", size=1.0)+
  labs(title = "Modelo mistos",
       subtitle = "Coeficientes(IC90%)",
       caption = "EPPG FGV")+xlab("Slope")+ylab("Estado")
Warning: Use of `G1$SLOPE` is discouraged. Use `SLOPE` instead.
Warning: Use of `G1$fator` is discouraged. Use `fator` instead.
Warning: Use of `G1$SLOPE` is discouraged. Use `SLOPE` instead.
Warning: Use of `G1$fator` is discouraged. Use `fator` instead.
Warning: Use of `G1$SLOPE` is discouraged. Use `SLOPE` instead.
Warning: Use of `G1$fator` is discouraged. Use `fator` instead.

Renda

#Variação de Poluente
mod_Red_NO2<-lmer(NU_NOTA_REDACAO~ no2_ppb +bs(NU_ANO)+Q15 + (1+no2_ppb|SG_UF_RESIDENCIA),data =df1)
mod_Red_O3<-lmer(NU_NOTA_REDACAO~ o3_ppb +bs(NU_ANO)+Q15 + (1+o3_ppb|SG_UF_RESIDENCIA),data =df1 )
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv,  :
  Model failed to converge with max|grad| = 1.36747 (tol = 0.002, component 1)
mod_Red_PM25<-lmer(NU_NOTA_REDACAO~ pm25_ugm3 +bs(NU_ANO)+Q15 + (1+pm25_ugm3|SG_UF_RESIDENCIA),data =df1 )
boundary (singular) fit: see help('isSingular')
mod_COMP1_NO2<-lmer(NU_NOTA_COMP1~ no2_ppb +bs(NU_ANO)+Q15 + (1+no2_ppb|SG_UF_RESIDENCIA),data =df1)
mod_COMP1_O3<-lmer(NU_NOTA_COMP1~ o3_ppb +bs(NU_ANO)+Q15 + (1+o3_ppb|SG_UF_RESIDENCIA),data =df1 )
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv,  :
  Model failed to converge with max|grad| = 0.235558 (tol = 0.002, component 1)
mod_COMP1_PM25<-lmer(NU_NOTA_COMP1~ pm25_ugm3 +bs(NU_ANO)+Q15 + (1+pm25_ugm3|SG_UF_RESIDENCIA),data =df1 )
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv,  :
  Model failed to converge with max|grad| = 1.53425 (tol = 0.002, component 1)
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv,  :
  Model is nearly unidentifiable: very large eigenvalue
 - Rescale variables?
mod_COMP2_NO2<-lmer(NU_NOTA_COMP2~ no2_ppb +bs(NU_ANO)+Q15 + (1+no2_ppb|SG_UF_RESIDENCIA),data =df1)
mod_COMP2_O3<-lmer(NU_NOTA_COMP2~ o3_ppb +bs(NU_ANO)+Q15 + (1+o3_ppb|SG_UF_RESIDENCIA),data =df1 )
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv,  :
  Model failed to converge with max|grad| = 0.0331485 (tol = 0.002, component 1)
mod_COMP2_PM25<-lmer(NU_NOTA_COMP2~ pm25_ugm3 +bs(NU_ANO)+Q15 + (1+pm25_ugm3|SG_UF_RESIDENCIA),data =df1 )
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv,  :
  Model failed to converge with max|grad| = 0.897507 (tol = 0.002, component 1)
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv,  :
  Model is nearly unidentifiable: very large eigenvalue
 - Rescale variables?
mod_COMP3_NO2<-lmer(NU_NOTA_COMP3~ no2_ppb + bs(NU_ANO)+ (1+no2_ppb|SG_UF_RESIDENCIA),data =df1)
mod_COMP3_O3<-lmer(NU_NOTA_COMP3~ o3_ppb + bs(NU_ANO)+(1+o3_ppb|SG_UF_RESIDENCIA),data =df1 )
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv,  :
  Model failed to converge with max|grad| = 0.700352 (tol = 0.002, component 1)
mod_COMP3_PM25<-lmer(NU_NOTA_COMP3~ pm25_ugm3 +bs(NU_ANO)+Q15 + (1+pm25_ugm3|SG_UF_RESIDENCIA),data =df1 )
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv,  :
  Model failed to converge with max|grad| = 0.480208 (tol = 0.002, component 1)
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv,  :
  Model is nearly unidentifiable: very large eigenvalue
 - Rescale variables?
mod_COMP4_NO2<-lmer(NU_NOTA_COMP4~ no2_ppb + bs(NU_ANO)+(1+no2_ppb|SG_UF_RESIDENCIA),data =df1)
mod_COMP4_O3<-lmer(NU_NOTA_COMP4~ o3_ppb + bs(NU_ANO)+(1+o3_ppb|SG_UF_RESIDENCIA),data =df1 )
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv,  :
  Model failed to converge with max|grad| = 0.0238664 (tol = 0.002, component 1)
mod_COMP4_PM25<-lmer(NU_NOTA_COMP4~ pm25_ugm3 + bs(NU_ANO)+(1+pm25_ugm3|SG_UF_RESIDENCIA),data =df1 )
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv,  :
  Model failed to converge with max|grad| = 0.00434433 (tol = 0.002, component 1)
mod_COMP5_NO2<-lmer(NU_NOTA_COMP5~ no2_ppb +bs(NU_ANO)+Q15 + (1+no2_ppb|SG_UF_RESIDENCIA),data =df1)
mod_COMP5_O3<-lmer(NU_NOTA_COMP5~ o3_ppb + bs(NU_ANO)+(1+o3_ppb|SG_UF_RESIDENCIA),data =df1 )
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv,  :
  Model failed to converge with max|grad| = 19.0571 (tol = 0.002, component 1)
mod_COMP5_PM25<-lmer(NU_NOTA_COMP5~ pm25_ugm3 + bs(NU_ANO)+(1+pm25_ugm3|SG_UF_RESIDENCIA),data =df1 )
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv,  :
  Model failed to converge with max|grad| = 0.00254503 (tol = 0.002, component 1)
G3<-coef 
#grafico Intercept
G3 %>%
  ggplot(aes(`(Intercept)`,fator,col=Poluente))+geom_point()+facet_grid(Nota~Poluente)+
  geom_errorbar(aes(xmin=(G3[,1]+1.645*G3[,3]),xmax=(G3[,1]-1.645*G3[,3])),size=0.1)+
  geom_pointrange(aes(xmin=(G3[,1]+1.645*G3[,3]),xmax=(G3[,1]-1.645*G3[,3])),size=0.1)+
  geom_vline(xintercept = 0, color = "red", size=1.0)+
  labs(title = "Modelo mistos",
       subtitle = "Coeficientes(IC90%)",
       caption = "EPPG FGV")+xlab("Intercept")+ylab("Estado")


#grafico Slope
G3 %>% 
  ggplot(aes(G3$SLOPE,G3$fator))+geom_point()+facet_grid(Nota~Poluente,scales = "free")+
  geom_errorbar(aes(xmin=(G3[,2]+1.645*G3[,4]),xmax=(G3[,2]-1.645*G3[,4])),size=0.1)+
  geom_pointrange(aes(xmin=(G3[,2]+1.645*G3[,4]),xmax=(G3[,2]-1.645*G3[,4])),size=0.1)+
  geom_vline(xintercept = 0, color = "red", size=1.0)+
  labs(title = "Modelo mistos",
       subtitle = "Coeficientes(IC90%)",
       caption = "EPPG FGV")+xlab("Slope")+ylab("Estado")
Warning: Use of `G3$SLOPE` is discouraged. Use `SLOPE` instead.
Warning: Use of `G3$fator` is discouraged. Use `fator` instead.
Warning: Use of `G3$SLOPE` is discouraged. Use `SLOPE` instead.
Warning: Use of `G3$fator` is discouraged. Use `fator` instead.
Warning: Use of `G3$SLOPE` is discouraged. Use `SLOPE` instead.
Warning: Use of `G3$fator` is discouraged. Use `fator` instead.

Modelo com controles variando por modalidade educacional

Modelo com controles variando por modalidade educacional

---
title: "Script para Nuvem2"
author:
- name: Thiago Noronha Gardin
  affiliation: Fundação Getulio Vargas
- name: Weeberb J. Réquia Jr.
  affiliation: Fundação Getulio Vargas
date: "`r format(Sys.time(), '%B %d, %Y')`"
output:
  html_notebook: 
    df_print: paged
    fig_width: 20
    fig_height: 10
  html_document:
    df_print: paged
  pdf_document: default
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

bibliotecas

```{r include=FALSE}
#manipulação de dados
library(tidyverse)
library(plotly)
#Analise exploratória
library(skimr)
library(funModeling)
library(corrplot)


#regressão
library(lmerTest)
library(lme4)
library(parameters)
library(splines)


```

# Preparação dos dados

## Abrir bases originais:

```{r}
# abrir base e criar amostras
#dados_finais_2000_2010 <- readRDS("~/RStudio/ENEM/2_Dado tratado/dados_finais_2000_2010.rds")
#df<-slice_sample(dados_finais_2000_2010,prop = 0.01) 
#saveRDS(df, file = "sample_00_10.rds")
#table(dados_finais_2000_2010$NU_ANO)
#table(df$NU_ANO)
#dados_finais_2000_2010 <- readRDS("~/RStudio/ENEM/2_Dado tratado/dados_finais_2012_2014.rds")

#df2<-slice_sample(dados_finais_2000_2010,prop = 0.01) 
#saveRDS(df2, file = "sample_12_14.rds")


#dados_finais_2015_2020 <- readRDS("~/RStudio/ENEM/2_Dado tratado/dados_finais_2015_2020.rds")
#df3<-slice_sample(dados_finais_2015_2020,prop = 0.01) 
#saveRDS(df3, file = "sample_15_20.rds")
```

```{r}
#Base de 2000-2010
sample_00_10 <- readRDS("~/RStudio/ENEM/sample_00_10.rds")
#Base de 2011-2015
sample_12_14 <- readRDS("~/RStudio/ENEM/sample_12_14.rds")
#Base de 2015-2016
sample_15_20 <- readRDS("~/RStudio/ENEM/sample_15_20.rds")
#base de Variaveis Ambientais


```

```{r}
# unificar base de dados ambiental
Centro <- readRDS("~/RStudio/ENEM/Data-20220615T014950Z-001/Data/Centro.rds")
NE <- readRDS("~/RStudio/ENEM/Data-20220615T014950Z-001/Data/NE.rds")
NO <- readRDS("~/RStudio/ENEM/Data-20220615T014950Z-001/Data/NO.rds")
SE <- readRDS("~/RStudio/ENEM/Data-20220615T014950Z-001/Data/SE.rds")
SUL <- readRDS("~/RStudio/ENEM/Data-20220615T014950Z-001/Data/Sul.rds")
df_Am<-bind_rows(NE, NO, SE,SUL)
rm(NE,NO,SE,SUL,Centro)
head(df_Am)
```

## Preparar Bases:

Selecionar Variaveis de Interesse

```{r}

sample_00_10$TP_SEXO[sample_00_10$Q1=="A"]<-"M"
sample_00_10$TP_SEXO[sample_00_10$Q1=="B"]<-"F"

sample_12_14$TP_SEXO[sample_12_14$TP_SEXO=="0"]<-"M"
sample_12_14$TP_SEXO[sample_12_14$TP_SEXO=="1"]<-"F"

#Base de 2000-2010
sample_00_10%>% select(NU_INSCRICAO,#número de inscrição
  TP_SEXO,
  NU_ANO,#ano
  SG_UF_RESIDENCIA,#Estado
  NO_MUNICIPIO_RESIDENCIA,#municipio
  CO_MUNICIPIO_RESIDENCIA,#codigo do municipio
  NU_NOTA_REDACAO,#nota Redação
  NU_NOTA_COMP1,#nota componente X
  NU_NOTA_COMP2,#nota componente X
  NU_NOTA_COMP3,#nota componente X
  NU_NOTA_COMP4,
  NU_NOTA_COMP5,
  Q9,# escolaridade do Pai
  Q10,
  TP_DEPENDENCIA_ADM_ESC,# Dependência administrativa	(1	Federal	2	Estadual	3	Municipal, 4	Privada
  TP_LOCALIZACAO_ESC, #Localização/Zona da escola	1	Urbana 2	Rural
  Q15#renda A-Q
)->sample_00_10



#Base de 2011-2015
sample_12_14%>% select(NU_INSCRICAO,#número de inscrição
  TP_SEXO,
  NU_ANO,#ano
  UF_RESIDENCIA,#Estado
  NO_MUNICIPIO_RESIDENCIA,#municipio
  CO_MUNICIPIO_RESIDENCIA,#codigo do municipio
  NU_NOTA_REDACAO,#nota Redação
  NU_NOTA_COMP1,#nota componente X
  NU_NOTA_COMP2,#nota componente X
  NU_NOTA_COMP3,#nota componente X
  NU_NOTA_COMP4,
  NU_NOTA_COMP5,
  Q9,# escolaridade do Pai
  Q10,
  TP_DEPENDENCIA_ADM_ESC,# Dependência administrativa	(1	Federal	2	Estadual	3	Municipal, 4	Privada
  TP_LOCALIZACAO_ESC, #Localização/Zona da escola	1	Urbana 2	Rural
  Q15#renda A-Q#nota componente X
  #Questão renda
#  ,#Questão
#  ,#Questão
#  ,#Questão
)->sample_12_14
#Base de 2015-2016

sample_15_20%>% select(NU_INSCRICAO,#número de inscrição
  TP_SEXO,
  NU_ANO,#ano
  SG_UF_RESIDENCIA,#Estado
  NO_MUNICIPIO_RESIDENCIA,#municipio
  CO_MUNICIPIO_RESIDENCIA,#codigo do municipio
  NU_NOTA_REDACAO,#nota Redação
  NU_NOTA_COMP1,#nota componente X
  NU_NOTA_COMP2,#nota componente X
  NU_NOTA_COMP3,#nota componente X
  NU_NOTA_COMP4,
  NU_NOTA_COMP5,#nota componente X
  Q9,# escolaridade do Pai
  Q10,
  TP_DEPENDENCIA_ADM_ESC,# Dependência administrativa	(1	Federal	2	Estadual	3	Municipal, 4	Privada
  TP_LOCALIZACAO_ESC, #Localização/Zona da escola	1	Urbana 2	Rural
  Q15#renda A-Q
#  ,#Questão renda
#  ,#Questão
#  ,#Questão
#  ,#Questão
)->sample_15_20
#base de Variaveis Ambientais


```

```{r}
df_Am<-df_Am %>% mutate(CO_MUNICIPIO_RESIDENCIA=code_muni) %>% # criterio unificador
  group_by(CO_MUNICIPIO_RESIDENCIA,Year,name_state,name_region) %>% # fatores a se manter
  summarise(co_ppb=mean(co_ppb),# media das variaveis ambientaris por dia/mes
            no2_ppb=mean(no2_ppb),
            o3_ppb=mean(o3_ppb),
            pm25_ugm3=mean(pm25_ugm3),
            so2_ugm3=mean(so2_ugm3),
            preciptation=mean(preciptation),
            temperature=mean(temperature),
            humidity=mean(humidity),
            wind_direction=mean(wind_direction),
            wind_speed=mean(wind_speed),
            wildfire=mean(wildfire))%>% distinct()
saveRDS(df_Am,"baseambiental.rds")
df_Am <- readRDS("~/RStudio/ENEM/baseambiental.rds")

```

## Fundir Bases:

```{r}

#Base do enem
#ultimos ajustes
sample_12_14 %>% mutate(SG_UF_RESIDENCIA=UF_RESIDENCIA) %>% 
  select(-UF_RESIDENCIA)->sample_12_14


sample_15_20$NU_ANO<-as.character(sample_15_20$NU_ANO)
#fusão:
df_enem<-bind_rows(sample_00_10,sample_12_14,sample_15_20)
saveRDS(df_enem,"Enemfundido.rds")
rm(sample_00_10,sample_12_14,sample_15_20)

#base de Variaveis Ambientais
unique(df_Am$Year)
unique(df_enem$NU_ANO)
df_enem$NU_ANO<-as.numeric(df_enem$NU_ANO)
df<-left_join(df_enem,df_Am,by=c("CO_MUNICIPIO_RESIDENCIA","NU_ANO"="Year"))
saveRDS(df,"base de referencia.rds")
rm(df_enem,df_Am)
df <- readRDS("~/RStudio/ENEM/base de referencia.rds")
```

# Analise Exploratória

## Sumário dos Dados

Resumo dos Dados

```{r}

df %>% select(-wildfire)->df
skim(df)
```

```{r}
#ajustando variaveis
df[,2] <- lapply(df[,2], as.factor)
df[,4:6] <- lapply(df[,4:6], as.factor)
df[,7:12] <- lapply(df[,7:12], as.numeric)
df[,13:20] <- lapply(df[,13:20], as.factor)

```

Analise das uni

```{r}
#variaveis numéricas
  plot_num(df)

```

\

```{r}


```

```{r}
#variaveis Categoricas
freq(df)

```

Analise de Correlações

```{r}
A<-df %>% select(where(is.numeric)) %>% drop_na()
cor(A)
corrplot(cor(A), method = 'square',type = 'lower', tl.col = 'black',
         cl.ratio = 0.2, tl.srt = 45, col = COL2('PuOr', 10) )

```

```{r}
skim(df)
df %>% slice_sample(prop = 0.1) ->df1
A<-colnames(df1)
for (i in 7:12) {
  print(i)
  print(ggplot(df1, aes(x = df1$NU_ANO, y =  df1[[i]]))  +facet_wrap(~name_state)+
          geom_point(alpha=0.1)+geom_smooth()+
    ylab(A[i]))
    Sys.sleep(1)

  }

```

Pelos gráficos parece que a escala de notas são realmente diferentes sendo o ano um efeito fixo

```{r}


for (i in 7:12) {
  print(i)
  print(ggplot(df1, aes(x = df1$pm25_ugm3, y =  df1[[i]])) +  facet_wrap(~name_state)+
          geom_point(alpha=0.1)+geom_smooth()+
    ylab(A[i]))
    Sys.sleep(1)

  }

#df1 %>% ggplot(aes(pm25_ugm3,NU_NOTA_REDACAO,col=name_state))+
#  geom_point()
  
```

```{r}

for (i in 7:12) {
  print(i)
  print(ggplot(df1, aes(x = df1$no2_ppb, y =  df1[[i]])) + facet_wrap(~name_state)+
          geom_point(alpha=0.1)+geom_smooth()+
    ylab(A[i]))
    Sys.sleep(1)

  }
```

```{r}

for (i in 7:12) {
  print(i)
  print(ggplot(df1, aes(x = df1$o3_ppb, y =  df1[[i]])) +facet_wrap(~name_state)+
          geom_point(alpha=0.1)+geom_smooth()+
    ylab(A[i]))
    Sys.sleep(1)

  }
```

# Regressões

## Modelo basico de referencia

```{r}
#modelo Linear
mainlm<-lm(NU_NOTA_REDACAO~ no2_ppb,data = df1)
#Efeitos Mistos
mainlmer<-lmer(NU_NOTA_REDACAO~ no2_ppb + (1+no2_ppb|SG_UF_RESIDENCIA),data =df1 )
summary(mainlm)
summary(mainlmer)

```

extração de Coeficientes

```{r}
coef_int= function (modelo) {
  coef(modelo)[[1]][1]
  coef<-coef(modelo)[[1]]
  SE<-(standard_error(modelo, effects = "random")[[1]])
  RES<-cbind(coef,SE,rownames(coef))
  colnames(RES)<- c(colnames(RES)[1],"SLOPE",paste("SE_",colnames(RES)[3]),paste("SE_","SLOPE"),"fator")
  return(RES)

}
G<-coef_int(mainlmer) 
#grafico Intercept
G %>%
  ggplot(aes(G[,1],G[,5]))+geom_point()+
  geom_errorbar(aes(xmin=(G[,1]+1.645*G[,3]),xmax=(G[,1]-1.645*G[,3])),size=0.1)+
  geom_pointrange(aes(xmin=(G[,1]+1.645*G[,3]),xmax=(G[,1]-1.645*G[,3])),size=0.1)+
  geom_vline(xintercept = 0, color = "red", size=1.0)+
  labs(title = "Modelo mistos",
       subtitle = "Coeficientes(IC90%)",
       caption = "EPPG FGV")+xlab("Intercept")+ylab("Estado")


#grafico Slope
G %>%
  ggplot(aes(G[,2],G[,5]))+geom_point()+
  geom_errorbar(aes(xmin=(G[,2]+1.645*G[,4]),xmax=(G[,2]-1.645*G[,4])),size=0.1)+
  geom_pointrange(aes(xmin=(G[,2]+1.645*G[,4]),xmax=(G[,2]-1.645*G[,4])),size=0.1)+
  geom_vline(xintercept = 0, color = "red", size=1.0)+
  labs(title = "Modelo mistos",
       subtitle = "Coeficientes(IC90%)",
       caption = "EPPG FGV")+xlab("Slope")+ylab("Estado")


```

## todos os Modelos

```{r}
df1$NU_NOTA_COMP5<-as.numeric(df1$NU_NOTA_COMP5)
  #Variação de Poluente
mod_Red_NO2<-lmer(NU_NOTA_REDACAO~ no2_ppb + (1+no2_ppb|SG_UF_RESIDENCIA),data =df1)
mod_Red_O3<-lmer(NU_NOTA_REDACAO~ o3_ppb + (1+o3_ppb|SG_UF_RESIDENCIA),data =df1 )
mod_Red_PM25<-lmer(NU_NOTA_REDACAO~ pm25_ugm3 + (1+pm25_ugm3|SG_UF_RESIDENCIA),data =df1 )

mod_COMP1_NO2<-lmer(NU_NOTA_COMP1~ no2_ppb + (1+no2_ppb|SG_UF_RESIDENCIA),data =df1)
mod_COMP1_O3<-lmer(NU_NOTA_COMP1~ o3_ppb + (1+o3_ppb|SG_UF_RESIDENCIA),data =df1 )
mod_COMP1_PM25<-lmer(NU_NOTA_COMP1~ pm25_ugm3 + (1+pm25_ugm3|SG_UF_RESIDENCIA),data =df1 )

mod_COMP2_NO2<-lmer(NU_NOTA_COMP2~ no2_ppb + (1+no2_ppb|SG_UF_RESIDENCIA),data =df1)
mod_COMP2_O3<-lmer(NU_NOTA_COMP2~ o3_ppb + (1+o3_ppb|SG_UF_RESIDENCIA),data =df1 )
mod_COMP2_PM25<-lmer(NU_NOTA_COMP2~ pm25_ugm3 + (1+pm25_ugm3|SG_UF_RESIDENCIA),data =df1 )

mod_COMP3_NO2<-lmer(NU_NOTA_COMP3~ no2_ppb + (1+no2_ppb|SG_UF_RESIDENCIA),data =df1)
mod_COMP3_O3<-lmer(NU_NOTA_COMP3~ o3_ppb + (1+o3_ppb|SG_UF_RESIDENCIA),data =df1 )
mod_COMP3_PM25<-lmer(NU_NOTA_COMP3~ pm25_ugm3 + (1+pm25_ugm3|SG_UF_RESIDENCIA),data =df1 )

mod_COMP4_NO2<-lmer(NU_NOTA_COMP4~ no2_ppb + (1+no2_ppb|SG_UF_RESIDENCIA),data =df1)
mod_COMP4_O3<-lmer(NU_NOTA_COMP4~ o3_ppb + (1+o3_ppb|SG_UF_RESIDENCIA),data =df1 )
mod_COMP4_PM25<-lmer(NU_NOTA_COMP4~ pm25_ugm3 + (1+pm25_ugm3|SG_UF_RESIDENCIA),data =df1 )

mod_COMP5_NO2<-lmer(NU_NOTA_COMP5~ no2_ppb + (1+no2_ppb|SG_UF_RESIDENCIA),data =df1)
mod_COMP5_O3<-lmer(NU_NOTA_COMP5~ o3_ppb + (1+o3_ppb|SG_UF_RESIDENCIA),data =df1 )
mod_COMP5_PM25<-lmer(NU_NOTA_COMP5~ pm25_ugm3 + (1+pm25_ugm3|SG_UF_RESIDENCIA),data =df1 )

```

##### Juntando resultados

```{r}
A<-coef_int(mod_Red_NO2)
A$Poluente<-"NO2"
A$Nota<- "redação"

aB<-coef_int(mod_Red_O3)
aB$Poluente<-"O3"
aB$Nota<- "redação"

aC<-coef_int(mod_Red_PM25)
aC$Poluente<-"PM25"
aC$Nota<- "redação"


aD<-coef_int(mod_COMP1_NO2)
aD$Poluente <-"NO2"
aD$Nota<-"COMP1"

aE<-coef_int(mod_COMP1_O3)
aE$Poluente<-"O3"
aE$Nota<-"COMP1"

aF<-coef_int(mod_COMP1_PM25)
aF$Poluente<-"PM25"
aF$Nota <-"COMP1"

aG<-coef_int(mod_COMP2_NO2)
aG$Poluente<-"NO2"
aG$Nota<-"COMP2"

aH<-coef_int(mod_COMP2_O3)
aH$Poluente<-"O3"
aH$Nota<-"COMP2"

aI<-coef_int(mod_COMP2_PM25)
aI$Poluente<-"PM25"
aI$Nota<-"COMP2"

aJ<-coef_int(mod_COMP3_NO2)
aJ$Poluente<-"NO2"
aJ$Nota<-"COMP3"

aL<-coef_int(mod_COMP3_O3)
aL$Poluente<-"O3"
aL$Nota<-"COMP3"

aM<-coef_int(mod_COMP3_PM25)
aM$Poluente<-"PM25"
aM$Nota<-"COMP3"

aN<-coef_int(mod_COMP4_NO2)
aN$Poluente<-"NO2"
aN$Nota<-"COMP4"

aO<-coef_int(mod_COMP4_O3)
aO$Poluente<-"O3"
aO$Nota<-"COMP4"

aP<-coef_int(mod_COMP4_PM25)
aP$Poluente<-"PM25"
aP$Nota<-"COMP4"

aQ<-coef_int(mod_COMP5_NO2)
aQ$Poluente<-"NO2"
aQ$Nota<-"COMP5"

aR<-coef_int(mod_COMP5_O3)
aR$Poluente<-"O3"
aR$Nota<-"COMP5"

aT<-coef_int(mod_COMP5_PM25)
aT$Poluente <-"PM25"
aT$Nota<-"COMP5"

rbind(A[],aB[])

coef<-bind_rows(A,aB,aC,aD,aE,aF,aG,aH,aI,aJ,aL,aM,aN,aO,aP,aQ,aR,aT)
rm(A,aB,aC,aD,aE,aF,aG,aH,aI,aJ,aL,aM,aN,aO,aP,aQ,aR,aT)

```

```{r}
coef
G<-coef 
#grafico Intercept
G$Poluente
G %>%
  ggplot(aes(`(Intercept)`,fator,col=Poluente))+geom_point()+facet_grid(Nota~Poluente)+
  geom_errorbar(aes(xmin=(G[,1]+1.645*G[,3]),xmax=(G[,1]-1.645*G[,3])),size=0.1)+
  geom_pointrange(aes(xmin=(G[,1]+1.645*G[,3]),xmax=(G[,1]-1.645*G[,3])),size=0.1)+
  geom_vline(xintercept = 0, color = "red", size=1.0)+
  labs(title = "Modelo mistos",
       subtitle = "Coeficientes(IC90%)",
       caption = "EPPG FGV")+xlab("Intercept")+ylab("Estado")

#grafico Slope
G %>% 
  ggplot(aes(G$SLOPE,G$fator))+geom_point()+facet_grid(Nota~Poluente,scales = "free")+
  geom_errorbar(aes(xmin=(G[,2]+1.645*G[,4]),xmax=(G[,2]-1.645*G[,4])),size=0.1)+
  geom_pointrange(aes(xmin=(G[,2]+1.645*G[,4]),xmax=(G[,2]-1.645*G[,4])),size=0.1)+
  geom_vline(xintercept = 0, color = "red", size=1.0)+
  labs(title = "Modelo mistos",
       subtitle = "Coeficientes(IC90%)",
       caption = "EPPG FGV")+xlab("Slope")+ylab("Estado")

```

## incluir outras variaveis

#### ANO

```{r}
  #Variação de Poluente
mod_Red_NO2<-lmer(NU_NOTA_REDACAO~ no2_ppb +bs(NU_ANO)+ (1+no2_ppb|SG_UF_RESIDENCIA),data =df1)
mod_Red_O3<-lmer(NU_NOTA_REDACAO~ o3_ppb +bs(NU_ANO)+ (1+o3_ppb|SG_UF_RESIDENCIA),data =df1 )
mod_Red_PM25<-lmer(NU_NOTA_REDACAO~ pm25_ugm3 +bs(NU_ANO)+ (1+pm25_ugm3|SG_UF_RESIDENCIA),data =df1 )

mod_COMP1_NO2<-lmer(NU_NOTA_COMP1~ no2_ppb +bs(NU_ANO)+ (1+no2_ppb|SG_UF_RESIDENCIA),data =df1)
mod_COMP1_O3<-lmer(NU_NOTA_COMP1~ o3_ppb +bs(NU_ANO)+ (1+o3_ppb|SG_UF_RESIDENCIA),data =df1 )
mod_COMP1_PM25<-lmer(NU_NOTA_COMP1~ pm25_ugm3 +bs(NU_ANO)+ (1+pm25_ugm3|SG_UF_RESIDENCIA),data =df1 )

mod_COMP2_NO2<-lmer(NU_NOTA_COMP2~ no2_ppb +bs(NU_ANO)+ (1+no2_ppb|SG_UF_RESIDENCIA),data =df1)
mod_COMP2_O3<-lmer(NU_NOTA_COMP2~ o3_ppb +bs(NU_ANO)+ (1+o3_ppb|SG_UF_RESIDENCIA),data =df1 )
mod_COMP2_PM25<-lmer(NU_NOTA_COMP2~ pm25_ugm3 +bs(NU_ANO)+ (1+pm25_ugm3|SG_UF_RESIDENCIA),data =df1 )

mod_COMP3_NO2<-lmer(NU_NOTA_COMP3~ no2_ppb + bs(NU_ANO)+ (1+no2_ppb|SG_UF_RESIDENCIA),data =df1)
mod_COMP3_O3<-lmer(NU_NOTA_COMP3~ o3_ppb + bs(NU_ANO)+(1+o3_ppb|SG_UF_RESIDENCIA),data =df1 )
mod_COMP3_PM25<-lmer(NU_NOTA_COMP3~ pm25_ugm3 +bs(NU_ANO)+ (1+pm25_ugm3|SG_UF_RESIDENCIA),data =df1 )

mod_COMP4_NO2<-lmer(NU_NOTA_COMP4~ no2_ppb + bs(NU_ANO)+(1+no2_ppb|SG_UF_RESIDENCIA),data =df1)
mod_COMP4_O3<-lmer(NU_NOTA_COMP4~ o3_ppb + bs(NU_ANO)+(1+o3_ppb|SG_UF_RESIDENCIA),data =df1 )
mod_COMP4_PM25<-lmer(NU_NOTA_COMP4~ pm25_ugm3 + bs(NU_ANO)+(1+pm25_ugm3|SG_UF_RESIDENCIA),data =df1 )

mod_COMP5_NO2<-lmer(NU_NOTA_COMP5~ no2_ppb +bs(NU_ANO)+ (1+no2_ppb|SG_UF_RESIDENCIA),data =df1)
mod_COMP5_O3<-lmer(NU_NOTA_COMP5~ o3_ppb + bs(NU_ANO)+(1+o3_ppb|SG_UF_RESIDENCIA),data =df1 )
mod_COMP5_PM25<-lmer(NU_NOTA_COMP5~ pm25_ugm3 + bs(NU_ANO)+(1+pm25_ugm3|SG_UF_RESIDENCIA),data =df1 )

```

```{r}
#extração de coeficiente
coef_int= function (modelo) {
  coef<-coef(modelo)[[1]][1:2]
  SE<-(standard_error(modelo, effects = "random")[[1]])
  RES<-cbind(coef,SE,rownames(coef))
  colnames(RES)<- c(colnames(RES)[1],"SLOPE",paste("SE_",colnames(RES)[3]),paste("SE_","SLOPE"),"fator")
  return(RES)

}

```

```{r}
G1<-coef 
#grafico Intercept
G1 %>%
  ggplot(aes(`(Intercept)`,fator,col=Poluente))+geom_point()+facet_grid(Nota~Poluente)+
  geom_errorbar(aes(xmin=(G1[,1]+1.645*G[,3]),xmax=(G1[,1]-1.645*G1[,3])),size=0.1)+
  geom_pointrange(aes(xmin=(G1[,1]+1.645*G[,3]),xmax=(G1[,1]-1.645*G1[,3])),size=0.1)+
  geom_vline(xintercept = 0, color = "red", size=1.0)+
  labs(title = "Modelo mistos",
       subtitle = "Coeficientes(IC90%)",
       caption = "EPPG FGV")+xlab("Intercept")+ylab("Estado")

#grafico Slope
G1 %>% 
  ggplot(aes(G1$SLOPE,G1$fator))+geom_point()+facet_grid(Nota~Poluente,scales = "free")+
  geom_errorbar(aes(xmin=(G1[,2]+1.645*G1[,4]),xmax=(G1[,2]-1.645*G1[,4])),size=0.1)+
  geom_pointrange(aes(xmin=(G1[,2]+1.645*G1[,4]),xmax=(G1[,2]-1.645*G1[,4])),size=0.1)+
  geom_vline(xintercept = 0, color = "red", size=1.0)+
  labs(title = "Modelo mistos",
       subtitle = "Coeficientes(IC90%)",
       caption = "EPPG FGV")+xlab("Slope")+ylab("Estado")
```

#### Renda

```{r}
#Variação de Poluente
mod_Red_NO2<-lmer(NU_NOTA_REDACAO~ no2_ppb +bs(NU_ANO)+Q15 + (1+no2_ppb|SG_UF_RESIDENCIA),data =df1)
mod_Red_O3<-lmer(NU_NOTA_REDACAO~ o3_ppb +bs(NU_ANO)+Q15 + (1+o3_ppb|SG_UF_RESIDENCIA),data =df1 )
mod_Red_PM25<-lmer(NU_NOTA_REDACAO~ pm25_ugm3 +bs(NU_ANO)+Q15 + (1+pm25_ugm3|SG_UF_RESIDENCIA),data =df1 )

mod_COMP1_NO2<-lmer(NU_NOTA_COMP1~ no2_ppb +bs(NU_ANO)+Q15 + (1+no2_ppb|SG_UF_RESIDENCIA),data =df1)
mod_COMP1_O3<-lmer(NU_NOTA_COMP1~ o3_ppb +bs(NU_ANO)+Q15 + (1+o3_ppb|SG_UF_RESIDENCIA),data =df1 )
mod_COMP1_PM25<-lmer(NU_NOTA_COMP1~ pm25_ugm3 +bs(NU_ANO)+Q15 + (1+pm25_ugm3|SG_UF_RESIDENCIA),data =df1 )

mod_COMP2_NO2<-lmer(NU_NOTA_COMP2~ no2_ppb +bs(NU_ANO)+Q15 + (1+no2_ppb|SG_UF_RESIDENCIA),data =df1)
mod_COMP2_O3<-lmer(NU_NOTA_COMP2~ o3_ppb +bs(NU_ANO)+Q15 + (1+o3_ppb|SG_UF_RESIDENCIA),data =df1 )
mod_COMP2_PM25<-lmer(NU_NOTA_COMP2~ pm25_ugm3 +bs(NU_ANO)+Q15 + (1+pm25_ugm3|SG_UF_RESIDENCIA),data =df1 )

mod_COMP3_NO2<-lmer(NU_NOTA_COMP3~ no2_ppb + bs(NU_ANO)+ (1+no2_ppb|SG_UF_RESIDENCIA),data =df1)
mod_COMP3_O3<-lmer(NU_NOTA_COMP3~ o3_ppb + bs(NU_ANO)+(1+o3_ppb|SG_UF_RESIDENCIA),data =df1 )
mod_COMP3_PM25<-lmer(NU_NOTA_COMP3~ pm25_ugm3 +bs(NU_ANO)+Q15 + (1+pm25_ugm3|SG_UF_RESIDENCIA),data =df1 )

mod_COMP4_NO2<-lmer(NU_NOTA_COMP4~ no2_ppb + bs(NU_ANO)+(1+no2_ppb|SG_UF_RESIDENCIA),data =df1)
mod_COMP4_O3<-lmer(NU_NOTA_COMP4~ o3_ppb + bs(NU_ANO)+(1+o3_ppb|SG_UF_RESIDENCIA),data =df1 )
mod_COMP4_PM25<-lmer(NU_NOTA_COMP4~ pm25_ugm3 + bs(NU_ANO)+(1+pm25_ugm3|SG_UF_RESIDENCIA),data =df1 )

mod_COMP5_NO2<-lmer(NU_NOTA_COMP5~ no2_ppb +bs(NU_ANO)+Q15 + (1+no2_ppb|SG_UF_RESIDENCIA),data =df1)
mod_COMP5_O3<-lmer(NU_NOTA_COMP5~ o3_ppb + bs(NU_ANO)+(1+o3_ppb|SG_UF_RESIDENCIA),data =df1 )
mod_COMP5_PM25<-lmer(NU_NOTA_COMP5~ pm25_ugm3 + bs(NU_ANO)+(1+pm25_ugm3|SG_UF_RESIDENCIA),data =df1 )
```

```{r}
G3<-coef 
#grafico Intercept
G3 %>%
  ggplot(aes(`(Intercept)`,fator,col=Poluente))+geom_point()+facet_grid(Nota~Poluente)+
  geom_errorbar(aes(xmin=(G3[,1]+1.645*G3[,3]),xmax=(G3[,1]-1.645*G3[,3])),size=0.1)+
  geom_pointrange(aes(xmin=(G3[,1]+1.645*G3[,3]),xmax=(G3[,1]-1.645*G3[,3])),size=0.1)+
  geom_vline(xintercept = 0, color = "red", size=1.0)+
  labs(title = "Modelo mistos",
       subtitle = "Coeficientes(IC90%)",
       caption = "EPPG FGV")+xlab("Intercept")+ylab("Estado")

#grafico Slope
G3 %>% 
  ggplot(aes(G3$SLOPE,G3$fator))+geom_point()+facet_grid(Nota~Poluente,scales = "free")+
  geom_errorbar(aes(xmin=(G3[,2]+1.645*G3[,4]),xmax=(G3[,2]-1.645*G3[,4])),size=0.1)+
  geom_pointrange(aes(xmin=(G3[,2]+1.645*G3[,4]),xmax=(G3[,2]-1.645*G3[,4])),size=0.1)+
  geom_vline(xintercept = 0, color = "red", size=1.0)+
  labs(title = "Modelo mistos",
       subtitle = "Coeficientes(IC90%)",
       caption = "EPPG FGV")+xlab("Slope")+ylab("Estado")
```

## Modelo com controles variando por modalidade educacional

## Modelo com controles variando por modalidade educacional
