# Bibliotecas
library(knitr)
library(esquisse)
library(tidyverse)
library(skimr)
library(extrafont)
gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 4125435 220.4 7984848 426.5 7984848 426.5
Vcells 117269465 894.7 171438110 1308.0 163239406 1245.5
memory.limit (9999999999)
Warning: 'memory.limit()' is no longer supported
[1] Inf
sample_00_10 <- readRDS("~/RStudio/ENEM/sample_00_10.rds")
Warning in readRDS("~/RStudio/ENEM/sample_00_10.rds") :
error reading the file
Error in readRDS("~/RStudio/ENEM/sample_00_10.rds") :
error reading from connection
Observar Valores Unicos e Numéricos
#Observar Valores Unicos e Numéricos
sample_00_10[,1:3] <- lapply(sample_00_10[,1:3], as.numeric)
sample_00_10[,7:9] <- lapply(sample_00_10[,7:9], as.factor)
sample_00_10[,10] <- lapply(sample_00_10[,10], as.numeric)
sample_00_10[,11:12] <- lapply(sample_00_10[,11:12], as.factor)
sample_00_10[,13:19] <- lapply(sample_00_10[,13:19], as.numeric)
sample_00_10[,20:83] <- lapply(sample_00_10[,20:83], as.factor)
skim_tee(sample_00_10)
-- Data Summary ------------------------
Values
Name data
Number of rows 30403
Number of columns 83
Key NULL
_______________________
Column type frequency:
character 3
factor 69
numeric 11
________________________
Group variables None
sample_00_10 %>% skim()
-- Data Summary ------------------------
Values
Name Piped data
Number of rows 30403
Number of columns 83
Key NULL
_______________________
Column type frequency:
character 3
factor 69
numeric 11
________________________
Group variables None
fix_windows_histograms()
This function will change your system locale. It may have other unintended effects.
y
standarizar indicadores?
Abrir Bases:
# unificar base de dados ambiental
# Centro <- readRDS("~/RStudio/ENEM/Data-20220615T014950Z-001/Data/Centro.rds")
# NE <- readRDS("~/RStudio/ENEM/Data-20220615T014950Z-001/Data/NE.rds")
# NO <- readRDS("~/RStudio/ENEM/Data-20220615T014950Z-001/Data/NO.rds")
# SE <- readRDS("~/RStudio/ENEM/Data-20220615T014950Z-001/Data/SE.rds")
# SUL <- readRDS("~/RStudio/ENEM/Data-20220615T014950Z-001/Data/Sul.rds")
# df_Am<-bind_rows(NE, NO, SE,SUL)
rm(NE,NO,SE,SUL,Centro)
head(df_Am)
Error in h(simpleError(msg, call)) :
error in evaluating the argument 'x' in selecting a method for function 'head': object 'df_Am' not found
Fundir Bases
#preparando para fundir
df_Am1<-df_Am %>% mutate(CO_MUNICIPIO_RESIDENCIA=code_muni) %>% # criterio unificador
group_by(CO_MUNICIPIO_RESIDENCIA,Year,name_state,name_region) %>% # fatores a se manter
summarise(co_ppb=mean(co_ppb),# media das variaveis ambientaris por dia/mes
no2_ppb=mean(no2_ppb),
o3_ppb=mean(o3_ppb),
pm25_ugm3=mean(pm25_ugm3),
so2_ugm3=mean(so2_ugm3),
preciptation=mean(preciptation),
temperature=mean(temperature),
humidity=mean(humidity),
wind_direction=mean(wind_direction),
wind_speed=mean(wind_speed),
wildfire==mean(wildfire))%>% distinct()
head(df_Am1)
# dftest<-left_join(sample_00_10,df_Am1,by="CO_MUNICIPIO_RESIDENCIA")
# table(dftest$Year,dftest$NU_ANO)
# skim(dftest$Year)
# dftest %>% filter(Year==NU_ANO+2000) -> dftest0
# saveRDS(dftest,"dftest0.rds")
dftest0 <- readRDS("~/RStudio/ENEM/dftest0.rds")
Análise Preliminar
gpa_mixed = lmer(Nota ~ Variavel Ambiental + (1 | ANO)+ (1 | Municipio), data = dfteste)
dftest0 %>%
ggplot(aes(co_ppb,NU_NOTA_REDACAO))+geom_point()+facet_wrap(~NU_ANO+2000,scales = "free")+geom_smooth()
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Warning: Removed 3155 rows containing non-finite values (stat_smooth).
Warning: Removed 3155 rows containing missing values (geom_point).
dftest0 %>%
ggplot(aes(co_ppb,NU_NOTA_COMP1))+geom_point()+facet_wrap(~NU_ANO+2000,scales = "free")+geom_smooth()
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Warning: Removed 16854 rows containing non-finite values (stat_smooth).
Warning: Removed 16854 rows containing missing values (geom_point).
dftest0 %>%
ggplot(aes(co_ppb,NU_NOTA_COMP2))+geom_point()+facet_wrap(~NU_ANO+2000,scales = "free")+geom_smooth()
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Warning: Removed 16854 rows containing non-finite values (stat_smooth).
Warning: Removed 16854 rows containing missing values (geom_point).
dftest0 %>%
ggplot(aes(co_ppb,NU_NOTA_COMP3))+geom_point()+facet_wrap(~NU_ANO+2000,scales = "free")+geom_smooth()
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Warning: Removed 16854 rows containing non-finite values (stat_smooth).
Warning: Removed 16854 rows containing missing values (geom_point).
dftest0 %>%
ggplot(aes(co_ppb,NU_NOTA_COMP4))+geom_point()+facet_wrap(~NU_ANO+2000,scales = "free")+geom_smooth()
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Warning: Removed 16854 rows containing non-finite values (stat_smooth).
Warning: Removed 16854 rows containing missing values (geom_point).
dftest0 %>%
ggplot(aes(co_ppb,NU_NOTA_COMP5))+geom_point()+facet_wrap(~NU_ANO+2000,scales = "free")+geom_smooth()
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Warning: Removed 16854 rows containing non-finite values (stat_smooth).
Warning: Removed 16854 rows containing missing values (geom_point).
library(lme4)
gpa_mixed = lmer(NU_NOTA_REDACAO ~ co_ppb+no2_ppb + NU_IDADE + (1 +co_ppb| CO_UF_RESIDENCIA)+ (1 +co_ppb| NU_ANO), data = dftest0)
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
unable to evaluate scaled gradient
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
Model failed to converge: degenerate Hessian with 4 negative eigenvalues
summary(gpa_mixed)
Linear mixed model fit by REML ['lmerMod']
Formula:
NU_NOTA_REDACAO ~ co_ppb + no2_ppb + NU_IDADE + (1 + co_ppb |
CO_UF_RESIDENCIA) + (1 + co_ppb | NU_ANO)
Data: dftest0
REML criterion at convergence: 5582022
Scaled residuals:
Min 1Q Median 3Q Max
-2.6670 -0.2039 0.0322 0.2062 4.2815
Random effects:
Groups Name Variance Std.Dev. Corr
CO_UF_RESIDENCIA (Intercept) 8.381e-03 0.09155
co_ppb 4.230e-03 0.06504 0.63
NU_ANO (Intercept) 3.320e+04 182.19703
co_ppb 8.200e+04 286.36461 0.00
Residual 2.724e+04 165.05674
Number of obs: 427696, groups: CO_UF_RESIDENCIA, 27; NU_ANO, 11
Fixed effects:
Estimate Std. Error t value
(Intercept) 144.64178 54.95350 2.632
co_ppb -0.02048 86.35020 0.000
no2_ppb -0.10479 0.28394 -0.369
NU_IDADE -3.02018 0.03650 -82.755
Correlation of Fixed Effects:
(Intr) co_ppb n2_ppb
co_ppb 0.000
no2_ppb 0.015 0.000
NU_IDADE -0.009 0.000 -0.049
optimizer (nloptwrap) convergence code: 0 (OK)
unable to evaluate scaled gradient
Model failed to converge: degenerate Hessian with 4 negative eigenvalues
Extração de Coeficientes
Viés de Coleta
Atrito/ Perca de dados
Dados Ausentes
Variaveis Pouco Claras
Avaliar Variaveis Q1,Q2,…Qn
Avaliar Variaveis Q1,Q2,…Qn
Avaliar autocorrelação das Variaveis Ambientais
Sugestões:
2.1 Estatistica Descritiva das Variaveis
2.2 Associações, correlações
2.3 Modelo Logico
melhores variaveis ambientais
melhores variaveis de Performace
Covariaveis, Autocorrelacionalidade, efeitos fixos e aleatórios
2.1 Hipotese Básica do Modelo
2.2. Extração de Coeficientes
2.3 Hipoteses