bibliotecas
# abrir base e criar amostras
#dados_finais_2000_2010 <- readRDS("~/RStudio/ENEM/2_Dado tratado/dados_finais_2000_2010.rds")
#df<-slice_sample(dados_finais_2000_2010,prop = 0.01)
#saveRDS(df, file = "sample_00_10.rds")
#table(dados_finais_2000_2010$NU_ANO)
#table(df$NU_ANO)
#dados_finais_2000_2010 <- readRDS("~/RStudio/ENEM/2_Dado tratado/dados_finais_2012_2014.rds")
#df2<-slice_sample(dados_finais_2000_2010,prop = 0.01)
#saveRDS(df2, file = "sample_12_14.rds")
#dados_finais_2015_2020 <- readRDS("~/RStudio/ENEM/2_Dado tratado/dados_finais_2015_2020.rds")
#df3<-slice_sample(dados_finais_2015_2020,prop = 0.01)
#saveRDS(df3, file = "sample_15_20.rds")
#Base de 2000-2010
sample_00_10 <- readRDS("~/RStudio/ENEM/sample_00_10.rds")
#Base de 2011-2015
sample_12_14 <- readRDS("~/RStudio/ENEM/sample_12_14.rds")
#Base de 2015-2016
sample_15_20 <- readRDS("~/RStudio/ENEM/sample_15_20.rds")
#base de Variaveis Ambientais
# unificar base de dados ambiental
Centro <- readRDS("~/RStudio/ENEM/Data-20220615T014950Z-001/Data/Centro.rds")
NE <- readRDS("~/RStudio/ENEM/Data-20220615T014950Z-001/Data/NE.rds")
NO <- readRDS("~/RStudio/ENEM/Data-20220615T014950Z-001/Data/NO.rds")
SE <- readRDS("~/RStudio/ENEM/Data-20220615T014950Z-001/Data/SE.rds")
SUL <- readRDS("~/RStudio/ENEM/Data-20220615T014950Z-001/Data/Sul.rds")
df_Am<-bind_rows(NE, NO, SE,SUL)
rm(NE,NO,SE,SUL,Centro)
head(df_Am)
Selecionar Variaveis de Interesse
#nota componente X
# ,#Questão renda
# ,#Questão
# ,#Questão
# ,#Questão
)->sample_00_10
Error: unexpected ')' in ")"
rm(df_enem,df_Am)
Warning in rm(df_enem, df_Am) : object 'df_Am' not found
Resumo dos Dados
skim(df)
── Data Summary ────────────────────────
Values
Name df
Number of rows 915875
Number of columns 25
Key NULL
_______________________
Column type frequency:
character 2
factor 6
logical 1
numeric 16
________________________
Group variables None
df[,7:12] <- lapply(df[,7:12], as.numeric)
Warning in lapply(df[, 7:12], as.numeric) : NAs introduced by coercion
Warning in lapply(df[, 7:12], as.numeric) : NAs introduced by coercion
Warning in lapply(df[, 7:12], as.numeric) : NAs introduced by coercion
Warning in lapply(df[, 7:12], as.numeric) : NAs introduced by coercion
Warning in lapply(df[, 7:12], as.numeric) : NAs introduced by coercion
Warning in lapply(df[, 7:12], as.numeric) : NAs introduced by coercion
Analise das uni
#variaveis numéricas
plot_num(df)
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
#variaveis Categoricas
freq(df)
Warning in freq_logic(data = data, input = input[i], plot, na.rm, path_out = path_out) :
Skipping plot for variable 'NU_INSCRICAO' (more than 100 categories)
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning in freq_logic(data = data, input = input[i], plot, na.rm, path_out = path_out) :
Skipping plot for variable 'NO_MUNICIPIO_RESIDENCIA' (more than 100 categories)
Warning in freq_logic(data = data, input = input[i], plot, na.rm, path_out = path_out) :
Skipping plot for variable 'CO_MUNICIPIO_RESIDENCIA' (more than 100 categories)
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
[1] "Variables processed: NU_INSCRICAO, TP_SEXO, SG_UF_RESIDENCIA, NO_MUNICIPIO_RESIDENCIA, CO_MUNICIPIO_RESIDENCIA, NU_NOTA_COMP5, name_state, name_region"
Analise de Correlações
#df1<-slice_sample(df,prop = 0.01)
df1 %>% ggplot(aes(x=NU_ANO,y =NU_NOTA_REDACAO))+#geom_density2d_filled()+
geom_smooth()+geom_dotplot(size=5,alpha=0.01)
Warning: Ignoring unknown parameters: size
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Warning: Removed 1263 rows containing non-finite values (stat_smooth).
Bin width defaults to 1/30 of the range of the data. Pick better value with `binwidth`.
Error in `check_required_aesthetics()`:
! geom_dotplot requires the following missing aesthetics: y
Backtrace:
1. base `<fn>`(x)
2. ggplot2:::print.ggplot(x)
4. ggplot2:::ggplot_build.ggplot(x)
5. ggplot2 by_layer(function(l, d) l$compute_geom_1(d))
6. ggplot2 f(l = layers[[i]], d = data[[i]])
7. l$compute_geom_1(d)
8. ggplot2 f(..., self = self)
9. ggplot2:::check_required_aesthetics(...)
a<-"a"
for (i in 7:12) {
print(i)
a<-print(colnames(df)[i])
df1 %>% ggplot(aes(pm25_ugm3,a,col=name_state))+
geom_point() %>% print()
}
[1] 7
[1] "NU_NOTA_REDACAO"
geom_point: na.rm = FALSE
stat_identity: na.rm = FALSE
position_identity
[1] 8
[1] "NU_NOTA_COMP1"
geom_point: na.rm = FALSE
stat_identity: na.rm = FALSE
position_identity
[1] 9
[1] "NU_NOTA_COMP2"
geom_point: na.rm = FALSE
stat_identity: na.rm = FALSE
position_identity
[1] 10
[1] "NU_NOTA_COMP3"
geom_point: na.rm = FALSE
stat_identity: na.rm = FALSE
position_identity
[1] 11
[1] "NU_NOTA_COMP4"
geom_point: na.rm = FALSE
stat_identity: na.rm = FALSE
position_identity
[1] 12
[1] "NU_NOTA_COMP5"
geom_point: na.rm = FALSE
stat_identity: na.rm = FALSE
position_identity
i
[1] 12
#df1 %>% ggplot(aes(pm25_ugm3,NU_NOTA_REDACAO,col=name_state))+
# geom_point()
#modelo Linear
<-lm(Nota~ poluente,data = )
#Efeitos Mistos
<-lmer(Nota~ poluente + (1+poluente|Estado),data = )
summary()
summary()
extração de Coeficientes
#extração de Coeficientes
ds
#Valor Previsto
%>% ggplot(
aes(
predict(),
col=estado
)
) +
geom_point()
#modelo Linear
<-lm(Nota~ poluente,data = )
#Efeitos Mistos
<-lmer(Nota~ poluente + (1+poluente|Estado),data = )
summary()
summary()
extração de Coeficientes
#extração de Coeficientes
ds
#Valor Previsto
%>% ggplot(
aes(
predict(),
col=estado
)
) +
geom_point()