Tabelas

sjt.xtab(enem_2015$TP_SEXO, 
      enem_2015$grp, show.col.prc  = TRUE)
## Argument `include.values` is deprecated. Please use `values` instead.
## Argument `include.non.labelled` is deprecated. Please use `non.labelled` instead.
## Argument `include.values` is deprecated. Please use `values` instead.
## Argument `include.non.labelled` is deprecated. Please use `non.labelled` instead.
## Argument `include.values` is deprecated. Please use `values` instead.
## Argument `include.non.labelled` is deprecated. Please use `non.labelled` instead.
Sexo grp Total
0 1
Feminino 318585
58.6 %
3485
61.7 %
322070
58.6 %
Masculino 225017
41.4 %
2166
38.3 %
227183
41.4 %
Total 543602
100 %
5651
100 %
549253
100 %
χ2=21.525 · df=1 · φ=0.006 · p=0.000
sjt.xtab(enem_2015$TP_DEPENDENCIA_ADM_ESC, 
      enem_2015$grp, show.col.prc  = TRUE)
## Argument `include.values` is deprecated. Please use `values` instead.
## Argument `include.non.labelled` is deprecated. Please use `non.labelled` instead.
## Argument `include.values` is deprecated. Please use `values` instead.
## Argument `include.non.labelled` is deprecated. Please use `non.labelled` instead.
## Argument `include.values` is deprecated. Please use `values` instead.
## Argument `include.non.labelled` is deprecated. Please use `non.labelled` instead.
Dependência
administrativa
(Escola)
grp Total
0 1
Federal 3188
2.3 %
11
0.9 %
3199
2.3 %
Estadual 107049
77 %
1081
88.6 %
108130
77.1 %
Municipal 1312
0.9 %
14
1.1 %
1326
0.9 %
Privada 27493
19.8 %
114
9.3 %
27607
19.7 %
Total 139042
100 %
1220
100 %
140262
100 %
χ2=98.792 · df=3 · Cramer’s V=0.027 · p=0.000

Primeira Questão

MEDIA_NU_NOTA_MT <- tapply(enem_2015$NU_NOTA_MT, enem_2015$TP_SEXO, mean)
enem_2015$MEDIA_NU_NOTA_MT <- ifelse(enem_2015$TP_SEXO==0, MEDIA_NU_NOTA_MT[1], MEDIA_NU_NOTA_MT[2])

 enem_2015 %>%
      ggplot(aes(x= NU_NOTA_MT, y = ..count../sum(..count..)*100)) +
      ggtitle("Dos alunos que acertaram os itens dificeis, existe diferença entre os sexos?") +
      geom_histogram(fill = "blue", alpha = 0.5) +
      xlab("Nota") +
      ylab("Proporção (%)") +
      scale_x_continuous(limits=c(250,1000))+
      scale_y_continuous(limits=c(0,8), breaks = 1:8)+
      #geom_vline(aes(xintercept = MEDIA_NU_NOTA_MT[2], xintercept = MEDIA_NU_NOTA_MT[1]), linetype = 3)+
   facet_grid(. ~ TP_SEXO, labeller = labeller(TP_SEXO = labels))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 544 rows containing non-finite values (stat_bin).
## Warning: Removed 2 rows containing missing values (geom_bar).

Segunda Questão

 enem_2015 %>%
    ggplot(aes(x= NU_NOTA_MT, y = NU_IDADE, color = NU_NOTA_MT)) +
      geom_point()+
      scale_color_gradientn(colours =brewer.pal(7, "Paired"))+
      labs(title = "Qual a relação entre nota e idade?", color = "Nota")+
       xlab("Nota") +
      ylab("Idade")
## Warning: Removed 4 rows containing missing values (geom_point).

Terceira Questão

enem_2015 %>%
    ggplot(aes(x=NU_NOTA_MT, y=mt_scores))+
    ggtitle("Quem acertou os itens mais difíceis tirou maior nota?")+
    geom_point(aes(y=NU_NOTA_MT, x=c1_meas3, color=acrt_dif2), alpha = 1/44)+
    scale_x_continuous(
      breaks = seq(-4,4,1),
      limits = c(-4,4))+
    scale_y_continuous(
      breaks = seq(250, 1000, 50),
      limits = c(250, 1000))+
    scale_colour_gradient(
      low = "red",
      high = "blue")+
    geom_smooth(method = "lm", se = FALSE)+
    geom_hline(yintercept = describe(enem_2015$NU_NOTA_MT)$mean)+
    geom_vline(xintercept = describe(enem_2015$c1_meas3)$mean)
## Warning: Removed 549253 rows containing non-finite values (stat_smooth).
## Warning: Removed 564 rows containing missing values (geom_point).