1 Códigos das Cotas

Código	Descrição
AC	Ampla Concorrência.
L01	Candidatos com renda familiar bruta per capita igual ou inferior a 1,5 salário mínimo, que tenham cursado integralmente o ensino médio em escolas públicas.
L02	Candidatos autodeclarados pretos, pardos ou indígenas, com renda familiar bruta per capita igual ou inferior a 1,5 salário mínimo e que tenham cursado integralmente o ensino médio em escola pública.
L05	Candidatos que, independente da renda, tenham cursado integralmente o ensino médio em escolas públicas.
L06	Candidatos autodeclarados pretos, pardos ou indígenas que, independente da renda, tenham cursado integralmente o ensino médio em escolas públicas.
L09	Candidatos com deficiência que tenha renda familiar bruta per capita igual ou inferior a 1,5 salário mínimo, que tenham cursado integralmente o ensino médio em escolas públicas.
L10	Candidatos com deficiência autodeclarados pretos, pardos ou indígenas, com renda familiar bruta per capita igual ou inferior a 1,5 salário mínimo e que tenham cursado integralmente o ensino médio em escola pública.
L13	Candidatos com deficiência que, independente da renda, tenham cursado integralmente o ensino médio em escolas públicas.
L14	Candidatos com deficiência autodeclarados pretos, pardos ou indígenas que, independente da renda, tenham cursado integralmente o ensino médio em escolas públicas.

2 Aprovados na 1ª chamada (Arquivo R1)

# PROGRAD - TRABALHO 01: INSTITUIÇÃO DA NOTA MÍNIMA NO ENEM
# VERIFICAR A REDUÇÃO NO NÚMERO DE CANDIDATOS.

# Pacotes -----------------------------------------------------------------

library(readr)
library(plyr)
library(ggplot2)
library(ggthemes)
library(plotly)
library(openxlsx)
library(readxl)
library(summarytools)

# Importando os dados -----------------------------------------------------

R1 <- read_delim("C:/Users/geisa.vasconcelos/workspace/trabalho/PROGRAD/nota_minima_enem/data/223_1_589R1_.csv", 
                 ";", escape_double = FALSE, col_types = cols(CO_IES = col_skip(), 
                                                              DS_EMAIL = col_skip(), NO_IES = col_skip(), 
                                                              NO_MAE = col_skip(), NU_ETAPA = col_skip(), 
                                                              NU_FONE1 = col_skip(), NU_FONE2 = col_skip(), 
                                                              SG_IES = col_skip(), SG_UF_IES = col_skip()), 
                 locale = locale(encoding = "ISO-8859-1"), 
                 trim_ws = TRUE)


# Renomeando as cotas  ----------------------------------------------------

R1$NO_MODALIDADE_CONCORRENCIA %>%
revalue(., 
        c("Ampla concorrência" = "AC",
          "Candidatos autodeclarados pretos, pardos ou indígenas que, independentemente da renda (art. 14, II, Portaria Normativa nº 18/2012), tenham cursado integralmente o ensino médio em escolas públicas (Lei nº 12.711/2012)." = "L06", #ok
          "Candidatos autodeclarados pretos, pardos ou indígenas, com renda familiar bruta per capita igual ou inferior a 1,5 salário mínimo e que tenham cursado integralmente o ensino médio em escolas públicas (Lei nº 12.711/2012)." = "L02", #ok
          "Candidatos com deficiência autodeclarados pretos, pardos ou indígenas que, independentemente da renda (art. 14, II, Portaria Normativa nº 18/2012), tenham cursado integralmente o ensino médio em escolas públicas (Lei nº 12.711/2012)." = "L14", #ok 
          "Candidatos com deficiência autodeclarados pretos, pardos ou indígenas, que tenham renda familiar bruta per capita igual ou inferior a 1,5 salário mínimo e que tenham cursado integralmente o ensino médio em escolas públicas (Lei nº 12.711/2012)" = "L10", #ok
          "Candidatos com deficiência que tenham renda familiar bruta per capita igual ou inferior a 1,5 salário mínimo e que tenham cursado integralmente o ensino médio em escolas públicas (Lei nº 12.711/2012)." = "L09", #ok
          "Candidatos com deficiência que, independentemente da renda (art. 14, II, Portaria Normativa nº 18/2012), tenham cursado integralmente o ensino médio em escolas públicas (Lei nº 12.711/2012)." = "L13", #ok
          "Candidatos com renda familiar bruta per capita igual ou inferior a 1,5 salário mínimo que tenham cursado integralmente o ensino médio em escolas públicas (Lei nº 12.711/2012)." = "L01",  #ok
          "Candidatos que, independentemente da renda (art. 14, II, Portaria Normativa nº 18/2012), tenham cursado integralmente o ensino médio em escolas públicas (Lei nº 12.711/2012)." = "L05" #ok
          )
        ) %>% as.factor -> R1$NO_MODALIDADE_CONCORRENCIA


R1$NO_MOD_CONCORRENCIA_ORIG %>% 
  revalue(., 
          c("Ampla concorrência" = "AC",
            "Candidatos autodeclarados pretos, pardos ou indígenas que, independentemente da renda (art. 14, II, Portaria Normativa nº 18/2012), tenham cursado integralmente o ensino médio em escolas públicas (Lei nº 12.711/2012)." = "L06",
            "Candidatos autodeclarados pretos, pardos ou indígenas, com renda familiar bruta per capita igual ou inferior a 1,5 salário mínimo e que tenham cursado integralmente o ensino médio em escolas públicas (Lei nº 12.711/2012)." = "L02", 
            "Candidatos com deficiência autodeclarados pretos, pardos ou indígenas que, independentemente da renda (art. 14, II, Portaria Normativa nº 18/2012), tenham cursado integralmente o ensino médio em escolas públicas (Lei nº 12.711/2012)." = "L14", 
            "Candidatos com deficiência autodeclarados pretos, pardos ou indígenas, que tenham renda familiar bruta per capita igual ou inferior a 1,5 salário mínimo e que tenham cursado integralmente o ensino médio em escolas públicas (Lei nº 12.711/2012)" = "L10", 
            "Candidatos com deficiência que tenham renda familiar bruta per capita igual ou inferior a 1,5 salário mínimo e que tenham cursado integralmente o ensino médio em escolas públicas (Lei nº 12.711/2012)." = "L09",
            "Candidatos com deficiência que, independentemente da renda (art. 14, II, Portaria Normativa nº 18/2012), tenham cursado integralmente o ensino médio em escolas públicas (Lei nº 12.711/2012)." = "L13",
            "Candidatos com renda familiar bruta per capita igual ou inferior a 1,5 salário mínimo que tenham cursado integralmente o ensino médio em escolas públicas (Lei nº 12.711/2012)." = "L01", 
            "Candidatos que, independentemente da renda (art. 14, II, Portaria Normativa nº 18/2012), tenham cursado integralmente o ensino médio em escolas públicas (Lei nº 12.711/2012)." = "L05"
          )
  ) %>% as.factor -> R1$NO_MOD_CONCORRENCIA_ORIG


# Utilizando somente a parte do dataset que importa -----------------------

R1 %>% as.data.frame -> R1_dataframe

dados <- data.frame(Linguagens = R1_dataframe$NU_NOTA_L, 
                    Humanas = R1_dataframe$NU_NOTA_CH, 
                    Naturais = R1_dataframe$NU_NOTA_CN,
                    Matematica = R1_dataframe$NU_NOTA_M, 
                    Redacao = R1_dataframe$NU_NOTA_R,
                    Cota = R1_dataframe$NO_MODALIDADE_CONCORRENCIA)


# Summary dos dados
print(dfSummary(dados, plain.ascii = FALSE, graph.magnif = 0.75, style = "grid", valid.col = FALSE, na.col = FALSE), method = 'render')

Data Frame Summary

dados

Dimensions: 2663 x 6
Duplicates: 0

Variable

Stats / Values

Freqs (% of Valid)

Graph

Linguagens [numeric]

Mean (sd) : 570.5 (54.3) min < med < max: 0 < 573.7 < 742.5 IQR (CV) : 64 (0.1)

1464 distinct values

Humanas [numeric]

Mean (sd) : 615.8 (59.1) min < med < max: 400 < 625.8 < 825.6 IQR (CV) : 62.1 (0.1)

1446 distinct values

Naturais [numeric]

Mean (sd) : 537.5 (70.9) min < med < max: 0 < 540.3 < 822 IQR (CV) : 93.8 (0.1)

1657 distinct values

Matematica [numeric]

Mean (sd) : 624.1 (92.1) min < med < max: 0 < 630.9 < 924 IQR (CV) : 130.6 (0.1)

1834 distinct values

Redacao [numeric]

Mean (sd) : 644.1 (146.5) min < med < max: 240 < 640 < 980 IQR (CV) : 180 (0.2)

38 distinct values

Cota [factor]

1. AC 2. L01 3. L02 4. L05 5. L06 6. L09 7. L10 8. L13 9. L14

1355	(	50.9%	)
253	(	9.5%	)
367	(	13.8%	)
253	(	9.5%	)
356	(	13.4%	)
14	(	0.5%	)
31	(	1.2%	)
14	(	0.5%	)
20	(	0.8%	)

Generated by summarytools 0.9.3 (R version 3.5.3)
2019-07-23

# Eliminado ou nao pela nota minima ---------------------------------------

passa_nota_minima <- function(x, nota_minima){
  ifelse(any(x[5] < nota_minima), "ELIMINADO", "PERMANECE")
}

# Nova coluna com a situacao -----------------------------------------------

avaliacao_nota_minima <- function(dados, nota_minima){
  
  dataframe_final = data.frame(coluna1 = matrix(NA, length(nota_minima), 1))
  codigo_cota = c("GERAL", "AC", "L01", "L02", "L05", "L06", "L09", "L10", "L13", "L14")
  
  for(cota in codigo_cota){
    
    if(cota != "GERAL"){
      dados %>% filter(., dados$Cota == cota) -> dados_temp
    }else{
      dados_temp <- dados
    }
    
    prop.eliminados <- total.eliminados <- NULL
    
    for(i in 1:length(nota_minima)){
      situacao = apply(dados_temp, 1, passa_nota_minima, nota_minima=nota_minima[i])
      prop.eliminados[i] <- sum(situacao != "PERMANECE")/length(situacao)  
      
    }
    
    dados_cota_parcial = data.frame(prop.eliminados = prop.eliminados)
    colnames(dados_cota_parcial) = c(paste0(cota))
    
    dataframe_final = cbind(dataframe_final, dados_cota_parcial)
    
  }
  
  dataframe_final["nota.minima"] <- nota_minima
  return(dataframe_final[,-1])
  
}

resultados = avaliacao_nota_minima(dados, nota_minima = 300:600)


tidyr::gather(resultados, Cota, Proporcao, GERAL:L14) -> teste



# Criando gráfico ---------------------------------------------------------

# p <- ggplot(teste, aes(x=nota.minima, y=Proporcao, color=Cota)) 
# 
# p + geom_line(size=1.3) +
#   scale_x_continuous(name="Nota Mínima") +
#   scale_y_continuous(name="Proporção de Eliminados") +
#   ggtitle("Nota Mínima Enem - UFERSA (R1)") + 
#   theme_wsj()
# Análise exploratória ----------------------------------------------------

dados_hist <- dados

# 1. Linguagens

mu <- ddply(dados_hist, "Cota", summarise, grp.mean=mean(Linguagens))

p1 <- ggplot(dados, aes(x=Linguagens, y=..density..)) + xlim(c(200, 1000)) +
  geom_histogram(color="black", fill="white") + facet_grid(Cota ~ .) +
  geom_vline(data=mu, aes(xintercept=grp.mean, color="red"),
               linetype="dashed", size=1.4) + theme(legend.position="none") +
  geom_density(alpha=.2, fill="#FF6666") + ggtitle("Histogramas Linguagem Enem por cota")  

p1

# 2. Humanas

mu <- ddply(dados_hist, "Cota", summarise, grp.mean=mean(Humanas))

p2 <- ggplot(dados, aes(x=Humanas, y=..density..)) + xlim(c(200, 1000)) +
  geom_histogram(color="black", fill="white") + facet_grid(Cota ~ .) +
  geom_vline(data=mu, aes(xintercept=grp.mean, color="red"),
             linetype="dashed", size=1.4) + theme(legend.position="none") +
  geom_density(alpha=.2, fill="#FF6666") + ggtitle("Histogramas Humanas Enem por cota")  


p2

# 3. Naturais

mu <- ddply(dados_hist, "Cota", summarise, grp.mean=mean(Naturais))

p3 <- ggplot(dados, aes(x=Naturais, y=..density..)) + xlim(c(200, 1000)) +
  geom_histogram(color="black", fill="white") + facet_grid(Cota ~ .) +
  geom_vline(data=mu, aes(xintercept=grp.mean, color="red"),
             linetype="dashed", size=1.4) + theme(legend.position="none") +
  geom_density(alpha=.2, fill="#FF6666") + ggtitle("Histogramas ciências Naturais Enem por cota")  

p3

# 4. Matematica

mu <- ddply(dados_hist, "Cota", summarise, grp.mean=mean(Matematica))

p4 <- ggplot(dados, aes(x=Matematica, y=..density..)) + xlim(c(200, 1000)) +
  geom_histogram(color="black", fill="white") + facet_grid(Cota ~ .) +
  geom_vline(data=mu, aes(xintercept=grp.mean, color="red"),
             linetype="dashed", size=1.4) + theme(legend.position="none") +
  geom_density(alpha=.2, fill="#FF6666") + ggtitle("Histogramas Matemática Enem por cota")  

p4

# 5. Redacao

mu <- ddply(dados_hist, "Cota", summarise, grp.mean=mean(Redacao))

p5 <- ggplot(dados, aes(x=Redacao, y=..density..)) + xlim(c(200, 1000)) +
  geom_histogram(color="black", fill="white") + facet_grid(Cota ~ .) +
  geom_vline(data=mu, aes(xintercept=grp.mean, color="red"),
             linetype="dashed", size=1.4) + theme(legend.position="none") +
  geom_density(alpha=.2, fill="#FF6666") + ggtitle("Histogramas Redação Enem por cota")  

p5

# plotly ------------------------------------------------------------------


q <- plot_ly(resultados, x = ~nota.minima, y = ~AC, name = 'Ampla Concorrência', type = 'scatter', mode = 'lines') %>%
  add_trace(y = ~GERAL, name = 'GERAL', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L01, name = 'L01', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L02, name = 'L02', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L05, name = 'L05', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L06, name = 'L06', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L09, name = 'L09', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L10, name = 'L10', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L13, name = 'L13', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L14, name = 'L14', mode = 'lines', visible="legendonly") %>%
  add_segments(x = 350, xend = 350, y = 0, yend = 1, name="350 pontos", color="red") %>%
  layout(
    title = "Adoção de Nota Mínima Enem - UFERSA (R1)",
    scene = list(
      xaxis = list(title = "Nota Mínima"),
      yaxis = list(title = "Proporção de Eliminados")
    ),
    hovermode = 'compare'
    ) 
  

q

# NA VOLTA: AJEITAR O GRÁFICO E DEPOIS FAZER UMA TABELA COM OS RESULTADOS PARA ALGUMAS NOTAS MINIMAS
# ANÁLISE EXPLORATÓRIA


# Tabela para algumas escolhas de nota ------------------------------------

avaliacao_nota_minima2 <- function(dados, nota_minima){
  
  dataframe_final = data.frame(coluna1 = matrix(NA, length(nota_minima), 1))
  codigo_cota = c("GERAL", "AC", "L01", "L02", "L05", "L06", "L09", "L10", "L13", "L14")
  
  for(cota in codigo_cota){
    
    if(cota != "GERAL"){
      dados %>% filter(., dados$Cota == cota) -> dados_temp
    }else{
      dados_temp <- dados
    }
    
    prop.eliminados <- total.eliminados <- total.geral <- NULL
    
    for(i in 1:length(nota_minima)){
      situacao = apply(dados_temp, 1, passa_nota_minima, nota_minima=nota_minima[i])
      total.eliminados[i] <- sum(situacao != "PERMANECE") #(situacao %>% table)[1] 
      prop.eliminados[i] <- sum(situacao != "PERMANECE")/length(situacao)  #(situacao %>% table %>% prop.table)[1]
      total.geral[i] <- length(situacao)
    }
    
    dados_cota_parcial = data.frame(total.geral = total.geral, total.eliminados = total.eliminados, prop.eliminados = prop.eliminados)
    colnames(dados_cota_parcial) = c(paste0("total.geral",cota), paste0("total.eliminados",cota), paste0("prop.eliminados",cota))
    
    dataframe_final = cbind(dataframe_final, dados_cota_parcial[,-1])
    
  }
  
  dataframe_final["nota.minima"] <- nota_minima
  return(dataframe_final[,-1])
  
}

resultados2 = avaliacao_nota_minima2(dados, nota_minima = 300:600)

tabela_final = resultados2 %>% filter(., resultados2$nota.minima %in% c(300,350,400,450,500))


tabela_GERAL_AC = cbind(tabela_final$nota.minima, tabela_final[,1:4])
knitr::kable(tabela_GERAL_AC)

tabela_final$nota.minima	total.eliminadosGERAL	prop.eliminadosGERAL	total.eliminadosAC	prop.eliminadosAC
300	7	0.0026286	2	0.0014760
350	56	0.0210289	19	0.0140221
400	142	0.0533233	59	0.0435424
450	248	0.0931281	109	0.0804428
500	363	0.1363124	164	0.1210332

tabela_GERAL_L1L2 = cbind(tabela_final$nota.minima, tabela_final[,5:8])
knitr::kable(tabela_GERAL_L1L2)

tabela_final$nota.minima	total.eliminadosL01	prop.eliminadosL01	total.eliminadosL02	prop.eliminadosL02
300	0	0.0000000	1	0.0027248
350	1	0.0039526	6	0.0163488
400	8	0.0316206	13	0.0354223
450	15	0.0592885	27	0.0735695
500	28	0.1106719	45	0.1226158

tabela_GERAL_L5L6 = cbind(tabela_final$nota.minima, tabela_final[,9:12])
knitr::kable(tabela_GERAL_L5L6)

tabela_final$nota.minima	total.eliminadosL05	prop.eliminadosL05	total.eliminadosL06	prop.eliminadosL06
300	0	0.0000000	0	0.0000000
350	2	0.0079051	9	0.0252809
400	8	0.0316206	21	0.0589888
450	17	0.0671937	33	0.0926966
500	24	0.0948617	48	0.1348315

tabela_GERAL_L9L10 = cbind(tabela_final$nota.minima, tabela_final[,13:16])
knitr::kable(tabela_GERAL_L9L10)

tabela_final$nota.minima	total.eliminadosL09	prop.eliminadosL09	total.eliminadosL10	prop.eliminadosL10
300	0	0.0000000	3	0.0967742
350	3	0.2142857	10	0.3225806
400	7	0.5000000	15	0.4838710
450	8	0.5714286	18	0.5806452
500	12	0.8571429	19	0.6129032

tabela_GERAL_L13L14 = cbind(tabela_final$nota.minima, tabela_final[,17:20])
knitr::kable(tabela_GERAL_L13L14)

tabela_final$nota.minima	total.eliminadosL13	prop.eliminadosL13	total.eliminadosL14	prop.eliminadosL14
300	0	0.0000000	1	0.05
350	1	0.0714286	5	0.25
400	3	0.2142857	8	0.40
450	7	0.5000000	14	0.70
500	8	0.5714286	15	0.75

3 Confirmados Online

# PROGRAD - TRABALHO 01: INSTITUIÇÃO DA NOTA MÍNIMA NO ENEM
# VERIFICAR A REDUÇÃO NO NÚMERO DE CANDIDATOS.

# Importando os dados -----------------------------------------------------

online <- 
  read_excel("C:/Users/geisa.vasconcelos/workspace/trabalho/PROGRAD/nota_minima_enem/data/Candidatos de TODAS as etapas confirmados online (2019-0).xls")


# Recuperando os dados de cotas -------------------------------------------

online <- online[c("NOTA CIENCIAS HUMANAS", "NOTA CIENCIAS NATURAIS", 
                "NOTA LINGUAGENS", "NOTA MATEMATICA", "NOTA REDACAO", "ACAO AFIRMATIVA")]

online$`ACAO AFIRMATIVA` = as.factor(online$`ACAO AFIRMATIVA`)
colnames(online) = c("Humanas", "Naturais", "Linguagens", "Matematica", "Redacao", "Cota")

online$Humanas <- as.numeric(as.character(online$Humanas))
online$Naturais <- as.numeric(as.character(online$Naturais))
online$Linguagens <- as.numeric(as.character(online$Linguagens))
online$Matematica <- as.numeric(as.character(online$Matematica))
online$Redacao <- as.numeric(as.character(online$Redacao))

resultados = avaliacao_nota_minima(online, nota_minima = 300:600)

tidyr::gather(resultados, Cota, Proporcao, GERAL:L14) -> teste

# Summary dos dados
print(dfSummary(online, plain.ascii = FALSE, graph.magnif = 0.75, style = "grid"), method = 'render')

Data Frame Summary

online

Dimensions: 4012 x 6
Duplicates: 0

Variable

Stats / Values

Freqs (% of Valid)

Graph

Valid

Missing

Humanas [numeric]

Mean (sd) : 562.1 (78.5) min < med < max: 0 < 569.3 < 814.4 IQR (CV) : 111.9 (0.1)

2165 distinct values

4012 (100%)

0 (0%)

Naturais [numeric]

Mean (sd) : 488.9 (80.9) min < med < max: 0 < 475.1 < 818.3 IQR (CV) : 97.8 (0.2)

2078 distinct values

4012 (100%)

0 (0%)

Linguagens [numeric]

Mean (sd) : 517.1 (69.5) min < med < max: 318.8 < 514.6 < 763.3 IQR (CV) : 94.1 (0.1)

2088 distinct values

4012 (100%)

0 (0%)

Matematica [numeric]

Mean (sd) : 532.1 (98) min < med < max: 0 < 517.2 < 896.2 IQR (CV) : 129.2 (0.2)

2378 distinct values

4012 (100%)

0 (0%)

Redacao [numeric]

Mean (sd) : 512.8 (165.9) min < med < max: 120 < 500 < 980 IQR (CV) : 240 (0.3)

43 distinct values

4012 (100%)

0 (0%)

Cota [factor]

1. AC 2. L01 3. L02 4. L05 5. L06 6. L09 7. L10 8. L13 9. L14

1639	(	40.8%	)
487	(	12.1%	)
753	(	18.8%	)
441	(	11.0%	)
646	(	16.1%	)
4	(	0.1%	)
23	(	0.6%	)
3	(	0.1%	)
16	(	0.4%	)

4012 (100%)

0 (0%)

Generated by summarytools 0.9.3 (R version 3.5.3)
2019-07-23

# Criando gráfico ---------------------------------------------------------

# p <- ggplot(teste, aes(x=nota.minima, y=Proporcao, color=Cota)) 
# 
# p + geom_line(size=1.3) +
#   scale_x_continuous(name="Nota Mínima") +
#   scale_y_continuous(name="Proporção de Eliminados") +
#   ggtitle("Adoção de Nota Mínima Enem - UFERSA (online)") + 
#   theme_wsj()
# 
# p


# Análise exploratória ----------------------------------------------------


online$Humanas <- as.numeric(as.character(online$Humanas))
online$Naturais <- as.numeric(as.character(online$Naturais))
online$Linguagens <- as.numeric(as.character(online$Linguagens))
online$Matematica <- as.numeric(as.character(online$Matematica))
online$Redacao <- as.numeric(as.character(online$Redacao))

dados_hist <- online

# 1. Linguagens

mu <- ddply(dados_hist, "Cota", summarise, grp.mean=mean(Linguagens))

p <- ggplot(online, aes(x=Linguagens, y=..density..)) + xlim(c(200, 1000)) +
  geom_histogram(color="black", fill="white") + facet_grid(Cota ~ .) +
  geom_vline(data=mu, aes(xintercept=grp.mean, color="red"),
             linetype="dashed", size=1.4) + theme(legend.position="none") +
  geom_density(alpha=.2, fill="#FF6666") + ggtitle("Histogramas Linguagem Enem por cota")  

p

# 2. Humanas

mu <- ddply(dados_hist, "Cota", summarise, grp.mean=mean(Humanas))

p <- ggplot(online, aes(x=Humanas, y=..density..)) + xlim(c(200, 1000)) +
  geom_histogram(color="black", fill="white") + facet_grid(Cota ~ .) +
  geom_vline(data=mu, aes(xintercept=grp.mean, color="red"),
             linetype="dashed", size=1.4) + theme(legend.position="none") +
  geom_density(alpha=.2, fill="#FF6666") + ggtitle("Histogramas Humanas Enem por cota")  

p

# 3. Naturais

mu <- ddply(dados_hist, "Cota", summarise, grp.mean=mean(Naturais))


p <- ggplot(online, aes(x=Naturais, y=..density..)) + xlim(c(200, 1000)) +
  geom_histogram(color="black", fill="white") + facet_grid(Cota ~ .) +
  geom_vline(data=mu, aes(xintercept=grp.mean, color="red"),
             linetype="dashed", size=1.4) + theme(legend.position="none") +
  geom_density(alpha=.2, fill="#FF6666") + ggtitle("Histogramas ciências Naturais Enem por cota")  

p

# 4. Matematica

mu <- ddply(dados_hist, "Cota", summarise, grp.mean=mean(Matematica))

p <- ggplot(online, aes(x=Matematica, y=..density..)) + xlim(c(200, 1000)) +
  geom_histogram(color="black", fill="white") + facet_grid(Cota ~ .) +
  geom_vline(data=mu, aes(xintercept=grp.mean, color="red"),
             linetype="dashed", size=1.4) + theme(legend.position="none") +
  geom_density(alpha=.2, fill="#FF6666") + ggtitle("Histogramas Matemática Enem por cota")  

p

# 5. Redacao

mu <- ddply(dados_hist, "Cota", summarise, grp.mean=mean(Redacao))

p <- ggplot(online, aes(x=Redacao, y=..density..)) + xlim(c(200, 1000)) +
  geom_histogram(color="black", fill="white") + facet_grid(Cota ~ .) +
  geom_vline(data=mu, aes(xintercept=grp.mean, color="red"),
             linetype="dashed", size=1.4) + theme(legend.position="none") +
  geom_density(alpha=.2, fill="#FF6666") + ggtitle("Histogramas Redação Enem por cota")  

p

# plotly ------------------------------------------------------------------

q <- plot_ly(resultados, x = ~nota.minima, y = ~AC, name = 'Ampla Concorrência', type = 'scatter', mode = 'lines') %>%
  add_trace(y = ~GERAL, name = 'GERAL', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L01, name = 'L01', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L02, name = 'L02', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L05, name = 'L05', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L06, name = 'L06', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L09, name = 'L09', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L10, name = 'L10', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L13, name = 'L13', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L14, name = 'L14', mode = 'lines', visible="legendonly") %>%
  add_segments(x = 350, xend = 350, y = 0, yend = 1, name="350 pontos", color="red") %>%
  layout(
    title = "Adoção de Nota Mínima Enem - UFERSA (Online)",
    scene = list(
      xaxis = list(title = "Nota Mínima"),
      yaxis = list(title = "Proporção de Eliminados")
    ),
    hovermode = 'compare'
    ) 

q

# Tabela para algumas escolhas de nota ------------------------------------

resultados2 = avaliacao_nota_minima2(online, nota_minima = 300:600)

tabela_final = resultados2 %>% filter(., resultados2$nota.minima %in% c(300,350,400,450,500))

tabela_GERAL_AC = cbind(tabela_final$nota.minima, tabela_final[,1:4])
knitr::kable(tabela_GERAL_AC)

tabela_final$nota.minima	total.eliminadosGERAL	prop.eliminadosGERAL	total.eliminadosAC	prop.eliminadosAC
300	148	0.0368893	50	0.0305064
350	650	0.1620140	233	0.1421599
400	1245	0.3103190	469	0.2861501
450	1638	0.4082752	619	0.3776693
500	1967	0.4902792	760	0.4636974

tabela_GERAL_L1L2 = cbind(tabela_final$nota.minima, tabela_final[,5:8])
knitr::kable(tabela_GERAL_L1L2)

tabela_final$nota.minima	total.eliminadosL01	prop.eliminadosL01	total.eliminadosL02	prop.eliminadosL02
300	21	0.0431211	27	0.0358566
350	74	0.1519507	138	0.1832669
400	150	0.3080082	257	0.3413015
450	202	0.4147844	333	0.4422311
500	235	0.4825462	396	0.5258964

tabela_GERAL_L5L6 = cbind(tabela_final$nota.minima, tabela_final[,9:12])
knitr::kable(tabela_GERAL_L5L6)

tabela_final$nota.minima	total.eliminadosL05	prop.eliminadosL05	total.eliminadosL06	prop.eliminadosL06
300	16	0.0362812	31	0.0479876
350	63	0.1428571	131	0.2027864
400	127	0.2879819	224	0.3467492
450	168	0.3809524	295	0.4566563
500	204	0.4625850	346	0.5356037

tabela_GERAL_L9L10 = cbind(tabela_final$nota.minima, tabela_final[,13:16])
knitr::kable(tabela_GERAL_L9L10)

tabela_final$nota.minima	total.eliminadosL09	prop.eliminadosL09	total.eliminadosL10	prop.eliminadosL10
300	0	0.00	1	0.0434783
350	2	0.50	4	0.1739130
400	2	0.50	8	0.3478261
450	3	0.75	9	0.3913043
500	3	0.75	11	0.4782609

tabela_GERAL_L13L14 = cbind(tabela_final$nota.minima, tabela_final[,17:20])
knitr::kable(tabela_GERAL_L13L14)

tabela_final$nota.minima	total.eliminadosL13	prop.eliminadosL13	total.eliminadosL14	prop.eliminadosL14
300	0	0.0000000	2	0.1250
350	0	0.0000000	5	0.3125
400	1	0.3333333	7	0.4375
450	1	0.3333333	8	0.5000
500	2	0.6666667	10	0.6250

4 Confirmados Presencialmente

# PROGRAD - TRABALHO 01: INSTITUIÇÃO DA NOTA MÍNIMA NO ENEM
# VERIFICAR A REDUÇÃO NO NÚMERO DE CANDIDATOS.

# Importando os dados -----------------------------------------------------

presencial <- 
  read_excel("C:/Users/geisa.vasconcelos/workspace/trabalho/PROGRAD/nota_minima_enem/data/Candidatos de TODAS as etapas confirmados presencialmente (2019-0).xls")


# Recuperando os dados de cotas -------------------------------------------

presencial <- presencial[c("NOTA CIENCIAS HUMANAS", "NOTA CIENCIAS NATURAIS", 
                   "NOTA LINGUAGENS", "NOTA MATEMATICA", "NOTA REDACAO", "ACAO AFIRMATIVA")]

presencial$`ACAO AFIRMATIVA` = as.factor(presencial$`ACAO AFIRMATIVA`)
colnames(presencial) = c("Humanas", "Naturais", "Linguagens", "Matematica", "Redacao", "Cota")

presencial$Humanas <- as.numeric(as.character(presencial$Humanas))
presencial$Naturais <- as.numeric(as.character(presencial$Naturais))
presencial$Linguagens <- as.numeric(as.character(presencial$Linguagens))
presencial$Matematica <- as.numeric(as.character(presencial$Matematica))
presencial$Redacao <- as.numeric(as.character(presencial$Redacao))

resultados = avaliacao_nota_minima(presencial, nota_minima = 300:600)

tidyr::gather(resultados, Cota, Proporcao, GERAL:L14) -> teste

# Summary dos dados
print(dfSummary(presencial, plain.ascii = FALSE, graph.magnif = 0.75, style = "grid"), method = 'render')

Data Frame Summary

presencial

Dimensions: 3227 x 6
Duplicates: 0

Variable

Stats / Values

Freqs (% of Valid)

Graph

Valid

Missing

Humanas [numeric]

Mean (sd) : 589.2 (73.3) min < med < max: 0 < 604 < 814.4 IQR (CV) : 92 (0.1)

1823 distinct values

3227 (100%)

0 (0%)

Naturais [numeric]

Mean (sd) : 514.6 (73.1) min < med < max: 0 < 512.6 < 822 IQR (CV) : 105.1 (0.1)

1874 distinct values

3227 (100%)

0 (0%)

Linguagens [numeric]

Mean (sd) : 545.3 (63.7) min < med < max: 324.8 < 550.6 < 737.3 IQR (CV) : 84.9 (0.1)

1773 distinct values

3227 (100%)

0 (0%)

Matematica [numeric]

Mean (sd) : 585.1 (101.2) min < med < max: 0 < 583.6 < 907.5 IQR (CV) : 156.2 (0.2)

2167 distinct values

3227 (100%)

0 (0%)

Redacao [numeric]

Mean (sd) : 588.9 (163.9) min < med < max: 120 < 580 < 980 IQR (CV) : 200 (0.3)

42 distinct values

3227 (100%)

0 (0%)

Cota [factor]

1. AC 2. L01 3. L02 4. L05 5. L06 6. L09 7. L10 8. L13 9. L14

1478	(	45.8%	)
346	(	10.7%	)
503	(	15.6%	)
357	(	11.1%	)
499	(	15.5%	)
8	(	0.2%	)
11	(	0.3%	)
11	(	0.3%	)
14	(	0.4%	)

3227 (100%)

0 (0%)

Generated by summarytools 0.9.3 (R version 3.5.3)
2019-07-23

# Criando gráfico ---------------------------------------------------------

# p <- ggplot(teste, aes(x=nota.minima, y=Proporcao, color=Cota)) 
# 
# p + geom_line(size=1.3) +
#   scale_x_continuous(name="Nota Mínima") +
#   scale_y_continuous(name="Proporção de Eliminados") +
#   ggtitle("Adoção de Nota Mínima Enem - UFERSA (Presencial)") + 
#   theme_wsj()
# 
# p


# Análise exploratória ----------------------------------------------------

dados_hist <- presencial

# 1. Linguagens

mu <- ddply(dados_hist, "Cota", summarise, grp.mean=mean(Linguagens))

p <- ggplot(presencial, aes(x=Linguagens, y=..density..)) + xlim(c(200, 1000)) +
  geom_histogram(color="black", fill="white") + facet_grid(Cota ~ .) +
  geom_vline(data=mu, aes(xintercept=grp.mean, color="red"),
             linetype="dashed", size=1.4) + theme(legend.position="none") +
  geom_density(alpha=.2, fill="#FF6666") + ggtitle("Histogramas Linguagem Enem por cota")  

p

# 2. Humanas

mu <- ddply(dados_hist, "Cota", summarise, grp.mean=mean(Humanas))

p <- ggplot(presencial, aes(x=Humanas, y=..density..)) + xlim(c(200, 1000)) +
  geom_histogram(color="black", fill="white") + facet_grid(Cota ~ .) +
  geom_vline(data=mu, aes(xintercept=grp.mean, color="red"),
             linetype="dashed", size=1.4) + theme(legend.position="none") +
  geom_density(alpha=.2, fill="#FF6666") + ggtitle("Histogramas Humanas Enem por cota")  

p

# 3. Naturais

mu <- ddply(dados_hist, "Cota", summarise, grp.mean=mean(Naturais))

p <- ggplot(presencial, aes(x=Naturais, y=..density..)) + xlim(c(200, 1000)) +
  geom_histogram(color="black", fill="white") + facet_grid(Cota ~ .) +
  geom_vline(data=mu, aes(xintercept=grp.mean, color="red"),
             linetype="dashed", size=1.4) + theme(legend.position="none") +
  geom_density(alpha=.2, fill="#FF6666") + ggtitle("Histogramas ciências Naturais Enem por cota")  
p

# 4. Matematica

mu <- ddply(dados_hist, "Cota", summarise, grp.mean=mean(Matematica))

p <- ggplot(presencial, aes(x=Matematica, y=..density..)) + xlim(c(200, 1000)) +
  geom_histogram(color="black", fill="white") + facet_grid(Cota ~ .) +
  geom_vline(data=mu, aes(xintercept=grp.mean, color="red"),
             linetype="dashed", size=1.4) + theme(legend.position="none") +
  geom_density(alpha=.2, fill="#FF6666") + ggtitle("Histogramas Matemática Enem por cota")  
p

# 5. Redacao

mu <- ddply(dados_hist, "Cota", summarise, grp.mean=mean(Redacao))

p <- ggplot(presencial, aes(x=Redacao, y=..density..)) + xlim(c(200, 1000)) +
  geom_histogram(color="black", fill="white") + facet_grid(Cota ~ .) +
  geom_vline(data=mu, aes(xintercept=grp.mean, color="red"),
             linetype="dashed", size=1.4) + theme(legend.position="none") +
  geom_density(alpha=.2, fill="#FF6666") + ggtitle("Histogramas Redação Enem por cota")  

p

# plotly ------------------------------------------------------------------

q <- plot_ly(resultados, x = ~nota.minima, y = ~AC, name = 'Ampla Concorrência', type = 'scatter', mode = 'lines') %>%
  add_trace(y = ~GERAL, name = 'GERAL', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L01, name = 'L01', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L02, name = 'L02', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L05, name = 'L05', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L06, name = 'L06', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L09, name = 'L09', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L10, name = 'L10', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L13, name = 'L13', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L14, name = 'L14', mode = 'lines', visible="legendonly") %>%
  add_segments(x = 350, xend = 350, y = 0, yend = 1, name="350 pontos", color="red") %>%
  layout(
    title = "Adoção de Nota Mínima Enem - UFERSA (Presencial)",
    scene = list(
      xaxis = list(title = "Nota Mínima"),
      yaxis = list(title = "Proporção de Eliminados")
    ),
    hovermode = 'compare'
    ) 

q

# ANÁLISE EXPLORATÓRIA
# Tabela para algumas escolhas de nota ------------------------------------

resultados2 = avaliacao_nota_minima2(presencial, nota_minima = 300:600)

tabela_final = resultados2 %>% filter(., resultados2$nota.minima %in% c(300,350,400,450,500))

tabela_GERAL_AC = cbind(tabela_final$nota.minima, tabela_final[,1:4])
knitr::kable(tabela_GERAL_AC)

tabela_final$nota.minima	total.eliminadosGERAL	prop.eliminadosGERAL	total.eliminadosAC	prop.eliminadosAC
300	49	0.0151844	19	0.0128552
350	223	0.0691044	84	0.0568336
400	479	0.1484351	180	0.1217862
450	673	0.2085528	258	0.1745602
500	866	0.2683607	345	0.2334235

tabela_GERAL_L1L2 = cbind(tabela_final$nota.minima, tabela_final[,5:8])
knitr::kable(tabela_GERAL_L1L2)

tabela_final$nota.minima	total.eliminadosL01	prop.eliminadosL01	total.eliminadosL02	prop.eliminadosL02
300	5	0.0144509	11	0.0218688
350	18	0.0520231	47	0.0934394
400	51	0.1473988	96	0.1908549
450	73	0.2109827	132	0.2624254
500	90	0.2601156	164	0.3260437

tabela_GERAL_L5L6 = cbind(tabela_final$nota.minima, tabela_final[,9:12])
knitr::kable(tabela_GERAL_L5L6)

tabela_final$nota.minima	total.eliminadosL05	prop.eliminadosL05	total.eliminadosL06	prop.eliminadosL06
300	6	0.0168067	6	0.0120240
350	23	0.0644258	44	0.0881764
400	55	0.1540616	83	0.1663327
450	69	0.1932773	119	0.2384770
500	85	0.2380952	154	0.3086172

tabela_GERAL_L9L10 = cbind(tabela_final$nota.minima, tabela_final[,13:16])
knitr::kable(tabela_GERAL_L9L10)

tabela_final$nota.minima	total.eliminadosL09	prop.eliminadosL09	total.eliminadosL10	prop.eliminadosL10
300	0	0.00	0	0.0000000
350	2	0.25	1	0.0909091
400	4	0.50	3	0.2727273
450	4	0.50	4	0.3636364
500	6	0.75	4	0.3636364

tabela_GERAL_L13L14 = cbind(tabela_final$nota.minima, tabela_final[,17:20])
knitr::kable(tabela_GERAL_L13L14)

tabela_final$nota.minima	total.eliminadosL13	prop.eliminadosL13	total.eliminadosL14	prop.eliminadosL14
300	0	0.0000000	2	0.1428571
350	1	0.0909091	3	0.2142857
400	3	0.2727273	4	0.2857143
450	6	0.5454545	8	0.5714286
500	8	0.7272727	10	0.7142857

Desenvolvido por Kássio Camelo

kassio.silva@ufersa.edu.br

Impacto da Implantação de Nota Mínima para REDAÇÃO no Enem

Kássio Camelo Ferreira da Silva, Proplan - Ufersa

18 de julho de 2019

1 Códigos das Cotas

2 Aprovados na 1ª chamada (Arquivo R1)

Data Frame Summary

dados

3 Confirmados Online

Data Frame Summary

online

4 Confirmados Presencialmente

Data Frame Summary

presencial