1 Códigos das Cotas

Código	Descrição
AC	Ampla Concorrência.
L01	Candidatos com renda familiar bruta per capita igual ou inferior a 1,5 salário mínimo, que tenham cursado integralmente o ensino médio em escolas públicas.
L02	Candidatos autodeclarados pretos, pardos ou indígenas, com renda familiar bruta per capita igual ou inferior a 1,5 salário mínimo e que tenham cursado integralmente o ensino médio em escola pública.
L05	Candidatos que, independente da renda, tenham cursado integralmente o ensino médio em escolas públicas.
L06	Candidatos autodeclarados pretos, pardos ou indígenas que, independente da renda, tenham cursado integralmente o ensino médio em escolas públicas.
L09	Candidatos com deficiência que tenha renda familiar bruta per capita igual ou inferior a 1,5 salário mínimo, que tenham cursado integralmente o ensino médio em escolas públicas.
L10	Candidatos com deficiência autodeclarados pretos, pardos ou indígenas, com renda familiar bruta per capita igual ou inferior a 1,5 salário mínimo e que tenham cursado integralmente o ensino médio em escola pública.
L13	Candidatos com deficiência que, independente da renda, tenham cursado integralmente o ensino médio em escolas públicas.
L14	Candidatos com deficiência autodeclarados pretos, pardos ou indígenas que, independente da renda, tenham cursado integralmente o ensino médio em escolas públicas.

2 Aprovados na 1ª chamada (Arquivo R1)

# PROGRAD - TRABALHO 01: INSTITUIÇÃO DA NOTA MÍNIMA NO ENEM
# VERIFICAR A REDUÇÃO NO NÚMERO DE CANDIDATOS.

# Pacotes -----------------------------------------------------------------

library(readr)
library(plyr)
library(ggplot2)
library(ggthemes)
library(plotly)
library(openxlsx)
library(readxl)
library(summarytools)

# Importando os dados -----------------------------------------------------

R1 <- read_delim("C:/Users/geisa.vasconcelos/workspace/trabalho/PROGRAD/nota_minima_enem/data/223_1_589R1_.csv", 
                 ";", escape_double = FALSE, col_types = cols(CO_IES = col_skip(), 
                                                              DS_EMAIL = col_skip(), NO_IES = col_skip(), 
                                                              NO_MAE = col_skip(), NU_ETAPA = col_skip(), 
                                                              NU_FONE1 = col_skip(), NU_FONE2 = col_skip(), 
                                                              SG_IES = col_skip(), SG_UF_IES = col_skip()), 
                 locale = locale(encoding = "ISO-8859-1"), 
                 trim_ws = TRUE)


# Renomeando as cotas  ----------------------------------------------------

R1$NO_MODALIDADE_CONCORRENCIA %>%
revalue(., 
        c("Ampla concorrência" = "AC",
          "Candidatos autodeclarados pretos, pardos ou indígenas que, independentemente da renda (art. 14, II, Portaria Normativa nº 18/2012), tenham cursado integralmente o ensino médio em escolas públicas (Lei nº 12.711/2012)." = "L06", #ok
          "Candidatos autodeclarados pretos, pardos ou indígenas, com renda familiar bruta per capita igual ou inferior a 1,5 salário mínimo e que tenham cursado integralmente o ensino médio em escolas públicas (Lei nº 12.711/2012)." = "L02", #ok
          "Candidatos com deficiência autodeclarados pretos, pardos ou indígenas que, independentemente da renda (art. 14, II, Portaria Normativa nº 18/2012), tenham cursado integralmente o ensino médio em escolas públicas (Lei nº 12.711/2012)." = "L14", #ok 
          "Candidatos com deficiência autodeclarados pretos, pardos ou indígenas, que tenham renda familiar bruta per capita igual ou inferior a 1,5 salário mínimo e que tenham cursado integralmente o ensino médio em escolas públicas (Lei nº 12.711/2012)" = "L10", #ok
          "Candidatos com deficiência que tenham renda familiar bruta per capita igual ou inferior a 1,5 salário mínimo e que tenham cursado integralmente o ensino médio em escolas públicas (Lei nº 12.711/2012)." = "L09", #ok
          "Candidatos com deficiência que, independentemente da renda (art. 14, II, Portaria Normativa nº 18/2012), tenham cursado integralmente o ensino médio em escolas públicas (Lei nº 12.711/2012)." = "L13", #ok
          "Candidatos com renda familiar bruta per capita igual ou inferior a 1,5 salário mínimo que tenham cursado integralmente o ensino médio em escolas públicas (Lei nº 12.711/2012)." = "L01",  #ok
          "Candidatos que, independentemente da renda (art. 14, II, Portaria Normativa nº 18/2012), tenham cursado integralmente o ensino médio em escolas públicas (Lei nº 12.711/2012)." = "L05" #ok
          )
        ) %>% as.factor -> R1$NO_MODALIDADE_CONCORRENCIA


R1$NO_MOD_CONCORRENCIA_ORIG %>% 
  revalue(., 
          c("Ampla concorrência" = "AC",
            "Candidatos autodeclarados pretos, pardos ou indígenas que, independentemente da renda (art. 14, II, Portaria Normativa nº 18/2012), tenham cursado integralmente o ensino médio em escolas públicas (Lei nº 12.711/2012)." = "L06",
            "Candidatos autodeclarados pretos, pardos ou indígenas, com renda familiar bruta per capita igual ou inferior a 1,5 salário mínimo e que tenham cursado integralmente o ensino médio em escolas públicas (Lei nº 12.711/2012)." = "L02", 
            "Candidatos com deficiência autodeclarados pretos, pardos ou indígenas que, independentemente da renda (art. 14, II, Portaria Normativa nº 18/2012), tenham cursado integralmente o ensino médio em escolas públicas (Lei nº 12.711/2012)." = "L14", 
            "Candidatos com deficiência autodeclarados pretos, pardos ou indígenas, que tenham renda familiar bruta per capita igual ou inferior a 1,5 salário mínimo e que tenham cursado integralmente o ensino médio em escolas públicas (Lei nº 12.711/2012)" = "L10", 
            "Candidatos com deficiência que tenham renda familiar bruta per capita igual ou inferior a 1,5 salário mínimo e que tenham cursado integralmente o ensino médio em escolas públicas (Lei nº 12.711/2012)." = "L09",
            "Candidatos com deficiência que, independentemente da renda (art. 14, II, Portaria Normativa nº 18/2012), tenham cursado integralmente o ensino médio em escolas públicas (Lei nº 12.711/2012)." = "L13",
            "Candidatos com renda familiar bruta per capita igual ou inferior a 1,5 salário mínimo que tenham cursado integralmente o ensino médio em escolas públicas (Lei nº 12.711/2012)." = "L01", 
            "Candidatos que, independentemente da renda (art. 14, II, Portaria Normativa nº 18/2012), tenham cursado integralmente o ensino médio em escolas públicas (Lei nº 12.711/2012)." = "L05"
          )
  ) %>% as.factor -> R1$NO_MOD_CONCORRENCIA_ORIG


# Utilizando somente a parte do dataset que importa -----------------------

R1 %>% as.data.frame -> R1_dataframe

dados <- data.frame(Linguagens = R1_dataframe$NU_NOTA_L, 
                    Humanas = R1_dataframe$NU_NOTA_CH, 
                    Naturais = R1_dataframe$NU_NOTA_CN,
                    Matematica = R1_dataframe$NU_NOTA_M, 
                    Redacao = R1_dataframe$NU_NOTA_R,
                    Cota = R1_dataframe$NO_MODALIDADE_CONCORRENCIA)


# Summary dos dados
print(dfSummary(dados, plain.ascii = FALSE, graph.magnif = 0.75, style = "grid", valid.col = FALSE, na.col = FALSE), method = 'render')

Data Frame Summary

dados

Dimensions: 2663 x 6
Duplicates: 0

Variable

Stats / Values

Freqs (% of Valid)

Graph

Linguagens [numeric]

Mean (sd) : 570.5 (54.3) min < med < max: 0 < 573.7 < 742.5 IQR (CV) : 64 (0.1)

1464 distinct values

Humanas [numeric]

Mean (sd) : 615.8 (59.1) min < med < max: 400 < 625.8 < 825.6 IQR (CV) : 62.1 (0.1)

1446 distinct values

Naturais [numeric]

Mean (sd) : 537.5 (70.9) min < med < max: 0 < 540.3 < 822 IQR (CV) : 93.8 (0.1)

1657 distinct values

Matematica [numeric]

Mean (sd) : 624.1 (92.1) min < med < max: 0 < 630.9 < 924 IQR (CV) : 130.6 (0.1)

1834 distinct values

Redacao [numeric]

Mean (sd) : 644.1 (146.5) min < med < max: 240 < 640 < 980 IQR (CV) : 180 (0.2)

38 distinct values

Cota [factor]

1. AC 2. L01 3. L02 4. L05 5. L06 6. L09 7. L10 8. L13 9. L14

1355	(	50.9%	)
253	(	9.5%	)
367	(	13.8%	)
253	(	9.5%	)
356	(	13.4%	)
14	(	0.5%	)
31	(	1.2%	)
14	(	0.5%	)
20	(	0.8%	)

Generated by summarytools 0.9.3 (R version 3.5.3)
2019-07-18

# Eliminado ou nao pela nota minima ---------------------------------------

passa_nota_minima <- function(x, nota_minima){
  ifelse(any(x[1:5] < nota_minima), "ELIMINADO", "PERMANECE")
}

# Nova coluna com a situacao -----------------------------------------------

avaliacao_nota_minima <- function(dados, nota_minima){
  
  dataframe_final = data.frame(coluna1 = matrix(NA, length(nota_minima), 1))
  codigo_cota = c("GERAL", "AC", "L01", "L02", "L05", "L06", "L09", "L10", "L13", "L14")
  
  for(cota in codigo_cota){
    
    if(cota != "GERAL"){
      dados %>% filter(., dados$Cota == cota) -> dados_temp
    }else{
      dados_temp <- dados
    }
    
    prop.eliminados <- total.eliminados <- NULL
    
    for(i in 1:length(nota_minima)){
      situacao = apply(dados_temp, 1, passa_nota_minima, nota_minima=nota_minima[i])
      prop.eliminados[i] <- sum(situacao != "PERMANECE")/length(situacao)  
      
    }
    
    dados_cota_parcial = data.frame(prop.eliminados = prop.eliminados)
    colnames(dados_cota_parcial) = c(paste0(cota))
    
    dataframe_final = cbind(dataframe_final, dados_cota_parcial)
    
  }
  
  dataframe_final["nota.minima"] <- nota_minima
  return(dataframe_final[,-1])
  
}

resultados = avaliacao_nota_minima(dados, nota_minima = 300:600)


tidyr::gather(resultados, Cota, Proporcao, GERAL:L14) -> teste



# Criando gráfico ---------------------------------------------------------

# p <- ggplot(teste, aes(x=nota.minima, y=Proporcao, color=Cota)) 
# 
# p + geom_line(size=1.3) +
#   scale_x_continuous(name="Nota Mínima") +
#   scale_y_continuous(name="Proporção de Eliminados") +
#   ggtitle("Nota Mínima Enem - UFERSA (R1)") + 
#   theme_wsj()
# Análise exploratória ----------------------------------------------------

dados_hist <- dados

# 1. Linguagens

mu <- ddply(dados_hist, "Cota", summarise, grp.mean=mean(Linguagens))

p1 <- ggplot(dados, aes(x=Linguagens, y=..density..)) + xlim(c(200, 1000)) +
  geom_histogram(color="black", fill="white") + facet_grid(Cota ~ .) +
  geom_vline(data=mu, aes(xintercept=grp.mean, color="red"),
               linetype="dashed", size=1.4) + theme(legend.position="none") +
  geom_density(alpha=.2, fill="#FF6666") + ggtitle("Histogramas Linguagem Enem por cota")  

p1

# 2. Humanas

mu <- ddply(dados_hist, "Cota", summarise, grp.mean=mean(Humanas))

p2 <- ggplot(dados, aes(x=Humanas, y=..density..)) + xlim(c(200, 1000)) +
  geom_histogram(color="black", fill="white") + facet_grid(Cota ~ .) +
  geom_vline(data=mu, aes(xintercept=grp.mean, color="red"),
             linetype="dashed", size=1.4) + theme(legend.position="none") +
  geom_density(alpha=.2, fill="#FF6666") + ggtitle("Histogramas Humanas Enem por cota")  


p2

# 3. Naturais

mu <- ddply(dados_hist, "Cota", summarise, grp.mean=mean(Naturais))

p3 <- ggplot(dados, aes(x=Naturais, y=..density..)) + xlim(c(200, 1000)) +
  geom_histogram(color="black", fill="white") + facet_grid(Cota ~ .) +
  geom_vline(data=mu, aes(xintercept=grp.mean, color="red"),
             linetype="dashed", size=1.4) + theme(legend.position="none") +
  geom_density(alpha=.2, fill="#FF6666") + ggtitle("Histogramas ciências Naturais Enem por cota")  

p3

# 4. Matematica

mu <- ddply(dados_hist, "Cota", summarise, grp.mean=mean(Matematica))

p4 <- ggplot(dados, aes(x=Matematica, y=..density..)) + xlim(c(200, 1000)) +
  geom_histogram(color="black", fill="white") + facet_grid(Cota ~ .) +
  geom_vline(data=mu, aes(xintercept=grp.mean, color="red"),
             linetype="dashed", size=1.4) + theme(legend.position="none") +
  geom_density(alpha=.2, fill="#FF6666") + ggtitle("Histogramas Matemática Enem por cota")  

p4

# 5. Redacao

mu <- ddply(dados_hist, "Cota", summarise, grp.mean=mean(Redacao))

p5 <- ggplot(dados, aes(x=Redacao, y=..density..)) + xlim(c(200, 1000)) +
  geom_histogram(color="black", fill="white") + facet_grid(Cota ~ .) +
  geom_vline(data=mu, aes(xintercept=grp.mean, color="red"),
             linetype="dashed", size=1.4) + theme(legend.position="none") +
  geom_density(alpha=.2, fill="#FF6666") + ggtitle("Histogramas Redação Enem por cota")  

p5

# plotly ------------------------------------------------------------------


q <- plot_ly(resultados, x = ~nota.minima, y = ~AC, name = 'Ampla Concorrência', type = 'scatter', mode = 'lines') %>%
  add_trace(y = ~GERAL, name = 'GERAL', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L01, name = 'L01', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L02, name = 'L02', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L05, name = 'L05', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L06, name = 'L06', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L09, name = 'L09', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L10, name = 'L10', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L13, name = 'L13', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L14, name = 'L14', mode = 'lines', visible="legendonly") %>%
  add_segments(x = 350, xend = 350, y = 0, yend = 1, name="350 pontos", color="red") %>%
  layout(
    title = "Adoção de Nota Mínima Enem - UFERSA (R1)",
    scene = list(
      xaxis = list(title = "Nota Mínima"),
      yaxis = list(title = "Proporção de Eliminados")
    ),
    hovermode = 'compare'
    ) 
  

q

# NA VOLTA: AJEITAR O GRÁFICO E DEPOIS FAZER UMA TABELA COM OS RESULTADOS PARA ALGUMAS NOTAS MINIMAS
# ANÁLISE EXPLORATÓRIA


# Tabela para algumas escolhas de nota ------------------------------------

avaliacao_nota_minima2 <- function(dados, nota_minima){
  
  dataframe_final = data.frame(coluna1 = matrix(NA, length(nota_minima), 1))
  codigo_cota = c("GERAL", "AC", "L01", "L02", "L05", "L06", "L09", "L10", "L13", "L14")
  
  for(cota in codigo_cota){
    
    if(cota != "GERAL"){
      dados %>% filter(., dados$Cota == cota) -> dados_temp
    }else{
      dados_temp <- dados
    }
    
    prop.eliminados <- total.eliminados <- total.geral <- NULL
    
    for(i in 1:length(nota_minima)){
      situacao = apply(dados_temp, 1, passa_nota_minima, nota_minima=nota_minima[i])
      total.eliminados[i] <- sum(situacao != "PERMANECE") #(situacao %>% table)[1] 
      prop.eliminados[i] <- sum(situacao != "PERMANECE")/length(situacao)  #(situacao %>% table %>% prop.table)[1]
      total.geral[i] <- length(situacao)
    }
    
    dados_cota_parcial = data.frame(total.geral = total.geral, total.eliminados = total.eliminados, prop.eliminados = prop.eliminados)
    colnames(dados_cota_parcial) = c(paste0("total.geral",cota), paste0("total.eliminados",cota), paste0("prop.eliminados",cota))
    
    dataframe_final = cbind(dataframe_final, dados_cota_parcial[,-1])
    
  }
  
  dataframe_final["nota.minima"] <- nota_minima
  return(dataframe_final[,-1])
  
}

resultados2 = avaliacao_nota_minima2(dados, nota_minima = 300:600)

tabela_final = resultados2 %>% filter(., resultados2$nota.minima %in% c(300,350,400,450,500))


tabela_GERAL_AC = cbind(tabela_final$nota.minima, tabela_final[,1:4])
knitr::kable(tabela_GERAL_AC)

tabela_final$nota.minima	total.eliminadosGERAL	prop.eliminadosGERAL	total.eliminadosAC	prop.eliminadosAC
300	9	0.0033796	3	0.0022140
350	61	0.0229065	20	0.0147601
400	201	0.0754788	80	0.0590406
450	667	0.2504694	277	0.2044280
500	1270	0.4769057	543	0.4007380

tabela_GERAL_L1L2 = cbind(tabela_final$nota.minima, tabela_final[,5:8])
knitr::kable(tabela_GERAL_L1L2)

tabela_final$nota.minima	total.eliminadosL01	prop.eliminadosL01	total.eliminadosL02	prop.eliminadosL02
300	0	0.0000000	1	0.0027248
350	1	0.0039526	6	0.0163488
400	11	0.0434783	20	0.0544959
450	53	0.2094862	105	0.2861035
500	125	0.4940711	207	0.5640327

tabela_GERAL_L5L6 = cbind(tabela_final$nota.minima, tabela_final[,9:12])
knitr::kable(tabela_GERAL_L5L6)

tabela_final$nota.minima	total.eliminadosL05	prop.eliminadosL05	total.eliminadosL06	prop.eliminadosL06
300	0	0.0000000	1	0.0028090
350	2	0.0079051	11	0.0308989
400	14	0.0553360	29	0.0814607
450	55	0.2173913	105	0.2949438
500	109	0.4308300	210	0.5898876

tabela_GERAL_L9L10 = cbind(tabela_final$nota.minima, tabela_final[,13:16])
knitr::kable(tabela_GERAL_L9L10)

tabela_final$nota.minima	total.eliminadosL09	prop.eliminadosL09	total.eliminadosL10	prop.eliminadosL10
300	0	0.0000000	3	0.0967742
350	4	0.2857143	11	0.3548387
400	10	0.7142857	21	0.6774194
450	13	0.9285714	28	0.9032258
500	14	1.0000000	29	0.9354839

tabela_GERAL_L13L14 = cbind(tabela_final$nota.minima, tabela_final[,17:20])
knitr::kable(tabela_GERAL_L13L14)

tabela_final$nota.minima	total.eliminadosL13	prop.eliminadosL13	total.eliminadosL14	prop.eliminadosL14
300	0	0.0000000	1	0.05
350	1	0.0714286	5	0.25
400	4	0.2857143	12	0.60
450	13	0.9285714	18	0.90
500	14	1.0000000	19	0.95

3 Confirmados Online

# PROGRAD - TRABALHO 01: INSTITUIÇÃO DA NOTA MÍNIMA NO ENEM
# VERIFICAR A REDUÇÃO NO NÚMERO DE CANDIDATOS.

# Importando os dados -----------------------------------------------------

online <- 
  read_excel("C:/Users/geisa.vasconcelos/workspace/trabalho/PROGRAD/nota_minima_enem/data/Candidatos de TODAS as etapas confirmados online (2019-0).xls")


# Recuperando os dados de cotas -------------------------------------------

online <- online[c("NOTA CIENCIAS HUMANAS", "NOTA CIENCIAS NATURAIS", 
                "NOTA LINGUAGENS", "NOTA MATEMATICA", "NOTA REDACAO", "ACAO AFIRMATIVA")]

online$`ACAO AFIRMATIVA` = as.factor(online$`ACAO AFIRMATIVA`)
colnames(online) = c("Humanas", "Naturais", "Linguagens", "Matematica", "Redacao", "Cota")

online$Humanas <- as.numeric(as.character(online$Humanas))
online$Naturais <- as.numeric(as.character(online$Naturais))
online$Linguagens <- as.numeric(as.character(online$Linguagens))
online$Matematica <- as.numeric(as.character(online$Matematica))
online$Redacao <- as.numeric(as.character(online$Redacao))

resultados = avaliacao_nota_minima(online, nota_minima = 300:600)

tidyr::gather(resultados, Cota, Proporcao, GERAL:L14) -> teste

# Summary dos dados
print(dfSummary(online, plain.ascii = FALSE, graph.magnif = 0.75, style = "grid"), method = 'render')

Data Frame Summary

online

Dimensions: 4012 x 6
Duplicates: 0

Variable

Stats / Values

Freqs (% of Valid)

Graph

Valid

Missing

Humanas [numeric]

Mean (sd) : 562.1 (78.5) min < med < max: 0 < 569.3 < 814.4 IQR (CV) : 111.9 (0.1)

2165 distinct values

4012 (100%)

0 (0%)

Naturais [numeric]

Mean (sd) : 488.9 (80.9) min < med < max: 0 < 475.1 < 818.3 IQR (CV) : 97.8 (0.2)

2078 distinct values

4012 (100%)

0 (0%)

Linguagens [numeric]

Mean (sd) : 517.1 (69.5) min < med < max: 318.8 < 514.6 < 763.3 IQR (CV) : 94.1 (0.1)

2088 distinct values

4012 (100%)

0 (0%)

Matematica [numeric]

Mean (sd) : 532.1 (98) min < med < max: 0 < 517.2 < 896.2 IQR (CV) : 129.2 (0.2)

2378 distinct values

4012 (100%)

0 (0%)

Redacao [numeric]

Mean (sd) : 512.8 (165.9) min < med < max: 120 < 500 < 980 IQR (CV) : 240 (0.3)

43 distinct values

4012 (100%)

0 (0%)

Cota [factor]

1. AC 2. L01 3. L02 4. L05 5. L06 6. L09 7. L10 8. L13 9. L14

1639	(	40.8%	)
487	(	12.1%	)
753	(	18.8%	)
441	(	11.0%	)
646	(	16.1%	)
4	(	0.1%	)
23	(	0.6%	)
3	(	0.1%	)
16	(	0.4%	)

4012 (100%)

0 (0%)

Generated by summarytools 0.9.3 (R version 3.5.3)
2019-07-18

# Criando gráfico ---------------------------------------------------------

# p <- ggplot(teste, aes(x=nota.minima, y=Proporcao, color=Cota)) 
# 
# p + geom_line(size=1.3) +
#   scale_x_continuous(name="Nota Mínima") +
#   scale_y_continuous(name="Proporção de Eliminados") +
#   ggtitle("Adoção de Nota Mínima Enem - UFERSA (online)") + 
#   theme_wsj()
# 
# p


# Análise exploratória ----------------------------------------------------


online$Humanas <- as.numeric(as.character(online$Humanas))
online$Naturais <- as.numeric(as.character(online$Naturais))
online$Linguagens <- as.numeric(as.character(online$Linguagens))
online$Matematica <- as.numeric(as.character(online$Matematica))
online$Redacao <- as.numeric(as.character(online$Redacao))

dados_hist <- online

# 1. Linguagens

mu <- ddply(dados_hist, "Cota", summarise, grp.mean=mean(Linguagens))

p <- ggplot(online, aes(x=Linguagens, y=..density..)) + xlim(c(200, 1000)) +
  geom_histogram(color="black", fill="white") + facet_grid(Cota ~ .) +
  geom_vline(data=mu, aes(xintercept=grp.mean, color="red"),
             linetype="dashed", size=1.4) + theme(legend.position="none") +
  geom_density(alpha=.2, fill="#FF6666") + ggtitle("Histogramas Linguagem Enem por cota")  

p

# 2. Humanas

mu <- ddply(dados_hist, "Cota", summarise, grp.mean=mean(Humanas))

p <- ggplot(online, aes(x=Humanas, y=..density..)) + xlim(c(200, 1000)) +
  geom_histogram(color="black", fill="white") + facet_grid(Cota ~ .) +
  geom_vline(data=mu, aes(xintercept=grp.mean, color="red"),
             linetype="dashed", size=1.4) + theme(legend.position="none") +
  geom_density(alpha=.2, fill="#FF6666") + ggtitle("Histogramas Humanas Enem por cota")  

p

# 3. Naturais

mu <- ddply(dados_hist, "Cota", summarise, grp.mean=mean(Naturais))


p <- ggplot(online, aes(x=Naturais, y=..density..)) + xlim(c(200, 1000)) +
  geom_histogram(color="black", fill="white") + facet_grid(Cota ~ .) +
  geom_vline(data=mu, aes(xintercept=grp.mean, color="red"),
             linetype="dashed", size=1.4) + theme(legend.position="none") +
  geom_density(alpha=.2, fill="#FF6666") + ggtitle("Histogramas ciências Naturais Enem por cota")  

p

# 4. Matematica

mu <- ddply(dados_hist, "Cota", summarise, grp.mean=mean(Matematica))

p <- ggplot(online, aes(x=Matematica, y=..density..)) + xlim(c(200, 1000)) +
  geom_histogram(color="black", fill="white") + facet_grid(Cota ~ .) +
  geom_vline(data=mu, aes(xintercept=grp.mean, color="red"),
             linetype="dashed", size=1.4) + theme(legend.position="none") +
  geom_density(alpha=.2, fill="#FF6666") + ggtitle("Histogramas Matemática Enem por cota")  

p

# 5. Redacao

mu <- ddply(dados_hist, "Cota", summarise, grp.mean=mean(Redacao))

p <- ggplot(online, aes(x=Redacao, y=..density..)) + xlim(c(200, 1000)) +
  geom_histogram(color="black", fill="white") + facet_grid(Cota ~ .) +
  geom_vline(data=mu, aes(xintercept=grp.mean, color="red"),
             linetype="dashed", size=1.4) + theme(legend.position="none") +
  geom_density(alpha=.2, fill="#FF6666") + ggtitle("Histogramas Redação Enem por cota")  

p

# plotly ------------------------------------------------------------------

q <- plot_ly(resultados, x = ~nota.minima, y = ~AC, name = 'Ampla Concorrência', type = 'scatter', mode = 'lines') %>%
  add_trace(y = ~GERAL, name = 'GERAL', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L01, name = 'L01', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L02, name = 'L02', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L05, name = 'L05', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L06, name = 'L06', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L09, name = 'L09', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L10, name = 'L10', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L13, name = 'L13', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L14, name = 'L14', mode = 'lines', visible="legendonly") %>%
  add_segments(x = 350, xend = 350, y = 0, yend = 1, name="350 pontos", color="red") %>%
  layout(
    title = "Adoção de Nota Mínima Enem - UFERSA (Online)",
    scene = list(
      xaxis = list(title = "Nota Mínima"),
      yaxis = list(title = "Proporção de Eliminados")
    ),
    hovermode = 'compare'
    ) 

q

# Tabela para algumas escolhas de nota ------------------------------------

resultados2 = avaliacao_nota_minima2(online, nota_minima = 300:600)

tabela_final = resultados2 %>% filter(., resultados2$nota.minima %in% c(300,350,400,450,500))

tabela_GERAL_AC = cbind(tabela_final$nota.minima, tabela_final[,1:4])
knitr::kable(tabela_GERAL_AC)

tabela_final$nota.minima	total.eliminadosGERAL	prop.eliminadosGERAL	total.eliminadosAC	prop.eliminadosAC
300	158	0.0393819	56	0.0341672
350	668	0.1665005	241	0.1470409
400	1578	0.3933200	609	0.3715680
450	2714	0.6764706	1033	0.6302624
500	3345	0.8337488	1297	0.7913362

tabela_GERAL_L1L2 = cbind(tabela_final$nota.minima, tabela_final[,5:8])
knitr::kable(tabela_GERAL_L1L2)

tabela_final$nota.minima	total.eliminadosL01	prop.eliminadosL01	total.eliminadosL02	prop.eliminadosL02
300	21	0.0431211	30	0.0398406
350	76	0.1560575	143	0.1899070
400	191	0.3921971	316	0.4196547
450	339	0.6960986	552	0.7330677
500	408	0.8377823	666	0.8844622

tabela_GERAL_L5L6 = cbind(tabela_final$nota.minima, tabela_final[,9:12])
knitr::kable(tabela_GERAL_L5L6)

tabela_final$nota.minima	total.eliminadosL05	prop.eliminadosL05	total.eliminadosL06	prop.eliminadosL06
300	16	0.0362812	32	0.0495356
350	63	0.1428571	134	0.2074303
400	162	0.3673469	280	0.4334365
450	295	0.6689342	467	0.7229102
500	365	0.8276644	573	0.8869969

tabela_GERAL_L9L10 = cbind(tabela_final$nota.minima, tabela_final[,13:16])
knitr::kable(tabela_GERAL_L9L10)

tabela_final$nota.minima	total.eliminadosL09	prop.eliminadosL09	total.eliminadosL10	prop.eliminadosL10
300	0	0.00	1	0.0434783
350	2	0.50	4	0.1739130
400	3	0.75	9	0.3913043
450	4	1.00	13	0.5652174
500	4	1.00	16	0.6956522

tabela_GERAL_L13L14 = cbind(tabela_final$nota.minima, tabela_final[,17:20])
knitr::kable(tabela_GERAL_L13L14)

tabela_final$nota.minima	total.eliminadosL13	prop.eliminadosL13	total.eliminadosL14	prop.eliminadosL14
300	0	0.0000000	2	0.1250
350	0	0.0000000	5	0.3125
400	1	0.3333333	7	0.4375
450	2	0.6666667	9	0.5625
500	2	0.6666667	14	0.8750

4 Confirmados Presencialmente

# PROGRAD - TRABALHO 01: INSTITUIÇÃO DA NOTA MÍNIMA NO ENEM
# VERIFICAR A REDUÇÃO NO NÚMERO DE CANDIDATOS.

# Importando os dados -----------------------------------------------------

presencial <- 
  read_excel("C:/Users/geisa.vasconcelos/workspace/trabalho/PROGRAD/nota_minima_enem/data/Candidatos de TODAS as etapas confirmados presencialmente (2019-0).xls")


# Recuperando os dados de cotas -------------------------------------------

presencial <- presencial[c("NOTA CIENCIAS HUMANAS", "NOTA CIENCIAS NATURAIS", 
                   "NOTA LINGUAGENS", "NOTA MATEMATICA", "NOTA REDACAO", "ACAO AFIRMATIVA")]

presencial$`ACAO AFIRMATIVA` = as.factor(presencial$`ACAO AFIRMATIVA`)
colnames(presencial) = c("Humanas", "Naturais", "Linguagens", "Matematica", "Redacao", "Cota")

presencial$Humanas <- as.numeric(as.character(presencial$Humanas))
presencial$Naturais <- as.numeric(as.character(presencial$Naturais))
presencial$Linguagens <- as.numeric(as.character(presencial$Linguagens))
presencial$Matematica <- as.numeric(as.character(presencial$Matematica))
presencial$Redacao <- as.numeric(as.character(presencial$Redacao))

resultados = avaliacao_nota_minima(presencial, nota_minima = 300:600)

tidyr::gather(resultados, Cota, Proporcao, GERAL:L14) -> teste

# Summary dos dados
print(dfSummary(presencial, plain.ascii = FALSE, graph.magnif = 0.75, style = "grid"), method = 'render')

Data Frame Summary

presencial

Dimensions: 3227 x 6
Duplicates: 0

Variable

Stats / Values

Freqs (% of Valid)

Graph

Valid

Missing

Humanas [numeric]

Mean (sd) : 589.2 (73.3) min < med < max: 0 < 604 < 814.4 IQR (CV) : 92 (0.1)

1823 distinct values

3227 (100%)

0 (0%)

Naturais [numeric]

Mean (sd) : 514.6 (73.1) min < med < max: 0 < 512.6 < 822 IQR (CV) : 105.1 (0.1)

1874 distinct values

3227 (100%)

0 (0%)

Linguagens [numeric]

Mean (sd) : 545.3 (63.7) min < med < max: 324.8 < 550.6 < 737.3 IQR (CV) : 84.9 (0.1)

1773 distinct values

3227 (100%)

0 (0%)

Matematica [numeric]

Mean (sd) : 585.1 (101.2) min < med < max: 0 < 583.6 < 907.5 IQR (CV) : 156.2 (0.2)

2167 distinct values

3227 (100%)

0 (0%)

Redacao [numeric]

Mean (sd) : 588.9 (163.9) min < med < max: 120 < 580 < 980 IQR (CV) : 200 (0.3)

42 distinct values

3227 (100%)

0 (0%)

Cota [factor]

1. AC 2. L01 3. L02 4. L05 5. L06 6. L09 7. L10 8. L13 9. L14

1478	(	45.8%	)
346	(	10.7%	)
503	(	15.6%	)
357	(	11.1%	)
499	(	15.5%	)
8	(	0.2%	)
11	(	0.3%	)
11	(	0.3%	)
14	(	0.4%	)

3227 (100%)

0 (0%)

Generated by summarytools 0.9.3 (R version 3.5.3)
2019-07-18

# Criando gráfico ---------------------------------------------------------

# p <- ggplot(teste, aes(x=nota.minima, y=Proporcao, color=Cota)) 
# 
# p + geom_line(size=1.3) +
#   scale_x_continuous(name="Nota Mínima") +
#   scale_y_continuous(name="Proporção de Eliminados") +
#   ggtitle("Adoção de Nota Mínima Enem - UFERSA (Presencial)") + 
#   theme_wsj()
# 
# p


# Análise exploratória ----------------------------------------------------

dados_hist <- presencial

# 1. Linguagens

mu <- ddply(dados_hist, "Cota", summarise, grp.mean=mean(Linguagens))

p <- ggplot(presencial, aes(x=Linguagens, y=..density..)) + xlim(c(200, 1000)) +
  geom_histogram(color="black", fill="white") + facet_grid(Cota ~ .) +
  geom_vline(data=mu, aes(xintercept=grp.mean, color="red"),
             linetype="dashed", size=1.4) + theme(legend.position="none") +
  geom_density(alpha=.2, fill="#FF6666") + ggtitle("Histogramas Linguagem Enem por cota")  

p

# 2. Humanas

mu <- ddply(dados_hist, "Cota", summarise, grp.mean=mean(Humanas))

p <- ggplot(presencial, aes(x=Humanas, y=..density..)) + xlim(c(200, 1000)) +
  geom_histogram(color="black", fill="white") + facet_grid(Cota ~ .) +
  geom_vline(data=mu, aes(xintercept=grp.mean, color="red"),
             linetype="dashed", size=1.4) + theme(legend.position="none") +
  geom_density(alpha=.2, fill="#FF6666") + ggtitle("Histogramas Humanas Enem por cota")  

p

# 3. Naturais

mu <- ddply(dados_hist, "Cota", summarise, grp.mean=mean(Naturais))

p <- ggplot(presencial, aes(x=Naturais, y=..density..)) + xlim(c(200, 1000)) +
  geom_histogram(color="black", fill="white") + facet_grid(Cota ~ .) +
  geom_vline(data=mu, aes(xintercept=grp.mean, color="red"),
             linetype="dashed", size=1.4) + theme(legend.position="none") +
  geom_density(alpha=.2, fill="#FF6666") + ggtitle("Histogramas ciências Naturais Enem por cota")  
p

# 4. Matematica

mu <- ddply(dados_hist, "Cota", summarise, grp.mean=mean(Matematica))

p <- ggplot(presencial, aes(x=Matematica, y=..density..)) + xlim(c(200, 1000)) +
  geom_histogram(color="black", fill="white") + facet_grid(Cota ~ .) +
  geom_vline(data=mu, aes(xintercept=grp.mean, color="red"),
             linetype="dashed", size=1.4) + theme(legend.position="none") +
  geom_density(alpha=.2, fill="#FF6666") + ggtitle("Histogramas Matemática Enem por cota")  
p

# 5. Redacao

mu <- ddply(dados_hist, "Cota", summarise, grp.mean=mean(Redacao))

p <- ggplot(presencial, aes(x=Redacao, y=..density..)) + xlim(c(200, 1000)) +
  geom_histogram(color="black", fill="white") + facet_grid(Cota ~ .) +
  geom_vline(data=mu, aes(xintercept=grp.mean, color="red"),
             linetype="dashed", size=1.4) + theme(legend.position="none") +
  geom_density(alpha=.2, fill="#FF6666") + ggtitle("Histogramas Redação Enem por cota")  

p

# plotly ------------------------------------------------------------------

q <- plot_ly(resultados, x = ~nota.minima, y = ~AC, name = 'Ampla Concorrência', type = 'scatter', mode = 'lines') %>%
  add_trace(y = ~GERAL, name = 'GERAL', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L01, name = 'L01', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L02, name = 'L02', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L05, name = 'L05', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L06, name = 'L06', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L09, name = 'L09', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L10, name = 'L10', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L13, name = 'L13', mode = 'lines', visible="legendonly") %>%
  add_trace(y = ~L14, name = 'L14', mode = 'lines', visible="legendonly") %>%
  add_segments(x = 350, xend = 350, y = 0, yend = 1, name="350 pontos", color="red") %>%
  layout(
    title = "Adoção de Nota Mínima Enem - UFERSA (Presencial)",
    scene = list(
      xaxis = list(title = "Nota Mínima"),
      yaxis = list(title = "Proporção de Eliminados")
    ),
    hovermode = 'compare'
    ) 

q

# ANÁLISE EXPLORATÓRIA
# Tabela para algumas escolhas de nota ------------------------------------

resultados2 = avaliacao_nota_minima2(presencial, nota_minima = 300:600)

tabela_final = resultados2 %>% filter(., resultados2$nota.minima %in% c(300,350,400,450,500))

tabela_GERAL_AC = cbind(tabela_final$nota.minima, tabela_final[,1:4])
knitr::kable(tabela_GERAL_AC)

tabela_final$nota.minima	total.eliminadosGERAL	prop.eliminadosGERAL	total.eliminadosAC	prop.eliminadosAC
300	53	0.0164239	21	0.0142084
350	231	0.0715835	87	0.0588633
400	624	0.1933685	233	0.1576455
450	1366	0.4233034	524	0.3545332
500	2065	0.6399132	822	0.5561570

tabela_GERAL_L1L2 = cbind(tabela_final$nota.minima, tabela_final[,5:8])
knitr::kable(tabela_GERAL_L1L2)

tabela_final$nota.minima	total.eliminadosL01	prop.eliminadosL01	total.eliminadosL02	prop.eliminadosL02
300	5	0.0144509	13	0.0258449
350	18	0.0520231	50	0.0994036
400	69	0.1994220	126	0.2504970
450	157	0.4537572	269	0.5347913
500	233	0.6734104	379	0.7534791

tabela_GERAL_L5L6 = cbind(tabela_final$nota.minima, tabela_final[,9:12])
knitr::kable(tabela_GERAL_L5L6)

tabela_final$nota.minima	total.eliminadosL05	prop.eliminadosL05	total.eliminadosL06	prop.eliminadosL06
300	6	0.0168067	6	0.0120240
350	23	0.0644258	45	0.0901804
400	68	0.1904762	109	0.2184369
450	141	0.3949580	244	0.4889780
500	227	0.6358543	367	0.7354709

tabela_GERAL_L9L10 = cbind(tabela_final$nota.minima, tabela_final[,13:16])
knitr::kable(tabela_GERAL_L9L10)

tabela_final$nota.minima	total.eliminadosL09	prop.eliminadosL09	total.eliminadosL10	prop.eliminadosL10
300	0	0.000	0	0.0000000
350	3	0.375	1	0.0909091
400	6	0.750	3	0.2727273
450	8	1.000	4	0.3636364
500	8	1.000	6	0.5454545

tabela_GERAL_L13L14 = cbind(tabela_final$nota.minima, tabela_final[,17:20])
knitr::kable(tabela_GERAL_L13L14)

tabela_final$nota.minima	total.eliminadosL13	prop.eliminadosL13	total.eliminadosL14	prop.eliminadosL14
300	0	0.0000000	2	0.1428571
350	1	0.0909091	3	0.2142857
400	3	0.2727273	7	0.5000000
450	9	0.8181818	10	0.7142857
500	10	0.9090909	13	0.9285714

Desenvolvido por Kássio Camelo

kassio.silva@ufersa.edu.br

Impacto da Implantação de Nota Mínima no Enem

Kássio Camelo Ferreira da Silva, Proplan - Ufersa

18 de julho de 2019

1 Códigos das Cotas

2 Aprovados na 1ª chamada (Arquivo R1)

Data Frame Summary

dados

3 Confirmados Online

Data Frame Summary

online

4 Confirmados Presencialmente

Data Frame Summary

presencial