Censo das Turmas de Graduação em Estatística

Prof. Fernando Bastos - Darah Moreira

2024-02-26

Introdução 1

O objetivo desses slides é apresentar um resumo tabular e gráfico dos dados coletados via questionário apresentado pelo professor Fernando Bastos aos estudantes de Estatística Básica da Universidade Federal de Viçosa.

Distribuição de Frequência da Residência dos Estudantes

##########----------------------------------------------------------------------
#####Distribuicao de Frequencia - RESIDENCIA
##########----------------------------------------------------------------------
tab1 <- df %>% group_by(RESIDENCIA) %>% summarise(Freq=n(), .groups = 'drop' )
tab1$FreqR <- round(tab1$Freq/sum(tab1$Freq), digits = 2)

tab1$FreqACM <- cumsum(tab1$Freq)
tab1$FreqRACM <- cumsum(tab1$FreqR)
Total <- c("Total", sum(tab1$Freq), sum(tab1$FreqR), "-", "-")
tab1 <- rbind(tab1, Total)

tab1%>% 
  addHtmlTableStyle(col.rgroup = c("none", "#F9FAF0"),
                    col.columns = c("none", "#F1F0FA")) %>% 
  htmlTable(total = TRUE, rnames = FALSE,
            caption = "Tabela de Frequencia")

Observações: É possível notar que há estudantes de que responderam como cidade de residência, Vicosa e Vicosa MG, ora ambos, são residentes de Vicosa, por isso, é necessário uma correção da resposta!

RESIDENCIA	Freq	FreqR	FreqACM	FreqRACM
Tabela de Frequencia
ABAETE	1	0.01	1	0.01
ALTO RIO DOCE	1	0.01	2	0.02
ARACUAI	1	0.01	3	0.03
ARARUAMA	1	0.01	4	0.04
ARAXA	1	0.01	5	0.05
BELO HORIZONTE	3	0.04	8	0.09
CACHOEIRO DE ITAPEMIRIM	1	0.01	9	0.1
CARANDAI	1	0.01	10	0.11
CASA GRANDES	1	0.01	11	0.12
CATAGUASES	1	0.01	12	0.13
COIMBRA	1	0.01	13	0.14
CONSELHEIRO LAFAIETE	2	0.02	15	0.16
CORONEL MURTA	1	0.01	16	0.17
DIVINOPOLIS	1	0.01	17	0.18
IPATINGA	2	0.02	19	0.2
ITABIRA	2	0.02	21	0.22
ITAPERUNA	1	0.01	22	0.23
LAVRAS	1	0.01	23	0.24
MANHUMIRIM	1	0.01	24	0.25
MUNIZ FREIRE	1	0.01	25	0.26
MURIAE	1	0.01	26	0.27
NASCIDO EM SETE LAGOAS (MG), MORANDO EM VICOSA.	1	0.01	27	0.28
PARAUAPEBAS	1	0.01	28	0.29
PAULA CANDIDO	1	0.01	29	0.3
PEDRA DO ANTA	1	0.01	30	0.31
PIRANGA	1	0.01	31	0.32
PRESIDENTE BERNARDES	1	0.01	32	0.33
SAO MATEUS	1	0.01	33	0.34
SETE LAGOAS	1	0.01	34	0.35
SOBRALIA	1	0.01	35	0.36
TAUBATE	1	0.01	36	0.37
TEIXEIRAS	1	0.01	37	0.38
TERESOPOLIS	1	0.01	38	0.39
TOCANTINS	1	0.01	39	0.4
UBA	2	0.02	41	0.42
UBAPORANGA	1	0.01	42	0.43
VICOSA	38	0.46	80	0.89
VICOSA MG	2	0.02	82	0.91
Total	82	0.91	-	-

Distribuição de Frequência da Residência dos Estudantes (Corrigida)

###Correcao da coluna RESIDENCIA------------------------------------------------
df <- df %>% mutate(RESIDENCIA=ifelse(as.character(RESIDENCIA)==
                                        "BH","BELO HORIZONTE",as.character
                                      (RESIDENCIA)))
df <- df %>% mutate(RESIDENCIA=ifelse(as.character(RESIDENCIA)==
                                        "TEXAS","TEIXEIRAS",as.character
                                      (RESIDENCIA)))
df <- df %>% mutate(RESIDENCIA=ifelse(as.character(RESIDENCIA)==
                                        "TEIXIERAS","TEIXEIRAS",as.character
                                      (RESIDENCIA)))
df <- df %>% mutate(RESIDENCIA=ifelse(as.character(RESIDENCIA)==
                                        "VICOSA MG","VICOSA",as.character
                                      (RESIDENCIA)))
df <- df %>% mutate(RESIDENCIA=ifelse(as.character(RESIDENCIA)==
                                        "NASCIDO EM SETE LAGOAS (MG), MORANDO EM VICOSA.","VICOSA",as.character
                                      (RESIDENCIA)))
df <- df %>% mutate(RESIDENCIA=ifelse(as.character(RESIDENCIA)==
                                        "CASA GRANDES","CASA GRANDE",as.character
                                      (RESIDENCIA)))

RESIDENCIA	Freq	FreqR	FreqACM	FreqRACM
Tabela de Frequencia
ABAETE	1	0.01	1	0.01
ALTO RIO DOCE	1	0.01	2	0.02
ARACUAI	1	0.01	3	0.03
ARARUAMA	1	0.01	4	0.04
ARAXA	1	0.01	5	0.05
BELO HORIZONTE	3	0.04	8	0.09
CACHOEIRO DE ITAPEMIRIM	1	0.01	9	0.1
CARANDAI	1	0.01	10	0.11
CASA GRANDE	1	0.01	11	0.12
CATAGUASES	1	0.01	12	0.13
COIMBRA	1	0.01	13	0.14
CONSELHEIRO LAFAIETE	2	0.02	15	0.16
CORONEL MURTA	1	0.01	16	0.17
DIVINOPOLIS	1	0.01	17	0.18
IPATINGA	2	0.02	19	0.2
ITABIRA	2	0.02	21	0.22
ITAPERUNA	1	0.01	22	0.23
LAVRAS	1	0.01	23	0.24
MANHUMIRIM	1	0.01	24	0.25
MUNIZ FREIRE	1	0.01	25	0.26
MURIAE	1	0.01	26	0.27
PARAUAPEBAS	1	0.01	27	0.28
PAULA CANDIDO	1	0.01	28	0.29
PEDRA DO ANTA	1	0.01	29	0.3
PIRANGA	1	0.01	30	0.31
PRESIDENTE BERNARDES	1	0.01	31	0.32
SAO MATEUS	1	0.01	32	0.33
SETE LAGOAS	1	0.01	33	0.34
SOBRALIA	1	0.01	34	0.35
TAUBATE	1	0.01	35	0.36
TEIXEIRAS	1	0.01	36	0.37
TERESOPOLIS	1	0.01	37	0.38
TOCANTINS	1	0.01	38	0.39
UBA	2	0.02	40	0.41
UBAPORANGA	1	0.01	41	0.42
VICOSA	41	0.5	82	0.92
Total	82	0.92	-	-

Gráfico de Barras Horizontal

#Grafico de barra---------------------------------------------------------------

plot <- df %>% 
  group_by(RESIDENCIA) %>% 
  summarise(count=n(), .groups = 'drop') %>% 
  ggplot(aes(reorder(RESIDENCIA, count), 
             text = paste("Curso: ", 
                          RESIDENCIA, "<br>",
                          "Número de estudantes: ", 
                          count), 
             count, 
             fill=RESIDENCIA)) + 
  geom_col(position = "dodge", show.legend = F) + 
  geom_text(aes(label = count), nudge_y = 1, size=5) +
  coord_flip() + xlab("Residência") + ylab("Nº de Estudantes")

plot %>% plotly::ggplotly(tooltip = "text") %>% 
  plotly::layout(showlegend=FALSE)

Gráfico de Barras Horizontal Alterado

#Grafico------------------------------------------------------------------------
ggplot(df) +
  aes(x = RESIDENCIA, fill = RESIDENCIA) +
  geom_bar(position = "dodge") +
  scale_fill_hue(direction = 1) +
  labs(
    x = "Residência",
    y = "Número de Estudantes",
    title = "Cidade de Residência dos Estudantes"
  ) +
  coord_flip() +
  theme_minimal() +
  theme(legend.position = "none")

Distribuição de Frequência do Estado dos Estudantes

Observação:Note que temos MG e MINAS GERAIS para representar o mesmo estado. De maneira semelhante acontece com o estado do Rio de Janeiro. Portanto é necessário que seja feita uma correção.

ESTADO	Freq	FreqR	FreqACM	FreqRACM
Tabela de Frequencia
BRASIL	1	0.01	1	0.01
ESPIRITO SANTO	3	0.04	4	0.05
MG	11	0.13	15	0.18
MG/VICOSA	1	0.01	16	0.19
MINAS GERAIS	61	0.74	77	0.93
PARA	1	0.01	78	0.94
RIO DE JANEIRO	2	0.02	80	0.96
RJ	1	0.01	81	0.97
SAO PAULO	1	0.01	82	0.98
Total	82	0.98	-	-

Distribuição de Frequência do Estado dos Estudantes (Corrigida)

###Correcao da coluna ESTADO----------------------------------------------------

df <- df %>% mutate(ESTADO=ifelse(as.character(ESTADO)==
                                    "MG","MINAS GERAIS",as.character(ESTADO)))
df <- df %>% mutate(ESTADO=ifelse(as.character(ESTADO)==
                                    "MG/VICOSA","MINAS GERAIS",as.character(ESTADO)))
df <- df %>% mutate(ESTADO=ifelse(as.character(ESTADO)==
                                    "RJ","RIO DE JANEIRO",as.character(ESTADO)))

ESTADO	Freq	FreqR	FreqACM	FreqRACM
Tabela de Frequencia
BRASIL	1	0.01	1	0.01
ESPIRITO SANTO	3	0.04	4	0.05
MINAS GERAIS	73	0.89	77	0.94
PARA	1	0.01	78	0.95
RIO DE JANEIRO	3	0.04	81	0.99
SAO PAULO	1	0.01	82	1
Total	82	1	-	-

Gráfico de Barras Vertical

plot1 <- df %>% group_by(ESTADO) %>% summarise(count=n(), .groups = 'drop') %>% 
  ggplot(aes(reorder(ESTADO, count), text = paste("ESTADO: ", ESTADO, "<br>",
                                                 "Número de estudantes: ",
                                                 count), count, fill=ESTADO)) + 
  geom_col(show.legend = F) + 
  geom_text(aes(label = count), nudge_y = 1, size=5) + 
  theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust=1)) + 
  ylab("Quantidade de Estudantes") + 
  xlab("Estado") + 
  ggtitle("Estado dos Estudantes")
plotly::ggplotly(plot1, tooltip = "text") %>% 
  plotly::layout(showlegend=FALSE)

Gráfico de Barras Horizontal

ggplot(df) +
  aes(x = ESTADO, fill = ESTADO) +
  geom_bar(position = "dodge") +
  scale_fill_hue(direction = 1) +
  labs(
    x = "Estados",
    y = "Número de Estudantes",
    title = "Estado dos Estudantes"
  ) +
  coord_flip() +
  theme_minimal() +
  theme(legend.position = "none")

Distribuição de Frequência do Curso dos Estudantes

Observação:É possível notar que o curso Ciências Econômicas está escrito de três maneiras diferentes, assim como o curso de Engenharia Agrícola e Ambiental. Isso é um erro a ser corrigido.

CURSO	Freq	FreqR	FreqACM	FreqRACM
Tabela de Frequencia
CIENCIA ECONOMICAS	1	0.01	1	0.01
CIENCIAS ECONOMICA	1	0.01	2	0.02
CIENCIAS ECONOMICAS	33	0.4	35	0.42
ECONOMIA	7	0.09	42	0.51
ENGENHARIA AGRICOLA AMBIETAL	1	0.01	43	0.52
ENGENHARIA AGRICOLA E AMBIENTAL	1	0.01	44	0.53
ENGENHARIA AMBIENTAL	5	0.06	49	0.59
ENGENHARIA CIVIL	18	0.22	67	0.81
ENGENHARIA DE AGRIMENSURA E CARTOGRAFICA	1	0.01	68	0.82
ENGENHARIA ELETRICA	10	0.12	78	0.94
ENGENHARIA MECANICA	3	0.04	81	0.98
LICENCIATURA EM MATEMATICA	1	0.01	82	0.99
Total	82	0.99	-	-

Distribuição de Frequência do Curso dos Estudantes (Corrigida)

###Correcao da coluna CURSO-----------------------------------------------------

df <- df %>% mutate(CURSO=ifelse(as.character(CURSO)=="CIENCIA ECONOMICAS",
                                 "CIENCIAS ECONOMICAS",
                                 as.character(CURSO)))
df <- df %>% mutate(CURSO=ifelse(as.character(CURSO)=="CIENCIAS ECONOMICA",
                                 "CIENCIAS ECONOMICAS",
                                 as.character(CURSO)))
df <- df %>% mutate(CURSO=ifelse(as.character(CURSO)=="ECONOMIA",
                                 "CIENCIAS ECONOMICAS",
                                 as.character(CURSO)))
df <- df %>% mutate(CURSO=ifelse(as.character(CURSO)==
                                   "ENGENHARIA AGRICOLA AMBIETAL",
                                 "ENGENHARIA AGRICOLA E AMBIENTAL",
                                 as.character(CURSO)))
df <- df %>% mutate(CURSO=ifelse(as.character(CURSO)==
                                   "ENGENHARIA AMBIENTAL",
                                 "ENGENHARIA AGRICOLA E AMBIENTAL",
                                 as.character(CURSO)))

CURSO	Freq	FreqR	FreqACM	FreqRACM
Tabela de Frequencia
CIENCIAS ECONOMICAS	42	0.51	42	0.51
ENGENHARIA AGRICOLA E AMBIENTAL	7	0.09	49	0.6
ENGENHARIA CIVIL	18	0.22	67	0.82
ENGENHARIA DE AGRIMENSURA E CARTOGRAFICA	1	0.01	68	0.83
ENGENHARIA ELETRICA	10	0.12	78	0.95
ENGENHARIA MECANICA	3	0.04	81	0.99
LICENCIATURA EM MATEMATICA	1	0.01	82	1
Total	82	1	-	-

Gráfico de Barras Vertical

plot1 <- df %>% group_by(CURSO) %>% summarise(count=n(), .groups = 'drop') %>% 
  ggplot(aes(reorder(CURSO, count), text = paste("Curso: ",
                                                 CURSO, 
                                                 "<br>", 
                                                 "Número de estudantes: ", 
                                                 count), 
             count, 
             fill=CURSO)) + 
  geom_col(show.legend = F) + 
  geom_text(aes(label = count), nudge_y = 1, size=5) + 
  theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust=1)) + 
  ylab("Quantidade de Estudantes") + 
  xlab("Cursos") + 
  ggtitle("Número de Estudantes por curso da\n Disciplina Estatística")
plotly::ggplotly(plot1, tooltip = "text") %>% 
  plotly::layout(showlegend=FALSE)

Gráfico de Barras Horizontal

plot <- df %>% 
  group_by(CURSO) %>% 
  summarise(count=n(), .groups = 'drop') %>% 
  ggplot(aes(reorder(CURSO, count), 
             text = paste("Curso: ", 
                          CURSO, "<br>",
                          "Número de estudantes: ", 
                          count), 
             count, 
             fill=CURSO)) + 
  geom_col(position = "dodge", show.legend = F) + 
  geom_text(aes(label = count), nudge_y = 1, size=5) +
  coord_flip() + xlab("Curso") + ylab("Nº de Estudantes") +
  ggtitle("Número de Estudantes\n por Curso")

plot %>% plotly::ggplotly(tooltip = "text") %>% 
  plotly::layout(showlegend=FALSE)

Treemap de alunos por curso

library(treemap)
df2 <- df %>% 
  group_by(CURSO) %>% 
  summarise(quant=n(),fac=sum(1*(FACILIDADE=="SIM")))
df2$escala <- scale(df2$fac)
treemap(df2,
        index=c("CURSO"),
        vSize="quant",
        vColor="escala",
        type="value", 
        palette = "-RdGy", 
        lowerbound.cex.labels = 0.1, 
        overlap.labels = 0.05,
        title = "Treemap dos Cursos com Alunos Matriculados em EST 106")

Distribuição de Frequência do Ano de Ingresso dos Estudantes ao Curso

ANOINICIO	Freq	FreqR	FreqACM	FreqRACM
Tabela de Frequencia
2013	1	0.01	1	0.01
2019	2	0.02	3	0.03
2020	10	0.12	13	0.15
2021	5	0.06	18	0.21
2022	28	0.34	46	0.55
2023	32	0.39	78	0.94
2023/2	1	0.01	79	0.95
2024	3	0.04	82	0.99
Total	82	0.99	-	-

Distribuição de Frequência do Ano de Ingresso dos Estudantes ao Curso (Corrigida)

ANOINICIO	Freq	FreqR	FreqACM	FreqRACM
Tabela de Frequencia
2013	1	0.01	1	0.01
2019	2	0.02	3	0.03
2020	10	0.12	13	0.15
2021	5	0.06	18	0.21
2022	28	0.34	46	0.55
2023	33	0.4	79	0.95
2024	3	0.04	82	0.99
Total	82	0.99	-	-

Gráfico de Barras Vertical

plot1 <- df %>% group_by(ANOINICIO) %>% summarise(count=n(),
                                                  .groups = 'drop')%>% 
  ggplot(aes(reorder(ANOINICIO, count), text = paste("Ano: ",
                                                     ANOINICIO, 
                                                     "<br>", 
                                                     "Ingresso dos Estudantes: ", 
                                                     count), 
             count, 
             fill=ANOINICIO)) + 
  geom_col(show.legend = F) + 
  geom_text(aes(label = count), nudge_y = 1, size=5) + 
  theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust=1)) + 
  ylab("Nº de Estudantes") + 
  xlab("Ano") + 
  ggtitle("Ingresso dos Estudantes")
plotly::ggplotly(plot1, tooltip = "text") %>% 
  plotly::layout(showlegend=FALSE)

Gráfico de Barras Horizontal

plot <- df %>% 
  group_by(ANOINICIO) %>% 
  summarise(count=n(), .groups = 'drop') %>% 
  ggplot(aes(reorder(ANOINICIO, count), 
             text = paste("Anoinicio: ", 
                          ANOINICIO, "<br>",
                          "Número de estudantes: ", 
                          count), 
             count, 
             fill=ANOINICIO)) + 
  geom_col(position = "dodge", show.legend = F) + 
  geom_text(aes(label = count), nudge_y = 1, size=5) +
  coord_flip() + xlab("Ano") + ylab("Nº de Estudantes")+
  ggtitle("Ingresso dos Estudantes")

plot %>% plotly::ggplotly(tooltip = "text") %>% 
  plotly::layout(showlegend=FALSE)

Distribuição de Frequência do Semestre dos Estudantes

Observação:Se repararmos, temos um dado em caracter enquanto todas as outras respostas são numéricas, o que acaba gerando um erro a ser corrigido.

SEMESTRE	Freq	FreqR	FreqACM	FreqRACM
Tabela de Frequencia
1	2	0.02	2	0.02
2	2	0.02	4	0.04
3	33	0.4	37	0.44
4	29	0.35	66	0.79
5	10	0.12	76	0.91
6	2	0.02	78	0.93
7	2	0.02	80	0.95
Entre o terceiro, o quarto e o quinto.	1	0.01	81	0.96
Só deus sabe	1	0.01	82	0.97
Total	82	0.97	-	-

Distribuição de Frequência do Semestre dos Estudantes (Corrigida)

#Correcao tabela----------------------------------------------------------------
#str(df$SEMESTRE)
df$SEMESTRE <- as.numeric(as.character(unlist(df$SEMESTRE)))
tab <- df %>% filter(!is.na(df$SEMESTRE))

## Warning: NAs introduced by coercion

SEMESTRE	Freq	FreqR	FreqACM	FreqRACM
Tabela de Frequencia
1	2	0.03	2	0.03
2	2	0.03	4	0.06
3	33	0.41	37	0.47
4	29	0.36	66	0.83
5	10	0.12	76	0.95
6	2	0.03	78	0.98
7	2	0.03	80	1.01
Total	80	1.01	-	-

Gráfico de Barras Horizontal

plot <- tab %>% 
  group_by(SEMESTRE) %>% 
  summarise(count=n(), .groups = 'drop') %>% 
  ggplot(aes(reorder(SEMESTRE, count), 
             text = paste("Semestre: ", 
                          SEMESTRE, "<br>",
                          "Número de estudantes: ", 
                          count), 
             count, 
             fill=SEMESTRE)) + 
  geom_col(position = "dodge", show.legend = F) + 
  geom_text(aes(label = count), nudge_y = 1, size=5) +
  coord_flip() + xlab("Semestre") + ylab("Nº de Estudantes") +
  ggtitle("Número de alunos por semestre")

plot %>% plotly::ggplotly(tooltip = "text") %>% 
  plotly::layout(showlegend=FALSE)

Gráfico de Barras Vertical

plot1 <- tab %>% group_by(SEMESTRE) %>% summarise(count=n(),
                                                  .groups = 'drop')%>% 
  ggplot(aes(reorder(SEMESTRE, count), text = paste("Semestre: ",
                                                    SEMESTRE, 
                                                     "<br>", 
                                                     "Estudantes por Semestre: ", 
                                                     count), 
             count, 
             fill=SEMESTRE)) + 
  geom_col(show.legend = F) + 
  geom_text(aes(label = count), nudge_y = 1, size=5) + 
  theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust=1)) + 
  ylab("Nº de Estudantes") + 
  xlab("Ano") + 
  ggtitle("Estudantes por Semestre")
plotly::ggplotly(plot1, tooltip = "text") %>% 
  plotly::layout(showlegend=FALSE)

Distribuição de Frequência da Idade dos Estudantes

IDADE	Freq	FreqR	FreqACM	FreqRACM
Tabela de Frequencia
18	7	0.09	7	0.09
19	23	0.28	30	0.37
20	16	0.2	46	0.57
21	11	0.13	57	0.7
22	8	0.1	65	0.8
23	8	0.1	73	0.9
24	3	0.04	76	0.94
26	3	0.04	79	0.98
27	1	0.01	80	0.99
34	1	0.01	81	1
69	1	0.01	82	1.01
Total	82	1.01	-	-

Boxplot da Idade dos Alunos

ggplot(data = df, aes(x = IDADE)) +
  geom_boxplot(color = 'black')+ 
  stat_boxplot(geom = "errorbar",
               width = 0.15) +
  ggtitle("Boxplot Idade")

Gráfico de Barras Vertical

plot1 <- df %>% group_by(IDADE) %>% summarise(count=n(), .groups = 'drop') %>% 
  ggplot(aes(reorder(IDADE, count), text = paste("IDADE: ", IDADE, "<br>",
                                                 "Número de estudantes: ", 
                                                 count),
             count, fill=IDADE)) + 
  geom_col(show.legend = F) + 
  geom_text(aes(label = count), nudge_y = 1, size=5) + 
  #theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust=1)) + 
  ylab("Quantidade de Estudantes") + 
  xlab("Idade") + 
  ggtitle("Idade dos Estudantes")
plotly::ggplotly(plot1, tooltip = "text") %>% 
  plotly::layout(showlegend=FALSE)

Distribuição de Frequência do Tempo de Sono dos Estudantes

HORASSONO	Freq	FreqR	FreqACM	FreqRACM
Tabela de Frequencia
5	4	0.05	4	0.05
5.5	1	0.01	5	0.06
6	17	0.21	22	0.27
7	35	0.43	57	0.7
8	23	0.28	80	0.98
9	2	0.02	82	1
Total	82	1	-	-

Gráfico de Barras Vertical

plot1 <- df %>% 
  group_by(HORASSONO=as.character(HORASSONO)) %>% 
  summarise(count=n(), 
            .groups = 'drop') %>% 
  ggplot(aes(reorder(as.character(HORASSONO), 
                     count), 
             text = paste("HORASSONO:", 
                          HORASSONO, 
                          "<br>",
                          "Número de estudantes: ", 
                          count),
             count, 
             fill=HORASSONO)) + 
  geom_col(show.legend = F) + 
  geom_text(aes(label = count), nudge_y = 1, size=5) + 
  #theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust=1)) + 
  ylab("Quantidade de Estudantes") + 
  xlab("Horas") + 
  ggtitle("Tempo de Sono")
plotly::ggplotly(plot1, tooltip = "text") %>% 
  plotly::layout(showlegend=FALSE)

Gráfico de Barras Horizontal

plot <- df %>% 
  group_by(HORASSONO=as.character(HORASSONO)) %>% 
  summarise(count=n(), .groups = 'drop') %>% 
  ggplot(aes(reorder(HORASSONO, count), 
             text = paste("Horas: ", 
                          HORASSONO, "<br>",
                          "Número de estudantes: ", 
                          count), 
             count, 
             fill=HORASSONO)) + 
  geom_col(position = "dodge", show.legend = F) + 
  geom_text(aes(label = count), nudge_y = 1, size=5) +
  coord_flip() + xlab("Horas") + ylab("Nº de Estudantes") +
  ggtitle("Tempo de Sono")

plot %>% plotly::ggplotly(tooltip = "text") %>% 
  plotly::layout(showlegend=FALSE)

Distribuição de Frequência do Recebimento de Assistência Estudantil dos Estudantes

ASSISTENCIA	Freq	FreqR	FreqACM	FreqRACM
Tabela de Frequencia
NAO	66	0.8	66	0.8
SIM	16	0.2	82	1
Total	82	1	-	-

Gráfico de Setores

#Grafico de setores------------------------------------------------------------- 

tab1 <- df %>% group_by(ASSISTENCIA) %>% summarise(Freq=n(), .groups = 'drop' )
library(scales)
pizza <- ggplot(tab1, aes(x="", y=Freq, fill=ASSISTENCIA))+
  geom_bar(width = 1, stat = "identity")+
  coord_polar("y")+ggtitle("Assistência Estudantil")

pizza + scale_fill_brewer(palette="Dark2") +
  theme(axis.text.x=element_blank()) +
  geom_text(aes(y = Freq/2, 
                label = Freq), data = tab1, size=5)

Gráfico de Setores (Algumas Alterações)

ni<-table(df$ASSISTENCIA) 
fi<-prop.table(ni) # Tabela de frequências relativas (f_i)
p_fi<-100*prop.table(ni) # Porcentagem (100 f_i)

# Adiciona linhas de total
ni<-c(ni,sum(ni)) 
fi<-c(fi,sum(fi))
p_fi<-c(p_fi,sum(p_fi))
names(ni)[3]<-"Total"
df2<-cbind(ni,fi=round(fi,digits=2),p_fi=round(p_fi,digits=2))
labs<-paste(1:3,"(",df2[1:3,1],";",round(df2[1:3,3],1),"%)",sep="")
pie(table(df$ASSISTENCIA),labels=labs, main =
      "Gráfico em setores para Assistência Estudantil.",
    sub = "Fonte: Censo EST 106")
#title("Figura 2.3: Gráfico em setores para a variável Y: grau de instrução")
legend(-1.1,-0.8,legend=c("1=Não, 2=Sim"),border=NA,box.col=NA)

Gráfico de Barras Horizontal

#Grafico Assistencia por Curso--------------------------------------------------

teste <- df %>% 
  group_by(CURSO,ASSISTENCIA) %>% 
  summarise("quant"=n())


ggplot(teste,aes(CURSO,quant,fill=ASSISTENCIA))+
  geom_col() +
  coord_flip() +
  geom_text(aes(label=quant), position = position_stack(vjust = 1.2))+
  labs(title = "Alunos com Assistência\n Estudantil por Curso")

Gráfico de Barras Horizontal (Alterado)

#Algumas Alterações-------------------------------------------------------------

ggplot(teste,aes(CURSO,quant,fill=ASSISTENCIA))+
  geom_col(position = "dodge") +
  coord_flip() +
  geom_text(aes(label=quant),position = position_dodge(0.9),hjust = -1) +
  labs(title = "Alunos com Assistência\n Estudantil por Curso")

Distribuição de Frequência do Estudo Antecipado dos Estudantes

ESTUDOANTECIPADO	Freq	FreqR	FreqACM	FreqRACM
Tabela de Frequencia
AS VEZES ESTUDO ANTECIPADAMENTE A PROVA	46	0.56	46	0.56
NUNCA ESTUDO ANTECIPADAMENTE AS PROVAS	1	0.01	47	0.57
SEMPRE ESTUDO ANTECIPADAMENTE AS PROVAS	35	0.43	82	1
Total	82	1	-	-

Gráfico de Barras Vertical

#Grafico------------------------------------------------------------------------

plot1 <- df %>% 
  group_by(ESTUDOANTECIPADO=as.character(ESTUDOANTECIPADO)) %>% 
  summarise(count=n(), 
            .groups = 'drop') %>% 
  ggplot(aes(reorder(as.character(ESTUDOANTECIPADO),
                     count), 
             text = paste("ESTUDOANTECIPADO:", 
                          ESTUDOANTECIPADO, 
                          "<br>",
                          "Número de estudantes: ",
                          count), 
             count, 
             fill=ESTUDOANTECIPADO)) + 
  geom_col(show.legend = F) + 
  geom_text(aes(label = count), nudge_y = 1, size=5) + 
  #theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust=1)) + 
  ylab("Quantidade de Estudantes") + 
  xlab(" ") + 
  ggtitle("Estudo Antecipado")
plotly::ggplotly(plot1, tooltip = "text") %>% 
  plotly::layout(showlegend=FALSE)

Gráfico de Barras Vertical (Alterado)

#Alteração do Grafico-----------------------------------------------------------

plot1 <- df %>% 
  group_by(ESTUDOANTECIPADO=as.character(ESTUDOANTECIPADO)) %>% 
  summarise(count=n(), 
            .groups = 'drop') %>% 
  ggplot(aes(reorder(as.character(ESTUDOANTECIPADO),
                     count), 
             text = paste("ESTUDOANTECIPADO:", 
                          ESTUDOANTECIPADO, 
                          "<br>",
                          "Número de estudantes: ",
                          count), 
             count, 
             fill=ESTUDOANTECIPADO)) + 
  geom_col(show.legend = F) + 
  geom_text(aes(label = count), nudge_y = 1, size=5) + 
  theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust=1)) + 
  ylab("Quantidade de Estudantes") + 
  xlab(" ") + 
  ggtitle("Estudo Antecipado")
plotly::ggplotly(plot1, tooltip = "text") %>% 
  plotly::layout(showlegend=FALSE)

Distribuição de Frequência do Tempo de Estudo dos Estudantes

HORASESTUDO	Freq	FreqR	FreqACM	FreqRACM
Tabela de Frequencia
0	1	0.01	1	0.01
2	1	0.01	2	0.02
3	2	0.02	4	0.04
4	4	0.05	8	0.09
5	6	0.07	14	0.16
6	11	0.13	25	0.29
7	3	0.04	28	0.33
8	7	0.09	35	0.42
9	1	0.01	36	0.43
10	13	0.16	49	0.59
12	7	0.09	56	0.68
13	1	0.01	57	0.69
14	2	0.02	59	0.71
16	3	0.04	62	0.75
20	10	0.12	72	0.87
21	1	0.01	73	0.88
25	2	0.02	75	0.9
30	3	0.04	78	0.94
32	1	0.01	79	0.95
36	1	0.01	80	0.96
40	1	0.01	81	0.97
50	1	0.01	82	0.98
Total	82	0.98	-	-

Histograma

#Histograma---------------------------------------------------------------------

attach(df)
hist(HORASESTUDO, 
     xlab = "Estudantes", 
     ylab = "Frenquência", col = "red", border = "black", 
     xlim = c(0,50), ylim = c(0,50), breaks = 5, 
     main = "Histograma de Horas de Estudo", labels = TRUE)

Histograma com ggplot

#Histograma pelo ggplot---------------------------------------------------------

ggplot(data = df, aes(x = HORASESTUDO)) +
  geom_histogram(color = 'black',bins = 5)+  ylim(0,20)+
  stat_bin(bins = 5, geom="text", aes(label=..count..), vjust=-1.5)+
  ggtitle("Histograma de Horas de Estudo")+
  xlab("Horas de Estudo") + 
  ylab("Estudantes")

Gráfico de Barras Vertical

#Grafico------------------------------------------------------------------------

plot1 <- df %>% group_by(HORASESTUDO) %>% summarise(count=n(),
                                                    .groups = 'drop') %>% 
  ggplot(aes(reorder(HORASESTUDO, count),
             text = paste("HORASESTUDO: ", 
                          HORASESTUDO, 
                          "<br>", 
                          "Número de estudantes: ", 
                          count), 
             count, 
             fill=HORASESTUDO)) + 
  geom_col(show.legend = F) + 
  geom_text(aes(label = count), nudge_y = 1, size=5) + 
  #theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust=1)) + 
  ylab("Quantidade de Estudantes") + 
  xlab("Horas") + 
  ggtitle("Tempo de Estudo")
plotly::ggplotly(plot1, tooltip = "text") %>% 
  plotly::layout(showlegend=FALSE)

Distribuição de Frequência do Número de Créditos Cursado pelos Estudantes

CREDITOS	Freq	FreqR	FreqACM	FreqRACM
Tabela de Frequencia
12	4	0.05	4	0.05
14	2	0.02	6	0.07
16	5	0.06	11	0.13
18	4	0.05	15	0.18
19	2	0.02	17	0.2
20	32	0.39	49	0.59
21	1	0.01	50	0.6
22	3	0.04	53	0.64
23	3	0.04	56	0.68
24	19	0.23	75	0.91
25	2	0.02	77	0.93
26	2	0.02	79	0.95
27	2	0.02	81	0.97
28	1	0.01	82	0.98
Total	82	0.98	-	-

Gráfico de Barras Vertical

#Grafico------------------------------------------------------------------------

plot1 <- df %>% group_by(CREDITOS) %>% summarise(count=n(),
                                                 .groups = 'drop') %>% 
  ggplot(aes(reorder(CREDITOS, count),
             text = paste("CREDITOS: ",
                          CREDITOS,
                          "<br>",
                          "Número de estudantes: ", 
                          count), 
             count, fill=CREDITOS)) + 
  geom_col(show.legend = F) + 
  geom_text(aes(label = count), nudge_y = 1, size=5) + 
  #theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust=1)) + 
  ylab("Quantidade de Estudantes") + 
  xlab("Creditos") + 
  ggtitle("Numero de creditos cursados pelos alunos")
plotly::ggplotly(plot1, tooltip = "text") %>% 
  plotly::layout(showlegend=FALSE)

Histograma

#Histograma---------------------------------------------------------------------

attach(df)
hist(CREDITOS, 
     xlab = "Créditos", 
     ylab = "Nº de Estudantes", col = "blue", border = "black", 
     xlim = c(10,30), ylim = c(0,30), breaks = 5, 
     main = "Histograma de Número de Créditos", labels = TRUE)

Distribuição de Frequência do Interesse pela Área Acadêmica dos Estudantes

Observação:Considerando as respostas, podemos compactar melhor em apenas três opções. Veja a seguir.

INTERESSEAREAACADEMICA	Freq	FreqR	FreqACM	FreqRACM
Tabela de Frequencia
AINDA NAO	1	0.01	1	0.01
DEPENDE	1	0.01	2	0.02
E UMA POSSIBILIDADE, MAS AINDA ME ENCONTRO INDECISO COM RELACAO A ISSO.	1	0.01	3	0.03
INDEFINIDO	1	0.01	4	0.04
MAIS PARA DESINTERESSADO	1	0.01	5	0.05
NAO	41	0.5	46	0.55
NAO NESSE CURSO, VOU FAZER ENEM NOVAMENTE PARA TROCAR	1	0.01	47	0.56
NAO SEI	4	0.05	51	0.61
NAO SEI AINDA	1	0.01	52	0.62
SIM	27	0.33	79	0.95
TALVEZ	3	0.04	82	0.99
Total	82	0.99	-	-

Distribuição de Frequência do Interesse pela Área Acadêmica dos Estudantes (Corrigida)

#Correcao da coluna INTERESSEAREAACADEMICA--------------------------------------

df <- df %>% mutate(INTERESSEAREAACADEMICA=
                      ifelse(as.character(INTERESSEAREAACADEMICA)==
                               "AINDA NAO","NAO",
                             as.character(INTERESSEAREAACADEMICA)))
df <- df %>% mutate(INTERESSEAREAACADEMICA=
                      ifelse(as.character(INTERESSEAREAACADEMICA)==
                               "DEPENDE","NAO SEI",
                             as.character(INTERESSEAREAACADEMICA)))
df <- df %>% mutate(INTERESSEAREAACADEMICA=
                      ifelse(as.character(INTERESSEAREAACADEMICA)==
                               "NAO SEI AINDA","NAO SEI",
                             as.character(INTERESSEAREAACADEMICA)))
df <- df %>% mutate(INTERESSEAREAACADEMICA=
                      ifelse(as.character(INTERESSEAREAACADEMICA)==
                               "TALVEZ","NAO SEI",
                             as.character(INTERESSEAREAACADEMICA)))

INTERESSEAREAACADEMICA	Freq	FreqR	FreqACM	FreqRACM
Tabela de Frequencia
E UMA POSSIBILIDADE, MAS AINDA ME ENCONTRO INDECISO COM RELACAO A ISSO.	1	0.01	1	0.01
INDEFINIDO	1	0.01	2	0.02
MAIS PARA DESINTERESSADO	1	0.01	3	0.03
NAO	42	0.51	45	0.54
NAO NESSE CURSO, VOU FAZER ENEM NOVAMENTE PARA TROCAR	1	0.01	46	0.55
NAO SEI	9	0.11	55	0.66
SIM	27	0.33	82	0.99
Total	82	0.99	-	-

Gráfico de Barras Vertical

#Grafico------------------------------------------------------------------------

plot1 <- df %>% group_by(INTERESSEAREAACADEMICA) %>% 
  summarise(count=n(),.groups = 'drop')%>% 
  ggplot(aes(reorder(INTERESSEAREAACADEMICA, count), 
               text = paste("INTERESSEAREAACADEMICA: ", 
                            INTERESSEAREAACADEMICA, "<br>",
                                                  "Número de estudantes: ",
                                                  count), count, 
             fill=INTERESSEAREAACADEMICA)) + 
  geom_col(show.legend = F) + 
  geom_text(aes(label = count), nudge_y = 1, size=5) + 
  theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust=1)) + 
  ylab("Quantidade de Estudantes") + 
  xlab("") + 
  ggtitle("Interesse dos Estudantes\n Pela Área Acadêmica")
plotly::ggplotly(plot1, tooltip = "text") %>% 
  plotly::layout(showlegend=FALSE)

Gráfico de Barras Horizontal

#Grafico------------------------------------------------------------------------

plot <- df %>% 
  group_by(INTERESSEAREAACADEMICA=as.character(INTERESSEAREAACADEMICA)) %>% 
  summarise(count=n(), .groups = 'drop') %>% 
  ggplot(aes(reorder(INTERESSEAREAACADEMICA, count), 
             text = paste("Interesse: ", 
                          INTERESSEAREAACADEMICA, "<br>",
                          "Número de estudantes: ", 
                          count), 
             count, 
             fill=INTERESSEAREAACADEMICA)) + 
  geom_col(position = "dodge", show.legend = F) + 
  geom_text(aes(label = count), nudge_y = 1, size=5) +
  coord_flip() + xlab("") + ylab("Nº de Estudantes") +
  ggtitle("Interesse dos Estudantes pela Área Acadêmica")

plot %>% plotly::ggplotly(tooltip = "text") %>% 
  plotly::layout(showlegend=FALSE)

Distribuição de Frequência da Expectativa na Disciplina Pelos Estudantes

RESULTADOEST	Freq	FreqR	FreqACM	FreqRACM
Tabela de Frequencia
DEPENDERA DO PROFESSOR!	9	0.11	9	0.11
NAO	9	0.11	18	0.22
SIM	64	0.78	82	1
Total	82	1	-	-

Gráfico de Barras Vertical

#Grafico------------------------------------------------------------------------

plot1 <- df %>% group_by(RESULTADOEST) %>% 
  summarise(count=n(),.groups = 'drop')%>% 
  ggplot(aes(reorder(RESULTADOEST, count), 
             text = paste("RESULTADOEST: ", 
                          RESULTADOEST, "<br>",
                          "Número de estudantes: ",
                          count), count, 
             fill=RESULTADOEST)) + 
  geom_col(show.legend = F) + 
  geom_text(aes(label = count), nudge_y = 1, size=5) + 
  theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust=1)) + 
  ylab("Quantidade de Estudantes") + 
  xlab("") + 
  ggtitle("Expectativa dos Estudantes Pela Disciplina")
plotly::ggplotly(plot1, tooltip = "text") %>% 
  plotly::layout(showlegend=FALSE)

Gráfico de Barras Horizontal

#Grafico------------------------------------------------------------------------

plot <- df %>% 
  group_by(RESULTADOEST=as.character(RESULTADOEST)) %>% 
  summarise(count=n(), .groups = 'drop') %>% 
  ggplot(aes(reorder(RESULTADOEST, count), 
             text = paste("Resultado: ", 
                          RESULTADOEST, "<br>",
                          "Número de estudantes: ", 
                          count), 
             count, 
             fill=RESULTADOEST)) + 
  geom_col(position = "dodge", show.legend = F) + 
  geom_text(aes(label = count), nudge_y = 1, size=5) +
  coord_flip() + xlab("") + ylab("Nº de Estudantes") +
  ggtitle("Expectativa dos Alunos\n pela Disciplina")

plot %>% plotly::ggplotly(tooltip = "text") %>% 
  plotly::layout(showlegend=FALSE)

Distribuição de Frequência do Salário Atual dos Estudantes

SALARIOHOJE	Freq	FreqR	FreqACM	FreqRACM
Tabela de Frequencia
0	65	0.79	65	0.79
1	1	0.01	66	0.8
400	1	0.01	67	0.81
500	1	0.01	68	0.82
600	3	0.04	71	0.86
660	1	0.01	72	0.87
750	1	0.01	73	0.88
800	1	0.01	74	0.89
900	2	0.02	76	0.91
1000	1	0.01	77	0.92
1100	1	0.01	78	0.93
1500	2	0.02	80	0.95
3500	1	0.01	81	0.96
20000	1	0.01	82	0.97
Total	82	0.97	-	-

Gráfico de Barras Vertical

#Grafico------------------------------------------------------------------------

plot1 <- df %>% group_by(SALARIOHOJE) %>% 
  summarise(count=n(),.groups = 'drop')%>% 
  ggplot(aes(reorder(SALARIOHOJE, count), 
             text = paste("SALARIOHOJE: ", 
                          SALARIOHOJE, "<br>",
                          "Número de estudantes: ",
                          count), count, 
             fill=SALARIOHOJE)) + 
  geom_col(show.legend = F) + 
  geom_text(aes(label = count), nudge_y = 1, size=5) + 
  theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust=1)) + 
  ylab("Quantidade de Estudantes") + 
  xlab("Valor em Real") + 
  ggtitle("Renda dos Estudantes")
plotly::ggplotly(plot1, tooltip = "text") %>% 
  plotly::layout(showlegend=FALSE)

Boxplot do Salário Atual dos Estudantes

#Grafico------------------------------------------------------------------------

ggplot(data = df, aes(x = SALARIOHOJE)) +
  geom_boxplot(color = 'black')+ 
  stat_boxplot(geom = "errorbar",
               width = 0.15) +
  ggtitle("Boxplot Salário Atual")

Distribuição de Frequência dao Salário Esperado pelos Estudantes

SALARIOPOSFORMADO	Freq	FreqR	FreqACM	FreqRACM
Tabela de Frequencia
0	1	0.01	1	0.01
2	13	0.16	14	0.17
3	29	0.35	43	0.52
4	15	0.18	58	0.7
5	11	0.13	69	0.83
6	5	0.06	74	0.89
7	3	0.04	77	0.93
8	1	0.01	78	0.94
10	3	0.04	81	0.98
4000	1	0.01	82	0.99
Total	82	0.99	-	-

Gráfico de Barras Vertical

#Grafico------------------------------------------------------------------------

plot1 <- df %>% group_by(SALARIOPOSFORMADO) %>% 
  summarise(count=n(),.groups = 'drop')%>% 
  ggplot(aes(reorder(SALARIOPOSFORMADO, count), 
             text = paste("SALARIOPOSFORMADO: ", 
                          SALARIOPOSFORMADO, "<br>",
                          "Número de estudantes: ",
                          count), count, 
             fill=SALARIOPOSFORMADO)) + 
  geom_col(show.legend = F) + 
  geom_text(aes(label = count), nudge_y = 1, size=5) + 
  #theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust=1)) + 
  ylab("Quantidade de Estudantes") + 
  xlab("Valor em Real") + 
  ggtitle("Renda Esperada Pelos Estudantes")
plotly::ggplotly(plot1, tooltip = "text") %>% 
  plotly::layout(showlegend=FALSE)

Histograma

#Tabela com Respostas
#
table(df$SALARIOPOSFORMADO)
SALARIOPOSFORMADO <- df$SALARIOPOSFORMADO[df$SALARIOPOSFORMADO<100]
hist(SALARIOPOSFORMADO, 
     xlab = "Valor em Real por Salários Mínimos", 
     ylab = "Nº de Estudantes", col = "blue", border = "black", 
     xlim = c(0,20), ylim = c(0,30), breaks = 10, 
     main = "Histograma Renda Esperada\n Pelos Estudantes", labels = TRUE)

## 
##    0    2    3    4    5    6    7    8   10 4000 
##    1   13   29   15   11    5    3    1    3    1