rm(list = ls())
library(here)
## here() starts at /Users/ashish/files/research/projects/psych80_survey
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(janitor)
## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(tidylog)
## 
## Attaching package: 'tidylog'
## 
## The following objects are masked from 'package:dplyr':
## 
##     add_count, add_tally, anti_join, count, distinct, distinct_all,
##     distinct_at, distinct_if, filter, filter_all, filter_at, filter_if,
##     full_join, group_by, group_by_all, group_by_at, group_by_if,
##     inner_join, left_join, mutate, mutate_all, mutate_at, mutate_if,
##     relocate, rename, rename_all, rename_at, rename_if, rename_with,
##     right_join, sample_frac, sample_n, select, select_all, select_at,
##     select_if, semi_join, slice, slice_head, slice_max, slice_min,
##     slice_sample, slice_tail, summarise, summarise_all, summarise_at,
##     summarise_if, summarize, summarize_all, summarize_at, summarize_if,
##     tally, top_frac, top_n, transmute, transmute_all, transmute_at,
##     transmute_if, ungroup
## 
## The following objects are masked from 'package:tidyr':
## 
##     drop_na, fill, gather, pivot_longer, pivot_wider, replace_na,
##     spread, uncount
## 
## The following object is masked from 'package:stats':
## 
##     filter
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
theme_set(theme_bw())

1 Read data

gradebook_filepath <- "data/2024-03-24T1119_Grades-W24-PSYCH-80-01.csv"

df_gradebook_raw <- read_csv(here(gradebook_filepath)) %>% 
  clean_names 
## Rows: 100 Columns: 35
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (34): Student, SIS User ID, SIS Login ID, Section, SU ID, Assignment #2 ...
## dbl  (1): ID
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_gradebook <- df_gradebook_raw %>% 
  drop_na(id) %>% 
  rename(suid = su_id) %>% 
  drop_na(suid) %>% 
  filter(suid != "06778030") %>% # jahfari
  mutate(suid = as.numeric(suid)) %>%
  rename_all(~str_replace(., "_[0-9]{6}$", "")) %>% 
  rename(exam_2 = exam_number_2) %>% 
  mutate_at(vars(
    exam_1, 
    exam_1_for_traveling_athletes,
    exam_number_2_traveling_athletes,
    exam_2,
    final_exam,
    assignment_number_1,
    assignment_number_2,
    assignment_number_3,
    assignment_number_4,
    extra_credit,
    participation), 
    as.numeric) %>%
  mutate(exam_2 = ifelse(!is.na(exam_number_2_traveling_athletes),
                         exam_number_2_traveling_athletes,
                          exam_2)) %>% 
  select(-matches("traveling_athletes")) %>%
  rename_at(vars(matches("assignment_number_[1-4]")), 
            ~str_replace(., "number_", "")) %>% 
  rename(sunet = sis_login_id) %>% 
  # -- dropped class -- #
  filter(!str_detect(student, "Dietrich"))
## drop_na: removed 2 rows (2%), 98 rows remaining
## rename: renamed one variable (suid)
## drop_na: removed 3 rows (3%), 95 rows remaining
## filter: removed one row (1%), 94 rows remaining
## mutate: converted 'suid' from character to double (0 new NA)
## rename_all: renamed 11 variables (assignment_number_2, assignment_number_3, exam_1, exam_number_2_traveling_athletes, assignment_number_1, …)
## rename: renamed one variable (exam_2)
## Warning: There were 3 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `exam_1_for_traveling_athletes =
##   .Primitive("as.double")(exam_1_for_traveling_athletes)`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 2 remaining warnings.
## mutate_at: converted 'assignment_number_2' from character to double (0 new NA)
##            converted 'assignment_number_3' from character to double (0 new NA)
##            converted 'exam_1' from character to double (0 new NA)
##            converted 'exam_number_2_traveling_athletes' from character to double (93 new NA)
##            converted 'assignment_number_1' from character to double (0 new NA)
##            converted 'extra_credit' from character to double (0 new NA)
##            converted 'assignment_number_4' from character to double (0 new NA)
##            converted 'exam_2' from character to double (0 new NA)
##            converted 'final_exam' from character to double (92 new NA)
##            converted 'participation' from character to double (0 new NA)
##            converted 'exam_1_for_traveling_athletes' from character to double (92 new NA)
## mutate: no changes
## select: dropped 2 variables (exam_number_2_traveling_athletes, exam_1_for_traveling_athletes)
## rename_at: renamed 4 variables (assignment_2, assignment_3, assignment_1, assignment_4)
## rename: renamed one variable (sunet)
## filter: removed one row (1%), 93 rows remaining

2 Process data

exam_filepath <- "data/Final exam grading.xlsx"

df_exam_raw <- readxl::read_excel(here(exam_filepath), 
                                  sheet = "Update") %>% 
  clean_names


df_mc_key <- df_exam_raw %>% 
  slice(1) %>% 
  select(matches("mc[0-9]{,2}$")) %>% 
  pivot_longer(cols = everything(), 
               names_to = "mc",
               values_to = "key")
## slice: removed 96 rows (99%), one row remaining
## select: dropped 21 variables (exam_number, name, suid, sunet, mc_total, …)
## pivot_longer: reorganized (mc1, mc2, mc3, mc4, mc5, …) into (mc, key) [was 1x25, now 25x2]
df_mc <- df_exam_raw %>% 
  drop_na(suid) %>% 
  drop_na(total_score) %>% 
  select(name, suid, matches("mc[0-9]{,2}$")) %>% 
  pivot_longer(cols = -c(name, suid), 
               names_to = "mc",
               values_to = "response") %>%
  full_join(df_mc_key) %>% 
  mutate(score = if_else(response == key, 1, 0)) %>% 
  select(-key, -response) %>%
  mutate(mc = paste0(mc, "_score")) %>% 
  pivot_wider(id_cols = c(name, suid), 
              names_from = mc, 
              values_from = score) 
## drop_na: removed 3 rows (3%), 94 rows remaining
## drop_na: removed 3 rows (3%), 91 rows remaining
## select: dropped 19 variables (exam_number, sunet, mc_total, sa1, sa2, …)
## pivot_longer: reorganized (mc1, mc2, mc3, mc4, mc5, …) into (mc, response) [was 91x27, now 2275x4]
## Joining with `by = join_by(mc)`
## full_join: added one column (key)
## > rows only in x 0
## > rows only in y 0
## > matched rows 2,275
## > =======
## > rows total 2,275
## mutate: new variable 'score' (double) with 3 unique values and 1% NA
## select: dropped 2 variables (response, key)
## mutate: changed 2,275 values (100%) of 'mc' (0 new NA)
## pivot_wider: reorganized (mc, score) into (mc1_score, mc2_score, mc3_score,
## mc4_score, mc5_score, …) [was 2275x4, now 91x27]
df_exam <- df_exam_raw %>% 
  drop_na(suid) %>% 
  drop_na(total_score) %>% 
  left_join(df_mc) %>% 
  mutate(total_score = total_score + 3) %>%  # adding 3 points because of MC questions that most got wrong
  # left pad the numbers after mc in the column names to make them all the same length
  
  rename_at(vars(matches("mc[0-9]($|_)")), 
            function(x){
              x <- str_replace_all(x, "mc", "")
              x <- str_pad(x, width = 2, side = "left", pad = "0")
              paste0("mc", x)
            }) 
## drop_na: removed 3 rows (3%), 94 rows remaining
## drop_na: removed 3 rows (3%), 91 rows remaining
## Joining with `by = join_by(name, suid)`left_join: added 25 columns (mc1_score, mc2_score, mc3_score, mc4_score, mc5_score, …)
##            > rows only in x    0
##            > rows only in y  ( 0)
##            > matched rows     91
##            >                 ====
##            > rows total       91
## mutate: changed 91 values (100%) of 'total_score' (0 new NA)
## rename_at: renamed 9 variables (mc01, mc02, mc03, mc04, mc05, …)

3 Final exam

3.1 MC

3.1.1 Total MC Score

df_exam %>% 
  select(name, suid, matches("mc[0-9]{,2}_score$")) %>% 
  pivot_longer(cols = -c(name, suid), 
               names_to = "mc",
               values_to = "score") %>%
  drop_na(mc) %>% 
  group_by(name, suid) %>% 
  summarize(score = sum(score)) %>%
  ggplot(aes(x = score)) +
  geom_histogram(bins = 20) +
  geom_vline(aes(xintercept = mean(score, na.rm=T)), color = "red") +
  geom_label(
    aes(label = paste0("M = ", round(mean(score, na.rm=T), 2),
                       "\nSD = ", round(sd(score, na.rm=T), 2)
    )),
    x=15, y = 10,
    vjust = -1) +
  labs(title = "Multiple Choice Scores", x = "Score", y = "Count")
## select: dropped 44 variables (exam_number, sunet, mc01, mc02, mc03, …)
## pivot_longer: reorganized (mc1_score, mc2_score, mc3_score, mc4_score, mc5_score, …) into (mc, score) [was 91x27, now 2275x4]
## drop_na: no rows removed
## group_by: 2 grouping variables (name, suid)
## summarize: now 91 rows and 3 columns, one group variable remaining (name)
## Warning: Removed 4 rows containing non-finite values (`stat_bin()`).

3.1.2 By question

d <- df_exam %>% 
  select(name, suid, matches("mc[0-9]{,2}_score$")) %>% 
  pivot_longer(cols = -c(name, suid), 
               names_to = "mc",
               values_to = "score") %>%
  drop_na(mc) %>% 
  mutate(mc = str_replace_all(mc, "[a-z_]", "")) %>%
  mutate(mc = str_pad(mc, width = 2, side = "left", pad = "0")) 
## select: dropped 44 variables (exam_number, sunet, mc01, mc02, mc03, …)
## pivot_longer: reorganized (mc1_score, mc2_score, mc3_score, mc4_score, mc5_score, …) into (mc, score) [was 91x27, now 2275x4]
## drop_na: no rows removed
## mutate: changed 2,275 values (100%) of 'mc' (0 new NA)
## mutate: changed 819 values (36%) of 'mc' (0 new NA)
d %>%
  group_by(mc) %>%
  summarize(score = mean(score, na.rm=T)) %>%
  ggplot(aes(x = mc, y = score)) +
  geom_col() +
  geom_text(aes(label = round(score, 2)), vjust = -0.5, size = 3) +
  labs(title = "Multiple Choice by Question", y = "Proportion correct")
## group_by: one grouping variable (mc)
## summarize: now 25 rows and 2 columns, ungrouped

df_exam %>% 
  select(name, suid, matches("mc[0-9]{,2}$")) %>% 
  pivot_longer(cols = -c(name, suid), 
               names_to = "mc",
               values_to = "response") %>% 
  ggplot(aes(x = response)) + 
  geom_bar(stat = "count") + 
  facet_wrap(~mc, scales = "free") 
## select: dropped 44 variables (exam_number, sunet, mc_total, sa1, sa2, …)
## pivot_longer: reorganized (mc01, mc02, mc03, mc04, mc05, …) into (mc, response) [was 91x27, now 2275x4]

3.1.3 By grader

df_exam %>%
  ggplot(aes(x = mc_sa_grader, y = mc_total)) +
  geom_bar(stat = "summary", fun = "mean") +
  geom_errorbar(stat = "summary", 
                fun.data = mean_sdl,
                fun.args = list(mult = 1),
                width = 0.2) +
  geom_point( alpha = .2, 
              position = position_jitter(width = .2,
                                         height = 0))

3.2 SA

3.2.1 Total

df_exam %>% 
  ggplot(aes(x = sa_total)) + 
  geom_histogram(bins = 25) +
  labs(title = "Short Answer Scores", x = "Score", y = "Count") +
  geom_vline(aes(xintercept = mean(sa_total, na.rm=T)), color = "red") +
  geom_label(
    aes(label = paste0("M = ", round(mean(sa_total, na.rm=T), 2),
                       "\nSD = ", round(sd(sa_total, na.rm=T), 2)
    )),
    x=15, y = 10,
    vjust = -1)  +
  scale_x_continuous(breaks = seq(0, 25, 1))

3.2.2 By grader

df_exam %>%
  ggplot(aes(x = mc_sa_grader, y = sa_total)) + 
  geom_bar(stat = "summary", fun = "mean") +
  geom_point(alpha = .4, position=position_jitter(.2)) +
  geom_errorbar(stat = "summary", fun.data=mean_sdl, 
                fun.args = list(mult=1),
                width = 0.5) 

df_exam %>% 
  group_by(mc_sa_grader) %>% 
  summarize(mean_score = mean(sa_total),
            sd_score = sd(sa_total))
## group_by: one grouping variable (mc_sa_grader)
## summarize: now 4 rows and 3 columns, ungrouped
## # A tibble: 4 × 3
##   mc_sa_grader mean_score sd_score
##   <chr>             <dbl>    <dbl>
## 1 Allen              20.6     3.96
## 2 Arethea            19.6     3.90
## 3 Ashish             21.9     3.82
## 4 Kate               20.0     6.02

3.2.3 By question

d <- df_exam %>%
  select(name, suid, matches("sa[0-9]$"), mc_sa_grader) %>%
  pivot_longer(-c(name, suid, mc_sa_grader), 
               names_to = "sa",
               values_to = "score") %>%
  drop_na(score) 
## select: dropped 60 variables (exam_number, sunet, mc01, mc02, mc03, …)
## pivot_longer: reorganized (sa1, sa2, sa3, sa4, sa5, …) into (sa, score) [was 91x11, now 728x5]
## drop_na: removed 273 rows (38%), 455 rows remaining
d %>% 
  ggplot(aes(x = sa, y = score)) + 
  geom_bar(stat = "summary", fun = "mean") +
  geom_point(alpha = .4, position=position_jitter(.2)) +
  geom_errorbar(stat = "summary", fun.data=mean_sdl, 
                fun.args = list(mult=1),
                width = 0.5) 

d %>% 
  group_by(sa) %>% 
  summarize(mean_score = mean(score),
            sd_score = sd(score)) 
## group_by: one grouping variable (sa)
## summarize: now 8 rows and 3 columns, ungrouped
## # A tibble: 8 × 3
##   sa    mean_score sd_score
##   <chr>      <dbl>    <dbl>
## 1 sa1         4.14    1.30 
## 2 sa2         4.12    1.01 
## 3 sa3         4.35    0.724
## 4 sa4         4.15    1.25 
## 5 sa5         3.38    1.38 
## 6 sa6         4.02    1.23 
## 7 sa7         3.93    1.39 
## 8 sa8         4.52    0.903

3.2.4 By grader by question - 5

d <- df_exam %>%
  select(name, suid, matches("sa[0-9]$"), mc_sa_grader) %>%
  pivot_longer(-c(name, suid, mc_sa_grader), 
               names_to = "sa",
               values_to = "score") %>%
  filter(sa=="sa5") %>% 
  drop_na(score)
## select: dropped 60 variables (exam_number, sunet, mc01, mc02, mc03, …)
## pivot_longer: reorganized (sa1, sa2, sa3, sa4, sa5, …) into (sa, score) [was 91x11, now 728x5]
## filter: removed 637 rows (88%), 91 rows remaining
## drop_na: removed 40 rows (44%), 51 rows remaining
d %>% 
  ggplot(aes(x = mc_sa_grader, y = score)) + 
  geom_bar(stat = "summary", fun = "mean") +
  geom_point(alpha = .4, position=position_jitter(.2)) +
  geom_errorbar(stat = "summary", fun.data=mean_sdl, 
                fun.args = list(mult=1),
                width = 0.5) 

d %>% 
  group_by(mc_sa_grader) %>% 
  summarize(n = n())
## group_by: one grouping variable (mc_sa_grader)
## summarize: now 4 rows and 2 columns, ungrouped
## # A tibble: 4 × 2
##   mc_sa_grader     n
##   <chr>        <int>
## 1 Allen           13
## 2 Arethea         12
## 3 Ashish          16
## 4 Kate            10

3.2.5 By grader by question - all qs

d <- df_exam %>%
  select(name, suid, matches("sa[0-9]$"), mc_sa_grader) %>%
  pivot_longer(-c(name, suid, mc_sa_grader), 
               names_to = "sa",
               values_to = "score") %>%
  drop_na(score)
## select: dropped 60 variables (exam_number, sunet, mc01, mc02, mc03, …)
## pivot_longer: reorganized (sa1, sa2, sa3, sa4, sa5, …) into (sa, score) [was 91x11, now 728x5]
## drop_na: removed 273 rows (38%), 455 rows remaining
d %>% 
  ggplot(aes(x = mc_sa_grader, y = score)) + 
  geom_bar(stat = "summary", fun = "mean") +
  geom_point(alpha = .4, position=position_jitter(.2)) +
  geom_errorbar(stat = "summary", fun.data=mean_sdl, 
                fun.args = list(mult=1),
                width = 0.5) + 
  facet_wrap(~sa)

d %>% 
  group_by(mc_sa_grader) %>% 
  summarize(n = n())
## group_by: one grouping variable (mc_sa_grader)
## summarize: now 4 rows and 2 columns, ungrouped
## # A tibble: 4 × 2
##   mc_sa_grader     n
##   <chr>        <int>
## 1 Allen          120
## 2 Arethea        100
## 3 Ashish         115
## 4 Kate           120

3.2.6 By grader by question - 6

d <- df_exam %>%
  select(name, suid, matches("sa[0-9]$"), mc_sa_grader) %>%
  pivot_longer(-c(name, suid, mc_sa_grader), 
               names_to = "sa",
               values_to = "score") %>%
  filter(sa=="sa6") %>% 
  drop_na(score)
## select: dropped 60 variables (exam_number, sunet, mc01, mc02, mc03, …)
## pivot_longer: reorganized (sa1, sa2, sa3, sa4, sa5, …) into (sa, score) [was 91x11, now 728x5]
## filter: removed 637 rows (88%), 91 rows remaining
## drop_na: removed 20 rows (22%), 71 rows remaining
d %>% 
  ggplot(aes(x = mc_sa_grader, y = score)) + 
  geom_bar(stat = "summary", fun = "mean") +
  geom_point(alpha = .4, position=position_jitter(.2)) +
  geom_errorbar(stat = "summary", fun.data=mean_sdl, 
                fun.args = list(mult=1),
                width = 0.5) 

3.3 Essay

df_exam %>% 
  ggplot(aes(x = essay_total)) + 
  geom_histogram(bins = 25) +
  geom_label(
    aes(label = paste0("M = ", round(mean(essay_total, na.rm=T), 2),
                       "\nSD = ", round(sd(essay_total, na.rm=T), 2)
    )),
    x=15, y = 10,
    vjust = -1)  +
  labs(title = "Essay Scores", x = "Score", y = "Count") +
  scale_x_continuous(breaks = seq(0, 20, 1))

3.3.1 By grader

df_exam %>%
  ggplot(aes(x = essay_grader, y = essay_total)) + 
  geom_bar(stat = "summary", fun = "mean") + 
  geom_point(alpha = .4, position=position_jitter(.2)) +
  geom_errorbar(stat = "summary", fun.data=mean_sdl, 
                fun.args = list(mult=1),
                width = 0.5)

3.4 Total

df_exam %>% 
  ggplot(aes(x = total_score)) + 
  geom_histogram(bins = 25) +
  labs(title = "Total Scores", x = "Score", y = "Count") +
  scale_x_continuous(breaks = seq(0, 72, 5)) +
  geom_vline(aes(xintercept = mean(total_score, na.rm=T)), color = "red") +
  geom_label(
    aes(label = paste0("M = ", round(mean(total_score, na.rm=T), 2),
                       "\nSD = ", round(sd(total_score, na.rm=T), 2)
    )),
    x=40, y = 10,
    vjust = -1)

df_exam %>% 
  select(name, mc_total, sa_total, essay_total, total_score) %>% 
  view
## select: dropped 66 variables (exam_number, suid, sunet, mc01, mc02, …)

4 Class grades

# 5 + 5 + 5 + 5 + 40 + 40 + 70 + 5
df_gradebook_final <- df_gradebook %>% 
  left_join(df_exam %>% 
              select(suid, total_score) %>% 
              rename(final_exam_score = total_score)) %>% 
  mutate_at(vars(assignment_1,
                 assignment_2,
                 assignment_3,
                 assignment_4,
                 exam_1,
                 exam_2,
                 participation,
                 extra_credit,
                 final_exam_score), ~replace_na(as.numeric(.), 0)) %>% 
  mutate(total_class_score = 
           (assignment_1/5) * 5 + 
           (assignment_2/5) * 5  + 
           (assignment_3/5) * 5  + 
           (assignment_4/5) * 5  + 
           (exam_1 / 40) * 20 + 
           (exam_2 / 40) * 20 + 
           (participation / 5) * 5 + 
           extra_credit / 2 * 2 + 
           (final_exam_score / 70) * 35) %>% 
  select(student, sunet, suid, 
         assignment_1,
         assignment_2,
         assignment_3,
         assignment_4,
         exam_1,
         exam_2,
         participation,
         extra_credit,
         final_exam_score,
         total_class_score) 
## select: dropped 69 variables (exam_number, name, sunet, mc01, mc02, …)
## rename: renamed one variable (final_exam_score)
## Joining with `by = join_by(suid)`
## left_join: added one column (final_exam_score)
## > rows only in x 2
## > rows only in y ( 0)
## > matched rows 91
## > ====
## > rows total 93
## mutate_at: changed 2 values (2%) of 'assignment_2' (2 fewer NA)
## changed 2 values (2%) of 'assignment_3' (2 fewer NA)
## changed one value (1%) of 'exam_1' (1 fewer NA)
## changed 5 values (5%) of 'assignment_1' (5 fewer NA)
## changed 35 values (38%) of 'extra_credit' (35 fewer NA)
## changed 3 values (3%) of 'assignment_4' (3 fewer NA)
## changed 5 values (5%) of 'participation' (5 fewer NA)
## changed 2 values (2%) of 'final_exam_score' (2 fewer NA)
## mutate: new variable 'total_class_score' (double) with 90 unique values and 0%
## NA
## select: dropped 22 variables (id, sis_user_id, section, final_exam,
## assignments_current_points, …)

4.1 Total class scores

99+ = A+ 93-99 = A 90-92 = A- 87-89 = B+ 83-86 = B 80-82 = B- 77-79 = C+ 73-76 = C 70-72 = C- 67-69 = D+ 63-66 = D 60-62 = D- <60 = F

df_gradebook_final %>% 
  summarize(
    mean = mean(total_class_score),
    sd = sd(total_class_score)
  )
## summarize: now one row and 2 columns, ungrouped
## # A tibble: 1 × 2
##    mean    sd
##   <dbl> <dbl>
## 1  89.6  13.2
df_gradebook_final %>% 
  ggplot(aes(x = total_class_score)) +
  geom_histogram() + 
  geom_vline(aes(xintercept = mean(total_class_score)), color = "red") +
  geom_label(
    aes(label = paste0("M = ", round(mean(total_class_score), 2),
                       "\nSD = ", round(sd(total_class_score), 2)
    )),
    x=60, y = 10,
    vjust = -1) 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

df_letter_grades <- df_gradebook_final %>% 
  mutate(grade = case_when(
    total_class_score >= 99 ~ "A+",
    total_class_score >= 93 & total_class_score < 99 ~ "A",
    total_class_score >= 90 & total_class_score < 93 ~ "A-",
    total_class_score >= 87 & total_class_score < 90 ~ "B+",
    total_class_score >= 83 & total_class_score < 87 ~ "B",
    total_class_score >= 80 & total_class_score < 83 ~ "B-",
    total_class_score >= 77 & total_class_score < 80 ~ "C+",
    total_class_score >= 73 & total_class_score < 77 ~ "C",
    total_class_score >= 70 & total_class_score < 73 ~ "C-",
    total_class_score >= 67 & total_class_score < 70 ~ "D+",
    total_class_score >= 63 & total_class_score < 67 ~ "D",
    total_class_score >= 60 & total_class_score < 63 ~ "D-",
    total_class_score < 60 ~ "F"
  )) %>% 
  arrange(desc(total_class_score)) %>% 
  mutate(rank = row_number()) %>% 
  group_by(grade) %>% 
  mutate(cumul_n = max(rank)) %>% 
  ungroup
## mutate: new variable 'grade' (character) with 12 unique values and 0% NA
## mutate: new variable 'rank' (integer) with 93 unique values and 0% NA
## group_by: one grouping variable (grade)
## mutate (grouped): new variable 'cumul_n' (integer) with 12 unique values and 0% NA
## ungroup: no grouping variables
df_letter_grades %>%
  # filter(final_exam_score > 0) %>% 
  mutate(grade = fct_reorder(grade, total_class_score, .desc = T)) %>%
  group_by(grade, cumul_n) %>% 
  summarize(
    n = n(),
    cumul_n = max(cumul_n),
    .groups = "drop"
  ) %>%
  ggplot(aes(x = grade, y = n)) +
  geom_col() + 
  geom_text(aes(label = n), vjust=-.5) +
  # geom_text(aes(label = cumul_n), y = 12) +
  labs(title = "Class Grades", x = "Grade", y = "Count")
## mutate: converted 'grade' from character to factor (0 new NA)
## group_by: 2 grouping variables (grade, cumul_n)
## summarize: now 12 rows and 3 columns, ungrouped

5 Each student’s final class grade

df_letter_grades %>% 
  select(-sunet, -suid, student, grade, total_class_score, everything()) %>% 
  kable()
## select: columns reordered (student, assignment_1, assignment_2, assignment_3, assignment_4, …)
student assignment_1 assignment_2 assignment_3 assignment_4 exam_1 exam_2 participation extra_credit final_exam_score total_class_score grade rank cumul_n sunet suid
Roghair, Aurora Jade 5 4.90 4.40 5.0 40.25 42.00 5 2 73.00 103.925 A+ 1 11 aroghair 6495846
Wang, Annabelle Anjie 5 5.00 5.00 5.0 41.25 41.50 5 2 69.00 102.875 A+ 2 11 aawang0 6778775
Salahifar, Aylin G 5 5.00 4.66 5.0 40.50 41.00 5 2 70.50 102.660 A+ 3 11 aysalah 6650094
Nguyen, Michelle 5 5.00 4.74 4.5 42.00 40.50 5 2 70.00 102.490 A+ 4 11 micheiie 6682990
Park, Emma 5 4.70 4.53 5.0 41.00 41.00 5 0 73.00 101.730 A+ 5 11 emmapark 6421329
Trockel, Rebecca Sabrina 5 4.50 5.00 5.0 40.25 39.50 5 2 70.25 101.500 A+ 6 11 rtrockel 6376569
Mallen, Susie Lynn 5 4.50 4.48 5.0 41.75 37.50 5 2 71.00 101.105 A+ 7 11 smallen 6640480
Barrientos, Isabella 5 5.00 4.83 5.0 40.00 41.00 5 2 67.50 101.080 A+ 8 11 isabp03 6504013
Huang, Emily 5 5.00 4.13 5.0 40.50 41.00 5 0 71.50 100.630 A+ 9 11 ehuang25 6603608
Seyahi, Lara Selin 5 5.00 4.40 5.0 38.50 40.00 5 2 68.50 99.900 A+ 10 11 seyahi 6545140
Tiu, Ekin Yokhan 5 4.50 4.48 5.0 40.00 40.00 5 0 70.50 99.230 A+ 11 11 ekintiu 6423010
Srinivasan, Maya Wells 5 5.00 4.66 5.0 41.25 36.00 5 2 67.00 98.785 A 12 49 mayasrin 6422613
Breschini, Alex Jean-Buenviaje 5 4.75 4.61 4.5 39.00 41.00 5 2 65.50 98.610 A 13 49 ajb2004 6683400
Citron, Emily 5 5.00 4.65 5.0 38.50 36.50 5 2 68.75 98.525 A 14 49 ecitron 6438897
Banks, Everett Samuel 5 2.45 4.22 5.0 39.88 39.50 5 2 70.00 98.360 A 15 49 evbanks 6731886
He, Shizhe 5 4.75 4.05 5.0 40.75 38.75 5 2 65.50 98.300 A 16 49 shizhehe 6661097
McNitt, Kaylee Sarah 5 4.25 4.66 5.0 37.00 38.50 5 2 69.00 98.160 A 17 49 ksmcnitt 6369630
Siggelkow, Katarina 5 5.00 4.66 5.0 41.00 39.50 5 2 62.50 98.160 A 18 49 ksig 6682708
Smith, Jerred Denali Villegas 5 5.00 4.74 5.0 39.50 41.00 5 0 66.00 97.990 A 19 49 jerred89 6512315
Movich-Fields, Tishrei Lee 5 5.00 4.66 5.0 39.25 38.50 5 0 68.75 97.910 A 20 49 tishrei 6420491
Sundar, Lavanya Marakatavalli 5 4.75 4.66 4.5 41.00 37.50 5 0 69.50 97.910 A 21 49 lsundar 6659969
Chang, Yujin 5 4.25 4.70 5.0 38.00 37.00 5 2 67.50 97.200 A 22 49 yujin9 6644961
Cortes, Nicole 5 4.75 4.74 5.0 37.25 38.00 5 2 66.00 97.115 A 23 49 nicortes 6504077
Mengheang, Marissa 5 5.00 4.27 5.0 41.25 34.50 5 2 65.50 96.895 A 24 49 mars7418 6683966
Torres Arsuaga, Alexandra Cecilia 5 4.75 4.58 5.0 35.50 41.25 5 2 64.00 96.705 A 25 49 torresar 6512328
Huang, Robin 5 3.50 4.70 5.0 40.00 33.50 5 2 69.50 96.700 A 26 49 rhuang42 6512167
Alvarado, Rodrigo 5 5.00 4.74 5.0 38.50 36.00 5 2 64.50 96.240 A 27 49 rodrigo0 6493795
Varlack, Vivianna Tiffany 5 5.00 3.97 5.0 40.50 37.75 5 0 66.00 96.095 A 28 49 vvarlack 6496691
Costa, Ana Ferreira da Motta 5 4.75 4.53 5.0 40.50 38.00 5 2 61.00 96.030 A 29 49 anafmc 6683414
Fung, Michael Benjamin 5 3.75 4.87 5.0 36.75 38.75 5 2 65.25 95.995 A 30 49 mikefung 6650042
Vachovska, Lora 5 5.00 4.54 5.0 36.25 39.00 5 0 67.50 95.915 A 31 49 lorapv 6544168
Dakad, Nazli Hilal 5 5.00 4.48 5.0 39.25 37.00 5 2 62.00 95.605 A 32 49 nhdakad 6455175
Alluri, Rishi 5 5.00 4.22 4.0 38.25 42.00 5 0 64.00 95.345 A 33 49 allurir 6543538
Jiang, Yuzhi 5 4.25 4.57 5.0 38.25 38.00 5 0 66.50 95.195 A 34 49 baggioj 6456134
Lu, Yuwen 5 5.00 4.57 5.0 38.75 37.00 5 0 65.50 95.195 A 35 49 tinaluyw 6486310
Marton, Selina Claire 5 4.75 3.53 5.0 39.25 35.50 5 2 65.00 95.155 A 36 49 selinacm 6743583
Anderson, Elijah Francisco 5 4.00 4.61 5.0 36.38 37.00 5 2 65.50 95.050 A 37 49 efa2003 6683927
Brindisi, Sophia Rose 5 5.00 4.83 5.0 40.38 36.00 5 2 60.00 95.020 A 38 49 sbrindis 6495775
Kisembo, Benita Kemigisa 5 5.00 3.92 5.0 39.88 37.50 5 0 64.50 94.860 A 39 49 kisemben 6504254
Martz, Eric 5 5.00 4.57 5.0 38.50 38.00 5 0 64.00 94.820 A 40 49 emartz 6448695
Garcia-Escobar, Eduardo 5 4.25 4.48 5.0 34.25 35.00 5 2 68.50 94.605 A 41 49 egarcia6 6504136
Lawson, Benjamin Richard 5 3.15 4.87 4.5 38.50 37.25 5 2 64.00 94.395 A 42 49 blawson3 6512205
Cushing, Stella Marie Reese 5 4.50 4.48 5.0 37.00 34.75 5 2 65.00 94.355 A 43 49 scushing 6745843
Sossa, Candice Rachel 5 5.00 3.41 5.0 39.25 38.75 5 0 63.00 93.910 A 44 49 csossa 6401980
Narayanan, Tejas 5 5.00 4.87 5.0 40.00 36.00 5 0 62.00 93.870 A 45 49 tejasn9 6458279
Temal, Anjali 5 4.50 4.46 5.0 37.25 38.75 5 0 63.50 93.710 A 46 49 anjali82 6456179
Ozoemelam, Liane 5 4.00 4.74 5.0 37.00 36.50 5 0 65.50 93.240 A 47 49 lianeozo 6544973
Tchakmakjian, Emmanuella Levon 5 5.00 4.27 4.5 38.25 38.00 5 2 58.50 93.145 A 48 49 etchak 6409933
Ballard, Tensaye Libra 5 4.50 4.57 5.0 38.75 30.50 5 2 64.75 93.070 A 49 49 tensayeb 6749550
Hernandez Leon, Brianna 5 3.75 4.31 5.0 34.88 38.00 5 2 62.00 92.500 A- 50 60 janay17 6647227
Ghimire, Samikshya 5 3.40 4.61 4.5 37.38 34.50 5 2 64.00 92.450 A- 51 60 samighim 6532372
Barone, Kamryn Regina 5 4.65 4.66 5.0 38.75 34.00 5 2 59.50 92.435 A- 52 60 kbarone 6739395
Morales-Madrid, Samuel 5 4.50 4.66 5.0 38.25 34.50 5 2 57.50 91.285 A- 53 60 samuelmm 6603236
Odell, Cherrial Ann 5 3.90 5.00 4.5 36.88 36.75 5 2 58.00 91.215 A- 54 60 cherrial 6360769
Tamor, Sydney Elizabeth 5 3.75 3.58 4.0 37.00 41.25 5 0 61.50 91.205 A- 55 60 sydtamor 6779178
Strutz, Patricia Marie 5 5.00 3.53 5.0 38.00 34.25 5 0 62.75 91.030 A- 56 60 pstrutz 6503711
MacCoun, Audrey 5 5.00 4.61 5.0 32.50 33.25 5 2 62.25 90.610 A- 57 60 amaccoun 6605504
Murphy, Cheyenne N 5 4.50 4.53 4.5 32.50 33.00 5 2 64.50 90.530 A- 58 60 cmurphy6 6493861
Rodriguez, Keila Samantha 5 4.50 4.00 5.0 35.38 36.00 5 2 58.50 90.440 A- 59 60 keila 6647252
Nordhagen, Andrew 5 4.75 4.48 5.0 33.00 35.50 5 0 63.50 90.230 A- 60 60 aenord 6504358
Pinciroli Pascual, Alissa 5 4.15 4.66 4.0 34.75 34.75 5 2 60.25 89.685 B+ 61 73 pasc061 6495841
Vincent, Dylan Michael 0 5.00 3.97 5.0 37.75 33.25 5 2 66.00 89.470 B+ 62 73 dylanv 6423286
Sturdza, Caroline Marie 5 4.25 4.40 5.0 33.75 29.00 5 2 64.50 89.275 B+ 63 73 carostu 6486322
Aguilar, Karla Patricia 5 5.00 4.24 5.0 37.00 33.00 5 2 55.50 88.990 B+ 64 73 kaguila5 6682225
Kehm, Andrew Thomas 5 5.00 4.18 5.0 33.75 33.25 5 0 62.00 88.680 B+ 65 73 atkehm 6683181
Stewart, Riley Grace 5 4.75 4.22 4.5 36.75 28.50 5 2 61.00 88.595 B+ 66 73 riley3 6644984
De Loera, Alan 5 4.00 4.74 5.0 40.50 39.00 5 0 50.00 88.490 B+ 67 73 adeloera 6496650
Ramirez Gonzalez, Mirna Guadalupe 5 4.25 4.66 4.5 37.00 26.50 5 2 62.50 88.410 B+ 68 73 mirna22 6649756
Shenoy, Thanh-Nga Catherine 5 3.75 5.00 5.0 36.00 37.50 5 2 51.50 88.250 B+ 69 73 tshenoy 6544608
Hansen, Luke 0 4.25 4.74 5.0 37.00 36.00 5 0 65.00 87.990 B+ 70 73 lrhansen 6252378
Garewal, Samuel Oliver 5 3.50 4.64 5.0 33.75 29.00 5 0 66.33 87.680 B+ 71 73 sgarewal 6827043
Heinemann, Abigail Grace 5 3.75 5.00 5.0 33.25 36.50 0 0 67.50 87.375 B+ 72 73 aheinem 6641596
Cortes, Maria 5 3.50 4.57 4.0 36.25 29.00 5 2 61.00 87.195 B+ 73 73 mcortesc 6657464
Powell, Taylor Janai 5 5.00 4.27 5.0 36.00 33.00 5 2 51.25 86.395 B 74 77 tjp2608 6682994
Rojas, Tania Lisseth 5 3.75 4.59 4.0 34.00 36.50 5 0 55.75 85.465 B 75 77 trojas18 6421966
Cantin, Lauren Elizabeth 5 3.00 4.14 5.0 34.50 32.00 5 2 56.00 85.390 B 76 77 lecantin 6509449
Mijat, Sophia Grace 5 5.00 4.31 4.5 31.88 28.25 5 2 59.00 85.375 B 77 77 smijat 6650818
Campbell, Maxwell James 5 3.50 4.83 5.0 28.25 34.00 5 0 56.75 82.830 B- 78 80 maxjcamp 6659964
Nzelu, Kene Simone 5 4.15 3.23 5.0 33.75 30.50 5 2 49.50 81.255 B- 79 80 kenen 6421098
Bailey, Quincy 5 4.75 3.84 4.0 36.25 23.75 5 2 51.25 80.215 B- 80 80 qbailey 6778990
Thompson, Joshua Issa 5 4.40 3.85 4.5 34.75 32.00 5 0 46.75 79.500 C+ 81 84 josht26 6640261
Taylor, Mikey Stephen 5 4.00 4.74 0.0 36.00 34.50 0 0 60.00 78.990 C+ 82 84 mikey686 6778059
Johnson, Leah Yolanda 5 3.25 4.60 5.0 35.75 29.25 5 0 46.00 78.350 C+ 83 84 bluerun2 6385807
Baker, Tahayla Brianna 5 3.00 4.01 5.0 27.88 28.75 5 2 52.00 78.325 C+ 84 84 tahayla 6416268
Pacheco Ramirez, Jessica 5 4.25 3.92 5.0 31.38 25.00 5 2 40.50 73.610 C 85 85 jessipr 6657509
Leiva, Carlos D 5 3.75 4.18 4.5 37.00 24.50 5 0 35.50 70.930 C- 86 86 cdleiva 6749505
Kinney, Jasmine Waukela 5 3.30 4.44 5.0 29.38 27.25 5 2 32.50 69.305 D+ 87 88 jwkinney 6455928
Ramos, Valeria 5 4.80 3.90 4.5 29.50 29.75 5 2 25.00 67.325 D+ 88 88 ramos03 6683972
Cloud, Anne Elizabeth 0 4.75 2.54 5.0 24.10 26.00 5 2 41.50 65.090 D 89 89 acloud 6643281
Reliford, Dominique 0 0.00 0.00 0.0 23.00 32.00 0 0 39.83 47.415 F 90 93 reliford 9656716
Sun, Lilly 5 1.20 1.15 3.5 13.00 27.75 0 0 28.00 45.225 F 91 93 lillysun 6250630
Hicks, Christina Helena Pau 5 4.00 3.79 5.0 0.00 33.50 5 0 0.00 39.540 F 92 93 chickshp 6407150
Briones, Julia 0 0.00 0.00 0.0 29.25 28.75 0 0 0.00 29.000 F 93 93 jmba812 6261749

5.1 All assignment scores

Note that the mean for the final exam doesn’t match the earlier statistics, because this includes people who got a 0.

d <- df_gradebook_final %>% 
  select(student, 
         assignment_1,
         assignment_2,
         assignment_3,
         assignment_4,
         exam_1,
         exam_2,
         final_exam_score
  ) %>% 
  pivot_longer(cols = -student, 
               names_to = "assignment",
               values_to = "score") 
## select: dropped 5 variables (sunet, suid, participation, extra_credit, total_class_score)
## pivot_longer: reorganized (assignment_1, assignment_2, assignment_3, assignment_4, exam_1, …) into (assignment, score) [was 93x8, now 651x3]
d %>% 
  ggplot(aes(x = score)) +
  geom_histogram(bins = 20) +
  facet_wrap(~assignment, scales = "free") +
  labs(title = "Assignment Scores", x = "Score", y = "Count")

d %>% 
  group_by(assignment) %>%
  summarize(mean_score = mean(score),
            sd_score = sd(score)) %>% 
  mutate_at(vars(mean_score, sd_score), ~round(., 2)) %>%
  kbl() %>%
  kable_styling()
## group_by: one grouping variable (assignment)
## summarize: now 7 rows and 3 columns, ungrouped
## mutate_at: changed 7 values (100%) of 'mean_score' (0 new NA)
##            changed 7 values (100%) of 'sd_score' (0 new NA)
assignment mean_score sd_score
assignment_1 4.73 1.13
assignment_2 4.31 0.94
assignment_3 4.29 0.84
assignment_4 4.68 0.92
exam_1 36.18 5.87
exam_2 35.29 4.47
final_exam_score 59.68 13.06

5.2 Missing grades

df_gradebook_final %>% 
  pivot_longer(cols = -c(student, sunet, suid), 
               names_to = "assignment",
               values_to = "score") %>% 
  filter(is.na(score) | score == 0) %>% 
  arrange(assignment) %>% 
  kbl() %>% 
  kable_styling()
## pivot_longer: reorganized (assignment_1, assignment_2, assignment_3, assignment_4, exam_1, …) into (assignment, score) [was 93x13, now 930x5]
## filter: removed 875 rows (94%), 55 rows remaining
student sunet suid assignment score
Briones, Julia jmba812 6261749 assignment_1 0
Cloud, Anne Elizabeth acloud 6643281 assignment_1 0
Hansen, Luke lrhansen 6252378 assignment_1 0
Reliford, Dominique reliford 9656716 assignment_1 0
Vincent, Dylan Michael dylanv 6423286 assignment_1 0
Briones, Julia jmba812 6261749 assignment_2 0
Reliford, Dominique reliford 9656716 assignment_2 0
Briones, Julia jmba812 6261749 assignment_3 0
Reliford, Dominique reliford 9656716 assignment_3 0
Briones, Julia jmba812 6261749 assignment_4 0
Reliford, Dominique reliford 9656716 assignment_4 0
Taylor, Mikey Stephen mikey686 6778059 assignment_4 0
Hicks, Christina Helena Pau chickshp 6407150 exam_1 0
Alluri, Rishi allurir 6543538 extra_credit 0
Briones, Julia jmba812 6261749 extra_credit 0
Campbell, Maxwell James maxjcamp 6659964 extra_credit 0
De Loera, Alan adeloera 6496650 extra_credit 0
Garewal, Samuel Oliver sgarewal 6827043 extra_credit 0
Hansen, Luke lrhansen 6252378 extra_credit 0
Heinemann, Abigail Grace aheinem 6641596 extra_credit 0
Hicks, Christina Helena Pau chickshp 6407150 extra_credit 0
Huang, Emily ehuang25 6603608 extra_credit 0
Jiang, Yuzhi baggioj 6456134 extra_credit 0
Johnson, Leah Yolanda bluerun2 6385807 extra_credit 0
Kehm, Andrew Thomas atkehm 6683181 extra_credit 0
Kisembo, Benita Kemigisa kisemben 6504254 extra_credit 0
Leiva, Carlos D cdleiva 6749505 extra_credit 0
Lu, Yuwen tinaluyw 6486310 extra_credit 0
Martz, Eric emartz 6448695 extra_credit 0
Movich-Fields, Tishrei Lee tishrei 6420491 extra_credit 0
Narayanan, Tejas tejasn9 6458279 extra_credit 0
Nordhagen, Andrew aenord 6504358 extra_credit 0
Ozoemelam, Liane lianeozo 6544973 extra_credit 0
Park, Emma emmapark 6421329 extra_credit 0
Reliford, Dominique reliford 9656716 extra_credit 0
Rojas, Tania Lisseth trojas18 6421966 extra_credit 0
Smith, Jerred Denali Villegas jerred89 6512315 extra_credit 0
Sossa, Candice Rachel csossa 6401980 extra_credit 0
Strutz, Patricia Marie pstrutz 6503711 extra_credit 0
Sun, Lilly lillysun 6250630 extra_credit 0
Sundar, Lavanya Marakatavalli lsundar 6659969 extra_credit 0
Tamor, Sydney Elizabeth sydtamor 6779178 extra_credit 0
Taylor, Mikey Stephen mikey686 6778059 extra_credit 0
Temal, Anjali anjali82 6456179 extra_credit 0
Thompson, Joshua Issa josht26 6640261 extra_credit 0
Tiu, Ekin Yokhan ekintiu 6423010 extra_credit 0
Vachovska, Lora lorapv 6544168 extra_credit 0
Varlack, Vivianna Tiffany vvarlack 6496691 extra_credit 0
Briones, Julia jmba812 6261749 final_exam_score 0
Hicks, Christina Helena Pau chickshp 6407150 final_exam_score 0
Briones, Julia jmba812 6261749 participation 0
Heinemann, Abigail Grace aheinem 6641596 participation 0
Reliford, Dominique reliford 9656716 participation 0
Sun, Lilly lillysun 6250630 participation 0
Taylor, Mikey Stephen mikey686 6778059 participation 0