rm(list = ls())
library(here)

## here() starts at /Users/ashish/files/research/projects/psych80_survey

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(janitor)

## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test

library(tidylog)

## 
## Attaching package: 'tidylog'
## 
## The following objects are masked from 'package:dplyr':
## 
##     add_count, add_tally, anti_join, count, distinct, distinct_all,
##     distinct_at, distinct_if, filter, filter_all, filter_at, filter_if,
##     full_join, group_by, group_by_all, group_by_at, group_by_if,
##     inner_join, left_join, mutate, mutate_all, mutate_at, mutate_if,
##     relocate, rename, rename_all, rename_at, rename_if, rename_with,
##     right_join, sample_frac, sample_n, select, select_all, select_at,
##     select_if, semi_join, slice, slice_head, slice_max, slice_min,
##     slice_sample, slice_tail, summarise, summarise_all, summarise_at,
##     summarise_if, summarize, summarize_all, summarize_at, summarize_if,
##     tally, top_frac, top_n, transmute, transmute_all, transmute_at,
##     transmute_if, ungroup
## 
## The following objects are masked from 'package:tidyr':
## 
##     drop_na, fill, gather, pivot_longer, pivot_wider, replace_na,
##     spread, uncount
## 
## The following object is masked from 'package:stats':
## 
##     filter

library(kableExtra)

## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows

theme_set(theme_bw())

1 Read data

gradebook_filepath <- "data/2024-03-24T1119_Grades-W24-PSYCH-80-01.csv"

df_gradebook_raw <- read_csv(here(gradebook_filepath)) %>% 
  clean_names

## Rows: 100 Columns: 35
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (34): Student, SIS User ID, SIS Login ID, Section, SU ID, Assignment #2 ...
## dbl  (1): ID
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

df_gradebook <- df_gradebook_raw %>% 
  drop_na(id) %>% 
  rename(suid = su_id) %>% 
  drop_na(suid) %>% 
  filter(suid != "06778030") %>% # jahfari
  mutate(suid = as.numeric(suid)) %>%
  rename_all(~str_replace(., "_[0-9]{6}$", "")) %>% 
  rename(exam_2 = exam_number_2) %>% 
  mutate_at(vars(
    exam_1, 
    exam_1_for_traveling_athletes,
    exam_number_2_traveling_athletes,
    exam_2,
    final_exam,
    assignment_number_1,
    assignment_number_2,
    assignment_number_3,
    assignment_number_4,
    extra_credit,
    participation), 
    as.numeric) %>%
  mutate(exam_2 = ifelse(!is.na(exam_number_2_traveling_athletes),
                         exam_number_2_traveling_athletes,
                          exam_2)) %>% 
  select(-matches("traveling_athletes")) %>%
  rename_at(vars(matches("assignment_number_[1-4]")), 
            ~str_replace(., "number_", "")) %>% 
  rename(sunet = sis_login_id) %>% 
  # -- dropped class -- #
  filter(!str_detect(student, "Dietrich"))

## drop_na: removed 2 rows (2%), 98 rows remaining
## rename: renamed one variable (suid)
## drop_na: removed 3 rows (3%), 95 rows remaining
## filter: removed one row (1%), 94 rows remaining
## mutate: converted 'suid' from character to double (0 new NA)
## rename_all: renamed 11 variables (assignment_number_2, assignment_number_3, exam_1, exam_number_2_traveling_athletes, assignment_number_1, …)
## rename: renamed one variable (exam_2)

## Warning: There were 3 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `exam_1_for_traveling_athletes =
##   .Primitive("as.double")(exam_1_for_traveling_athletes)`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 2 remaining warnings.

## mutate_at: converted 'assignment_number_2' from character to double (0 new NA)
##            converted 'assignment_number_3' from character to double (0 new NA)
##            converted 'exam_1' from character to double (0 new NA)
##            converted 'exam_number_2_traveling_athletes' from character to double (93 new NA)
##            converted 'assignment_number_1' from character to double (0 new NA)
##            converted 'extra_credit' from character to double (0 new NA)
##            converted 'assignment_number_4' from character to double (0 new NA)
##            converted 'exam_2' from character to double (0 new NA)
##            converted 'final_exam' from character to double (92 new NA)
##            converted 'participation' from character to double (0 new NA)
##            converted 'exam_1_for_traveling_athletes' from character to double (92 new NA)
## mutate: no changes
## select: dropped 2 variables (exam_number_2_traveling_athletes, exam_1_for_traveling_athletes)
## rename_at: renamed 4 variables (assignment_2, assignment_3, assignment_1, assignment_4)
## rename: renamed one variable (sunet)
## filter: removed one row (1%), 93 rows remaining

2 Process data

exam_filepath <- "data/Final exam grading.xlsx"

df_exam_raw <- readxl::read_excel(here(exam_filepath), 
                                  sheet = "Update") %>% 
  clean_names


df_mc_key <- df_exam_raw %>% 
  slice(1) %>% 
  select(matches("mc[0-9]{,2}$")) %>% 
  pivot_longer(cols = everything(), 
               names_to = "mc",
               values_to = "key")

## slice: removed 96 rows (99%), one row remaining

## select: dropped 21 variables (exam_number, name, suid, sunet, mc_total, …)

## pivot_longer: reorganized (mc1, mc2, mc3, mc4, mc5, …) into (mc, key) [was 1x25, now 25x2]

df_mc <- df_exam_raw %>% 
  drop_na(suid) %>% 
  drop_na(total_score) %>% 
  select(name, suid, matches("mc[0-9]{,2}$")) %>% 
  pivot_longer(cols = -c(name, suid), 
               names_to = "mc",
               values_to = "response") %>%
  full_join(df_mc_key) %>% 
  mutate(score = if_else(response == key, 1, 0)) %>% 
  select(-key, -response) %>%
  mutate(mc = paste0(mc, "_score")) %>% 
  pivot_wider(id_cols = c(name, suid), 
              names_from = mc, 
              values_from = score)

## drop_na: removed 3 rows (3%), 94 rows remaining

## drop_na: removed 3 rows (3%), 91 rows remaining

## select: dropped 19 variables (exam_number, sunet, mc_total, sa1, sa2, …)

## pivot_longer: reorganized (mc1, mc2, mc3, mc4, mc5, …) into (mc, response) [was 91x27, now 2275x4]

## Joining with `by = join_by(mc)`
## full_join: added one column (key)
## > rows only in x 0
## > rows only in y 0
## > matched rows 2,275
## > =======
## > rows total 2,275
## mutate: new variable 'score' (double) with 3 unique values and 1% NA
## select: dropped 2 variables (response, key)
## mutate: changed 2,275 values (100%) of 'mc' (0 new NA)
## pivot_wider: reorganized (mc, score) into (mc1_score, mc2_score, mc3_score,
## mc4_score, mc5_score, …) [was 2275x4, now 91x27]

df_exam <- df_exam_raw %>% 
  drop_na(suid) %>% 
  drop_na(total_score) %>% 
  left_join(df_mc) %>% 
  mutate(total_score = total_score + 3) %>%  # adding 3 points because of MC questions that most got wrong
  # left pad the numbers after mc in the column names to make them all the same length
  
  rename_at(vars(matches("mc[0-9]($|_)")), 
            function(x){
              x <- str_replace_all(x, "mc", "")
              x <- str_pad(x, width = 2, side = "left", pad = "0")
              paste0("mc", x)
            })

## drop_na: removed 3 rows (3%), 94 rows remaining
## drop_na: removed 3 rows (3%), 91 rows remaining
## Joining with `by = join_by(name, suid)`left_join: added 25 columns (mc1_score, mc2_score, mc3_score, mc4_score, mc5_score, …)
##            > rows only in x    0
##            > rows only in y  ( 0)
##            > matched rows     91
##            >                 ====
##            > rows total       91
## mutate: changed 91 values (100%) of 'total_score' (0 new NA)
## rename_at: renamed 9 variables (mc01, mc02, mc03, mc04, mc05, …)

3 Final exam

3.1 MC

3.1.1 Total MC Score

df_exam %>% 
  select(name, suid, matches("mc[0-9]{,2}_score$")) %>% 
  pivot_longer(cols = -c(name, suid), 
               names_to = "mc",
               values_to = "score") %>%
  drop_na(mc) %>% 
  group_by(name, suid) %>% 
  summarize(score = sum(score)) %>%
  ggplot(aes(x = score)) +
  geom_histogram(bins = 20) +
  geom_vline(aes(xintercept = mean(score, na.rm=T)), color = "red") +
  geom_label(
    aes(label = paste0("M = ", round(mean(score, na.rm=T), 2),
                       "\nSD = ", round(sd(score, na.rm=T), 2)
    )),
    x=15, y = 10,
    vjust = -1) +
  labs(title = "Multiple Choice Scores", x = "Score", y = "Count")

## select: dropped 44 variables (exam_number, sunet, mc01, mc02, mc03, …)

## pivot_longer: reorganized (mc1_score, mc2_score, mc3_score, mc4_score, mc5_score, …) into (mc, score) [was 91x27, now 2275x4]

## drop_na: no rows removed

## group_by: 2 grouping variables (name, suid)

## summarize: now 91 rows and 3 columns, one group variable remaining (name)

## Warning: Removed 4 rows containing non-finite values (`stat_bin()`).

3.1.2 By question

d <- df_exam %>% 
  select(name, suid, matches("mc[0-9]{,2}_score$")) %>% 
  pivot_longer(cols = -c(name, suid), 
               names_to = "mc",
               values_to = "score") %>%
  drop_na(mc) %>% 
  mutate(mc = str_replace_all(mc, "[a-z_]", "")) %>%
  mutate(mc = str_pad(mc, width = 2, side = "left", pad = "0"))

## select: dropped 44 variables (exam_number, sunet, mc01, mc02, mc03, …)

## pivot_longer: reorganized (mc1_score, mc2_score, mc3_score, mc4_score, mc5_score, …) into (mc, score) [was 91x27, now 2275x4]

## drop_na: no rows removed

## mutate: changed 2,275 values (100%) of 'mc' (0 new NA)

## mutate: changed 819 values (36%) of 'mc' (0 new NA)

d %>%
  group_by(mc) %>%
  summarize(score = mean(score, na.rm=T)) %>%
  ggplot(aes(x = mc, y = score)) +
  geom_col() +
  geom_text(aes(label = round(score, 2)), vjust = -0.5, size = 3) +
  labs(title = "Multiple Choice by Question", y = "Proportion correct")

## group_by: one grouping variable (mc)

## summarize: now 25 rows and 2 columns, ungrouped

df_exam %>% 
  select(name, suid, matches("mc[0-9]{,2}$")) %>% 
  pivot_longer(cols = -c(name, suid), 
               names_to = "mc",
               values_to = "response") %>% 
  ggplot(aes(x = response)) + 
  geom_bar(stat = "count") + 
  facet_wrap(~mc, scales = "free")

## select: dropped 44 variables (exam_number, sunet, mc_total, sa1, sa2, …)

## pivot_longer: reorganized (mc01, mc02, mc03, mc04, mc05, …) into (mc, response) [was 91x27, now 2275x4]

3.1.3 By grader

df_exam %>%
  ggplot(aes(x = mc_sa_grader, y = mc_total)) +
  geom_bar(stat = "summary", fun = "mean") +
  geom_errorbar(stat = "summary", 
                fun.data = mean_sdl,
                fun.args = list(mult = 1),
                width = 0.2) +
  geom_point( alpha = .2, 
              position = position_jitter(width = .2,
                                         height = 0))

3.2 SA

3.2.1 Total

df_exam %>% 
  ggplot(aes(x = sa_total)) + 
  geom_histogram(bins = 25) +
  labs(title = "Short Answer Scores", x = "Score", y = "Count") +
  geom_vline(aes(xintercept = mean(sa_total, na.rm=T)), color = "red") +
  geom_label(
    aes(label = paste0("M = ", round(mean(sa_total, na.rm=T), 2),
                       "\nSD = ", round(sd(sa_total, na.rm=T), 2)
    )),
    x=15, y = 10,
    vjust = -1)  +
  scale_x_continuous(breaks = seq(0, 25, 1))

3.2.2 By grader

df_exam %>%
  ggplot(aes(x = mc_sa_grader, y = sa_total)) + 
  geom_bar(stat = "summary", fun = "mean") +
  geom_point(alpha = .4, position=position_jitter(.2)) +
  geom_errorbar(stat = "summary", fun.data=mean_sdl, 
                fun.args = list(mult=1),
                width = 0.5)

df_exam %>% 
  group_by(mc_sa_grader) %>% 
  summarize(mean_score = mean(sa_total),
            sd_score = sd(sa_total))

## group_by: one grouping variable (mc_sa_grader)

## summarize: now 4 rows and 3 columns, ungrouped

## # A tibble: 4 × 3
##   mc_sa_grader mean_score sd_score
##   <chr>             <dbl>    <dbl>
## 1 Allen              20.6     3.96
## 2 Arethea            19.6     3.90
## 3 Ashish             21.9     3.82
## 4 Kate               20.0     6.02

3.2.3 By question

d <- df_exam %>%
  select(name, suid, matches("sa[0-9]$"), mc_sa_grader) %>%
  pivot_longer(-c(name, suid, mc_sa_grader), 
               names_to = "sa",
               values_to = "score") %>%
  drop_na(score)

## select: dropped 60 variables (exam_number, sunet, mc01, mc02, mc03, …)

## pivot_longer: reorganized (sa1, sa2, sa3, sa4, sa5, …) into (sa, score) [was 91x11, now 728x5]

## drop_na: removed 273 rows (38%), 455 rows remaining

d %>% 
  ggplot(aes(x = sa, y = score)) + 
  geom_bar(stat = "summary", fun = "mean") +
  geom_point(alpha = .4, position=position_jitter(.2)) +
  geom_errorbar(stat = "summary", fun.data=mean_sdl, 
                fun.args = list(mult=1),
                width = 0.5)

d %>% 
  group_by(sa) %>% 
  summarize(mean_score = mean(score),
            sd_score = sd(score))

## group_by: one grouping variable (sa)

## summarize: now 8 rows and 3 columns, ungrouped

## # A tibble: 8 × 3
##   sa    mean_score sd_score
##   <chr>      <dbl>    <dbl>
## 1 sa1         4.14    1.30 
## 2 sa2         4.12    1.01 
## 3 sa3         4.35    0.724
## 4 sa4         4.15    1.25 
## 5 sa5         3.38    1.38 
## 6 sa6         4.02    1.23 
## 7 sa7         3.93    1.39 
## 8 sa8         4.52    0.903

3.2.4 By grader by question - 5

d <- df_exam %>%
  select(name, suid, matches("sa[0-9]$"), mc_sa_grader) %>%
  pivot_longer(-c(name, suid, mc_sa_grader), 
               names_to = "sa",
               values_to = "score") %>%
  filter(sa=="sa5") %>% 
  drop_na(score)

## select: dropped 60 variables (exam_number, sunet, mc01, mc02, mc03, …)

## pivot_longer: reorganized (sa1, sa2, sa3, sa4, sa5, …) into (sa, score) [was 91x11, now 728x5]

## filter: removed 637 rows (88%), 91 rows remaining

## drop_na: removed 40 rows (44%), 51 rows remaining

d %>% 
  ggplot(aes(x = mc_sa_grader, y = score)) + 
  geom_bar(stat = "summary", fun = "mean") +
  geom_point(alpha = .4, position=position_jitter(.2)) +
  geom_errorbar(stat = "summary", fun.data=mean_sdl, 
                fun.args = list(mult=1),
                width = 0.5)

d %>% 
  group_by(mc_sa_grader) %>% 
  summarize(n = n())

## group_by: one grouping variable (mc_sa_grader)

## summarize: now 4 rows and 2 columns, ungrouped

## # A tibble: 4 × 2
##   mc_sa_grader     n
##   <chr>        <int>
## 1 Allen           13
## 2 Arethea         12
## 3 Ashish          16
## 4 Kate            10

3.2.5 By grader by question - all qs

d <- df_exam %>%
  select(name, suid, matches("sa[0-9]$"), mc_sa_grader) %>%
  pivot_longer(-c(name, suid, mc_sa_grader), 
               names_to = "sa",
               values_to = "score") %>%
  drop_na(score)

## select: dropped 60 variables (exam_number, sunet, mc01, mc02, mc03, …)

## pivot_longer: reorganized (sa1, sa2, sa3, sa4, sa5, …) into (sa, score) [was 91x11, now 728x5]

## drop_na: removed 273 rows (38%), 455 rows remaining

d %>% 
  ggplot(aes(x = mc_sa_grader, y = score)) + 
  geom_bar(stat = "summary", fun = "mean") +
  geom_point(alpha = .4, position=position_jitter(.2)) +
  geom_errorbar(stat = "summary", fun.data=mean_sdl, 
                fun.args = list(mult=1),
                width = 0.5) + 
  facet_wrap(~sa)

d %>% 
  group_by(mc_sa_grader) %>% 
  summarize(n = n())

## group_by: one grouping variable (mc_sa_grader)

## summarize: now 4 rows and 2 columns, ungrouped

## # A tibble: 4 × 2
##   mc_sa_grader     n
##   <chr>        <int>
## 1 Allen          120
## 2 Arethea        100
## 3 Ashish         115
## 4 Kate           120

3.2.6 By grader by question - 6

d <- df_exam %>%
  select(name, suid, matches("sa[0-9]$"), mc_sa_grader) %>%
  pivot_longer(-c(name, suid, mc_sa_grader), 
               names_to = "sa",
               values_to = "score") %>%
  filter(sa=="sa6") %>% 
  drop_na(score)

## select: dropped 60 variables (exam_number, sunet, mc01, mc02, mc03, …)

## pivot_longer: reorganized (sa1, sa2, sa3, sa4, sa5, …) into (sa, score) [was 91x11, now 728x5]

## filter: removed 637 rows (88%), 91 rows remaining

## drop_na: removed 20 rows (22%), 71 rows remaining

d %>% 
  ggplot(aes(x = mc_sa_grader, y = score)) + 
  geom_bar(stat = "summary", fun = "mean") +
  geom_point(alpha = .4, position=position_jitter(.2)) +
  geom_errorbar(stat = "summary", fun.data=mean_sdl, 
                fun.args = list(mult=1),
                width = 0.5)

3.3 Essay

df_exam %>% 
  ggplot(aes(x = essay_total)) + 
  geom_histogram(bins = 25) +
  geom_label(
    aes(label = paste0("M = ", round(mean(essay_total, na.rm=T), 2),
                       "\nSD = ", round(sd(essay_total, na.rm=T), 2)
    )),
    x=15, y = 10,
    vjust = -1)  +
  labs(title = "Essay Scores", x = "Score", y = "Count") +
  scale_x_continuous(breaks = seq(0, 20, 1))

3.3.1 By grader

df_exam %>%
  ggplot(aes(x = essay_grader, y = essay_total)) + 
  geom_bar(stat = "summary", fun = "mean") + 
  geom_point(alpha = .4, position=position_jitter(.2)) +
  geom_errorbar(stat = "summary", fun.data=mean_sdl, 
                fun.args = list(mult=1),
                width = 0.5)

3.4 Total

df_exam %>% 
  ggplot(aes(x = total_score)) + 
  geom_histogram(bins = 25) +
  labs(title = "Total Scores", x = "Score", y = "Count") +
  scale_x_continuous(breaks = seq(0, 72, 5)) +
  geom_vline(aes(xintercept = mean(total_score, na.rm=T)), color = "red") +
  geom_label(
    aes(label = paste0("M = ", round(mean(total_score, na.rm=T), 2),
                       "\nSD = ", round(sd(total_score, na.rm=T), 2)
    )),
    x=40, y = 10,
    vjust = -1)

df_exam %>% 
  select(name, mc_total, sa_total, essay_total, total_score) %>% 
  view

## select: dropped 66 variables (exam_number, suid, sunet, mc01, mc02, …)

4 Class grades

# 5 + 5 + 5 + 5 + 40 + 40 + 70 + 5
df_gradebook_final <- df_gradebook %>% 
  left_join(df_exam %>% 
              select(suid, total_score) %>% 
              rename(final_exam_score = total_score)) %>% 
  mutate_at(vars(assignment_1,
                 assignment_2,
                 assignment_3,
                 assignment_4,
                 exam_1,
                 exam_2,
                 participation,
                 extra_credit,
                 final_exam_score), ~replace_na(as.numeric(.), 0)) %>% 
  mutate(total_class_score = 
           (assignment_1/5) * 5 + 
           (assignment_2/5) * 5  + 
           (assignment_3/5) * 5  + 
           (assignment_4/5) * 5  + 
           (exam_1 / 40) * 20 + 
           (exam_2 / 40) * 20 + 
           (participation / 5) * 5 + 
           extra_credit / 2 * 2 + 
           (final_exam_score / 70) * 35) %>% 
  select(student, sunet, suid, 
         assignment_1,
         assignment_2,
         assignment_3,
         assignment_4,
         exam_1,
         exam_2,
         participation,
         extra_credit,
         final_exam_score,
         total_class_score)

## select: dropped 69 variables (exam_number, name, sunet, mc01, mc02, …)

## rename: renamed one variable (final_exam_score)

## Joining with `by = join_by(suid)`
## left_join: added one column (final_exam_score)
## > rows only in x 2
## > rows only in y ( 0)
## > matched rows 91
## > ====
## > rows total 93
## mutate_at: changed 2 values (2%) of 'assignment_2' (2 fewer NA)
## changed 2 values (2%) of 'assignment_3' (2 fewer NA)
## changed one value (1%) of 'exam_1' (1 fewer NA)
## changed 5 values (5%) of 'assignment_1' (5 fewer NA)
## changed 35 values (38%) of 'extra_credit' (35 fewer NA)
## changed 3 values (3%) of 'assignment_4' (3 fewer NA)
## changed 5 values (5%) of 'participation' (5 fewer NA)
## changed 2 values (2%) of 'final_exam_score' (2 fewer NA)
## mutate: new variable 'total_class_score' (double) with 90 unique values and 0%
## NA
## select: dropped 22 variables (id, sis_user_id, section, final_exam,
## assignments_current_points, …)

4.1 Total class scores

99+ = A+ 93-99 = A 90-92 = A- 87-89 = B+ 83-86 = B 80-82 = B- 77-79 = C+ 73-76 = C 70-72 = C- 67-69 = D+ 63-66 = D 60-62 = D- <60 = F

df_gradebook_final %>% 
  summarize(
    mean = mean(total_class_score),
    sd = sd(total_class_score)
  )

## summarize: now one row and 2 columns, ungrouped

## # A tibble: 1 × 2
##    mean    sd
##   <dbl> <dbl>
## 1  89.6  13.2

df_gradebook_final %>% 
  ggplot(aes(x = total_class_score)) +
  geom_histogram() + 
  geom_vline(aes(xintercept = mean(total_class_score)), color = "red") +
  geom_label(
    aes(label = paste0("M = ", round(mean(total_class_score), 2),
                       "\nSD = ", round(sd(total_class_score), 2)
    )),
    x=60, y = 10,
    vjust = -1)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

df_letter_grades <- df_gradebook_final %>% 
  mutate(grade = case_when(
    total_class_score >= 99 ~ "A+",
    total_class_score >= 93 & total_class_score < 99 ~ "A",
    total_class_score >= 90 & total_class_score < 93 ~ "A-",
    total_class_score >= 87 & total_class_score < 90 ~ "B+",
    total_class_score >= 83 & total_class_score < 87 ~ "B",
    total_class_score >= 80 & total_class_score < 83 ~ "B-",
    total_class_score >= 77 & total_class_score < 80 ~ "C+",
    total_class_score >= 73 & total_class_score < 77 ~ "C",
    total_class_score >= 70 & total_class_score < 73 ~ "C-",
    total_class_score >= 67 & total_class_score < 70 ~ "D+",
    total_class_score >= 63 & total_class_score < 67 ~ "D",
    total_class_score >= 60 & total_class_score < 63 ~ "D-",
    total_class_score < 60 ~ "F"
  )) %>% 
  arrange(desc(total_class_score)) %>% 
  mutate(rank = row_number()) %>% 
  group_by(grade) %>% 
  mutate(cumul_n = max(rank)) %>% 
  ungroup

## mutate: new variable 'grade' (character) with 12 unique values and 0% NA

## mutate: new variable 'rank' (integer) with 93 unique values and 0% NA

## group_by: one grouping variable (grade)

## mutate (grouped): new variable 'cumul_n' (integer) with 12 unique values and 0% NA

## ungroup: no grouping variables

df_letter_grades %>%
  # filter(final_exam_score > 0) %>% 
  mutate(grade = fct_reorder(grade, total_class_score, .desc = T)) %>%
  group_by(grade, cumul_n) %>% 
  summarize(
    n = n(),
    cumul_n = max(cumul_n),
    .groups = "drop"
  ) %>%
  ggplot(aes(x = grade, y = n)) +
  geom_col() + 
  geom_text(aes(label = n), vjust=-.5) +
  # geom_text(aes(label = cumul_n), y = 12) +
  labs(title = "Class Grades", x = "Grade", y = "Count")

## mutate: converted 'grade' from character to factor (0 new NA)

## group_by: 2 grouping variables (grade, cumul_n)

## summarize: now 12 rows and 3 columns, ungrouped

5 Each student’s final class grade

df_letter_grades %>% 
  select(-sunet, -suid, student, grade, total_class_score, everything()) %>% 
  kable()

## select: columns reordered (student, assignment_1, assignment_2, assignment_3, assignment_4, …)

student	assignment_1	assignment_2	assignment_3	assignment_4	exam_1	exam_2	participation	extra_credit	final_exam_score	total_class_score	grade	rank	cumul_n	sunet	suid
Roghair, Aurora Jade	5	4.90	4.40	5.0	40.25	42.00	5	2	73.00	103.925	A+	1	11	aroghair	6495846
Wang, Annabelle Anjie	5	5.00	5.00	5.0	41.25	41.50	5	2	69.00	102.875	A+	2	11	aawang0	6778775
Salahifar, Aylin G	5	5.00	4.66	5.0	40.50	41.00	5	2	70.50	102.660	A+	3	11	aysalah	6650094
Nguyen, Michelle	5	5.00	4.74	4.5	42.00	40.50	5	2	70.00	102.490	A+	4	11	micheiie	6682990
Park, Emma	5	4.70	4.53	5.0	41.00	41.00	5	0	73.00	101.730	A+	5	11	emmapark	6421329
Trockel, Rebecca Sabrina	5	4.50	5.00	5.0	40.25	39.50	5	2	70.25	101.500	A+	6	11	rtrockel	6376569
Mallen, Susie Lynn	5	4.50	4.48	5.0	41.75	37.50	5	2	71.00	101.105	A+	7	11	smallen	6640480
Barrientos, Isabella	5	5.00	4.83	5.0	40.00	41.00	5	2	67.50	101.080	A+	8	11	isabp03	6504013
Huang, Emily	5	5.00	4.13	5.0	40.50	41.00	5	0	71.50	100.630	A+	9	11	ehuang25	6603608
Seyahi, Lara Selin	5	5.00	4.40	5.0	38.50	40.00	5	2	68.50	99.900	A+	10	11	seyahi	6545140
Tiu, Ekin Yokhan	5	4.50	4.48	5.0	40.00	40.00	5	0	70.50	99.230	A+	11	11	ekintiu	6423010
Srinivasan, Maya Wells	5	5.00	4.66	5.0	41.25	36.00	5	2	67.00	98.785	A	12	49	mayasrin	6422613
Breschini, Alex Jean-Buenviaje	5	4.75	4.61	4.5	39.00	41.00	5	2	65.50	98.610	A	13	49	ajb2004	6683400
Citron, Emily	5	5.00	4.65	5.0	38.50	36.50	5	2	68.75	98.525	A	14	49	ecitron	6438897
Banks, Everett Samuel	5	2.45	4.22	5.0	39.88	39.50	5	2	70.00	98.360	A	15	49	evbanks	6731886
He, Shizhe	5	4.75	4.05	5.0	40.75	38.75	5	2	65.50	98.300	A	16	49	shizhehe	6661097
McNitt, Kaylee Sarah	5	4.25	4.66	5.0	37.00	38.50	5	2	69.00	98.160	A	17	49	ksmcnitt	6369630
Siggelkow, Katarina	5	5.00	4.66	5.0	41.00	39.50	5	2	62.50	98.160	A	18	49	ksig	6682708
Smith, Jerred Denali Villegas	5	5.00	4.74	5.0	39.50	41.00	5	0	66.00	97.990	A	19	49	jerred89	6512315
Movich-Fields, Tishrei Lee	5	5.00	4.66	5.0	39.25	38.50	5	0	68.75	97.910	A	20	49	tishrei	6420491
Sundar, Lavanya Marakatavalli	5	4.75	4.66	4.5	41.00	37.50	5	0	69.50	97.910	A	21	49	lsundar	6659969
Chang, Yujin	5	4.25	4.70	5.0	38.00	37.00	5	2	67.50	97.200	A	22	49	yujin9	6644961
Cortes, Nicole	5	4.75	4.74	5.0	37.25	38.00	5	2	66.00	97.115	A	23	49	nicortes	6504077
Mengheang, Marissa	5	5.00	4.27	5.0	41.25	34.50	5	2	65.50	96.895	A	24	49	mars7418	6683966
Torres Arsuaga, Alexandra Cecilia	5	4.75	4.58	5.0	35.50	41.25	5	2	64.00	96.705	A	25	49	torresar	6512328
Huang, Robin	5	3.50	4.70	5.0	40.00	33.50	5	2	69.50	96.700	A	26	49	rhuang42	6512167
Alvarado, Rodrigo	5	5.00	4.74	5.0	38.50	36.00	5	2	64.50	96.240	A	27	49	rodrigo0	6493795
Varlack, Vivianna Tiffany	5	5.00	3.97	5.0	40.50	37.75	5	0	66.00	96.095	A	28	49	vvarlack	6496691
Costa, Ana Ferreira da Motta	5	4.75	4.53	5.0	40.50	38.00	5	2	61.00	96.030	A	29	49	anafmc	6683414
Fung, Michael Benjamin	5	3.75	4.87	5.0	36.75	38.75	5	2	65.25	95.995	A	30	49	mikefung	6650042
Vachovska, Lora	5	5.00	4.54	5.0	36.25	39.00	5	0	67.50	95.915	A	31	49	lorapv	6544168
Dakad, Nazli Hilal	5	5.00	4.48	5.0	39.25	37.00	5	2	62.00	95.605	A	32	49	nhdakad	6455175
Alluri, Rishi	5	5.00	4.22	4.0	38.25	42.00	5	0	64.00	95.345	A	33	49	allurir	6543538
Jiang, Yuzhi	5	4.25	4.57	5.0	38.25	38.00	5	0	66.50	95.195	A	34	49	baggioj	6456134
Lu, Yuwen	5	5.00	4.57	5.0	38.75	37.00	5	0	65.50	95.195	A	35	49	tinaluyw	6486310
Marton, Selina Claire	5	4.75	3.53	5.0	39.25	35.50	5	2	65.00	95.155	A	36	49	selinacm	6743583
Anderson, Elijah Francisco	5	4.00	4.61	5.0	36.38	37.00	5	2	65.50	95.050	A	37	49	efa2003	6683927
Brindisi, Sophia Rose	5	5.00	4.83	5.0	40.38	36.00	5	2	60.00	95.020	A	38	49	sbrindis	6495775
Kisembo, Benita Kemigisa	5	5.00	3.92	5.0	39.88	37.50	5	0	64.50	94.860	A	39	49	kisemben	6504254
Martz, Eric	5	5.00	4.57	5.0	38.50	38.00	5	0	64.00	94.820	A	40	49	emartz	6448695
Garcia-Escobar, Eduardo	5	4.25	4.48	5.0	34.25	35.00	5	2	68.50	94.605	A	41	49	egarcia6	6504136
Lawson, Benjamin Richard	5	3.15	4.87	4.5	38.50	37.25	5	2	64.00	94.395	A	42	49	blawson3	6512205
Cushing, Stella Marie Reese	5	4.50	4.48	5.0	37.00	34.75	5	2	65.00	94.355	A	43	49	scushing	6745843
Sossa, Candice Rachel	5	5.00	3.41	5.0	39.25	38.75	5	0	63.00	93.910	A	44	49	csossa	6401980
Narayanan, Tejas	5	5.00	4.87	5.0	40.00	36.00	5	0	62.00	93.870	A	45	49	tejasn9	6458279
Temal, Anjali	5	4.50	4.46	5.0	37.25	38.75	5	0	63.50	93.710	A	46	49	anjali82	6456179
Ozoemelam, Liane	5	4.00	4.74	5.0	37.00	36.50	5	0	65.50	93.240	A	47	49	lianeozo	6544973
Tchakmakjian, Emmanuella Levon	5	5.00	4.27	4.5	38.25	38.00	5	2	58.50	93.145	A	48	49	etchak	6409933
Ballard, Tensaye Libra	5	4.50	4.57	5.0	38.75	30.50	5	2	64.75	93.070	A	49	49	tensayeb	6749550
Hernandez Leon, Brianna	5	3.75	4.31	5.0	34.88	38.00	5	2	62.00	92.500	A-	50	60	janay17	6647227
Ghimire, Samikshya	5	3.40	4.61	4.5	37.38	34.50	5	2	64.00	92.450	A-	51	60	samighim	6532372
Barone, Kamryn Regina	5	4.65	4.66	5.0	38.75	34.00	5	2	59.50	92.435	A-	52	60	kbarone	6739395
Morales-Madrid, Samuel	5	4.50	4.66	5.0	38.25	34.50	5	2	57.50	91.285	A-	53	60	samuelmm	6603236
Odell, Cherrial Ann	5	3.90	5.00	4.5	36.88	36.75	5	2	58.00	91.215	A-	54	60	cherrial	6360769
Tamor, Sydney Elizabeth	5	3.75	3.58	4.0	37.00	41.25	5	0	61.50	91.205	A-	55	60	sydtamor	6779178
Strutz, Patricia Marie	5	5.00	3.53	5.0	38.00	34.25	5	0	62.75	91.030	A-	56	60	pstrutz	6503711
MacCoun, Audrey	5	5.00	4.61	5.0	32.50	33.25	5	2	62.25	90.610	A-	57	60	amaccoun	6605504
Murphy, Cheyenne N	5	4.50	4.53	4.5	32.50	33.00	5	2	64.50	90.530	A-	58	60	cmurphy6	6493861
Rodriguez, Keila Samantha	5	4.50	4.00	5.0	35.38	36.00	5	2	58.50	90.440	A-	59	60	keila	6647252
Nordhagen, Andrew	5	4.75	4.48	5.0	33.00	35.50	5	0	63.50	90.230	A-	60	60	aenord	6504358
Pinciroli Pascual, Alissa	5	4.15	4.66	4.0	34.75	34.75	5	2	60.25	89.685	B+	61	73	pasc061	6495841
Vincent, Dylan Michael	0	5.00	3.97	5.0	37.75	33.25	5	2	66.00	89.470	B+	62	73	dylanv	6423286
Sturdza, Caroline Marie	5	4.25	4.40	5.0	33.75	29.00	5	2	64.50	89.275	B+	63	73	carostu	6486322
Aguilar, Karla Patricia	5	5.00	4.24	5.0	37.00	33.00	5	2	55.50	88.990	B+	64	73	kaguila5	6682225
Kehm, Andrew Thomas	5	5.00	4.18	5.0	33.75	33.25	5	0	62.00	88.680	B+	65	73	atkehm	6683181
Stewart, Riley Grace	5	4.75	4.22	4.5	36.75	28.50	5	2	61.00	88.595	B+	66	73	riley3	6644984
De Loera, Alan	5	4.00	4.74	5.0	40.50	39.00	5	0	50.00	88.490	B+	67	73	adeloera	6496650
Ramirez Gonzalez, Mirna Guadalupe	5	4.25	4.66	4.5	37.00	26.50	5	2	62.50	88.410	B+	68	73	mirna22	6649756
Shenoy, Thanh-Nga Catherine	5	3.75	5.00	5.0	36.00	37.50	5	2	51.50	88.250	B+	69	73	tshenoy	6544608
Hansen, Luke	0	4.25	4.74	5.0	37.00	36.00	5	0	65.00	87.990	B+	70	73	lrhansen	6252378
Garewal, Samuel Oliver	5	3.50	4.64	5.0	33.75	29.00	5	0	66.33	87.680	B+	71	73	sgarewal	6827043
Heinemann, Abigail Grace	5	3.75	5.00	5.0	33.25	36.50	0	0	67.50	87.375	B+	72	73	aheinem	6641596
Cortes, Maria	5	3.50	4.57	4.0	36.25	29.00	5	2	61.00	87.195	B+	73	73	mcortesc	6657464
Powell, Taylor Janai	5	5.00	4.27	5.0	36.00	33.00	5	2	51.25	86.395	B	74	77	tjp2608	6682994
Rojas, Tania Lisseth	5	3.75	4.59	4.0	34.00	36.50	5	0	55.75	85.465	B	75	77	trojas18	6421966
Cantin, Lauren Elizabeth	5	3.00	4.14	5.0	34.50	32.00	5	2	56.00	85.390	B	76	77	lecantin	6509449
Mijat, Sophia Grace	5	5.00	4.31	4.5	31.88	28.25	5	2	59.00	85.375	B	77	77	smijat	6650818
Campbell, Maxwell James	5	3.50	4.83	5.0	28.25	34.00	5	0	56.75	82.830	B-	78	80	maxjcamp	6659964
Nzelu, Kene Simone	5	4.15	3.23	5.0	33.75	30.50	5	2	49.50	81.255	B-	79	80	kenen	6421098
Bailey, Quincy	5	4.75	3.84	4.0	36.25	23.75	5	2	51.25	80.215	B-	80	80	qbailey	6778990
Thompson, Joshua Issa	5	4.40	3.85	4.5	34.75	32.00	5	0	46.75	79.500	C+	81	84	josht26	6640261
Taylor, Mikey Stephen	5	4.00	4.74	0.0	36.00	34.50	0	0	60.00	78.990	C+	82	84	mikey686	6778059
Johnson, Leah Yolanda	5	3.25	4.60	5.0	35.75	29.25	5	0	46.00	78.350	C+	83	84	bluerun2	6385807
Baker, Tahayla Brianna	5	3.00	4.01	5.0	27.88	28.75	5	2	52.00	78.325	C+	84	84	tahayla	6416268
Pacheco Ramirez, Jessica	5	4.25	3.92	5.0	31.38	25.00	5	2	40.50	73.610	C	85	85	jessipr	6657509
Leiva, Carlos D	5	3.75	4.18	4.5	37.00	24.50	5	0	35.50	70.930	C-	86	86	cdleiva	6749505
Kinney, Jasmine Waukela	5	3.30	4.44	5.0	29.38	27.25	5	2	32.50	69.305	D+	87	88	jwkinney	6455928
Ramos, Valeria	5	4.80	3.90	4.5	29.50	29.75	5	2	25.00	67.325	D+	88	88	ramos03	6683972
Cloud, Anne Elizabeth	0	4.75	2.54	5.0	24.10	26.00	5	2	41.50	65.090	D	89	89	acloud	6643281
Reliford, Dominique	0	0.00	0.00	0.0	23.00	32.00	0	0	39.83	47.415	F	90	93	reliford	9656716
Sun, Lilly	5	1.20	1.15	3.5	13.00	27.75	0	0	28.00	45.225	F	91	93	lillysun	6250630
Hicks, Christina Helena Pau	5	4.00	3.79	5.0	0.00	33.50	5	0	0.00	39.540	F	92	93	chickshp	6407150
Briones, Julia	0	0.00	0.00	0.0	29.25	28.75	0	0	0.00	29.000	F	93	93	jmba812	6261749

5.1 All assignment scores

Note that the mean for the final exam doesn’t match the earlier statistics, because this includes people who got a 0.

d <- df_gradebook_final %>% 
  select(student, 
         assignment_1,
         assignment_2,
         assignment_3,
         assignment_4,
         exam_1,
         exam_2,
         final_exam_score
  ) %>% 
  pivot_longer(cols = -student, 
               names_to = "assignment",
               values_to = "score")

## select: dropped 5 variables (sunet, suid, participation, extra_credit, total_class_score)

## pivot_longer: reorganized (assignment_1, assignment_2, assignment_3, assignment_4, exam_1, …) into (assignment, score) [was 93x8, now 651x3]

d %>% 
  ggplot(aes(x = score)) +
  geom_histogram(bins = 20) +
  facet_wrap(~assignment, scales = "free") +
  labs(title = "Assignment Scores", x = "Score", y = "Count")

d %>% 
  group_by(assignment) %>%
  summarize(mean_score = mean(score),
            sd_score = sd(score)) %>% 
  mutate_at(vars(mean_score, sd_score), ~round(., 2)) %>%
  kbl() %>%
  kable_styling()

## group_by: one grouping variable (assignment)

## summarize: now 7 rows and 3 columns, ungrouped

## mutate_at: changed 7 values (100%) of 'mean_score' (0 new NA)

##            changed 7 values (100%) of 'sd_score' (0 new NA)

assignment	mean_score	sd_score
assignment_1	4.73	1.13
assignment_2	4.31	0.94
assignment_3	4.29	0.84
assignment_4	4.68	0.92
exam_1	36.18	5.87
exam_2	35.29	4.47
final_exam_score	59.68	13.06

5.2 Missing grades

df_gradebook_final %>% 
  pivot_longer(cols = -c(student, sunet, suid), 
               names_to = "assignment",
               values_to = "score") %>% 
  filter(is.na(score) | score == 0) %>% 
  arrange(assignment) %>% 
  kbl() %>% 
  kable_styling()

## pivot_longer: reorganized (assignment_1, assignment_2, assignment_3, assignment_4, exam_1, …) into (assignment, score) [was 93x13, now 930x5]

## filter: removed 875 rows (94%), 55 rows remaining

student	sunet	suid	assignment
Briones, Julia	jmba812	6261749	assignment_1
Cloud, Anne Elizabeth	acloud	6643281	assignment_1
Hansen, Luke	lrhansen	6252378	assignment_1
Reliford, Dominique	reliford	9656716	assignment_1
Vincent, Dylan Michael	dylanv	6423286	assignment_1
Briones, Julia	jmba812	6261749	assignment_2
Reliford, Dominique	reliford	9656716	assignment_2
Briones, Julia	jmba812	6261749	assignment_3
Reliford, Dominique	reliford	9656716	assignment_3
Briones, Julia	jmba812	6261749	assignment_4
Reliford, Dominique	reliford	9656716	assignment_4
Taylor, Mikey Stephen	mikey686	6778059	assignment_4
Hicks, Christina Helena Pau	chickshp	6407150	exam_1
Alluri, Rishi	allurir	6543538	extra_credit
Briones, Julia	jmba812	6261749	extra_credit
Campbell, Maxwell James	maxjcamp	6659964	extra_credit
De Loera, Alan	adeloera	6496650	extra_credit
Garewal, Samuel Oliver	sgarewal	6827043	extra_credit
Hansen, Luke	lrhansen	6252378	extra_credit
Heinemann, Abigail Grace	aheinem	6641596	extra_credit
Hicks, Christina Helena Pau	chickshp	6407150	extra_credit
Huang, Emily	ehuang25	6603608	extra_credit
Jiang, Yuzhi	baggioj	6456134	extra_credit
Johnson, Leah Yolanda	bluerun2	6385807	extra_credit
Kehm, Andrew Thomas	atkehm	6683181	extra_credit
Kisembo, Benita Kemigisa	kisemben	6504254	extra_credit
Leiva, Carlos D	cdleiva	6749505	extra_credit
Lu, Yuwen	tinaluyw	6486310	extra_credit
Martz, Eric	emartz	6448695	extra_credit
Movich-Fields, Tishrei Lee	tishrei	6420491	extra_credit
Narayanan, Tejas	tejasn9	6458279	extra_credit
Nordhagen, Andrew	aenord	6504358	extra_credit
Ozoemelam, Liane	lianeozo	6544973	extra_credit
Park, Emma	emmapark	6421329	extra_credit
Reliford, Dominique	reliford	9656716	extra_credit
Rojas, Tania Lisseth	trojas18	6421966	extra_credit
Smith, Jerred Denali Villegas	jerred89	6512315	extra_credit
Sossa, Candice Rachel	csossa	6401980	extra_credit
Strutz, Patricia Marie	pstrutz	6503711	extra_credit
Sun, Lilly	lillysun	6250630	extra_credit
Sundar, Lavanya Marakatavalli	lsundar	6659969	extra_credit
Tamor, Sydney Elizabeth	sydtamor	6779178	extra_credit
Taylor, Mikey Stephen	mikey686	6778059	extra_credit
Temal, Anjali	anjali82	6456179	extra_credit
Thompson, Joshua Issa	josht26	6640261	extra_credit
Tiu, Ekin Yokhan	ekintiu	6423010	extra_credit
Vachovska, Lora	lorapv	6544168	extra_credit
Varlack, Vivianna Tiffany	vvarlack	6496691	extra_credit
Briones, Julia	jmba812	6261749	final_exam_score
Hicks, Christina Helena Pau	chickshp	6407150	final_exam_score
Briones, Julia	jmba812	6261749	participation
Heinemann, Abigail Grace	aheinem	6641596	participation
Reliford, Dominique	reliford	9656716	participation
Sun, Lilly	lillysun	6250630	participation
Taylor, Mikey Stephen	mikey686	6778059	participation

Grade analysis

Ashish

2024-03-25