Arithmetic tests data and analysis

Prepare data

Set up

Code
library(dplyr)
library(tidyr)
library(lubridate)
library(tibble)
library(here)
library(ggplot2)
library(stringr)
library(purrr)
library(stringr)
library(readr)
library(moments)
library(patchwork)
library(scales)
library(kableExtra)
library(knitr)
library(glue)
library(readxl)
library(forcats)
library(openxlsx)


probe_dir <- "02-Arithmetic"
output_dir <- here(probe_dir, "Shared-output")

Question presentation

Load & clean student responses

Code
results_df <- read_csv(here(probe_dir,"arithmetic_raw.csv")) %>% select(-starts_with("..."))  

student_df <- read_csv(here(".","01-Students","Student_Assessment.csv"))%>% select(-starts_with("..."))

student_clean <- student_df %>% 
  mutate(
    GradeGroup = case_when(
      str_detect(`Assessment Event Identifier`, "^year-1a-2025$")          ~ "Year 1A",
      str_detect(`Assessment Event Identifier`, "^year-1b-2025$")          ~ "Year 1B",
      str_detect(`Assessment Event Identifier`, "^foundation-a-2025$")     ~ "Foundation A",
      str_detect(`Assessment Event Identifier`, "^foundation-b-2025$")     ~ "Foundation B",
      TRUE                                                                 ~ NA_character_  
    )
  ) %>% 
  select(Identifier, GradeGroup) %>%          
  distinct()

results_df <- results_df %>% 
  left_join(student_clean, by = "Identifier") 

# Re-allocate students to correct GradeGroup
results_df <- results_df %>% 
  mutate(
    GradeGroup = case_when(
      `Test Identifier` == "AADD_2025" & GradeGroup == "Foundation A" ~ "Year 1A",
      `Test Identifier` == "AADD_2025" & GradeGroup == "Foundation B" ~ "Year 1B",
      `Test Identifier` == "ASDD_2025" & GradeGroup == "Foundation A" ~ "Year 1A",
      `Test Identifier` == "ASDD_2025" & GradeGroup == "Foundation B" ~ "Year 1B",
      `Test Identifier` == "DMT10_2025" & GradeGroup == "Year 1A" ~ "Foundation A",
      `Test Identifier` == "DMT10_2025" & GradeGroup == "Year 1B" ~ "Foundation B",
      `Test Identifier` == "DMT5_2025" & GradeGroup == "Year 1A" ~ "Foundation A",
      `Test Identifier` == "DMT5_2025" & GradeGroup == "Year 1B" ~ "Foundation B",
      TRUE ~ GradeGroup
    )
  ) %>% 
  select(-`Assessment Event Identifier`)

# Remove practice and NA response
results_cleaned_df <- results_df %>% 
  filter(!str_detect(`Question Identifier`, "TRANS_INTRO|_Prac|_Tch|-Prac|_prac")) %>% 
  filter(!is.na(`Answer Response`)) 

# Relabel DMT
results_cleaned_df <- results_cleaned_df %>%
  mutate(
    `Question Identifier` = str_replace(
      `Question Identifier`,
      "^d(5|10)_(\\d{3})$",
      "DMT\\1_\\2"
    )
  )

# De-duplicate: keep only the latest per student × test × question
tmp <- results_cleaned_df %>%
  mutate(.ts = dmy_hm(`Date Completed`))

deduped <- tmp %>%
  group_by(Identifier, `Test Identifier`, `Question Identifier`) %>%
  slice_max(.ts, n = 1, with_ties = FALSE) %>%
  ungroup()

n_removed <- nrow(tmp) - nrow(deduped)
message(n_removed, " duplicate rows removed; keeping only the latest per Identifier/Test/Question")

results_cleaned_df <- deduped %>% select(-.ts)

# Split drag-drop and DMT
arithmetic_DD_df <- results_cleaned_df %>% 
  filter(`Test Identifier` %in% c("AADD_2025", "ASDD_2025")) %>% 
  filter(`Answer Duration` > 0) 

arithmetic_DMT_df <- results_cleaned_df %>% 
  filter(`Test Identifier` %in% c("DMT10_2025", "DMT5_2025")) %>% 
  filter(`Answer Duration` != 0 | `Answer Response` != 9)

Map distractor identity

Drag and drop

Code
distractor_file <- here(probe_dir,"arithmetic_answer_key.xlsx")
DD_distractor_df <- read_excel(distractor_file, sheet = "DD") 

# Prase and join drag and drop
arithmetic_DD_df <- arithmetic_DD_df %>% 
  mutate(
    Student_Response = case_when(
      str_detect(`Answer Response`, "^\\[\\]$") ~ NA_integer_,
      TRUE ~ str_extract(`Answer Response`, "(?<=btn-)(\\d+)(?=\")") %>% as.integer()
    )
  ) %>%
  select(-`Answer Response`) %>%
  left_join(
    DD_distractor_df %>%
      select(
        `Question Identifier`,
        `Question Body`,
        `Correct response`,
        `Question Group ID`,
        `Question Group`
      ),
    by = "Question Identifier"
  )

# Recompute score
arithmetic_DD_df <- arithmetic_DD_df %>%
  mutate(
    `Is Answer Correct` = coalesce(
      Student_Response == `Correct response`,
      FALSE
    ),
    `Raw Score` = if_else(
      `Is Answer Correct`,
      1L,
      0L
    )
  )

arithmetic_DD_df <- arithmetic_DD_df %>%
  select(
    Test, `Test Identifier`,
    `Question Identifier`, `Question Name`, `Question Type`, `Question Body`,
    `Distractor Count`, `Correct Answer`, `Correct response`,
    `Question Group ID`, `Question Group`,
    Identifier, GradeGroup, `Answer Duration`, `Is Answer Correct`,
    `Raw Score`, Student_Response,
    `Date Completed`
  )

DMT

Code
DMT_distractor_df <- read_excel(distractor_file, sheet = "DMT") 

arithmetic_DMT_df <- arithmetic_DMT_df %>%
  select(-`Correct Answer`) %>% 
  left_join(
    DMT_distractor_df %>%
      mutate(
        `Correct Distractor` = str_remove(`Correct Distractor`, "^Distractor\\s+") %>% 
          as.integer()
      ) %>% 
      select(
        `Question Identifier`,
        Image_desc,
        `Correct Distractor`,
        `Correct Answer`,
        starts_with("Distractor"),
        `Decomposition task`
      ),
    by = c("Question Identifier")
  ) %>%
  mutate(
    Number_selected = case_when(
      `Answer Response` == 1 ~ `Distractor 1`,
      `Answer Response` == 2 ~ `Distractor 2`,
      `Answer Response` == 3 ~ `Distractor 3`,
      `Answer Response` == 4 ~ `Distractor 4`,
      TRUE                  ~ NA_real_
    ),
    
    `Is Answer Correct` = coalesce(
      Number_selected == `Correct Answer`,
      FALSE
    ),
    `Raw Score` = if_else(
      `Is Answer Correct`,
      1L,
      0L
    )
  )

# Check discrepancy
discrepancy_logical <- with(arithmetic_DMT_df, {
  Answer_Ord    <- as.integer(`Answer Response`)
  picked_correct_value   <- Number_selected == `Correct Answer`
  ord_vs_key            <- Answer_Ord == `Correct Distractor`
  (ord_vs_key & !picked_correct_value) |
  (!ord_vs_key & picked_correct_value)
})

if (any(discrepancy_logical, na.rm = TRUE)) {
  warning(
    sprintf(
      "Found %d discrepancies between ordinal pick and key values.",
      sum(discrepancy_logical, na.rm = TRUE)
    )
  )
}

# Re-arrange columns

arithmetic_DMT_df <- arithmetic_DMT_df %>%
  select(
    Test, `Test Identifier`,
    `Question Identifier`, `Question Name`, `Question Type`,
    Image_desc, `Decomposition task`, `Distractor Count`,
    `Correct Distractor`, `Correct Answer`,
    `Distractor 1`, `Distractor 2`, `Distractor 3`, `Distractor 4`,
    Identifier, GradeGroup,
    `Answer Duration`, `Answer Response`, Number_selected,
    `Is Answer Correct`, `Raw Score`,
    `Date Completed`
  )

Explore

Drag and drop

Correct vs incorrect

Code
# Summarise counts & proportions by Test × Item
dd_summary <- arithmetic_DD_df %>%
  group_by(Test, `Question Identifier`) %>%
  summarise(
    total     = n(),
    correct   = sum(`Is Answer Correct`),
    incorrect = total - correct,
    .groups = "drop"
  ) %>%
  mutate(
    pct_corr = correct   / total,
    pct_inc  = incorrect / total
  ) %>%
  pivot_longer(
    cols      = c(pct_corr, pct_inc),
    names_to  = "Outcome",
    values_to = "Proportion"
  ) %>%
  mutate(
    Outcome = recode(Outcome,
                     pct_corr = "Correct",
                     pct_inc  = "Incorrect"),
    # make sure Correct is drawn on the left
    Outcome = factor(Outcome, levels = c("Incorrect", "Correct"))
  )

q_levels <- sort(unique(dd_summary$`Question Identifier`))
dd_summary <- dd_summary %>%
  mutate(`Question Identifier` = factor(`Question Identifier`, levels = q_levels))

build_plot <- function(test_name) {

  dat <- dd_summary %>% filter(Test == test_name)

  lab_dat <- dat %>%
    distinct(`Question Identifier`, total) %>%
    mutate(label = paste0("n = ", scales::comma(total)))

  ggplot(dat, aes(Proportion,
                  `Question Identifier`,
                  fill = Outcome)) +
    geom_col(position = "fill", colour = "grey90") +
    geom_text(
      data = lab_dat,
      inherit.aes = FALSE,         
      aes(x = 1.01,
          y = `Question Identifier`,
          label = label),
      hjust = 0,
      size  = 3
    ) +
    coord_cartesian(xlim = c(0, 1.12), clip = "off") +
    scale_x_continuous(
      labels       = scales::percent_format(accuracy = 1),
      breaks       = seq(0, 1, .25),
      minor_breaks = seq(0, 1, .05),
      expand       = c(.01, .01)
    ) +
    scale_y_discrete(labels = \(x) stringr::str_remove(x, "^.*_")) +
    scale_fill_manual(values = c("Incorrect" = "#d95f02",
                                 "Correct"   = "#1b9e77")) +
    labs(title = test_name,
         x     = NULL,
         y     = "Question",
         fill  = NULL) +
    theme_minimal(base_size = 12) +
    theme(
    axis.ticks         = element_blank(),
    panel.grid.major.y = element_blank(),
    panel.grid.minor   = element_blank(),
    legend.position    = "bottom",
    legend.direction   = "vertical",
  
    axis.text.y = element_text(
      hjust  = 1,             # right-align text
      margin = margin(r = -12) # negative = pull toward bars
    ),
  
    plot.margin = margin(5.5, 25, 5.5, 5.5)  
  )
}

p_add <- build_plot("Arithmetic facts - Addition drag and drop")
p_sub <- build_plot("Arithmetic facts - Subtraction drag and drop")

final_plot <- p_add / p_sub +
  plot_layout(guides = "collect") &
  theme(legend.justification = "centre") &
  labs(x = "Proportion of responses")

final_plot

By question type

Code
# 1. Summarise counts & proportions by Test × Question Group
dd_summary_group <- arithmetic_DD_df %>%
  group_by(Test, `Question Group ID`) %>%
  summarise(
    total     = n(),
    correct   = sum(`Is Answer Correct`),
    incorrect = total - correct,
    .groups   = "drop"
  ) %>%
  mutate(
    pct_corr = correct   / total,
    pct_inc  = incorrect / total
  ) %>%
  pivot_longer(
    cols      = c(pct_corr, pct_inc),
    names_to  = "Outcome",
    values_to = "Proportion"
  ) %>%
  mutate(
    Outcome = recode(Outcome,
                     pct_corr = "Correct",
                     pct_inc  = "Incorrect"),
    # draw Incorrect on the left, Correct on the right
    Outcome = factor(Outcome, levels = c("Incorrect", "Correct"))
  )

# 2. Lock in the order of the groups
group_levels <- dd_summary_group %>%
  distinct(`Question Group ID`) %>%
  pull() %>%
  sort()
dd_summary_group <- dd_summary_group %>%
  mutate(`Question Group ID` = factor(`Question Group ID`, levels = group_levels))

# 3. Plot builder for grouped view

group_caption <- arithmetic_DD_df %>% 
  distinct(`Question Group ID`, `Question Group`) %>% 
  arrange(`Question Group ID`) %>% 
  mutate(pair = paste0(
    `Question Group ID`, ": ", `Question Group`
  )) %>% 
  pull(pair) %>% 
  str_c(collapse = " | ")

build_group_plot <- function(test_name) {
  dat     <- dd_summary_group %>% filter(Test == test_name)
  lab_dat <- dat %>%
    distinct(`Question Group ID`, total) %>%
    mutate(label = paste0("n = ", comma(total)))
  
  ggplot(dat, aes(
      x    = Proportion,
      y    = `Question Group ID`,
      fill = Outcome
    )) +
    geom_col(position = "fill", colour = "grey90") +
    geom_text(
      data        = lab_dat,
      inherit.aes = FALSE,
      aes(
        x     = 1.01,
        y     = `Question Group ID`,
        label = label
      ),
      hjust = 0, size = 3
    ) +
    coord_cartesian(xlim = c(0,1.12), clip = "off") +
    scale_x_continuous(
      labels       = percent_format(accuracy = 1),
      breaks       = seq(0,1,0.25),
      minor_breaks = seq(0,1,0.05),
      expand       = c(0,0)
    ) +
    scale_y_discrete(
      labels = function(x) x  # already the group IDs
    ) +
    scale_fill_manual(values = c(
      "Incorrect" = "#d95f02",
      "Correct"   = "#1b9e77"
    )) +
    labs(
      title = test_name,
      x     = NULL,
      y     = "Question Group",
      fill  = NULL
    ) +
    theme_minimal(base_size = 12) +
    theme(
      axis.ticks         = element_blank(),
      panel.grid.major.y = element_blank(),
      panel.grid.minor   = element_blank(),
      legend.position    = "bottom",
      legend.direction   = "vertical",
      axis.text.y = element_text(
        hjust  = 1,
        margin = margin(r = 0)
      ),
      plot.margin = margin(5.5, 25, 5.5, 5.5)
    )
}

# 4. Draw for each test
p_add_group <- build_group_plot("Arithmetic facts - Addition drag and drop")
p_sub_group <- build_group_plot("Arithmetic facts - Subtraction drag and drop")

# 5. Stack them with a shared legend
final_grouped_plot <- (p_add_group / p_sub_group) +
  plot_layout(guides = "collect") +
  plot_annotation(
    title   = "Proportion Correct vs Incorrect by Question Group",
    caption = group_caption
  ) &
  theme(
    legend.justification = "centre",
    plot.caption         = element_text(hjust = 0)  # left-align caption
  )

final_grouped_plot

Distribution of selected distractor

Code
dd_prop <- arithmetic_DD_df %>%
  # keep only the columns we need
  transmute(
    Test,
    item         = `Question Identifier`,
    duration     = `Answer Duration`,
    resp_raw     = Student_Response,
    correct_val  = `Correct response`
  ) %>%
  filter(duration > 0 | !is.na(resp_raw)) %>%
  # convert to character so we can add an "NA" level
  mutate(
    resp        = if_else(is.na(resp_raw), "NA", as.character(resp_raw)),
    correct_val = as.character(correct_val)
  ) %>%
  # count selections
  count(Test, item, resp, correct_val, name = "n") %>%
  group_by(Test, item) %>%
  mutate(
    total = sum(n),
    pct   = n / total,
    flag  = if_else(resp == correct_val, "Correct", "Incorrect")
  ) %>%
  ungroup()

plot_dd <- function(test_name, facets_per_row = 5) {
  
  dat <- dd_prop %>% filter(Test == test_name)
  
  facet_labs <- dat %>%
    distinct(item, total) %>%
    mutate(strip = paste0(
      str_remove(item, "^.*_"), " (n = ", comma(total), ")"
    )) %>%
    select(item, strip) %>%
    deframe()
  
  ggplot(
    dat,
    aes(
      x = factor(
      resp,
      levels = c(
        sort(as.numeric(setdiff(unique(resp), "NA"))),   # numeric order
        "NA"                                             # put blank last
      )
    ),
      y    = pct,
      fill = flag
    )
  ) +
    geom_col(width = .8) +
    facet_wrap(
      ~ item,
      ncol     = facets_per_row,
      scales   = "free_x",         
      labeller = labeller(item = facet_labs)
    ) +
    scale_y_continuous(
      labels = percent_format(accuracy = 1),
      expand = expansion(mult = c(0, .05))
    ) +
    scale_fill_manual(
      values = c("Correct"   = "steelblue",
                 "Incorrect" = "grey70")
    ) +
    labs(
      title = test_name,
      x     = "Answer chosen (\"NA\" = blank)",
      y     = "% of students",
      fill  = NULL
    ) +
    theme_minimal(base_size = 11) +
    theme(
      panel.grid.major.x = element_blank(),
      legend.position    = "bottom",
      legend.direction   = "vertical"
    )
}

p_AADD <- plot_dd("Arithmetic facts - Addition drag and drop")
p_ASDD <- plot_dd("Arithmetic facts - Subtraction drag and drop")

p_AADD

Code
p_ASDD

Compute accuracy and RCPM

Code
student_score <- arithmetic_DD_df %>%
  filter(Test %in% c(
    "Arithmetic facts - Addition drag and drop",
    "Arithmetic facts - Subtraction drag and drop"
  )) %>%
  group_by(Identifier, Test) %>%
  summarise(
    Attempts     = n(),
    Correct      = sum(`Is Answer Correct`),
    TotalTimeMin = sum(`Answer Duration`) / 60,
    Accuracy     = Correct / Attempts,
    RCPM         = if_else(TotalTimeMin > 0,
                           Correct / TotalTimeMin,
                           NA_real_),    # avoid Inf
    .groups = "drop"
  )

p_dd_1 <- student_score %>%
  filter(Test == "Arithmetic facts - Addition drag and drop") %>%
  ggplot(aes(x = Accuracy,
             y = ..count.. / sum(..count..))) +
    geom_histogram(
      binwidth = 0.05,    # 20 bins over [0,1]
      center   = 0.025,   # shift by half a bin
      fill     = "steelblue",
      alpha    = 0.7
    ) +
    scale_x_continuous(
      breaks = seq(0, 1, by = 0.25),
      labels = percent_format(accuracy = 1)
    ) +
    scale_y_continuous(
      labels = percent_format(accuracy = 1)
    ) +
    labs(
      title = "AADD: Accuracy",
      x     = "Accuracy",
      y     = "% of students"
    ) +
    theme_minimal()

p_dd_2 <- student_score %>%
  filter(Test == "Arithmetic facts - Subtraction drag and drop") %>%
  ggplot(aes(x = Accuracy,
             y = ..count.. / sum(..count..))) +
    geom_histogram(
      binwidth = 0.05,
      center   = 0.025,
      fill     = "steelblue",
      alpha    = 0.7
    ) +
    scale_x_continuous(
      breaks = seq(0, 1, by = 0.25),
      labels = percent_format(accuracy = 1)
    ) +
    scale_y_continuous(
      labels = percent_format(accuracy = 1)
    ) +
    labs(
      title = "ASDD: Accuracy",
      x     = "Accuracy",
      y     = "% of students"
    ) +
    theme_minimal()

p_dd_3 <- student_score %>%
  filter(Test == "Arithmetic facts - Addition drag and drop") %>%
  ggplot(aes(x = RCPM,
             y = ..count.. / sum(..count..))) +
    geom_histogram(
      binwidth = 1,     
      center   = 0.5,    
      fill     = "steelblue",
      alpha    = 0.7
    ) +
    scale_y_continuous(
      labels = percent_format(accuracy = 1)
    ) +
    labs(
      title = "AADD: Fluency",
      x     = "RCPM",
      y     = "% of students"
    ) +
    theme_minimal()

p_dd_4 <- student_score %>%
  filter(Test == "Arithmetic facts - Subtraction drag and drop") %>%
  ggplot(aes(x = RCPM,
             y = ..count.. / sum(..count..))) +
    geom_histogram(
      binwidth = 1,
      center   = 0.5,
      fill     = "steelblue",
      alpha    = 0.7
    ) +
    scale_y_continuous(
      labels = percent_format(accuracy = 1)
    ) +
    labs(
      title = "ASDD: Fluency",
      x     = "RCPM",
      y     = "% of students"
    ) +
    theme_minimal()

(p_dd_1 | p_dd_2) /
(p_dd_3 | p_dd_4) +
  plot_annotation(
    title = "Distributions of Accuracy and RCPM by Test",
    theme = theme(plot.title = element_text(face = "bold", size = 14))
  )

Decomposition

Correct vs incorrect

Code
# 1. Summarise counts & proportions by Test × Item for DMT
dmt_summary <- arithmetic_DMT_df %>%
  group_by(Test, `Question Identifier`) %>%
  summarise(
    total     = n(),
    correct   = sum(`Is Answer Correct`),
    incorrect = total - correct,
    .groups   = "drop"
  ) %>%
  mutate(
    pct_corr = correct / total,
    pct_inc  = incorrect / total
  ) %>%
  pivot_longer(
    cols      = c(pct_corr, pct_inc),
    names_to  = "Outcome",
    values_to = "Proportion"
  ) %>%
  mutate(
    Outcome = recode(Outcome,
                     pct_corr = "Correct",
                     pct_inc  = "Incorrect"),
    Outcome = factor(Outcome, levels = c("Incorrect", "Correct"))
  )

# 2. Lock in ascending item order 001 → 010 (or 001 → 005)
q_levels <- sort(unique(dmt_summary$`Question Identifier`))
dmt_summary <- dmt_summary %>%
  mutate(`Question Identifier` = factor(`Question Identifier`, levels = q_levels))

# 3. Plot builder
build_dmt_plot <- function(test_name) {
  dat    <- dmt_summary %>% filter(Test == test_name)
  lab_dat <- dat %>%
    distinct(`Question Identifier`, total) %>%
    mutate(label = paste0("n = ", comma(total)))
  
  ggplot(dat, aes(x = Proportion,
                  y = `Question Identifier`,
                  fill = Outcome)) +
    geom_col(position = "fill", colour = "grey90") +
    # sample‐size labels on the right
    geom_text(
      data        = lab_dat,
      inherit.aes = FALSE,
      aes(x     = 1.01,
          y     = `Question Identifier`,
          label = label),
      hjust = 0, size = 3
    ) +
    coord_cartesian(xlim = c(0, 1.12), clip = "off") +
    scale_x_continuous(
      labels       = percent_format(accuracy = 1),
      breaks       = seq(0, 1, 0.25),
      minor_breaks = seq(0, 1, 0.05),
      expand       = c(0, 0)
    ) +
    scale_y_discrete(labels = function(x) str_remove(x, "^.*_")) +
    scale_fill_manual(values = c(
      "Incorrect" = "#d95f02",
      "Correct"   = "#1b9e77"
    )) +
    labs(
      title = test_name,
      x     = NULL,
      y     = "Question",
      fill  = NULL
    ) +
    theme_minimal(base_size = 12) +
    theme(
      axis.ticks         = element_blank(),
      panel.grid.major.y = element_blank(),
      panel.grid.minor   = element_blank(),
      legend.position    = "bottom",
      legend.direction   = "vertical",
      axis.text.y        = element_text(
                             hjust  = 1,
                             margin = margin(r = 0)
                           ),
      plot.margin        = margin(5.5, 25, 5.5, 5.5)
    )
}

# 4. Generate the two panels
p_dmt10 <- build_dmt_plot("Decomposing Measure to 10")
p_dmt5  <- build_dmt_plot("Decomposing Measure to 5")

# 5. Stack with a shared legend
final_dmt_plot <- p_dmt10 / p_dmt5 +
  plot_layout(guides = "collect") &
  theme(legend.justification = "centre") &
  labs(x = "Proportion of responses")

final_dmt_plot

Distribution of selected distractor

Code
# 1. build long table of proportions
dmt_prop <- arithmetic_DMT_df %>%
  transmute(
    Test,
    item        = `Question Identifier`,
    duration    = `Answer Duration`,
    resp_raw    = Number_selected,
    correct_val = `Correct Answer`
  ) %>%
  filter(duration > 0 | !is.na(resp_raw)) %>%
  mutate(
    resp        = if_else(is.na(resp_raw), "NA", as.character(resp_raw)),
    correct_val = as.character(correct_val)
  ) %>%
  count(Test, item, resp, correct_val, name = "n") %>%
  group_by(Test, item) %>%
  mutate(
    total = sum(n),
    pct   = n / total,
    flag  = if_else(resp == correct_val, "Correct", "Incorrect")
  ) %>%
  ungroup()

# 2. plotting helper
plot_dmt <- function(test_name, facets_per_row = 5) {
  # pick up exactly the 4 distractor values for this test,
  # in ordinal order, then append "NA"
  prefix <- if_else(str_detect(test_name, "to 10"), "DMT10_", "DMT5_")
  levels_distractors <- DMT_distractor_df %>%
    filter(str_detect(`Question Identifier`, prefix)) %>%
    arrange(`Question Identifier`) %>%
    select(`Distractor 1`:`Distractor 4`) %>%
    as.matrix() %>% 
    c() %>% 
    unique() %>% 
    as.character()
  levels_resp <- c(levels_distractors, "NA")
  
  # build facet labels with sample sizes
  dat       <- dmt_prop %>% filter(Test == test_name)
  facet_labs <- dat %>%
    distinct(item, total) %>%
    mutate(strip = paste0(
      str_remove(item, "^.*_"), " (n = ", comma(total), ")"
    )) %>%
    select(item, strip) %>%
    deframe()
  
  ggplot(dat, aes(
      x    = factor(resp, levels = levels_resp),
      y    = pct,
      fill = flag
    )) +
    geom_col(width = 0.8, colour = "grey90") +
    facet_wrap(
      ~ item,
      ncol     = facets_per_row,
      scales   = "free_x",
      labeller = labeller(item = facet_labs)
    ) +
    scale_y_continuous(
      labels = percent_format(accuracy = 1),
      expand = expansion(mult = c(0, .05))
    ) +
    scale_fill_manual(
      values = c("Correct" = "steelblue",
                 "Incorrect" = "grey70")
    ) +
    labs(
      title = test_name,
      x     = "Number selected",
      y     = "% of students",
      fill  = NULL
    ) +
    theme_minimal(base_size = 11) +
    theme(
      panel.grid.major.x = element_blank(),
      legend.position    = "bottom",
      legend.direction   = "vertical"
    )
}

# 3. draw the two plots
p_dmt10 <- plot_dmt("Decomposing Measure to 10")
p_dmt5  <- plot_dmt("Decomposing Measure to 5")

# show them stacked with a shared legend
final_dmt_plot <- (p_dmt10 / p_dmt5) +
  plot_layout(guides = "collect") &
  theme(legend.justification = "centre")

final_dmt_plot

Accuracy and fluency

Code
# 1. Compute per-student performance on DMT
student_score_dmt <- arithmetic_DMT_df %>%
  filter(Test %in% c(
    "Decomposing Measure to 10",
    "Decomposing Measure to 5"
  )) %>%
  group_by(Identifier, Test) %>%
  summarise(
    Attempts     = n(),
    Correct      = sum(`Is Answer Correct`),
    TotalTimeMin = sum(`Answer Duration`) / 60,
    Accuracy     = Correct / Attempts,
    RCPM         = if_else(TotalTimeMin > 0,
                           Correct / TotalTimeMin,
                           NA_real_),
    .groups = "drop"
  )

# 2. Build the four histograms (percent on y-axis)
p_dmt10_acc <- student_score_dmt %>%
  filter(Test == "Decomposing Measure to 10") %>%
  ggplot(aes(x = Accuracy,
             y = ..count.. / sum(..count..))) +
    geom_histogram(
      binwidth = 0.05,          # 20 bins over [0,1]
      center   = 0.025,         # shift them by half a bin
      fill     = "steelblue",
      alpha    = 0.7
    ) +
    scale_x_continuous(
      breaks = seq(0,1,by=0.25),            # ticks at 0%,25%,50%…
      labels = scales::percent_format(1)
    ) +
    scale_y_continuous(labels = scales::percent_format(1)) +
    labs(
      title = "DMT10: Accuracy",
      x     = "Accuracy",
      y     = "% of students"
    ) +
    theme_minimal()

p_dmt5_acc <- student_score_dmt %>%
  filter(Test == "Decomposing Measure to 5") %>%
  ggplot(aes(x = Accuracy,
             y = ..count.. / sum(..count..))) +
    geom_histogram(binwidth=0.05, center=0.025,
                   fill="steelblue", alpha=0.7) +
    scale_x_continuous(breaks = seq(0,1,0.25),
                       labels = scales::percent_format(1)) +
    scale_y_continuous(labels = scales::percent_format(1)) +
    labs(title="DMT5: Accuracy", x="Accuracy", y="% of students") +
    theme_minimal()

p_dmt10_rcpm <- student_score_dmt %>%
  filter(Test == "Decomposing Measure to 10") %>%
  ggplot(aes(x = RCPM,
             y = ..count.. / sum(..count..))) +
    geom_histogram(binwidth=1,   # adjust binwidth for RCPM as needed
                   center=0.5,     # same idea: shift by half a bin
                   fill="steelblue", alpha=0.7) +
    scale_y_continuous(labels = scales::percent_format(1)) +
    labs(title="DMT10: Fluency (RCPM)",
         x="Correct Responses per Minute", y="% of students") +
    theme_minimal()

p_dmt5_rcpm <- student_score_dmt %>%
  filter(Test == "Decomposing Measure to 5") %>%
  ggplot(aes(x = RCPM,
             y = ..count.. / sum(..count..))) +
    geom_histogram(binwidth=1, center=0.5,
                   fill="steelblue", alpha=0.7) +
    scale_y_continuous(labels = scales::percent_format(1)) +
    labs(title="DMT5: Fluency (RCPM)",
         x="Correct Responses per Minute", y="% of students") +
    theme_minimal()

# 3. Arrange in a 2×2 grid
(p_dmt10_acc | p_dmt5_acc) /
(p_dmt10_rcpm | p_dmt5_rcpm) +
  plot_annotation(
    title = "Distributions of Accuracy and Fluency (RCPM) on DMT",
    theme = theme(plot.title = element_text(face = "bold", size = 14))
  )

Export

Code
ggsave(
  filename = file.path(output_dir, "AADD_by_item.png"),
  plot     = p_add,
  width    = 8,
  height   = 4,
  bg       = "white"
)

ggsave(
  filename = file.path(output_dir, "ASDD_by_item.png"),
  plot     = p_sub,
  width    = 8,
  height   = 4,
  bg       = "white"
)

ggsave(
  filename = file.path(output_dir, "DD_by_group.png"),
  plot     = final_grouped_plot,
  width    = 8,
  height   = 6,
  bg       = "white"
)

ggsave(
  filename = file.path(output_dir, "DMT10_by_item.png"),
  plot     = p_dmt10,
  width    = 8,
  height   = 4,
  bg       = "white"
)

ggsave(
  filename = file.path(output_dir, "DMT5_by_item.png"),
  plot     = p_dmt5,
  width    = 8,
  height   = 4,
  bg       = "white"
)

ggsave(
  filename = file.path(output_dir, "DD_distractors.png"),
  plot     = p_AADD,
  width    = 8,
  height   = 12,
  bg       = "white"
)

ggsave(
  filename = file.path(output_dir, "ASDD_distractors.png"),
  plot     = p_ASDD,
  width    = 8,
  height   = 12,
  bg       = "white"
)

ggsave(
  filename = file.path(output_dir, "AADD_accuracy.png"),
  plot     = p_dd_1,
  width    = 8,
  height   = 4,
  bg       = "white"
)

ggsave(
  filename = file.path(output_dir, "ASDD_accuracy.png"),
  plot     = p_dd_2,
  width    = 8,
  height   = 4,
  bg       = "white"
)

ggsave(
  filename = file.path(output_dir, "AADD_fluency.png"),
  plot     = p_dd_3,
  width    = 8,
  height   = 4,
  bg       = "white"
)

ggsave(
  filename = file.path(output_dir, "ASDD_fluency.png"),
  plot     = p_dd_4,
  width    = 8,
  height   = 4,
  bg       = "white"
)

ggsave(
  filename = file.path(output_dir, "DMT10_accuracy.png"),
  plot     = p_dmt10_acc,
  width    = 8,
  height   = 4,
  bg       = "white"
)

ggsave(
  filename = file.path(output_dir, "DMT5_accuracy.png"),
  plot     = p_dmt5_acc,
  width    = 8,
  height   = 4,
  bg       = "white"
)

ggsave(
  filename = file.path(output_dir, "DMT10_fluency.png"),
  plot     = p_dmt10_rcpm,
  width    = 8,
  height   = 4,
  bg       = "white"
)

ggsave(
  filename = file.path(output_dir, "DMT5_fluency.png"),
  plot     = p_dmt5_rcpm,
  width    = 8,
  height   = 4,
  bg       = "white"
)

ggsave(
  filename = file.path(output_dir, "DMT10_distractors.png"),
  plot     = p_dmt10,
  width    = 8,
  height   = 6,
  bg       = "white"
)

ggsave(
  filename = file.path(output_dir, "DMT5_distractors.png"),
  plot     = p_dmt5,
  width    = 8,
  height   = 6,
  bg       = "white"
)


toc_sheets <- c(
  "Clean_DD_Responses",
  "Clean_DMT_Responses",
  "DD_Item",
  "DD_Group",
  "DD_Distractors",
  "DD_Score",
  "DMT_Item",
  "DMT_Distractors",
  "DMT_Score"
)
toc_titles <- c(
  "Cleaned DD responses",
  "Cleaned DMT responses",
  "DD: Correct vs Incorrect by Question",
  "DD: Correct vs Incorrect by Question Type",
  "DD: Distractor Distribution",
  "DD: Accuracy & Fluency",
  "DMT: Correct vs Incorrect by Question",
  "DMT: Distractor Distribution",
  "DMT: Accuracy & Fluency"
)

wb <- createWorkbook()
addWorksheet(wb, "Table of Contents")
writeData(
  wb,
  sheet = "Table of Contents",
  tibble(Sheet = toc_sheets, Title = toc_titles),
  withFilter = FALSE
)

for (i in seq_along(toc_sheets)) {
  writeFormula(
    wb,
    sheet    = "Table of Contents",
    x        = sprintf(
                 "HYPERLINK(\"#'%s'!A1\",\"%s\")",
                 toc_sheets[i],
                 toc_titles[i]
               ),
    startCol = 3,
    startRow = i + 1
  )
}


# Cleaned DD responses
addWorksheet(wb, "Clean_DD_Responses")
writeData(wb, "Clean_DD_Responses", arithmetic_DD_df, withFilter = TRUE)

# Cleaned DMT responses
addWorksheet(wb, "Clean_DMT_Responses")
writeData(wb, "Clean_DMT_Responses", arithmetic_DMT_df, withFilter = TRUE)

# DD by question
addWorksheet(wb, "DD_Item")
writeData(wb, "DD_Item", dd_summary, withFilter = TRUE)
insertImage(
  wb,
  sheet    = "DD_Item",
  file      = file.path(output_dir, "AADD_by_item.png"),
  width     = 6,
  height    = 3,
  startRow  = nrow(dd_summary) + 4
)

# DD by question type
addWorksheet(wb, "DD_Group")
writeData(wb, "DD_Group", dd_summary_group, withFilter = TRUE)
insertImage(
  wb,
  sheet    = "DD_Group",
  file      = file.path(output_dir, "DD_by_group.png"),
  width     = 6,
  height    = 4,
  startRow  = nrow(dd_summary_group) + 4
)

# DD distractors
addWorksheet(wb, "DD_Distractors")
writeData(wb, "DD_Distractors", dd_prop %>% select(-pct, -flag), withFilter = TRUE)
insertImage(
  wb,
  sheet    = "DD_Distractors",
  file      = file.path(output_dir, "DD_distractors.png"),
  width     = 6,
  height    = 6,
  startRow  = nrow(dd_prop) + 4
)

# DD Accuracy & Fluency
addWorksheet(wb, "DD_Score")
writeData(wb, "DD_Score", student_score, withFilter = TRUE)
insertImage(
  wb,
  sheet    = "DD_Score",
  file      = file.path(output_dir, "AADD_accuracy.png"),
  width     = 6,
  height    = 3,
  startRow  = nrow(student_score) + 4
)
insertImage(
  wb,
  sheet    = "DD_Score",
  file      = file.path(output_dir, "AADD_fluency.png"),
  width     = 6,
  height    = 3,
  startRow  = nrow(student_score) + 20
)

# DMT by question
addWorksheet(wb, "DMT_Item")
writeData(wb, "DMT_Item", dmt_summary, withFilter = TRUE)
insertImage(
  wb,
  sheet    = "DMT_Item",
  file      = file.path(output_dir, "DMT10_by_item.png"),
  width     = 6,
  height    = 3,
  startRow  = nrow(dmt_summary) + 4
)

# DMT distractors
addWorksheet(wb, "DMT_Distractors")
writeData(wb, "DMT_Distractors", dmt_prop %>% select(-pct, -flag), withFilter = TRUE)
insertImage(
  wb,
  sheet    = "DMT_Distractors",
  file      = file.path(output_dir, "DMT10_distractors.png"),
  width     = 6,
  height    = 6,
  startRow  = nrow(dmt_prop) + 4
)
insertImage(
  wb,
  sheet    = "DMT_Distractors",
  file      = file.path(output_dir, "DMT5_distractors.png"),
  width     = 6,
  height    = 6,
  startRow  = nrow(dmt_prop) + 20
)

# DMT Accuracy & Fluency
addWorksheet(wb, "DMT_Score")
writeData(wb, "DMT_Score", student_score_dmt, withFilter = TRUE)
insertImage(
  wb,
  sheet    = "DMT_Score",
  file      = file.path(output_dir, "DMT10_accuracy.png"),
  width     = 6,
  height    = 3,
  startRow  = nrow(student_score_dmt) + 4
)
insertImage(
  wb,
  sheet    = "DMT_Score",
  file      = file.path(output_dir, "DMT10_fluency.png"),
  width     = 6,
  height    = 3,
  startRow  = nrow(student_score_dmt) + 20
)


saveWorkbook(
  wb,
  file      = file.path(output_dir, "arithmetic_analysis.xlsx"),
  overwrite = TRUE
)