Missing Number Summary Stats

Prepare data

Set up

Code

library(dplyr)
library(tidyr)
library(tibble)
library(here)
library(ggplot2)
library(stringr)
library(purrr)
library(stringr)
library(readr)
library(moments)
library(patchwork)
library(scales)
library(kableExtra)
library(knitr)
library(glue)
library(lubridate)
library(readxl)
library(openxlsx)

probe_dir <- "02-Missing_number"

Question presentation

Load and clean

Removed practice questions
Filtered non-NA responses

Code

results_df <- read_csv(here(probe_dir,"missing_number_raw.csv")) %>% select(-starts_with("..."))  

student_df <- read_csv(here(".","01-Students","Student_Assessment.csv"))%>% select(-starts_with("..."))

student_clean <- student_df %>% 
  mutate(
    GradeGroup = case_when(
      str_detect(`Assessment Event Identifier`, "^year-1a-2025$")          ~ "Year 1A",
      str_detect(`Assessment Event Identifier`, "^year-1b-2025$")          ~ "Year 1B",
      str_detect(`Assessment Event Identifier`, "^foundation-a-2025$")     ~ "Foundation A",
      str_detect(`Assessment Event Identifier`, "^foundation-b-2025$")     ~ "Foundation B",
      TRUE                                                                 ~ NA_character_  
    )
  ) %>% 
  select(Identifier, GradeGroup) %>%          
  distinct()

results_df <- results_df %>% 
  left_join(student_clean, by = "Identifier") 

# Count number of students per grade group
#results_df %>% group_by(`Test Identifier`, GradeGroup) %>% 
#  summarise(student_no = n_distinct(Identifier))

# Fix grade group
results_df <- results_df %>% 
  mutate(
    GradeGroup = case_when(
      `Test Identifier` == "MNC0-100_2025" & GradeGroup == "Foundation B" ~ "Year 1B", 
      `Test Identifier` == "MNA0-20_2025" & GradeGroup == "Year 1B" ~ "Foundation B",
      `Test Identifier` == "MNC0-20_2025" & GradeGroup == "Year 1A" ~ "Foundation A", 
      TRUE ~ GradeGroup
    )
  ) %>% 
  select(-`Assessment Event Identifier`)

# Remove practice items and invalid responses
results_cleaned_df <- results_df %>% 
  filter(!str_detect(`Question Identifier`, "TRANS_INTRO|_Prac|_Tch|-Prac|_prac")) %>% 
  filter( !(`Answer Duration` == 0 & `Answer Response` == 9)) %>% 
  filter( !(is.na(`Answer Duration`) &`Answer Response` == 9))

# De-duplicate: keep only the latest per student × test × question
tmp <- results_cleaned_df %>%
  mutate(.ts = parse_date_time(`Date Completed`, orders = "dmy HM p"))

deduped <- tmp %>%
  group_by(Identifier, `Test Identifier`, `Question Identifier`) %>%
  slice_max(.ts, n = 1, with_ties = FALSE) %>%
  ungroup()

n_removed <- nrow(tmp) - nrow(deduped)
message(n_removed, " duplicate rows removed; keeping only the most recent per Identifier/Test/Question")

results_cleaned_df <- deduped %>% select(-.ts)

# Inspect unique answer responses
#results_cleaned_df %>% 
#  select(`Test Identifier`, `Answer Response`) %>% 
#  arrange(`Test Identifier`, `Answer Response`) %>% 
#  distinct()

# Clean question identifiers for MNA
results_cleaned_df <- results_cleaned_df %>%
  mutate(
    `Question Identifier` = `Question Identifier` %>%
      # remove “s” in MNA0–100 items
      str_replace("^MNAs0-100", "MNA0-100") %>%
      # drop “new” from MNA0–20 items
      str_remove("new$")
  )

# Split probes
results_MNA_df <- results_cleaned_df %>% 
  filter(`Test Identifier` %in% c("MNA0-20_2025", "MNA0-100_2025"))

results_MNC_df <- results_cleaned_df %>% 
  filter(`Test Identifier` %in% c("MNC0-20_2025",  "MNC0-100_2025"))

Map distractor identity

Code

distractor_MNA_df <- read_excel(here(probe_dir,"missing_number_distractor_map.xlsx"), sheet = "MNA") %>% select(-starts_with("..."))  

distractor_MNC_df <- read_excel(here(probe_dir,"missing_number_distractor_map.xlsx"), sheet = "MNC") %>% select(-starts_with("..."))  

# Merge distractor dfs
results_MNA_df <- results_MNA_df %>%
  left_join(
    distractor_MNA_df %>% select(`Question Identifier`,
                             `Question Body`,
                             Correct_Distractor,
                             Correct_Distractor_Desc,
                             Error_type_label,
                             Error_type_desc,
                             Error_position_label,
                             Error_position_desc,
                             Range_label,
                             Range_desc,
                             Cross_decade,
                             Cross_decade_desc),
    by = "Question Identifier"
  )

results_MNC_df <- results_MNC_df %>%
  left_join(
    distractor_MNC_df %>% select(`Question Identifier`,
                             `Question Body`,
                             starts_with("Distractor "),
                             `Correct Distractor`,
                             `Position of blank`,
                             Range_label,
                             Range_desc,
                             Cross_decade_label,
                             Cross_decade_desc,
                             `Missing Number`),
    by = "Question Identifier"
  )

# Map distractor identity 
results_MNC_df <- results_MNC_df %>%
  mutate(
    Number_Selected = case_when(
      `Answer Response` == 1 ~ `Distractor 1`,
      `Answer Response` == 2 ~ `Distractor 2`,
      `Answer Response` == 3 ~ `Distractor 3`,
      `Answer Response` == 4 ~ `Distractor 4`,
      `Answer Response` == 9 ~ NA_real_
    ),
    # for plotting convenience
    Selected = case_when(
      `Answer Response` == 9     ~ "blank",
      TRUE                        ~ as.character(Number_Selected)
    )
  )

# Recompute `Is Answer Correct` and `Raw Score`
results_MNA_df <- results_MNA_df %>% 
  mutate(
    `Is Answer Correct` = (`Answer Response` == Correct_Distractor),
    `Raw Score` = as.integer(`Answer Response` == Correct_Distractor)
  ) %>% select(- `Correct Answer`)

results_MNC_df <- results_MNC_df %>% 
  mutate(
    `Is Answer Correct` = (`Answer Response` == `Correct Distractor`),
    `Raw Score` = as.integer(`Answer Response` == `Correct Distractor`)
  ) %>% select(- `Correct Answer`)

# Re-arrange columns
results_MNA_df <- results_MNA_df %>%
  select(
    Test,
    `Test Identifier`,
    `Question Identifier`,
    `Question Name`,
    `Question Type`,
    `Question Body`,
    Identifier,
    GradeGroup,
    `Answer Response`,
    `Answer Duration`,
    `Is Answer Correct`,
    `Raw Score`,
    `Distractor Count`,
    `Date Completed`,
    Correct_Distractor,
    Correct_Distractor_Desc,
    Error_type_label,
    Error_type_desc,
    Error_position_label,
    Error_position_desc,
    Range_label,
    Range_desc,
    Cross_decade,
    Cross_decade_desc,
    everything()
  )

results_MNC_df <- results_MNC_df %>%
  select(
    Test,
    `Test Identifier`,
    `Question Identifier`,
    `Question Name`,
    `Question Type`,
    `Question Body`,
    Identifier,
    GradeGroup,
    `Answer Response`,
    `Answer Duration`,
    `Is Answer Correct`,
    `Raw Score`,
    `Distractor Count`,
    `Missing Number`,
    `Number_Selected`,
    Selected,
    `Date Completed`,
    `Distractor 1`,
    `Distractor 2`,
    `Distractor 3`,
    `Distractor 4`,
    `Correct Distractor`,
    `Position of blank`,
    Range_label,
    Range_desc,
    Cross_decade_label,
    Cross_decade_desc,
    everything()
  )

Explore

Overview - item

Missing number ascending (MNA)

Code

mna_summary <- results_MNA_df %>%
  group_by(Test, `Test Identifier`, `Question Identifier`) %>%
  summarise(
    total     = n(),
    correct   = sum(`Is Answer Correct`),
    incorrect = total - correct,
    .groups = "drop"
  ) %>%
  mutate(
    pct_corr = correct   / total,
    pct_inc  = incorrect / total
  ) %>%
  pivot_longer(
    cols      = c(pct_corr, pct_inc),
    names_to  = "Outcome",
    values_to = "Proportion"
  ) %>%
  mutate(
    Outcome = recode(Outcome,
                     pct_corr = "Correct",
                     pct_inc  = "Incorrect"),
    Outcome = factor(Outcome, levels = c("Incorrect","Correct"))
  )

q_levels <- sort(unique(mna_summary$`Question Identifier`))
mna_summary <- mna_summary %>%
  mutate(`Question Identifier` = factor(`Question Identifier`, levels = q_levels))


build_overview_plot <- function(df, test_id, plot_title) {
  dat <- df %>% filter(`Test Identifier` == test_id)
  lab_dat <- dat %>%
    distinct(`Question Identifier`, total) %>%
    mutate(label = paste0("n = ", scales::comma(total)))
  
  ggplot(dat, aes(Proportion, `Question Identifier`, fill = Outcome)) +
    geom_col(position = "fill", colour = "grey90") +
    geom_text(
      data = lab_dat, inherit.aes = FALSE,
      aes(x = 1.01, y = `Question Identifier`, label = label),
      hjust = 0, size = 3
    ) +
    coord_cartesian(xlim = c(0, 1.12), clip = "off") +
    scale_x_continuous(
      labels       = percent_format(accuracy = 1),
      breaks       = seq(0, 1, .25),
      minor_breaks = seq(0, 1, .05),
      expand       = c(.01, .01)
    ) +
    scale_y_discrete(labels = \(x) str_remove(x, "^.*_")) +
    scale_fill_manual(values = c("Incorrect" = "#d95f02", "Correct" = "#1b9e77")) +
    labs(title = plot_title,
         x     = "Proportion of responses",
         y     = "Question",
         fill  = NULL) +
    theme_minimal(base_size = 12) +
    theme(
      axis.ticks         = element_blank(),
      panel.grid.major.y = element_blank(),
      panel.grid.minor   = element_blank(),
      legend.position    = "bottom",
      legend.direction   = "horizontal",
      axis.text.y = element_text(
        hjust  = 1,
        margin = margin(r = -12)
      ),
      plot.margin = margin(5.5, 25, 5.5, 5.5)
    )
}

p_mna100 <- build_overview_plot(mna_summary, "MNA0-100_2025", "Missing Number Ascending 0-100")
p_mna20  <- build_overview_plot(mna_summary, "MNA0-20_2025", "Missing Number Ascending 0-20")

print(p_mna20)

Code

print(p_mna100)

Missing number choice (MNC)

Code

mnc_summary <- results_MNC_df %>%
  group_by(Test, `Test Identifier`, `Question Identifier`) %>%
  summarise(
    total     = n(),
    correct   = sum(`Is Answer Correct`),
    incorrect = total - correct,
    .groups = "drop"
  ) %>%
  mutate(
    pct_corr = correct   / total,
    pct_inc  = incorrect / total
  ) %>%
  pivot_longer(
    cols      = c(pct_corr, pct_inc),
    names_to  = "Outcome",
    values_to = "Proportion"
  ) %>%
  mutate(
    Outcome = recode(Outcome,
                     pct_corr = "Correct",
                     pct_inc  = "Incorrect"),
    Outcome = factor(Outcome, levels = c("Incorrect","Correct"))
  )

q_levels <- sort(unique(mnc_summary$`Question Identifier`))
mnc_summary <- mnc_summary %>%
  mutate(`Question Identifier` = factor(`Question Identifier`, levels = q_levels))


p_mnc100 <- build_overview_plot(mnc_summary, "MNC0-100_2025", "Missing Number Choice 0-100")
p_mnc20  <- build_overview_plot(mnc_summary, "MNC0-20_2025" , "Missing Number Choice 0-20")

print(p_mnc100)

Code

print(p_mnc20)

Overview - question type

Missing number ascending

Code

# 1. Summarise across both Test types & any grouping column
summarise_group_all <- function(df, group_id) {
  df %>%
    group_by(Test, !!sym(group_id)) %>%
    summarise(
      total     = n(),
      correct   = sum(`Is Answer Correct`),
      incorrect = total - correct,
      .groups   = "drop"
    ) %>%
    mutate(
      pct_corr = correct   / total,
      pct_inc  = incorrect / total
    ) %>%
    pivot_longer(
      c(pct_inc, pct_corr),
      names_to  = "Outcome",
      values_to = "Proportion"
    ) %>%
    mutate(
      Outcome = factor(
        if_else(Outcome == "pct_inc", "Incorrect", "Correct"),
        levels = c("Incorrect", "Correct")
      ),
      # lock in whatever order the IDs appear
      !!sym(group_id) := factor(!!sym(group_id),
                                levels = unique(!!sym(group_id)))
    )
}

# 2. Build the combined facet plot + annotate n + neat theme
build_combined_group_plot <- function(df, group_id, group_desc, plot_title) {
  # 1. Replace NA labels/descriptions with "Other"
  df2 <- df %>%
    mutate(
      !!sym(group_desc) := replace_na(!!sym(group_desc), "Other")
    )
  
  # 2. Summarise across both Test types
  sum_df <- df2 %>%
    group_by(Test, !!sym(group_id)) %>%
    summarise(
      total     = n(),
      correct   = sum(`Is Answer Correct`),
      incorrect = total - correct,
      .groups   = "drop"
    ) %>%
    mutate(
      pct_corr = correct   / total,
      pct_inc  = incorrect / total
    ) %>%
    pivot_longer(
      c(pct_inc, pct_corr),
      names_to  = "Outcome",
      values_to = "Proportion"
    ) %>%
    mutate(
      Outcome = factor(
        if_else(Outcome == "pct_inc", "Incorrect", "Correct"),
        levels = c("Incorrect","Correct")
      ),
      !!sym(group_id) := factor(!!sym(group_id),
                                levels = unique(!!sym(group_id)))
    )
  
  # 3. Build the caption
  cap <- df2 %>%
    distinct(!!sym(group_id), !!sym(group_desc)) %>%
    arrange(!!sym(group_id)) %>%
    transmute(pair = paste0(!!sym(group_id), ": ", !!sym(group_desc))) %>%
    pull(pair) %>%
    str_c(collapse = " | ")
  
  # 4. Label data for n
  lab_df <- sum_df %>%
    distinct(Test, !!sym(group_id), total) %>%
    mutate(label = paste0("n = ", comma(total)))
  
  # 5. Plot
  ggplot(sum_df,
         aes(x = Proportion,
             y = !!sym(group_id),
             fill = Outcome)) +
    geom_col(position = "fill", colour = "grey90") +
    geom_text(
      data        = lab_df,
      inherit.aes = FALSE,
      aes(x = 1.01,
          y = !!sym(group_id),
          label = label),
      hjust = 0, size = 3
    ) +
    facet_wrap(~Test, scales = "free_y") +
    coord_cartesian(xlim = c(0, 1.12), clip = "off") +
    scale_x_continuous(
      labels       = percent_format(accuracy = 1),
      breaks       = seq(0, 1, .25),
      minor_breaks = seq(0, 1, .05),
      expand       = c(0.01, 0.01)
    ) +
    scale_y_discrete(labels = \(x) str_remove(x, "^.*_")) +
    scale_fill_manual(values = c("Incorrect" = "#d95f02",
                                 "Correct"   = "#1b9e77")) +
    geom_text(
      aes(label = scales::percent(Proportion, accuracy = 1)),
      position = position_fill(vjust = 0.5),
      colour   = "white",
      size     = 3
    ) + 
    labs(
      title = plot_title,
      x       = NULL,
      y       = str_replace_all(group_id, "_", " "),
      fill    = NULL,
      caption = cap
    ) +
    theme_minimal(base_size = 12) +
    theme(
      axis.ticks         = element_blank(),
      panel.grid.major.y = element_blank(),
      panel.grid.minor   = element_blank(),
      legend.position    = "bottom",
      legend.direction   = "horizontal",
      axis.text.y = element_text(
        hjust  = 1,
        margin = margin(r = -6)
      ),
      plot.margin  = margin(5.5, 25, 5.5, 5.5),
      plot.caption = element_text(hjust = 0)
    )
}

# 3. Apply it to each grouping type
plot_error_type     <- build_combined_group_plot(results_MNA_df,
                                                 "Error_type_label",
                                                 "Error_type_desc",
                                                 "Performance by error type")
plot_error_position <- build_combined_group_plot(results_MNA_df,
                                                 "Error_position_label",
                                                 "Error_position_desc",
                                                 "Performance by error position")
plot_range          <- build_combined_group_plot(results_MNA_df,
                                                 "Range_label",
                                                 "Range_desc",
                                                 "Performance by range")
plot_cross_decade   <- build_combined_group_plot(results_MNA_df,
                                                 "Cross_decade",
                                                 "Cross_decade_desc",
                                                 "Performance by cross decade")


plot_error_type

Code

plot_error_position

Code

plot_range

Code

plot_cross_decade

Missing number choice

Code

# 1. Range
plot_mnc_range <- build_combined_group_plot(
  df         = results_MNC_df,
  group_id   = "Range_label",
  group_desc = "Range_desc",
  "Performance by range"
)

# 2. Cross-decade
plot_mnc_cross <- build_combined_group_plot(
  df         = results_MNC_df,
  group_id   = "Cross_decade_label",
  group_desc = "Cross_decade_desc",
  "Performance by cross decade"
)

# 3. Display them
plot_mnc_range

Code

plot_mnc_cross

Distractor distribution

Code

# 1. Build a “response proportions” table
response_prop_mnc <- results_MNC_df %>%
  transmute(
    Test        = Test,
    Item        = `Question Identifier`,
    Duration    = `Answer Duration`,
    RawResp     = Number_Selected,
    CorrectAns  = as.character(`Missing Number`)
  ) %>%
  mutate(
    Resp = case_when(
      is.na(RawResp)       ~ "na",
      TRUE               ~ as.character(RawResp)
    )
  ) %>%
  # count by test × item × response
  count(Test, Item, Resp, CorrectAns, name = "n") %>%
  group_by(Test, Item) %>%
  mutate(
    total = sum(n),
    pct   = n / total,
    Outcome = if_else(Resp == CorrectAns, "Correct", "Incorrect")
  ) %>%
  ungroup()

# 2. Plotting function
plot_response_distribution <- function(test_name, facets_per_row = 5) {
  dat <- response_prop_mnc %>% filter(Test == test_name)
  
  # friendly facet strips
  strip_labels <- dat %>%
    distinct(Item, total) %>%
    mutate(strip = paste0(str_remove(Item, "^.*_"), " (n = ", comma(total), ")")) %>%
    select(Item, strip) %>%
    deframe()
  
  # build the plot
  ggplot(dat, aes(
      x    = factor(Resp, levels = c(
                sort(setdiff(unique(dat$Resp), "blank")), 
                "blank"
              )),
      y    = pct,
      fill = Outcome   
    )) +
    geom_col(width = 0.8) +
    facet_wrap(
      ~ Item,
      ncol     = facets_per_row,
      scales   = "free_x",
      labeller = labeller(Item = strip_labels)
    ) +
    scale_y_continuous(
      labels = percent_format(accuracy = 1),
      expand = expansion(mult = c(0, 0.05))
    ) +
    scale_fill_manual(
      values = c(Correct = "steelblue", Incorrect = "grey70")
    ) +
    labs(
      title = test_name,
      x     = "Answer chosen (“blank” = no choice)",
      y     = "% of students",
      fill  = NULL
    ) +
    theme_minimal(base_size = 11) +
    theme(
      panel.grid.major.x = element_blank(),
      legend.position    = "bottom",
      legend.direction   = "horizontal",
      axis.text.x        = element_text(angle = 0, hjust = 1)
    )
}

# 3. Example usage
plot_mnc_choice_20 <- plot_response_distribution("Missing Number Choice 0-20")
plot_mnc_choice_100 <- plot_response_distribution("Missing Number Choice 0-100")

plot_mnc_choice_20

Code

plot_mnc_choice_100

Test duration stats (seconds)

Code

student_time <- results_cleaned_df %>%
  group_by(Identifier, `Test Identifier`, Test) %>%
  summarise(
    total_minutes = sum(`Answer Duration`, na.rm = TRUE),
    .groups = "drop"
  )

student_time %>% 
  group_by(`Test Identifier`, Test) %>% 
  summarise(
    mean_t = mean(total_minutes),
    median_t = median(total_minutes),
    sd_t = sd(total_minutes),
    max_t = max(total_minutes),
    min_t = min(total_minutes)
    )

# A tibble: 4 × 7
# Groups:   Test Identifier [4]
  `Test Identifier` Test                       mean_t median_t  sd_t max_t min_t
  <chr>             <chr>                       <dbl>    <dbl> <dbl> <dbl> <dbl>
1 MNA0-100_2025     Missing Number Ascending …   56.1       56  2.58    82    13
2 MNA0-20_2025      Missing Number Ascending …   57.8       57 44.1   1572    23
3 MNC0-100_2025     Missing Number Choice 0-1…   55.9       56  3.18    68    15
4 MNC0-20_2025      Missing Number Choice 0-20   56.9       57  3.42   100    24

Performance

Compute accuracy and response correct per minute (RCPM) for each student x test

Code

student_metrics <- results_cleaned_df %>%
  group_by(Identifier, `Test Identifier`, Test) %>%
  summarise(
    attempted      = n(),
    correct_count = sum(`Raw Score`, na.rm = TRUE),
    total_minutes  = sum(`Answer Duration`, na.rm = TRUE) / 60,
    .groups = "drop"
  ) %>%
  mutate(
    Accuracy = correct_count / attempted,
    RCPM     = correct_count / total_minutes
  )

test_summary <- student_metrics %>% 
  group_by(`Test Identifier`) %>% 
  summarise(
    Acc_mean   = mean(Accuracy, na.rm = TRUE),
    Acc_sd     = sd(Accuracy,   na.rm = TRUE),
    Acc_skew   = skewness(Accuracy, na.rm = TRUE),
    Acc_kurt   = kurtosis(Accuracy, na.rm = TRUE),
    RCPM_mean  = mean(RCPM, na.rm = TRUE),
    RCPM_sd    = sd(RCPM,   na.rm = TRUE),
    RCPM_skew  = skewness(RCPM, na.rm = TRUE),
    RCPM_kurt  = kurtosis(RCPM, na.rm = TRUE),
    .groups    = "drop"
  ) 

knitr::kable(
  test_summary,
  digits     = 3,
  caption    = "Test results summary"
)

Test results summary
Test Identifier	Acc_mean	Acc_sd	Acc_skew	Acc_kurt	RCPM_mean	RCPM_sd	RCPM_skew	RCPM_kurt
MNA0-100_2025	0.753	0.186	-1.124	4.629	7.082	3.389	0.779	4.326
MNA0-20_2025	0.652	0.235	-0.720	3.139	5.481	3.627	1.887	12.241
MNC0-100_2025	0.764	0.233	-1.343	3.977	7.219	3.899	0.594	3.171
MNC0-20_2025	0.573	0.315	-0.330	1.884	4.316	3.357	1.077	4.542

Export

Code

# 1. Create workbook
wb <- createWorkbook()

# 2. Table of Contents
addWorksheet(wb, "Table_of_Contents")
toc <- data.frame(
  Sheet       = c(
    "MNA_cleaned", 
    "MNC_cleaned", 
    "MNA_summary", 
    "MNC_summary", 
    "MNC_distractors",
    "student_metrics",
    "test_summary"
  ),
  Description = c(
    "Cleaned MNA data (both MNA0-20 and MNA0-100)",
    "Cleaned MNC data (both MNC0-20 and MNC0-100)",
    "MNA item‐level summary plus overview plots for MNA0-20 & MNA0-100",
    "MNC item‐level summary plus overview plots for MNC0-20 & MNC0-100",
    "MNC response proportions & distractor‐distribution plots for 0–20 & 0–100",
    "Per‐student accuracy & RCPM metrics for each test",
    "Aggregated test‐level summary of Accuracy & RCPM"
  ),
  stringsAsFactors = FALSE
)
writeData(wb, "Table_of_Contents", toc)

# 3. Cleaned data sheets
addWorksheet(wb, "MNA_cleaned")
writeData(wb, "MNA_cleaned", results_MNA_df)

addWorksheet(wb, "MNC_cleaned")
writeData(wb, "MNC_cleaned", results_MNC_df)

# 4. MNA summary + plots
addWorksheet(wb, "MNA_summary")
writeData(wb, "MNA_summary", mna_summary)

# Print and insert p_mna20
print(p_mna20)
insertPlot(
  wb, 
  sheet    = "MNA_summary", 
  startRow = 5, 
  startCol = 12, 
  width    = 6, 
  height   = 4, 
  units    = "in"
)
# Print and insert p_mna100
print(p_mna100)
insertPlot(
  wb, 
  sheet    = "MNA_summary", 
  startRow = 20, 
  startCol = 12, 
  width    = 6, 
  height   = 4, 
  units    = "in"
)

# 5. MNC summary + plots
addWorksheet(wb, "MNC_summary")
writeData(wb, "MNC_summary", mnc_summary)

print(p_mnc20)
insertPlot(
  wb, 
  sheet    = "MNC_summary", 
  startRow = 5, 
  startCol = 12, 
  width    = 6, 
  height   = 4, 
  units    = "in"
)
print(p_mnc100)
insertPlot(
  wb, 
  sheet    = "MNC_summary", 
  startRow = 20, 
  startCol = 12, 
  width    = 6, 
  height   = 4, 
  units    = "in"
)

# 6. MNC distractor distributions
addWorksheet(wb, "MNC_distractors")
writeData(wb, "MNC_distractors", response_prop_mnc)

print(plot_mnc_choice_20)
insertPlot(
  wb, 
  sheet    = "MNC_distractors", 
  startRow = 5, 
  startCol = 12, 
  width    = 8, 
  height   = 10, 
  units    = "in"
)
print(plot_mnc_choice_100)
insertPlot(
  wb, 
  sheet    = "MNC_distractors", 
  startRow = 20, 
  startCol = 12, 
  width    = 8, 
  height   = 10, 
  units    = "in"
)

# 7. Student‐level metrics
addWorksheet(wb, "student_metrics")
writeData(wb, "student_metrics", student_metrics)

# 8. Test‐level summary
addWorksheet(wb, "test_summary")
writeData(wb, "test_summary", test_summary)

# 9. Save workbook
saveWorkbook(
  wb, 
  file      = here(probe_dir, "Missing_Number_Summary_Stats.xlsx"), 
  overwrite = TRUE
)