Magnitude Comparison

Question presentation

Prepare

Set up

Code
library(dplyr)
library(tidyr)
library(tibble)
library(here)
library(ggplot2)
library(stringr)
library(purrr)
library(stringr)
library(readr)
library(moments)
library(patchwork)
library(scales)
library(kableExtra)
library(knitr)
library(glue)
library(lubridate)
library(readxl)
library(openxlsx)
library(car)       
library(ggpubr) 

probe_dir <- "02-Magnitude_comparison"

Load and clean

  • Removed practice questions

  • Removed (response duration = 0 sec & response is NA), and (response duration is NA)

Code
results_df <- read_csv(here(probe_dir,"magnitude_comp_raw.csv")) %>% select(-starts_with("..."))  

student_df <- read_csv(here(".","01-Students","Student_Assessment.csv"))%>% select(-starts_with("..."))

student_clean <- student_df %>% 
  mutate(
    GradeGroup = case_when(
      str_detect(`Assessment Event Identifier`, "^year-1a-2025$")          ~ "Year 1A",
      str_detect(`Assessment Event Identifier`, "^year-1b-2025$")          ~ "Year 1B",
      str_detect(`Assessment Event Identifier`, "^foundation-a-2025$")     ~ "Foundation A",
      str_detect(`Assessment Event Identifier`, "^foundation-b-2025$")     ~ "Foundation B",
      TRUE                                                                 ~ NA_character_  
    )
  ) %>% 
  select(Identifier, GradeGroup) %>%          
  distinct()

results_df <- results_df %>% 
  left_join(student_clean, by = "Identifier") 

# Count number of students per grade group
#results_df %>% group_by(`Test Identifier`, GradeGroup) %>% 
#  summarise(student_no = n_distinct(Identifier))


# Fix typo MG --> MC
results_df <- results_df %>% 
  mutate(
    `Test Identifier` = str_replace_all(
      `Test Identifier`, 
      c("^MG0-20" = "MC0-20",
        "^MG0-100" = "MC0-100")
    )
  )
  

# Fix grade group
results_df <- results_df %>% 
  mutate(
    GradeGroup = case_when(
      `Test Identifier` == "MC0-100_2025" & GradeGroup == "Foundation B" ~ "Year 1B", 
      `Test Identifier` == "MC0-20_2025" & GradeGroup == "Year 1B" ~ "Foundation B",
      `Test Identifier` == "MC0-20_2025" & GradeGroup == "Year 1A" ~ "Foundation A", 
      TRUE ~ GradeGroup
    )
  ) %>% 
  select(-`Assessment Event Identifier`)

# Remove practice items and invalid responses
results_cleaned_df <- results_df %>% 
  filter(!str_detect(`Question Identifier`, "TRANS_INTRO|_Prac|_Tch|-Prac|_prac")) %>% 
  filter( !(`Answer Duration` == 0 & is.na(`Answer Response`) )) %>% 
  filter( !is.na(`Answer Duration`))

# De-duplicate: keep only the latest per student × test × question
tmp <- results_cleaned_df %>%
  mutate(.ts = parse_date_time(`Date Completed`, orders = "dmy HM p"))

deduped <- tmp %>%
  group_by(Identifier, `Test Identifier`, `Question Identifier`) %>%
  slice_max(.ts, n = 1, with_ties = FALSE) %>%
  ungroup()

n_removed <- nrow(tmp) - nrow(deduped)
message(n_removed, " duplicate rows removed; keeping only the most recent per Identifier/Test/Question")

results_cleaned_df <- deduped %>% select(-.ts)

# Inspect unique answer responses
#results_cleaned_df %>% 
#  select(`Test Identifier`, `Answer Response`) %>% 
#  arrange(`Test Identifier`, `Answer Response`) %>% 
#  distinct()

Map distractor identity

Code
distractor_df <- read_csv(here(probe_dir,"magnitude_comp_answer_key.csv")) %>% select(-starts_with("..."))

# Merge distractor dfs
results_cleaned_df <- results_cleaned_df %>%
  left_join(
    distractor_df %>% select(`Question Identifier`,
                             Hotspot1,
                             Hotspot2,
                             CorrectHotspot,
                             Ratio_label,
                             Ratio_desc,
                             Congruence_label,
                             Congruence_desc,
                             Decade_label,
                             Decade_desc),
    by = "Question Identifier"
  )

## Check answer key consistency
#results_cleaned_df %>%
#     count(`Correct Answer`, CorrectHotspot, name = "n") %>%
#     arrange(`Correct Answer`, CorrectHotspot)

#results_cleaned_df %>%
#  filter(`Correct Answer` == "10", CorrectHotspot ==2) %>%
#  distinct(`Question Identifier`) 

# "MC0-20_026" has wrong `Correct Answer`

# Recompute `Is Answer Correct` and `Raw Score`
results_cleaned_df <- results_cleaned_df %>% 
  mutate(
    AnswerResponse = case_when(
      `Answer Response` == "01" ~ 2L,
      `Answer Response` == "10" ~ 1L,
      TRUE                      ~ NA_integer_
    ),
    Is.Answer.Correct = if_else(
      is.na(AnswerResponse),
      FALSE,
      AnswerResponse == CorrectHotspot
    ),
    RawScore = if_else(
      is.na(AnswerResponse),
      0L,
      as.integer(AnswerResponse == CorrectHotspot)
    )
  ) %>% 
  select(-`Correct Answer`, -`Answer Response`)

results_cleaned_df <- results_cleaned_df %>%
  select(
    Identifier,
    Test,
    `Test Identifier`,
    GradeGroup,
    `Question Identifier`,
    `Question Name`,
    `Question Type`,
    Hotspot1,
    Hotspot2,
    AnswerResponse,
    Is.Answer.Correct,
    RawScore,
    `Answer Duration`,
    `Distractor Count`,
    CorrectHotspot,
    Ratio_label,
    Ratio_desc,
    Congruence_label,
    Congruence_desc,
    Decade_label,
    Decade_desc,
    `Date Completed`
  )

head(results_cleaned_df)
# A tibble: 6 × 22
  Identifier            Test  `Test Identifier` GradeGroup `Question Identifier`
  <chr>                 <chr> <chr>             <chr>      <chr>                
1 00107783-010a-f011-b… Magn… MC0-20_2025       Foundatio… MC0-20_001           
2 00107783-010a-f011-b… Magn… MC0-20_2025       Foundatio… MC0-20_002           
3 00107783-010a-f011-b… Magn… MC0-20_2025       Foundatio… MC0-20_003           
4 00107783-010a-f011-b… Magn… MC0-20_2025       Foundatio… MC0-20_004           
5 00107783-010a-f011-b… Magn… MC0-20_2025       Foundatio… MC0-20_005           
6 00107783-010a-f011-b… Magn… MC0-20_2025       Foundatio… MC0-20_006           
# ℹ 17 more variables: `Question Name` <chr>, `Question Type` <chr>,
#   Hotspot1 <dbl>, Hotspot2 <dbl>, AnswerResponse <int>,
#   Is.Answer.Correct <lgl>, RawScore <int>, `Answer Duration` <dbl>,
#   `Distractor Count` <dbl>, CorrectHotspot <dbl>, Ratio_label <chr>,
#   Ratio_desc <chr>, Congruence_label <chr>, Congruence_desc <chr>,
#   Decade_label <chr>, Decade_desc <chr>, `Date Completed` <chr>

Explore

Overview

Student breakdown by test

Code
results_cleaned_df %>%  
  group_by(`Test Identifier`, GradeGroup) %>% 
  summarise(student_count = n_distinct(Identifier))
# A tibble: 4 × 3
# Groups:   Test Identifier [2]
  `Test Identifier` GradeGroup   student_count
  <chr>             <chr>                <int>
1 MC0-100_2025      Year 1A               1295
2 MC0-100_2025      Year 1B               1089
3 MC0-20_2025       Foundation A          1263
4 MC0-20_2025       Foundation B          1203

Tests x Items

Code
results_cleaned_df %>% group_by(`Test Identifier`) %>% summarise(No_items = n_distinct(`Question Identifier`))
# A tibble: 2 × 2
  `Test Identifier` No_items
  <chr>                <int>
1 MC0-100_2025            49
2 MC0-20_2025             44

Item correct vs incorrect

Code
# 1) Summarise per question
mc_summary <- results_cleaned_df %>%
  group_by(Test, `Test Identifier`, `Question Identifier`) %>%
  summarise(
    # counts
    total     = n(),
    correct   = sum(`Is.Answer.Correct`),
    incorrect = total - correct,
    # duration: all students
    mean_dur_all = mean(`Answer Duration`, na.rm = TRUE),
    sd_dur_all   = sd(  `Answer Duration`, na.rm = TRUE),
    # duration: only correct
    mean_dur_corr = mean(`Answer Duration`[`Is.Answer.Correct`], na.rm = TRUE),
    sd_dur_corr   = sd(  `Answer Duration`[`Is.Answer.Correct`], na.rm = TRUE),
    # duration: only incorrect
    mean_dur_inc = mean(`Answer Duration`[!`Is.Answer.Correct`], na.rm = TRUE),
    sd_dur_inc   = sd(  `Answer Duration`[!`Is.Answer.Correct`], na.rm = TRUE),
    .groups = "drop"
  ) %>%
  mutate(
    pct_corr = correct   / total,
    pct_inc  = incorrect / total
  ) %>%
  pivot_longer(
    cols      = c(pct_corr, pct_inc),
    names_to  = "Outcome",
    values_to = "Proportion"
  ) %>%
  mutate(
    Outcome = dplyr::recode(Outcome,
                     pct_corr = "Correct",
                     pct_inc  = "Incorrect"),
    Outcome = factor(Outcome, levels = c("Incorrect","Correct"))
  )


# 2) Lock in a sensible question‐order on the y‐axis
q_levels <- mc_summary %>% 
  pull(`Question Identifier`) %>% 
  unique() %>% 
  sort()
mc_summary <- mc_summary %>%
  mutate(`Question Identifier` = factor(`Question Identifier`, levels = q_levels))

# 3) Plotting function
build_mc_plot <- function(df, test_id, plot_title) {
  dat <- df %>% filter(`Test Identifier` == test_id)
  lab_dat <- dat %>%
    distinct(`Question Identifier`, total) %>%
    mutate(label = paste0("n = ", scales::comma(total)))
  
  ggplot(dat, aes(x = Proportion, y = `Question Identifier`, fill = Outcome)) +
    geom_col(position = "fill", colour = "grey90") +
    geom_text(
      data = lab_dat, inherit.aes = FALSE,
      aes(x = 1.01, y = `Question Identifier`, label = label),
      hjust = 0, size = 3
    ) +
    coord_cartesian(xlim = c(0, 1.12), clip = "off") +
    scale_x_continuous(
      labels       = percent_format(accuracy = 1),
      breaks       = seq(0, 1, .25),
      minor_breaks = seq(0, 1, .05),
      expand       = c(.01, .01)
    ) +
    scale_y_discrete(labels = \(x) str_remove(x, "^.*_")) +
    labs(
      title = plot_title,
      x     = "Proportion of responses",
      y     = "Question",
      fill  = NULL
    ) +
    theme_minimal(base_size = 12) +
    theme(
      axis.ticks         = element_blank(),
      panel.grid.major.y = element_blank(),
      panel.grid.minor   = element_blank(),
      legend.position    = "bottom",
      legend.direction   = "horizontal",
      axis.text.y = element_text(
        hjust  = 1,
        margin = margin(r = -12)
      ),
      plot.margin = margin(5.5, 25, 5.5, 5.5)
    )
}

# 4) Plot 
p_mc20 <- build_mc_plot(
  mc_summary,
  test_id    = "MC0-20_2025",
  plot_title = "Magnitude Comparison 0–20: % Correct vs Incorrect by Question"
)

print(p_mc20)

Code
p_mc100 <- build_mc_plot(
  mc_summary,
  test_id    = "MC0-100_2025",
  plot_title = "Magnitude Comparison 0–100: % Correct vs Incorrect by Question"
)

print(p_mc100)

Code
# Flag floor/ceiling
item_difficulty <- results_cleaned_df %>%
  group_by(`Test Identifier`, `Question Identifier`) %>%
  summarise(
    count = n_distinct(Identifier),
    mean_score = mean(RawScore),  
    .groups      = "drop"
  ) %>%
  mutate(
    flag = case_when(
      mean_score >= 0.90 ~ "Ceiling (>90%)",
      mean_score <= 0.10 ~ "Floor (<10%)",
      TRUE                 ~ "OK"
    )
  ) %>% 
  arrange(
    flag
  )

item_difficulty %>% select(-`Test Identifier`) %>% 
  knitr::kable(caption = "Item difficulty (flag >90% | <10%)", digits = 2)
Item difficulty (flag >90% | <10%)
Question Identifier count mean_score flag
MC0-100_001 2383 0.95 Ceiling (>90%)
MC0-100_006 2323 0.94 Ceiling (>90%)
MC0-100_007 2301 0.93 Ceiling (>90%)
MC0-100_011 2091 0.94 Ceiling (>90%)
MC0-100_045 1 1.00 Ceiling (>90%)
MC0-100_046 1 1.00 Ceiling (>90%)
MC0-100_047 1 1.00 Ceiling (>90%)
MC0-100_048 1 1.00 Ceiling (>90%)
MC0-100_049 1 1.00 Ceiling (>90%)
MC0-20_001 2465 0.93 Ceiling (>90%)
MC0-20_002 2458 0.94 Ceiling (>90%)
MC0-20_004 2443 0.92 Ceiling (>90%)
MC0-20_012 1975 0.92 Ceiling (>90%)
MC0-20_013 1870 0.91 Ceiling (>90%)
MC0-20_014 1760 0.92 Ceiling (>90%)
MC0-20_017 1315 0.91 Ceiling (>90%)
MC0-20_019 930 0.90 Ceiling (>90%)
MC0-20_032 23 0.91 Ceiling (>90%)
MC0-20_034 15 0.93 Ceiling (>90%)
MC0-20_035 11 0.91 Ceiling (>90%)
MC0-20_037 7 1.00 Ceiling (>90%)
MC0-20_038 6 1.00 Ceiling (>90%)
MC0-20_039 4 1.00 Ceiling (>90%)
MC0-20_041 4 1.00 Ceiling (>90%)
MC0-20_042 4 1.00 Ceiling (>90%)
MC0-20_043 4 1.00 Ceiling (>90%)
MC0-20_044 1 1.00 Ceiling (>90%)
MC0-100_002 2374 0.85 OK
MC0-100_003 2365 0.70 OK
MC0-100_004 2359 0.86 OK
MC0-100_005 2349 0.83 OK
MC0-100_008 2285 0.86 OK
MC0-100_009 2237 0.89 OK
MC0-100_010 2171 0.54 OK
MC0-100_012 2007 0.90 OK
MC0-100_013 1875 0.89 OK
MC0-100_014 1739 0.88 OK
MC0-100_015 1600 0.80 OK
MC0-100_016 1429 0.82 OK
MC0-100_017 1268 0.82 OK
MC0-100_018 1105 0.86 OK
MC0-100_019 965 0.81 OK
MC0-100_020 843 0.86 OK
MC0-100_021 724 0.88 OK
MC0-100_022 604 0.88 OK
MC0-100_023 506 0.84 OK
MC0-100_024 402 0.81 OK
MC0-100_025 310 0.75 OK
MC0-100_026 231 0.78 OK
MC0-100_027 170 0.84 OK
MC0-100_028 140 0.68 OK
MC0-100_029 97 0.74 OK
MC0-100_030 64 0.78 OK
MC0-100_031 45 0.69 OK
MC0-100_032 29 0.69 OK
MC0-100_033 19 0.63 OK
MC0-100_034 12 0.58 OK
MC0-100_035 8 0.38 OK
MC0-100_036 7 0.43 OK
MC0-100_037 5 0.40 OK
MC0-100_038 4 0.75 OK
MC0-100_039 3 0.33 OK
MC0-100_040 2 0.50 OK
MC0-100_041 2 0.50 OK
MC0-100_042 2 0.50 OK
MC0-100_043 2 0.50 OK
MC0-100_044 2 0.50 OK
MC0-20_003 2453 0.86 OK
MC0-20_005 2437 0.78 OK
MC0-20_006 2415 0.87 OK
MC0-20_007 2397 0.67 OK
MC0-20_008 2319 0.58 OK
MC0-20_009 2236 0.89 OK
MC0-20_010 2166 0.84 OK
MC0-20_011 2061 0.87 OK
MC0-20_015 1632 0.86 OK
MC0-20_016 1496 0.82 OK
MC0-20_018 1167 0.77 OK
MC0-20_020 784 0.79 OK
MC0-20_021 628 0.86 OK
MC0-20_022 520 0.89 OK
MC0-20_023 421 0.80 OK
MC0-20_024 321 0.84 OK
MC0-20_025 239 0.74 OK
MC0-20_026 161 0.80 OK
MC0-20_027 114 0.88 OK
MC0-20_028 79 0.70 OK
MC0-20_029 58 0.86 OK
MC0-20_030 41 0.61 OK
MC0-20_031 31 0.71 OK
MC0-20_033 16 0.88 OK
MC0-20_036 7 0.86 OK
MC0-20_040 4 0.50 OK
Code
ggplot(item_difficulty,
       aes(mean_score)) +
  geom_histogram(binwidth = .05, colour = "white") +
  geom_vline(xintercept = c(0.10, 0.90), linetype = "dashed") +
  facet_wrap(~ `Test Identifier`, ncol = 1) +
  scale_x_continuous(labels = percent_format(accuracy = 1)) +
  labs(
    x     = "Item Mean Score",
    y     = "Number of Items",
    title = "Item-difficulty Distribution\n(dashed lines = floor / ceiling cut-offs)"
  ) +
  theme_minimal()

Item response count

Code
# 1. Summarise per question: counts of correct, incorrect, and non-responses
response_summary <- results_cleaned_df %>%
  group_by(`Test Identifier`, `Question Identifier`) %>%
  summarise(
    correct     = sum(RawScore, na.rm = TRUE),
    nonresponse = sum(is.na(AnswerResponse)),
    incorrect   = n() - correct - nonresponse,
    .groups     = "drop"
  )

# 2. Define plotting function
build_response_stack_plot <- function(df, test_id, plot_title) {
  df_sum <- df %>%
    filter(`Test Identifier` == test_id) %>%
    arrange(`Question Identifier`) %>%
    mutate(
      `Question Identifier` = factor(
        `Question Identifier`,
        levels = unique(`Question Identifier`)
      )
    ) %>%
    pivot_longer(
      cols      = c(nonresponse, incorrect, correct),
      names_to  = "Outcome",
      values_to = "Count"
    ) %>%
    mutate(
      Outcome = factor(Outcome, levels = c("correct", "incorrect", "nonresponse"))
    )
  
  ggplot(df_sum, aes(x = Count, y = `Question Identifier`, fill = Outcome)) +
    geom_col(width = 0.6) +
    scale_fill_manual(
      values = c(correct     = "#1b9e77",
                 incorrect   = "#d95f02",
                 nonresponse = "grey80"),
      labels = c("Correct", "Incorrect", "Non-response")
    ) +
    scale_y_discrete(labels = function(x) str_remove(x, "^.*_")) +
    scale_x_continuous(expand = expansion(add = c(0, 0.05))) +
    labs(
      title = plot_title,
      x     = "Count",
      y     = "Question",
      fill  = NULL
    ) +
    coord_flip() +
    theme_minimal(base_size = 12) +
    theme(
      panel.grid       = element_blank(),
      axis.ticks       = element_blank(),
      legend.position  = "bottom",
      legend.direction = "horizontal"
    )
}

# 3. Generate and display plots for each test
plot_mc20_stack <- build_response_stack_plot(
  response_summary,
  "MC0-20_2025",
  "MC 0–20: Correct, Incorrect & Non-response by Question"
)
plot_mc100_stack <- build_response_stack_plot(
  response_summary,
  "MC0-100_2025",
  "MC 0–100: Correct, Incorrect & Non-response by Question"
)

# 4. Print the plots
print(plot_mc20_stack)

Code
print(plot_mc100_stack)

Item response time (seconds)

Code
# 1. Prep data: extract numeric QID & factor
df_rt <- results_cleaned_df %>%
  mutate(
    Qnum     = as.integer(str_remove(`Question Identifier`, "^.*_")),
    Question = factor(Qnum, levels = sort(unique(Qnum)))
  )

# 2. Box‐plot constructor with a single flag
build_rt_box_plot <- function(df, test_id, title, show_outliers = TRUE) {
  d <- df %>% filter(`Test Identifier` == test_id)
  
  # if we don't want outliers, drop them first
  if (!show_outliers) {
    d <- d %>%
      group_by(Question) %>%
      filter(
        between(
          `Answer Duration`,
          quantile(`Answer Duration`, .25) - 1.5 * IQR(`Answer Duration`),
          quantile(`Answer Duration`, .75) + 1.5 * IQR(`Answer Duration`)
        )
      ) %>%
      ungroup()
  }
  
  ggplot(d, aes(y = Question, x = `Answer Duration`)) +
    geom_boxplot(
      width         = 0.6,
      outlier.shape = if (show_outliers) 19 else NA
    ) +
    labs(
      title = title,
      x     = "Answer Duration (s)",
      y     = "Question"
    ) +
    coord_flip() +
    theme_minimal(base_size = 12) +
    theme(
      panel.grid.major.x = element_blank(),
      panel.grid.minor   = element_blank(),
      axis.ticks         = element_blank()
    )
}

# 3. Build all four plots
p20_with     <- build_rt_box_plot(df_rt, "MC0-20_2025",  "MC 0–20 (with outliers)",     TRUE)
p20_without  <- build_rt_box_plot(df_rt, "MC0-20_2025",  "MC 0–20 (no outliers)",       FALSE)
p100_with    <- build_rt_box_plot(df_rt, "MC0-100_2025", "MC 0–100 (with outliers)",    TRUE)
p100_without <- build_rt_box_plot(df_rt, "MC0-100_2025", "MC 0–100 (no outliers)",      FALSE)

# 4. Stack them per test (independent x‐axes by default)
combined_mc20  <- p20_with  / p20_without
combined_mc100 <- p100_with / p100_without

# 5. Display
print(p20_with)

Code
print(p100_with)

Code
print(p20_without / p100_without)

Data table

Code
mc_summary %>%
  mutate(
    `Correct pct`   = correct/total * 100
  ) %>%
  select(
    `Question Identifier`,
    n            = total,
    `Correct pct`,
    mean_dur_all,
    sd_dur_all,
    mean_dur_corr,
    sd_dur_corr,
    mean_dur_inc,
    sd_dur_inc
  ) %>%
  kable(
    digits  = 1,
    caption = "Item response time",
    format  = "html"
  ) %>%
  kable_styling(full_width = FALSE) %>%
  scroll_box(width = "100%", height = "400px")
Item response time
Question Identifier n Correct pct mean_dur_all sd_dur_all mean_dur_corr sd_dur_corr mean_dur_inc sd_dur_inc
MC0-20_001 2465 93.3 6.4 6.2 6.2 5.6 9.5 11.6
MC0-20_001 2465 93.3 6.4 6.2 6.2 5.6 9.5 11.6
MC0-20_002 2458 93.7 3.5 3.1 3.4 2.9 4.9 5.2
MC0-20_002 2458 93.7 3.5 3.1 3.4 2.9 4.9 5.2
MC0-20_003 2453 86.1 3.6 3.3 3.4 2.7 4.9 5.4
MC0-20_003 2453 86.1 3.6 3.3 3.4 2.7 4.9 5.4
MC0-20_004 2443 92.1 2.7 2.2 2.6 2.0 3.7 3.1
MC0-20_004 2443 92.1 2.7 2.2 2.6 2.0 3.7 3.1
MC0-20_005 2437 77.5 3.8 3.3 3.7 3.0 4.4 4.0
MC0-20_005 2437 77.5 3.8 3.3 3.7 3.0 4.4 4.0
MC0-20_006 2415 87.3 3.1 2.4 2.9 2.2 4.0 3.4
MC0-20_006 2415 87.3 3.1 2.4 2.9 2.2 4.0 3.4
MC0-20_007 2397 67.2 4.4 4.0 4.2 3.4 4.9 4.9
MC0-20_007 2397 67.2 4.4 4.0 4.2 3.4 4.9 4.9
MC0-20_008 2319 57.5 3.8 2.9 3.8 2.9 3.8 3.0
MC0-20_008 2319 57.5 3.8 2.9 3.8 2.9 3.8 3.0
MC0-20_009 2236 89.2 2.8 1.9 2.7 1.7 3.4 3.0
MC0-20_009 2236 89.2 2.8 1.9 2.7 1.7 3.4 3.0
MC0-20_010 2166 84.2 2.8 1.8 2.8 1.6 2.9 2.4
MC0-20_010 2166 84.2 2.8 1.8 2.8 1.6 2.9 2.4
MC0-20_011 2061 87.3 3.0 1.8 3.0 1.6 2.9 2.7
MC0-20_011 2061 87.3 3.0 1.8 3.0 1.6 2.9 2.7
MC0-20_012 1975 92.4 2.2 1.4 2.2 1.3 2.5 2.4
MC0-20_012 1975 92.4 2.2 1.4 2.2 1.3 2.5 2.4
MC0-20_013 1870 91.0 2.2 1.5 2.2 1.2 2.8 3.3
MC0-20_013 1870 91.0 2.2 1.5 2.2 1.2 2.8 3.3
MC0-20_014 1760 92.4 2.0 1.1 2.0 1.1 2.2 1.5
MC0-20_014 1760 92.4 2.0 1.1 2.0 1.1 2.2 1.5
MC0-20_015 1632 86.2 2.4 1.4 2.3 1.2 2.7 2.1
MC0-20_015 1632 86.2 2.4 1.4 2.3 1.2 2.7 2.1
MC0-20_016 1496 82.0 2.5 1.3 2.5 1.2 2.4 1.7
MC0-20_016 1496 82.0 2.5 1.3 2.5 1.2 2.4 1.7
MC0-20_017 1315 91.3 2.0 1.1 2.0 1.0 2.3 2.0
MC0-20_017 1315 91.3 2.0 1.1 2.0 1.0 2.3 2.0
MC0-20_018 1167 77.3 2.5 1.4 2.5 1.2 2.6 1.8
MC0-20_018 1167 77.3 2.5 1.4 2.5 1.2 2.6 1.8
MC0-20_019 930 90.2 1.7 0.9 1.7 0.8 1.8 1.7
MC0-20_019 930 90.2 1.7 0.9 1.7 0.8 1.8 1.7
MC0-20_020 784 79.1 2.2 1.4 2.2 1.1 2.3 2.3
MC0-20_020 784 79.1 2.2 1.4 2.2 1.1 2.3 2.3
MC0-20_021 628 86.5 1.7 1.0 1.7 1.0 1.6 1.1
MC0-20_021 628 86.5 1.7 1.0 1.7 1.0 1.6 1.1
MC0-20_022 520 89.4 1.5 1.0 1.5 0.7 1.8 2.1
MC0-20_022 520 89.4 1.5 1.0 1.5 0.7 1.8 2.1
MC0-20_023 421 79.8 1.7 1.1 1.6 0.9 1.9 1.6
MC0-20_023 421 79.8 1.7 1.1 1.6 0.9 1.9 1.6
MC0-20_024 321 84.1 1.6 0.8 1.5 0.7 1.8 1.4
MC0-20_024 321 84.1 1.6 0.8 1.5 0.7 1.8 1.4
MC0-20_025 239 74.1 1.8 0.9 1.8 0.9 1.7 1.0
MC0-20_025 239 74.1 1.8 0.9 1.8 0.9 1.7 1.0
MC0-20_026 161 79.5 1.9 0.9 2.0 0.9 1.4 0.8
MC0-20_026 161 79.5 1.9 0.9 2.0 0.9 1.4 0.8
MC0-20_027 114 87.7 1.4 0.8 1.4 0.6 1.7 1.4
MC0-20_027 114 87.7 1.4 0.8 1.4 0.6 1.7 1.4
MC0-20_028 79 69.6 1.6 0.9 1.6 0.8 1.6 1.1
MC0-20_028 79 69.6 1.6 0.9 1.6 0.8 1.6 1.1
MC0-20_029 58 86.2 1.5 1.5 1.4 0.8 2.2 3.5
MC0-20_029 58 86.2 1.5 1.5 1.4 0.8 2.2 3.5
MC0-20_030 41 61.0 1.3 0.6 1.4 0.6 1.3 0.6
MC0-20_030 41 61.0 1.3 0.6 1.4 0.6 1.3 0.6
MC0-20_031 31 71.0 1.4 0.7 1.4 0.8 1.3 0.5
MC0-20_031 31 71.0 1.4 0.7 1.4 0.8 1.3 0.5
MC0-20_032 23 91.3 1.2 0.4 1.1 0.4 1.5 0.7
MC0-20_032 23 91.3 1.2 0.4 1.1 0.4 1.5 0.7
MC0-20_033 16 87.5 1.2 0.4 1.1 0.4 1.5 0.7
MC0-20_033 16 87.5 1.2 0.4 1.1 0.4 1.5 0.7
MC0-20_034 15 93.3 1.1 0.3 1.1 0.3 1.0 NA
MC0-20_034 15 93.3 1.1 0.3 1.1 0.3 1.0 NA
MC0-20_035 11 90.9 1.3 0.5 1.2 0.4 2.0 NA
MC0-20_035 11 90.9 1.3 0.5 1.2 0.4 2.0 NA
MC0-20_036 7 85.7 1.3 0.5 1.2 0.4 2.0 NA
MC0-20_036 7 85.7 1.3 0.5 1.2 0.4 2.0 NA
MC0-20_037 7 100.0 1.0 0.0 1.0 0.0 NaN NA
MC0-20_037 7 100.0 1.0 0.0 1.0 0.0 NaN NA
MC0-20_038 6 100.0 0.8 0.4 0.8 0.4 NaN NA
MC0-20_038 6 100.0 0.8 0.4 0.8 0.4 NaN NA
MC0-20_039 4 100.0 1.0 0.0 1.0 0.0 NaN NA
MC0-20_039 4 100.0 1.0 0.0 1.0 0.0 NaN NA
MC0-20_040 4 50.0 1.0 0.0 1.0 0.0 1.0 0.0
MC0-20_040 4 50.0 1.0 0.0 1.0 0.0 1.0 0.0
MC0-20_041 4 100.0 2.0 1.4 2.0 1.4 NaN NA
MC0-20_041 4 100.0 2.0 1.4 2.0 1.4 NaN NA
MC0-20_042 4 100.0 1.2 0.5 1.2 0.5 NaN NA
MC0-20_042 4 100.0 1.2 0.5 1.2 0.5 NaN NA
MC0-20_043 4 100.0 1.2 0.5 1.2 0.5 NaN NA
MC0-20_043 4 100.0 1.2 0.5 1.2 0.5 NaN NA
MC0-20_044 1 100.0 1.0 NA 1.0 NA NaN NA
MC0-20_044 1 100.0 1.0 NA 1.0 NA NaN NA
MC0-100_001 2383 95.3 5.8 6.7 5.6 6.1 9.8 14.2
MC0-100_001 2383 95.3 5.8 6.7 5.6 6.1 9.8 14.2
MC0-100_002 2374 84.7 3.9 4.4 3.6 3.9 5.7 6.0
MC0-100_002 2374 84.7 3.9 4.4 3.6 3.9 5.7 6.0
MC0-100_003 2365 69.8 3.6 2.9 3.5 2.9 3.7 3.0
MC0-100_003 2365 69.8 3.6 2.9 3.5 2.9 3.7 3.0
MC0-100_004 2359 85.8 3.2 2.7 3.1 2.6 3.9 2.9
MC0-100_004 2359 85.8 3.2 2.7 3.1 2.6 3.9 2.9
MC0-100_005 2349 82.5 3.3 2.8 3.1 2.6 4.0 3.5
MC0-100_005 2349 82.5 3.3 2.8 3.1 2.6 4.0 3.5
MC0-100_006 2323 93.6 2.9 2.0 2.8 1.7 4.0 4.5
MC0-100_006 2323 93.6 2.9 2.0 2.8 1.7 4.0 4.5
MC0-100_007 2301 93.3 2.6 1.7 2.5 1.6 3.0 2.8
MC0-100_007 2301 93.3 2.6 1.7 2.5 1.6 3.0 2.8
MC0-100_008 2285 85.9 3.3 2.2 3.2 2.2 3.8 2.6
MC0-100_008 2285 85.9 3.3 2.2 3.2 2.2 3.8 2.6
MC0-100_009 2237 88.9 3.2 2.1 3.2 1.9 3.4 2.8
MC0-100_009 2237 88.9 3.2 2.1 3.2 1.9 3.4 2.8
MC0-100_010 2171 54.2 3.3 2.4 3.6 2.7 3.1 2.0
MC0-100_010 2171 54.2 3.3 2.4 3.6 2.7 3.1 2.0
MC0-100_011 2091 94.0 2.5 1.5 2.4 1.5 2.6 2.1
MC0-100_011 2091 94.0 2.5 1.5 2.4 1.5 2.6 2.1
MC0-100_012 2007 89.6 2.8 1.5 2.8 1.5 2.9 1.9
MC0-100_012 2007 89.6 2.8 1.5 2.8 1.5 2.9 1.9
MC0-100_013 1875 89.5 2.2 1.3 2.2 1.2 2.5 2.1
MC0-100_013 1875 89.5 2.2 1.3 2.2 1.2 2.5 2.1
MC0-100_014 1739 88.1 2.2 1.6 2.1 1.5 2.6 1.8
MC0-100_014 1739 88.1 2.2 1.6 2.1 1.5 2.6 1.8
MC0-100_015 1600 79.9 2.6 1.6 2.6 1.5 2.7 1.9
MC0-100_015 1600 79.9 2.6 1.6 2.6 1.5 2.7 1.9
MC0-100_016 1429 82.3 2.5 1.5 2.4 1.4 2.7 1.8
MC0-100_016 1429 82.3 2.5 1.5 2.4 1.4 2.7 1.8
MC0-100_017 1268 82.3 2.3 1.2 2.2 1.0 2.4 1.9
MC0-100_017 1268 82.3 2.3 1.2 2.2 1.0 2.4 1.9
MC0-100_018 1105 85.9 2.2 1.2 2.1 1.0 2.3 1.9
MC0-100_018 1105 85.9 2.2 1.2 2.1 1.0 2.3 1.9
MC0-100_019 965 81.5 2.1 1.2 2.1 1.0 2.0 1.7
MC0-100_019 965 81.5 2.1 1.2 2.1 1.0 2.0 1.7
MC0-100_020 843 86.5 1.7 0.9 1.7 0.9 1.8 1.1
MC0-100_020 843 86.5 1.7 0.9 1.7 0.9 1.8 1.1
MC0-100_021 724 88.3 1.7 0.8 1.7 0.8 1.5 0.8
MC0-100_021 724 88.3 1.7 0.8 1.7 0.8 1.5 0.8
MC0-100_022 604 88.1 1.5 0.8 1.5 0.7 1.5 0.9
MC0-100_022 604 88.1 1.5 0.8 1.5 0.7 1.5 0.9
MC0-100_023 506 83.6 1.7 0.9 1.7 0.9 1.5 0.9
MC0-100_023 506 83.6 1.7 0.9 1.7 0.9 1.5 0.9
MC0-100_024 402 81.1 1.5 0.7 1.5 0.7 1.5 1.0
MC0-100_024 402 81.1 1.5 0.7 1.5 0.7 1.5 1.0
MC0-100_025 310 74.8 1.7 0.9 1.8 0.9 1.5 1.0
MC0-100_025 310 74.8 1.7 0.9 1.8 0.9 1.5 1.0
MC0-100_026 231 78.4 1.8 1.2 1.7 0.9 2.1 2.1
MC0-100_026 231 78.4 1.8 1.2 1.7 0.9 2.1 2.1
MC0-100_027 170 84.1 1.3 0.6 1.4 0.6 1.3 0.5
MC0-100_027 170 84.1 1.3 0.6 1.4 0.6 1.3 0.5
MC0-100_028 140 67.9 1.6 0.9 1.7 1.0 1.5 0.8
MC0-100_028 140 67.9 1.6 0.9 1.7 1.0 1.5 0.8
MC0-100_029 97 74.2 1.6 0.8 1.5 0.7 1.7 1.2
MC0-100_029 97 74.2 1.6 0.8 1.5 0.7 1.7 1.2
MC0-100_030 64 78.1 1.5 1.0 1.6 1.1 1.2 0.6
MC0-100_030 64 78.1 1.5 1.0 1.6 1.1 1.2 0.6
MC0-100_031 45 68.9 1.2 0.5 1.2 0.6 1.2 0.4
MC0-100_031 45 68.9 1.2 0.5 1.2 0.6 1.2 0.4
MC0-100_032 29 69.0 1.3 0.8 1.2 0.7 1.4 0.9
MC0-100_032 29 69.0 1.3 0.8 1.2 0.7 1.4 0.9
MC0-100_033 19 63.2 1.3 0.6 1.4 0.7 1.1 0.4
MC0-100_033 19 63.2 1.3 0.6 1.4 0.7 1.1 0.4
MC0-100_034 12 58.3 1.2 0.5 1.3 0.5 1.2 0.4
MC0-100_034 12 58.3 1.2 0.5 1.3 0.5 1.2 0.4
MC0-100_035 8 37.5 1.4 1.1 2.0 1.7 1.0 0.0
MC0-100_035 8 37.5 1.4 1.1 2.0 1.7 1.0 0.0
MC0-100_036 7 42.9 1.6 1.1 2.3 1.5 1.0 0.0
MC0-100_036 7 42.9 1.6 1.1 2.3 1.5 1.0 0.0
MC0-100_037 5 40.0 1.8 1.3 3.0 1.4 1.0 0.0
MC0-100_037 5 40.0 1.8 1.3 3.0 1.4 1.0 0.0
MC0-100_038 4 75.0 1.5 1.0 1.7 1.2 1.0 NA
MC0-100_038 4 75.0 1.5 1.0 1.7 1.2 1.0 NA
MC0-100_039 3 33.3 1.3 0.6 2.0 NA 1.0 0.0
MC0-100_039 3 33.3 1.3 0.6 2.0 NA 1.0 0.0
MC0-100_040 2 50.0 1.5 0.7 2.0 NA 1.0 NA
MC0-100_040 2 50.0 1.5 0.7 2.0 NA 1.0 NA
MC0-100_041 2 50.0 1.5 0.7 2.0 NA 1.0 NA
MC0-100_041 2 50.0 1.5 0.7 2.0 NA 1.0 NA
MC0-100_042 2 50.0 1.5 0.7 2.0 NA 1.0 NA
MC0-100_042 2 50.0 1.5 0.7 2.0 NA 1.0 NA
MC0-100_043 2 50.0 1.5 0.7 2.0 NA 1.0 NA
MC0-100_043 2 50.0 1.5 0.7 2.0 NA 1.0 NA
MC0-100_044 2 50.0 3.5 2.1 2.0 NA 5.0 NA
MC0-100_044 2 50.0 3.5 2.1 2.0 NA 5.0 NA
MC0-100_045 1 100.0 3.0 NA 3.0 NA NaN NA
MC0-100_045 1 100.0 3.0 NA 3.0 NA NaN NA
MC0-100_046 1 100.0 3.0 NA 3.0 NA NaN NA
MC0-100_046 1 100.0 3.0 NA 3.0 NA NaN NA
MC0-100_047 1 100.0 3.0 NA 3.0 NA NaN NA
MC0-100_047 1 100.0 3.0 NA 3.0 NA NaN NA
MC0-100_048 1 100.0 2.0 NA 2.0 NA NaN NA
MC0-100_048 1 100.0 2.0 NA 2.0 NA NaN NA
MC0-100_049 1 100.0 3.0 NA 3.0 NA NaN NA
MC0-100_049 1 100.0 3.0 NA 3.0 NA NaN NA

Performance by item type

Code
summarise_group_all <- function(df, group_id) {
  df %>%
    group_by(Test, !!sym(group_id)) %>%
    summarise(
      total     = n(),
      correct   = sum(`Is.Answer.Correct`),
      incorrect = total - correct,
      .groups   = "drop"
    ) %>%
    mutate(
      pct_corr = correct/total,
      pct_inc  = incorrect/total
    ) %>%
    pivot_longer(
      c(pct_inc, pct_corr),
      names_to  = "Outcome",
      values_to = "Proportion"
    ) %>%
    mutate(
      Outcome = factor(if_else(Outcome=="pct_inc","Incorrect","Correct"),
                       levels = c("Incorrect","Correct")),
      !!sym(group_id) := factor(!!sym(group_id), levels = unique(!!sym(group_id)))
    )
}

build_combined_group_plot <- function(df, group_id, group_desc, title) {
  df2 <- df %>%
    mutate(
      !!sym(group_id)   := replace_na(as.character(!!sym(group_id)),   "Other"),
      !!sym(group_desc) := replace_na(as.character(!!sym(group_desc)), "Other")
    )
  sum_df <- df2 %>%
    group_by(Test, !!sym(group_id)) %>%
    summarise(
      total     = n(),
      correct   = sum(`Is.Answer.Correct`),
      incorrect = total - correct,
      .groups   = "drop"
    ) %>%
    mutate(
      pct_corr = correct/total,
      pct_inc  = incorrect/total
    ) %>%
    pivot_longer(c(pct_inc, pct_corr),
                 names_to="Outcome",
                 values_to="Proportion") %>%
    mutate(
      Outcome = factor(if_else(Outcome=="pct_inc","Incorrect","Correct"),
                       levels=c("Incorrect","Correct")),
      !!sym(group_id) := factor(!!sym(group_id),
                                levels=unique(!!sym(group_id)))
    )
  pairs <- df2 %>%
    filter(!!sym(group_id) != "Other") %>%
    distinct(!!sym(group_id), !!sym(group_desc)) %>%
    arrange(!!sym(group_id)) %>%
    transmute(pair = paste0(!!sym(group_id), ": ", !!sym(group_desc))) %>%
    pull(pair)
  cap <- str_c(pairs, collapse = "\n")
  lab_df <- sum_df %>%
    distinct(Test, !!sym(group_id), total) %>%
    mutate(label = paste0("n = ", comma(total)))
  
  ggplot(sum_df, aes(x=Proportion, y=!!sym(group_id), fill=Outcome)) +
    geom_col(position="fill", colour="grey90") +
    geom_text(data=lab_df, inherit.aes=FALSE,
              aes(x=1.01, y=!!sym(group_id), label=label),
              hjust=0, size=3) +
    facet_wrap(~Test, scales="free_y") +
    coord_cartesian(xlim=c(0,1.12), clip="off") +
    scale_x_continuous(labels=percent_format(accuracy=1),
                       breaks=seq(0,1,0.25),
                       minor_breaks=seq(0,1,0.05),
                       expand=c(0.01,0.01)) +
    scale_y_discrete(labels=\(x) str_remove(x,"^.*_")) +
    scale_fill_manual(values=c("Incorrect"="#d95f02","Correct"="#1b9e77")) +
    geom_text(aes(label=percent(Proportion,accuracy=1)),
              position=position_fill(vjust=0.5),
              colour="white", size=3) +
    labs(title=title,
         x=NULL,
         y=str_replace_all(group_id,"_"," "),
         fill=NULL,
         caption=cap) +
    theme_minimal(base_size=12) +
    theme(
      plot.caption.position = "plot",
      axis.ticks           = element_blank(),
      panel.grid.major.y   = element_blank(),
      panel.grid.minor     = element_blank(),
      legend.position      = "bottom",
      legend.direction     = "horizontal",
      axis.text.y          = element_text(hjust=1, margin=margin(r=-6)),
      plot.margin          = margin(5.5,25,5.5,5.5),
      plot.caption         = element_text(hjust=0)
    )
}

plot_ratio <- build_combined_group_plot(
  results_cleaned_df, "Ratio_label", "Ratio_desc", "Performance by Ratio"
)
plot_congruence <- build_combined_group_plot(
  results_cleaned_df, "Congruence_label", "Congruence_desc", "Performance by Congruence"
)
plot_decade <- build_combined_group_plot(
  results_cleaned_df, "Decade_label", "Decade_desc", "Performance by Decade"
)

print(plot_ratio)

Code
print(plot_congruence)

Code
print(plot_decade)

Test Performance

Compute accuracy and response correct per minute (RCPM) for each student x test

Code
student_metrics <- results_cleaned_df %>%
  group_by(Identifier, `Test Identifier`, Test) %>%
  summarise(
    attempted      = n(),
    correct_count  = sum(RawScore, na.rm = TRUE),
    total_minutes  = sum(`Answer Duration`, na.rm = TRUE) / 60,
    .groups        = "drop"
  ) %>%
  mutate(
    Accuracy = correct_count / attempted,
    RCPM     = correct_count / total_minutes
  )

test_summary <- student_metrics %>%
  group_by(`Test Identifier`) %>%
  summarise(
    Acc_mean   = mean(Accuracy, na.rm = TRUE),
    Acc_sd     = sd(Accuracy,   na.rm = TRUE),
    Acc_skew   = skewness(Accuracy, na.rm = TRUE),
    Acc_kurt   = kurtosis(Accuracy, na.rm = TRUE),
    RCPM_mean  = mean(RCPM, na.rm = TRUE),
    RCPM_sd    = sd(RCPM,   na.rm = TRUE),
    RCPM_skew  = skewness(RCPM, na.rm = TRUE),
    RCPM_kurt  = kurtosis(RCPM, na.rm = TRUE),
    .groups    = "drop"
  )

test_summary_t <- test_summary %>%
  pivot_longer(-`Test Identifier`, names_to = "Metric", values_to = "Value") %>%
  pivot_wider(names_from = `Test Identifier`, values_from = Value)

knitr::kable(
  test_summary_t,
  digits  = 2,
  caption = "Test results summary"
)
Test results summary
Metric MC0-100_2025 MC0-20_2025
Acc_mean 0.83 0.83
Acc_sd 0.17 0.18
Acc_skew -1.41 -1.59
Acc_kurt 4.99 5.63
RCPM_mean 17.76 17.04
RCPM_sd 8.82 8.41
RCPM_skew 0.62 0.57
RCPM_kurt 3.26 3.73

Distribution of accuracy/fluency

Code
student_metrics_long <- student_metrics %>%
  select(Identifier, `Test Identifier`, Accuracy, RCPM) %>%
  pivot_longer(
    cols      = c(Accuracy, RCPM),
    names_to  = "Metric",
    values_to = "Value"
  )

ggplot(student_metrics_long, aes(x = `Test Identifier`, y = Value, fill = `Test Identifier`)) +
  geom_violin(alpha = 0.5, colour = NA) +
  geom_boxplot(width = 0.1, position = position_dodge(width = 0.9)) +
  facet_wrap(~ Metric, scales = "free_y") +
  labs(
    title = "Distribution of Accuracy and RCPM by Test",
    x     = NULL,
    y     = NULL,
    fill  = "Test"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    legend.position = "none",
    axis.text.x     = element_text(angle = 45, hjust = 1)
  )

Mean and spread test

  • Welch’s two-sample t-test in case of non-homogeneity of variance, and the two groups are independent (no overlap in students). Levene’s test for spread as distribution is not normal (required for F-test).

  • No evidence of difference in average accuracy and Levene’s more robust test for variance finds no significant spread difference. Both groups have essentially the same mean and the same variability in accuracy.

  • Small but statistically significant speed difference (RCPM) and borderline (p≈0.06) variance difference on Levene’s test. i.e. Statistically significant though small difference in mean speed (MC 0–100 > MC 0–20), and any difference in spread is at best marginal.

Code
ggqqplot(
  student_metrics,
  x        = "Accuracy",
  color    = "Test Identifier",
  facet.by = "Test Identifier",
  title    = "Q–Q Plot of Accuracy by Test"
)

Code
ggqqplot(
  student_metrics,
  x        = "RCPM",
  color    = "Test Identifier",
  facet.by = "Test Identifier",
  title    = "Q–Q Plot of RCPM by Test"
)

Code
t_acc <- t.test(
  Accuracy ~ `Test Identifier`,
  data      = student_metrics,
  var.equal = FALSE    # Welch’s t-test
)

t_rcpm <- t.test(
  RCPM ~ `Test Identifier`,
  data      = student_metrics,
  var.equal = FALSE
)

f_acc <- var.test(
  Accuracy ~ `Test Identifier`,
  data = student_metrics
)

f_rcpm <- var.test(
  RCPM ~ `Test Identifier`,
  data = student_metrics
)

lev_acc <- leveneTest(
  Accuracy ~ `Test Identifier`,
  data       = student_metrics
)

lev_rcpm <- leveneTest(
  RCPM ~ `Test Identifier`,
  data       = student_metrics
)

list(
  t_acc,
  t_rcpm,
  f_acc,
  f_rcpm,
  lev_acc,
  lev_rcpm
)
[[1]]

    Welch Two Sample t-test

data:  Accuracy by Test Identifier
t = -0.1235, df = 4847.7, p-value = 0.9017
alternative hypothesis: true difference in means between group MC0-100_2025 and group MC0-20_2025 is not equal to 0
95 percent confidence interval:
 -0.010627170  0.009367607
sample estimates:
mean in group MC0-100_2025  mean in group MC0-20_2025 
                 0.8272641                  0.8278939 


[[2]]

    Welch Two Sample t-test

data:  RCPM by Test Identifier
t = 2.9179, df = 4816.3, p-value = 0.00354
alternative hypothesis: true difference in means between group MC0-100_2025 and group MC0-20_2025 is not equal to 0
95 percent confidence interval:
 0.2371036 1.2080688
sample estimates:
mean in group MC0-100_2025  mean in group MC0-20_2025 
                  17.76444                   17.04185 


[[3]]

    F test to compare two variances

data:  Accuracy by Test Identifier
F = 0.91889, num df = 2383, denom df = 2465, p-value = 0.03746
alternative hypothesis: true ratio of variances is not equal to 1
95 percent confidence interval:
 0.8485680 0.9951034
sample estimates:
ratio of variances 
         0.9188944 


[[4]]

    F test to compare two variances

data:  RCPM by Test Identifier
F = 1.0995, num df = 2383, denom df = 2465, p-value = 0.01957
alternative hypothesis: true ratio of variances is not equal to 1
95 percent confidence interval:
 1.015334 1.190667
sample estimates:
ratio of variances 
          1.099481 


[[5]]
Levene's Test for Homogeneity of Variance (center = median)
        Df F value Pr(>F)
group    1  0.2647 0.6069
      4848               

[[6]]
Levene's Test for Homogeneity of Variance (center = median)
        Df F value  Pr(>F)  
group    1  3.4659 0.06271 .
      4848                  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Student total score

student_totals <- results_cleaned_df %>%
  group_by(`Test Identifier`, Identifier) %>%
  summarise(
    prop_correct = mean(RawScore),
    .groups      = "drop"
  )

student_totals %>%
  mutate(flag = case_when(
    prop_correct >= 0.9 ~ "90% & above",
    prop_correct <= 0.2 ~ "20% & below",
    TRUE              ~ "Middle"
  )) %>%
  count(`Test Identifier`, flag) %>%
  mutate(pct = n/sum(n) * 100) %>% 
  knitr::kable(caption = "Students at total ceiling / floor", digits = 1)
Students at total ceiling / floor
Test Identifier flag n pct
MC0-100_2025 20% & below 11 0.2
MC0-100_2025 90% & above 1124 23.2
MC0-100_2025 Middle 1249 25.8
MC0-20_2025 20% & below 17 0.4
MC0-20_2025 90% & above 1138 23.5
MC0-20_2025 Middle 1311 27.0

Export

Code
# 1) Create a new workbook
wb <- createWorkbook()

# 2) Table of Contents sheet
addWorksheet(wb, "TOC")
toc <- data.frame(
  Sheet       = c(
    "Results raw",
    "MC summary",
    "Response summary",
    "Stacked responses",
    "MC times",
    "Meta plots",
    "Student metrics",
    "Test summary"
  ),
  Description = c(
    "All individual item‐level responses",
    "MC summary table plus p_mc20 and p_mc100 plots",
    "Response summary table plus plot_mc20_stack & plot_mc100_stack",
    "Bullet‐chart plots p20_with,p100_with,p20_without,p100_without",
    "Response‐time boxplots with/without outliers (4 panels)",
    "Performance by Ratio/Congruence/Decade (3 facet plots)",
    "Student metrics table plus Accuracy/RCPM violin‐boxplot",
    "Test‐level summary (means, SD, skewness, kurtosis)"
  ),
  stringsAsFactors = FALSE
)
writeData(wb, "TOC", toc, startCol = 1, startRow = 1)

# 3) Raw data
addWorksheet(wb, "Results raw")
writeData(wb, "Results raw", results_cleaned_df)

# 4) MC summary + p_mc20, p_mc100
addWorksheet(wb, "MC summary")
writeData(wb, "MC summary", mc_summary)
# export the two plots to PNG and insert
png(tmp <- tempfile(fileext = ".png"), width = 800, height = 600)
print(p_mc20)
dev.off()
insertImage(wb, "MC summary", tmp, startRow = nrow(mc_summary) + 3, startCol = 1, width = 6, height = 4)

png(tmp <- tempfile(fileext = ".png"), width = 800, height = 600)
print(p_mc100)
dev.off()
insertImage(wb, "MC summary", tmp, startRow = nrow(mc_summary) + 3, startCol = 9, width = 6, height = 4)

# 5) Response summary + plot_mc20_stack & plot_mc100_stack
addWorksheet(wb, "Response summary")
writeData(wb, "Response summary", response_summary)
png(tmp <- tempfile(fileext = ".png"), width = 800, height = 600)
print(plot_mc20_stack)
dev.off()
insertImage(wb, "Response summary", tmp, startRow = nrow(response_summary) + 3, startCol = 1, width = 6, height = 4)
png(tmp <- tempfile(fileext = ".png"), width = 800, height = 600)
print(plot_mc100_stack)
dev.off()
insertImage(wb, "Response summary", tmp, startRow = nrow(response_summary) + 3, startCol = 9, width = 6, height = 4)

# 6) Bullet‐chart sheet
addWorksheet(wb, "Stacked responses")
# no table, just the four plots
plots <- list(p20_with, p100_with, p20_without, p100_without)
cols  <- c(1, 9, 1, 9)
rows  <- c(1, 1, 20, 20)
for(i in seq_along(plots)) {
  png(tmp <- tempfile(fileext = ".png"), width = 800, height = 400)
  print(plots[[i]])
  dev.off()
  insertImage(wb, "Stacked responses", tmp,
              startRow = rows[i], startCol = cols[i],
              width = 6, height = 4)
}

# 7) MC times table
addWorksheet(wb, "MC times")
times_tbl <- mc_summary %>%
  mutate(`Correct pct` = correct/total*100) %>%
  select(
    `Question Identifier`,
    n            = total,
    `Correct pct`,
    mean_dur_all, sd_dur_all,
    mean_dur_corr, sd_dur_corr,
    mean_dur_inc,  sd_dur_inc
  )
writeData(wb, "MC times", times_tbl)

# 8) Meta plots (Ratio/Congruence/Decade)
addWorksheet(wb, "Meta plots")
meta_plots <- list(plot_ratio, plot_congruence, plot_decade)
for(i in seq_along(meta_plots)) {
  png(tmp <- tempfile(fileext = ".png"), width = 800, height = 400)
  print(meta_plots[[i]])
  dev.off()
  insertImage(wb, "Meta plots", tmp,
              startRow = (i-1)*20 + 1, startCol = 1,
              width = 8, height = 4)
}

# 9) Student metrics + violin‐boxplot
addWorksheet(wb, "Student metrics")
writeData(wb, "Student metrics", student_metrics)
png(tmp <- tempfile(fileext = ".png"), width = 800, height = 600)
print({
  student_metrics_long %>%
    ggplot(aes(x = `Test Identifier`, y = Value, fill = `Test Identifier`)) +
    geom_violin(alpha = .5) +
    geom_boxplot(width = .1, position = position_dodge(.9)) +
    facet_wrap(~Metric, scales="free_y") +
    theme_minimal()
})
dev.off()
insertImage(wb, "Student metrics", tmp, startRow = nrow(student_metrics)+3, startCol = 1, width = 8, height = 6)

# 10) Test summary
addWorksheet(wb, "Test summary")
writeData(wb, "Test summary", test_summary)

# 11) Save workbook
saveWorkbook(wb, "analysis_export.xlsx", overwrite = TRUE)

# 12) Download link
cat("Workbook exported")