Magnitude Comparison

Question presentation

Prepare

Set up

Code

library(dplyr)
library(tidyr)
library(tibble)
library(here)
library(ggplot2)
library(stringr)
library(purrr)
library(stringr)
library(readr)
library(moments)
library(patchwork)
library(scales)
library(kableExtra)
library(knitr)
library(glue)
library(lubridate)
library(readxl)
library(openxlsx)
library(car)       
library(ggpubr) 

probe_dir <- "02-Magnitude_comparison"

Load and clean

Removed practice questions
Removed (response duration = 0 sec & response is NA), and (response duration is NA)

Code

results_df <- read_csv(here(probe_dir,"magnitude_comp_raw.csv")) %>% select(-starts_with("..."))  

student_df <- read_csv(here(".","01-Students","Student_Assessment.csv"))%>% select(-starts_with("..."))

student_clean <- student_df %>% 
  mutate(
    GradeGroup = case_when(
      str_detect(`Assessment Event Identifier`, "^year-1a-2025$")          ~ "Year 1A",
      str_detect(`Assessment Event Identifier`, "^year-1b-2025$")          ~ "Year 1B",
      str_detect(`Assessment Event Identifier`, "^foundation-a-2025$")     ~ "Foundation A",
      str_detect(`Assessment Event Identifier`, "^foundation-b-2025$")     ~ "Foundation B",
      TRUE                                                                 ~ NA_character_  
    )
  ) %>% 
  select(Identifier, GradeGroup) %>%          
  distinct()

results_df <- results_df %>% 
  left_join(student_clean, by = "Identifier") 

# Count number of students per grade group
#results_df %>% group_by(`Test Identifier`, GradeGroup) %>% 
#  summarise(student_no = n_distinct(Identifier))


# Fix typo MG --> MC
results_df <- results_df %>% 
  mutate(
    `Test Identifier` = str_replace_all(
      `Test Identifier`, 
      c("^MG0-20" = "MC0-20",
        "^MG0-100" = "MC0-100")
    )
  )
  

# Fix grade group
results_df <- results_df %>% 
  mutate(
    GradeGroup = case_when(
      `Test Identifier` == "MC0-100_2025" & GradeGroup == "Foundation B" ~ "Year 1B", 
      `Test Identifier` == "MC0-20_2025" & GradeGroup == "Year 1B" ~ "Foundation B",
      `Test Identifier` == "MC0-20_2025" & GradeGroup == "Year 1A" ~ "Foundation A", 
      TRUE ~ GradeGroup
    )
  ) %>% 
  select(-`Assessment Event Identifier`)

# Remove practice items and invalid responses
results_cleaned_df <- results_df %>% 
  filter(!str_detect(`Question Identifier`, "TRANS_INTRO|_Prac|_Tch|-Prac|_prac")) %>% 
  filter( !(`Answer Duration` == 0 & is.na(`Answer Response`) )) %>% 
  filter( !is.na(`Answer Duration`))

# De-duplicate: keep only the latest per student × test × question
tmp <- results_cleaned_df %>%
  mutate(.ts = parse_date_time(`Date Completed`, orders = "dmy HM p"))

deduped <- tmp %>%
  group_by(Identifier, `Test Identifier`, `Question Identifier`) %>%
  slice_max(.ts, n = 1, with_ties = FALSE) %>%
  ungroup()

n_removed <- nrow(tmp) - nrow(deduped)
message(n_removed, " duplicate rows removed; keeping only the most recent per Identifier/Test/Question")

results_cleaned_df <- deduped %>% select(-.ts)

# Inspect unique answer responses
#results_cleaned_df %>% 
#  select(`Test Identifier`, `Answer Response`) %>% 
#  arrange(`Test Identifier`, `Answer Response`) %>% 
#  distinct()

Map distractor identity

Code

distractor_df <- read_csv(here(probe_dir,"magnitude_comp_answer_key.csv")) %>% select(-starts_with("..."))

# Merge distractor dfs
results_cleaned_df <- results_cleaned_df %>%
  left_join(
    distractor_df %>% select(`Question Identifier`,
                             Hotspot1,
                             Hotspot2,
                             CorrectHotspot,
                             Ratio_label,
                             Ratio_desc,
                             Congruence_label,
                             Congruence_desc,
                             Decade_label,
                             Decade_desc),
    by = "Question Identifier"
  )

## Check answer key consistency
#results_cleaned_df %>%
#     count(`Correct Answer`, CorrectHotspot, name = "n") %>%
#     arrange(`Correct Answer`, CorrectHotspot)

#results_cleaned_df %>%
#  filter(`Correct Answer` == "10", CorrectHotspot ==2) %>%
#  distinct(`Question Identifier`) 

# "MC0-20_026" has wrong `Correct Answer`

# Recompute `Is Answer Correct` and `Raw Score`
results_cleaned_df <- results_cleaned_df %>% 
  mutate(
    AnswerResponse = case_when(
      `Answer Response` == "01" ~ 2L,
      `Answer Response` == "10" ~ 1L,
      TRUE                      ~ NA_integer_
    ),
    Is.Answer.Correct = if_else(
      is.na(AnswerResponse),
      FALSE,
      AnswerResponse == CorrectHotspot
    ),
    RawScore = if_else(
      is.na(AnswerResponse),
      0L,
      as.integer(AnswerResponse == CorrectHotspot)
    )
  ) %>% 
  select(-`Correct Answer`, -`Answer Response`)

results_cleaned_df <- results_cleaned_df %>%
  select(
    Identifier,
    Test,
    `Test Identifier`,
    GradeGroup,
    `Question Identifier`,
    `Question Name`,
    `Question Type`,
    Hotspot1,
    Hotspot2,
    AnswerResponse,
    Is.Answer.Correct,
    RawScore,
    `Answer Duration`,
    `Distractor Count`,
    CorrectHotspot,
    Ratio_label,
    Ratio_desc,
    Congruence_label,
    Congruence_desc,
    Decade_label,
    Decade_desc,
    `Date Completed`
  )

head(results_cleaned_df)

# A tibble: 6 × 22
  Identifier            Test  `Test Identifier` GradeGroup `Question Identifier`
  <chr>                 <chr> <chr>             <chr>      <chr>                
1 00107783-010a-f011-b… Magn… MC0-20_2025       Foundatio… MC0-20_001           
2 00107783-010a-f011-b… Magn… MC0-20_2025       Foundatio… MC0-20_002           
3 00107783-010a-f011-b… Magn… MC0-20_2025       Foundatio… MC0-20_003           
4 00107783-010a-f011-b… Magn… MC0-20_2025       Foundatio… MC0-20_004           
5 00107783-010a-f011-b… Magn… MC0-20_2025       Foundatio… MC0-20_005           
6 00107783-010a-f011-b… Magn… MC0-20_2025       Foundatio… MC0-20_006           
# ℹ 17 more variables: `Question Name` <chr>, `Question Type` <chr>,
#   Hotspot1 <dbl>, Hotspot2 <dbl>, AnswerResponse <int>,
#   Is.Answer.Correct <lgl>, RawScore <int>, `Answer Duration` <dbl>,
#   `Distractor Count` <dbl>, CorrectHotspot <dbl>, Ratio_label <chr>,
#   Ratio_desc <chr>, Congruence_label <chr>, Congruence_desc <chr>,
#   Decade_label <chr>, Decade_desc <chr>, `Date Completed` <chr>

Explore

Overview

Student breakdown by test

Code

results_cleaned_df %>%  
  group_by(`Test Identifier`, GradeGroup) %>% 
  summarise(student_count = n_distinct(Identifier))

# A tibble: 4 × 3
# Groups:   Test Identifier [2]
  `Test Identifier` GradeGroup   student_count
  <chr>             <chr>                <int>
1 MC0-100_2025      Year 1A               1295
2 MC0-100_2025      Year 1B               1089
3 MC0-20_2025       Foundation A          1263
4 MC0-20_2025       Foundation B          1203

Tests x Items

Code

results_cleaned_df %>% group_by(`Test Identifier`) %>% summarise(No_items = n_distinct(`Question Identifier`))

# A tibble: 2 × 2
  `Test Identifier` No_items
  <chr>                <int>
1 MC0-100_2025            49
2 MC0-20_2025             44

Item correct vs incorrect

Code

# 1) Summarise per question
mc_summary <- results_cleaned_df %>%
  group_by(Test, `Test Identifier`, `Question Identifier`) %>%
  summarise(
    # counts
    total     = n(),
    correct   = sum(`Is.Answer.Correct`),
    incorrect = total - correct,
    # duration: all students
    mean_dur_all = mean(`Answer Duration`, na.rm = TRUE),
    sd_dur_all   = sd(  `Answer Duration`, na.rm = TRUE),
    # duration: only correct
    mean_dur_corr = mean(`Answer Duration`[`Is.Answer.Correct`], na.rm = TRUE),
    sd_dur_corr   = sd(  `Answer Duration`[`Is.Answer.Correct`], na.rm = TRUE),
    # duration: only incorrect
    mean_dur_inc = mean(`Answer Duration`[!`Is.Answer.Correct`], na.rm = TRUE),
    sd_dur_inc   = sd(  `Answer Duration`[!`Is.Answer.Correct`], na.rm = TRUE),
    .groups = "drop"
  ) %>%
  mutate(
    pct_corr = correct   / total,
    pct_inc  = incorrect / total
  ) %>%
  pivot_longer(
    cols      = c(pct_corr, pct_inc),
    names_to  = "Outcome",
    values_to = "Proportion"
  ) %>%
  mutate(
    Outcome = dplyr::recode(Outcome,
                     pct_corr = "Correct",
                     pct_inc  = "Incorrect"),
    Outcome = factor(Outcome, levels = c("Incorrect","Correct"))
  )


# 2) Lock in a sensible question‐order on the y‐axis
q_levels <- mc_summary %>% 
  pull(`Question Identifier`) %>% 
  unique() %>% 
  sort()
mc_summary <- mc_summary %>%
  mutate(`Question Identifier` = factor(`Question Identifier`, levels = q_levels))

# 3) Plotting function
build_mc_plot <- function(df, test_id, plot_title) {
  dat <- df %>% filter(`Test Identifier` == test_id)
  lab_dat <- dat %>%
    distinct(`Question Identifier`, total) %>%
    mutate(label = paste0("n = ", scales::comma(total)))
  
  ggplot(dat, aes(x = Proportion, y = `Question Identifier`, fill = Outcome)) +
    geom_col(position = "fill", colour = "grey90") +
    geom_text(
      data = lab_dat, inherit.aes = FALSE,
      aes(x = 1.01, y = `Question Identifier`, label = label),
      hjust = 0, size = 3
    ) +
    coord_cartesian(xlim = c(0, 1.12), clip = "off") +
    scale_x_continuous(
      labels       = percent_format(accuracy = 1),
      breaks       = seq(0, 1, .25),
      minor_breaks = seq(0, 1, .05),
      expand       = c(.01, .01)
    ) +
    scale_y_discrete(labels = \(x) str_remove(x, "^.*_")) +
    labs(
      title = plot_title,
      x     = "Proportion of responses",
      y     = "Question",
      fill  = NULL
    ) +
    theme_minimal(base_size = 12) +
    theme(
      axis.ticks         = element_blank(),
      panel.grid.major.y = element_blank(),
      panel.grid.minor   = element_blank(),
      legend.position    = "bottom",
      legend.direction   = "horizontal",
      axis.text.y = element_text(
        hjust  = 1,
        margin = margin(r = -12)
      ),
      plot.margin = margin(5.5, 25, 5.5, 5.5)
    )
}

# 4) Plot 
p_mc20 <- build_mc_plot(
  mc_summary,
  test_id    = "MC0-20_2025",
  plot_title = "Magnitude Comparison 0–20: % Correct vs Incorrect by Question"
)

print(p_mc20)

Code

p_mc100 <- build_mc_plot(
  mc_summary,
  test_id    = "MC0-100_2025",
  plot_title = "Magnitude Comparison 0–100: % Correct vs Incorrect by Question"
)

print(p_mc100)

Code

# Flag floor/ceiling
item_difficulty <- results_cleaned_df %>%
  group_by(`Test Identifier`, `Question Identifier`) %>%
  summarise(
    count = n_distinct(Identifier),
    mean_score = mean(RawScore),  
    .groups      = "drop"
  ) %>%
  mutate(
    flag = case_when(
      mean_score >= 0.90 ~ "Ceiling (>90%)",
      mean_score <= 0.10 ~ "Floor (<10%)",
      TRUE                 ~ "OK"
    )
  ) %>% 
  arrange(
    flag
  )

item_difficulty %>% select(-`Test Identifier`) %>% 
  knitr::kable(caption = "Item difficulty (flag >90% | <10%)", digits = 2)

Item difficulty (flag >90% | <10%)
Question Identifier	count	mean_score	flag
MC0-100_001	2383	0.95	Ceiling (>90%)
MC0-100_006	2323	0.94	Ceiling (>90%)
MC0-100_007	2301	0.93	Ceiling (>90%)
MC0-100_011	2091	0.94	Ceiling (>90%)
MC0-100_045	1	1.00	Ceiling (>90%)
MC0-100_046	1	1.00	Ceiling (>90%)
MC0-100_047	1	1.00	Ceiling (>90%)
MC0-100_048	1	1.00	Ceiling (>90%)
MC0-100_049	1	1.00	Ceiling (>90%)
MC0-20_001	2465	0.93	Ceiling (>90%)
MC0-20_002	2458	0.94	Ceiling (>90%)
MC0-20_004	2443	0.92	Ceiling (>90%)
MC0-20_012	1975	0.92	Ceiling (>90%)
MC0-20_013	1870	0.91	Ceiling (>90%)
MC0-20_014	1760	0.92	Ceiling (>90%)
MC0-20_017	1315	0.91	Ceiling (>90%)
MC0-20_019	930	0.90	Ceiling (>90%)
MC0-20_032	23	0.91	Ceiling (>90%)
MC0-20_034	15	0.93	Ceiling (>90%)
MC0-20_035	11	0.91	Ceiling (>90%)
MC0-20_037	7	1.00	Ceiling (>90%)
MC0-20_038	6	1.00	Ceiling (>90%)
MC0-20_039	4	1.00	Ceiling (>90%)
MC0-20_041	4	1.00	Ceiling (>90%)
MC0-20_042	4	1.00	Ceiling (>90%)
MC0-20_043	4	1.00	Ceiling (>90%)
MC0-20_044	1	1.00	Ceiling (>90%)
MC0-100_002	2374	0.85	OK
MC0-100_003	2365	0.70	OK
MC0-100_004	2359	0.86	OK
MC0-100_005	2349	0.83	OK
MC0-100_008	2285	0.86	OK
MC0-100_009	2237	0.89	OK
MC0-100_010	2171	0.54	OK
MC0-100_012	2007	0.90	OK
MC0-100_013	1875	0.89	OK
MC0-100_014	1739	0.88	OK
MC0-100_015	1600	0.80	OK
MC0-100_016	1429	0.82	OK
MC0-100_017	1268	0.82	OK
MC0-100_018	1105	0.86	OK
MC0-100_019	965	0.81	OK
MC0-100_020	843	0.86	OK
MC0-100_021	724	0.88	OK
MC0-100_022	604	0.88	OK
MC0-100_023	506	0.84	OK
MC0-100_024	402	0.81	OK
MC0-100_025	310	0.75	OK
MC0-100_026	231	0.78	OK
MC0-100_027	170	0.84	OK
MC0-100_028	140	0.68	OK
MC0-100_029	97	0.74	OK
MC0-100_030	64	0.78	OK
MC0-100_031	45	0.69	OK
MC0-100_032	29	0.69	OK
MC0-100_033	19	0.63	OK
MC0-100_034	12	0.58	OK
MC0-100_035	8	0.38	OK
MC0-100_036	7	0.43	OK
MC0-100_037	5	0.40	OK
MC0-100_038	4	0.75	OK
MC0-100_039	3	0.33	OK
MC0-100_040	2	0.50	OK
MC0-100_041	2	0.50	OK
MC0-100_042	2	0.50	OK
MC0-100_043	2	0.50	OK
MC0-100_044	2	0.50	OK
MC0-20_003	2453	0.86	OK
MC0-20_005	2437	0.78	OK
MC0-20_006	2415	0.87	OK
MC0-20_007	2397	0.67	OK
MC0-20_008	2319	0.58	OK
MC0-20_009	2236	0.89	OK
MC0-20_010	2166	0.84	OK
MC0-20_011	2061	0.87	OK
MC0-20_015	1632	0.86	OK
MC0-20_016	1496	0.82	OK
MC0-20_018	1167	0.77	OK
MC0-20_020	784	0.79	OK
MC0-20_021	628	0.86	OK
MC0-20_022	520	0.89	OK
MC0-20_023	421	0.80	OK
MC0-20_024	321	0.84	OK
MC0-20_025	239	0.74	OK
MC0-20_026	161	0.80	OK
MC0-20_027	114	0.88	OK
MC0-20_028	79	0.70	OK
MC0-20_029	58	0.86	OK
MC0-20_030	41	0.61	OK
MC0-20_031	31	0.71	OK
MC0-20_033	16	0.88	OK
MC0-20_036	7	0.86	OK
MC0-20_040	4	0.50	OK

Code

ggplot(item_difficulty,
       aes(mean_score)) +
  geom_histogram(binwidth = .05, colour = "white") +
  geom_vline(xintercept = c(0.10, 0.90), linetype = "dashed") +
  facet_wrap(~ `Test Identifier`, ncol = 1) +
  scale_x_continuous(labels = percent_format(accuracy = 1)) +
  labs(
    x     = "Item Mean Score",
    y     = "Number of Items",
    title = "Item-difficulty Distribution\n(dashed lines = floor / ceiling cut-offs)"
  ) +
  theme_minimal()

Item response count

Code

# 1. Summarise per question: counts of correct, incorrect, and non-responses
response_summary <- results_cleaned_df %>%
  group_by(`Test Identifier`, `Question Identifier`) %>%
  summarise(
    correct     = sum(RawScore, na.rm = TRUE),
    nonresponse = sum(is.na(AnswerResponse)),
    incorrect   = n() - correct - nonresponse,
    .groups     = "drop"
  )

# 2. Define plotting function
build_response_stack_plot <- function(df, test_id, plot_title) {
  df_sum <- df %>%
    filter(`Test Identifier` == test_id) %>%
    arrange(`Question Identifier`) %>%
    mutate(
      `Question Identifier` = factor(
        `Question Identifier`,
        levels = unique(`Question Identifier`)
      )
    ) %>%
    pivot_longer(
      cols      = c(nonresponse, incorrect, correct),
      names_to  = "Outcome",
      values_to = "Count"
    ) %>%
    mutate(
      Outcome = factor(Outcome, levels = c("correct", "incorrect", "nonresponse"))
    )
  
  ggplot(df_sum, aes(x = Count, y = `Question Identifier`, fill = Outcome)) +
    geom_col(width = 0.6) +
    scale_fill_manual(
      values = c(correct     = "#1b9e77",
                 incorrect   = "#d95f02",
                 nonresponse = "grey80"),
      labels = c("Correct", "Incorrect", "Non-response")
    ) +
    scale_y_discrete(labels = function(x) str_remove(x, "^.*_")) +
    scale_x_continuous(expand = expansion(add = c(0, 0.05))) +
    labs(
      title = plot_title,
      x     = "Count",
      y     = "Question",
      fill  = NULL
    ) +
    coord_flip() +
    theme_minimal(base_size = 12) +
    theme(
      panel.grid       = element_blank(),
      axis.ticks       = element_blank(),
      legend.position  = "bottom",
      legend.direction = "horizontal"
    )
}

# 3. Generate and display plots for each test
plot_mc20_stack <- build_response_stack_plot(
  response_summary,
  "MC0-20_2025",
  "MC 0–20: Correct, Incorrect & Non-response by Question"
)
plot_mc100_stack <- build_response_stack_plot(
  response_summary,
  "MC0-100_2025",
  "MC 0–100: Correct, Incorrect & Non-response by Question"
)

# 4. Print the plots
print(plot_mc20_stack)

Code

print(plot_mc100_stack)

Item response time (seconds)

Code

# 1. Prep data: extract numeric QID & factor
df_rt <- results_cleaned_df %>%
  mutate(
    Qnum     = as.integer(str_remove(`Question Identifier`, "^.*_")),
    Question = factor(Qnum, levels = sort(unique(Qnum)))
  )

# 2. Box‐plot constructor with a single flag
build_rt_box_plot <- function(df, test_id, title, show_outliers = TRUE) {
  d <- df %>% filter(`Test Identifier` == test_id)
  
  # if we don't want outliers, drop them first
  if (!show_outliers) {
    d <- d %>%
      group_by(Question) %>%
      filter(
        between(
          `Answer Duration`,
          quantile(`Answer Duration`, .25) - 1.5 * IQR(`Answer Duration`),
          quantile(`Answer Duration`, .75) + 1.5 * IQR(`Answer Duration`)
        )
      ) %>%
      ungroup()
  }
  
  ggplot(d, aes(y = Question, x = `Answer Duration`)) +
    geom_boxplot(
      width         = 0.6,
      outlier.shape = if (show_outliers) 19 else NA
    ) +
    labs(
      title = title,
      x     = "Answer Duration (s)",
      y     = "Question"
    ) +
    coord_flip() +
    theme_minimal(base_size = 12) +
    theme(
      panel.grid.major.x = element_blank(),
      panel.grid.minor   = element_blank(),
      axis.ticks         = element_blank()
    )
}

# 3. Build all four plots
p20_with     <- build_rt_box_plot(df_rt, "MC0-20_2025",  "MC 0–20 (with outliers)",     TRUE)
p20_without  <- build_rt_box_plot(df_rt, "MC0-20_2025",  "MC 0–20 (no outliers)",       FALSE)
p100_with    <- build_rt_box_plot(df_rt, "MC0-100_2025", "MC 0–100 (with outliers)",    TRUE)
p100_without <- build_rt_box_plot(df_rt, "MC0-100_2025", "MC 0–100 (no outliers)",      FALSE)

# 4. Stack them per test (independent x‐axes by default)
combined_mc20  <- p20_with  / p20_without
combined_mc100 <- p100_with / p100_without

# 5. Display
print(p20_with)

Code

print(p100_with)

Code

print(p20_without / p100_without)

Data table

Code

mc_summary %>%
  mutate(
    `Correct pct`   = correct/total * 100
  ) %>%
  select(
    `Question Identifier`,
    n            = total,
    `Correct pct`,
    mean_dur_all,
    sd_dur_all,
    mean_dur_corr,
    sd_dur_corr,
    mean_dur_inc,
    sd_dur_inc
  ) %>%
  kable(
    digits  = 1,
    caption = "Item response time",
    format  = "html"
  ) %>%
  kable_styling(full_width = FALSE) %>%
  scroll_box(width = "100%", height = "400px")

Item response time
Question Identifier	n	Correct pct	mean_dur_all	sd_dur_all	mean_dur_corr	sd_dur_corr	mean_dur_inc	sd_dur_inc
MC0-20_001	2465	93.3	6.4	6.2	6.2	5.6	9.5	11.6
MC0-20_001	2465	93.3	6.4	6.2	6.2	5.6	9.5	11.6
MC0-20_002	2458	93.7	3.5	3.1	3.4	2.9	4.9	5.2
MC0-20_002	2458	93.7	3.5	3.1	3.4	2.9	4.9	5.2
MC0-20_003	2453	86.1	3.6	3.3	3.4	2.7	4.9	5.4
MC0-20_003	2453	86.1	3.6	3.3	3.4	2.7	4.9	5.4
MC0-20_004	2443	92.1	2.7	2.2	2.6	2.0	3.7	3.1
MC0-20_004	2443	92.1	2.7	2.2	2.6	2.0	3.7	3.1
MC0-20_005	2437	77.5	3.8	3.3	3.7	3.0	4.4	4.0
MC0-20_005	2437	77.5	3.8	3.3	3.7	3.0	4.4	4.0
MC0-20_006	2415	87.3	3.1	2.4	2.9	2.2	4.0	3.4
MC0-20_006	2415	87.3	3.1	2.4	2.9	2.2	4.0	3.4
MC0-20_007	2397	67.2	4.4	4.0	4.2	3.4	4.9	4.9
MC0-20_007	2397	67.2	4.4	4.0	4.2	3.4	4.9	4.9
MC0-20_008	2319	57.5	3.8	2.9	3.8	2.9	3.8	3.0
MC0-20_008	2319	57.5	3.8	2.9	3.8	2.9	3.8	3.0
MC0-20_009	2236	89.2	2.8	1.9	2.7	1.7	3.4	3.0
MC0-20_009	2236	89.2	2.8	1.9	2.7	1.7	3.4	3.0
MC0-20_010	2166	84.2	2.8	1.8	2.8	1.6	2.9	2.4
MC0-20_010	2166	84.2	2.8	1.8	2.8	1.6	2.9	2.4
MC0-20_011	2061	87.3	3.0	1.8	3.0	1.6	2.9	2.7
MC0-20_011	2061	87.3	3.0	1.8	3.0	1.6	2.9	2.7
MC0-20_012	1975	92.4	2.2	1.4	2.2	1.3	2.5	2.4
MC0-20_012	1975	92.4	2.2	1.4	2.2	1.3	2.5	2.4
MC0-20_013	1870	91.0	2.2	1.5	2.2	1.2	2.8	3.3
MC0-20_013	1870	91.0	2.2	1.5	2.2	1.2	2.8	3.3
MC0-20_014	1760	92.4	2.0	1.1	2.0	1.1	2.2	1.5
MC0-20_014	1760	92.4	2.0	1.1	2.0	1.1	2.2	1.5
MC0-20_015	1632	86.2	2.4	1.4	2.3	1.2	2.7	2.1
MC0-20_015	1632	86.2	2.4	1.4	2.3	1.2	2.7	2.1
MC0-20_016	1496	82.0	2.5	1.3	2.5	1.2	2.4	1.7
MC0-20_016	1496	82.0	2.5	1.3	2.5	1.2	2.4	1.7
MC0-20_017	1315	91.3	2.0	1.1	2.0	1.0	2.3	2.0
MC0-20_017	1315	91.3	2.0	1.1	2.0	1.0	2.3	2.0
MC0-20_018	1167	77.3	2.5	1.4	2.5	1.2	2.6	1.8
MC0-20_018	1167	77.3	2.5	1.4	2.5	1.2	2.6	1.8
MC0-20_019	930	90.2	1.7	0.9	1.7	0.8	1.8	1.7
MC0-20_019	930	90.2	1.7	0.9	1.7	0.8	1.8	1.7
MC0-20_020	784	79.1	2.2	1.4	2.2	1.1	2.3	2.3
MC0-20_020	784	79.1	2.2	1.4	2.2	1.1	2.3	2.3
MC0-20_021	628	86.5	1.7	1.0	1.7	1.0	1.6	1.1
MC0-20_021	628	86.5	1.7	1.0	1.7	1.0	1.6	1.1
MC0-20_022	520	89.4	1.5	1.0	1.5	0.7	1.8	2.1
MC0-20_022	520	89.4	1.5	1.0	1.5	0.7	1.8	2.1
MC0-20_023	421	79.8	1.7	1.1	1.6	0.9	1.9	1.6
MC0-20_023	421	79.8	1.7	1.1	1.6	0.9	1.9	1.6
MC0-20_024	321	84.1	1.6	0.8	1.5	0.7	1.8	1.4
MC0-20_024	321	84.1	1.6	0.8	1.5	0.7	1.8	1.4
MC0-20_025	239	74.1	1.8	0.9	1.8	0.9	1.7	1.0
MC0-20_025	239	74.1	1.8	0.9	1.8	0.9	1.7	1.0
MC0-20_026	161	79.5	1.9	0.9	2.0	0.9	1.4	0.8
MC0-20_026	161	79.5	1.9	0.9	2.0	0.9	1.4	0.8
MC0-20_027	114	87.7	1.4	0.8	1.4	0.6	1.7	1.4
MC0-20_027	114	87.7	1.4	0.8	1.4	0.6	1.7	1.4
MC0-20_028	79	69.6	1.6	0.9	1.6	0.8	1.6	1.1
MC0-20_028	79	69.6	1.6	0.9	1.6	0.8	1.6	1.1
MC0-20_029	58	86.2	1.5	1.5	1.4	0.8	2.2	3.5
MC0-20_029	58	86.2	1.5	1.5	1.4	0.8	2.2	3.5
MC0-20_030	41	61.0	1.3	0.6	1.4	0.6	1.3	0.6
MC0-20_030	41	61.0	1.3	0.6	1.4	0.6	1.3	0.6
MC0-20_031	31	71.0	1.4	0.7	1.4	0.8	1.3	0.5
MC0-20_031	31	71.0	1.4	0.7	1.4	0.8	1.3	0.5
MC0-20_032	23	91.3	1.2	0.4	1.1	0.4	1.5	0.7
MC0-20_032	23	91.3	1.2	0.4	1.1	0.4	1.5	0.7
MC0-20_033	16	87.5	1.2	0.4	1.1	0.4	1.5	0.7
MC0-20_033	16	87.5	1.2	0.4	1.1	0.4	1.5	0.7
MC0-20_034	15	93.3	1.1	0.3	1.1	0.3	1.0	NA
MC0-20_034	15	93.3	1.1	0.3	1.1	0.3	1.0	NA
MC0-20_035	11	90.9	1.3	0.5	1.2	0.4	2.0	NA
MC0-20_035	11	90.9	1.3	0.5	1.2	0.4	2.0	NA
MC0-20_036	7	85.7	1.3	0.5	1.2	0.4	2.0	NA
MC0-20_036	7	85.7	1.3	0.5	1.2	0.4	2.0	NA
MC0-20_037	7	100.0	1.0	0.0	1.0	0.0	NaN	NA
MC0-20_037	7	100.0	1.0	0.0	1.0	0.0	NaN	NA
MC0-20_038	6	100.0	0.8	0.4	0.8	0.4	NaN	NA
MC0-20_038	6	100.0	0.8	0.4	0.8	0.4	NaN	NA
MC0-20_039	4	100.0	1.0	0.0	1.0	0.0	NaN	NA
MC0-20_039	4	100.0	1.0	0.0	1.0	0.0	NaN	NA
MC0-20_040	4	50.0	1.0	0.0	1.0	0.0	1.0	0.0
MC0-20_040	4	50.0	1.0	0.0	1.0	0.0	1.0	0.0
MC0-20_041	4	100.0	2.0	1.4	2.0	1.4	NaN	NA
MC0-20_041	4	100.0	2.0	1.4	2.0	1.4	NaN	NA
MC0-20_042	4	100.0	1.2	0.5	1.2	0.5	NaN	NA
MC0-20_042	4	100.0	1.2	0.5	1.2	0.5	NaN	NA
MC0-20_043	4	100.0	1.2	0.5	1.2	0.5	NaN	NA
MC0-20_043	4	100.0	1.2	0.5	1.2	0.5	NaN	NA
MC0-20_044	1	100.0	1.0	NA	1.0	NA	NaN	NA
MC0-20_044	1	100.0	1.0	NA	1.0	NA	NaN	NA
MC0-100_001	2383	95.3	5.8	6.7	5.6	6.1	9.8	14.2
MC0-100_001	2383	95.3	5.8	6.7	5.6	6.1	9.8	14.2
MC0-100_002	2374	84.7	3.9	4.4	3.6	3.9	5.7	6.0
MC0-100_002	2374	84.7	3.9	4.4	3.6	3.9	5.7	6.0
MC0-100_003	2365	69.8	3.6	2.9	3.5	2.9	3.7	3.0
MC0-100_003	2365	69.8	3.6	2.9	3.5	2.9	3.7	3.0
MC0-100_004	2359	85.8	3.2	2.7	3.1	2.6	3.9	2.9
MC0-100_004	2359	85.8	3.2	2.7	3.1	2.6	3.9	2.9
MC0-100_005	2349	82.5	3.3	2.8	3.1	2.6	4.0	3.5
MC0-100_005	2349	82.5	3.3	2.8	3.1	2.6	4.0	3.5
MC0-100_006	2323	93.6	2.9	2.0	2.8	1.7	4.0	4.5
MC0-100_006	2323	93.6	2.9	2.0	2.8	1.7	4.0	4.5
MC0-100_007	2301	93.3	2.6	1.7	2.5	1.6	3.0	2.8
MC0-100_007	2301	93.3	2.6	1.7	2.5	1.6	3.0	2.8
MC0-100_008	2285	85.9	3.3	2.2	3.2	2.2	3.8	2.6
MC0-100_008	2285	85.9	3.3	2.2	3.2	2.2	3.8	2.6
MC0-100_009	2237	88.9	3.2	2.1	3.2	1.9	3.4	2.8
MC0-100_009	2237	88.9	3.2	2.1	3.2	1.9	3.4	2.8
MC0-100_010	2171	54.2	3.3	2.4	3.6	2.7	3.1	2.0
MC0-100_010	2171	54.2	3.3	2.4	3.6	2.7	3.1	2.0
MC0-100_011	2091	94.0	2.5	1.5	2.4	1.5	2.6	2.1
MC0-100_011	2091	94.0	2.5	1.5	2.4	1.5	2.6	2.1
MC0-100_012	2007	89.6	2.8	1.5	2.8	1.5	2.9	1.9
MC0-100_012	2007	89.6	2.8	1.5	2.8	1.5	2.9	1.9
MC0-100_013	1875	89.5	2.2	1.3	2.2	1.2	2.5	2.1
MC0-100_013	1875	89.5	2.2	1.3	2.2	1.2	2.5	2.1
MC0-100_014	1739	88.1	2.2	1.6	2.1	1.5	2.6	1.8
MC0-100_014	1739	88.1	2.2	1.6	2.1	1.5	2.6	1.8
MC0-100_015	1600	79.9	2.6	1.6	2.6	1.5	2.7	1.9
MC0-100_015	1600	79.9	2.6	1.6	2.6	1.5	2.7	1.9
MC0-100_016	1429	82.3	2.5	1.5	2.4	1.4	2.7	1.8
MC0-100_016	1429	82.3	2.5	1.5	2.4	1.4	2.7	1.8
MC0-100_017	1268	82.3	2.3	1.2	2.2	1.0	2.4	1.9
MC0-100_017	1268	82.3	2.3	1.2	2.2	1.0	2.4	1.9
MC0-100_018	1105	85.9	2.2	1.2	2.1	1.0	2.3	1.9
MC0-100_018	1105	85.9	2.2	1.2	2.1	1.0	2.3	1.9
MC0-100_019	965	81.5	2.1	1.2	2.1	1.0	2.0	1.7
MC0-100_019	965	81.5	2.1	1.2	2.1	1.0	2.0	1.7
MC0-100_020	843	86.5	1.7	0.9	1.7	0.9	1.8	1.1
MC0-100_020	843	86.5	1.7	0.9	1.7	0.9	1.8	1.1
MC0-100_021	724	88.3	1.7	0.8	1.7	0.8	1.5	0.8
MC0-100_021	724	88.3	1.7	0.8	1.7	0.8	1.5	0.8
MC0-100_022	604	88.1	1.5	0.8	1.5	0.7	1.5	0.9
MC0-100_022	604	88.1	1.5	0.8	1.5	0.7	1.5	0.9
MC0-100_023	506	83.6	1.7	0.9	1.7	0.9	1.5	0.9
MC0-100_023	506	83.6	1.7	0.9	1.7	0.9	1.5	0.9
MC0-100_024	402	81.1	1.5	0.7	1.5	0.7	1.5	1.0
MC0-100_024	402	81.1	1.5	0.7	1.5	0.7	1.5	1.0
MC0-100_025	310	74.8	1.7	0.9	1.8	0.9	1.5	1.0
MC0-100_025	310	74.8	1.7	0.9	1.8	0.9	1.5	1.0
MC0-100_026	231	78.4	1.8	1.2	1.7	0.9	2.1	2.1
MC0-100_026	231	78.4	1.8	1.2	1.7	0.9	2.1	2.1
MC0-100_027	170	84.1	1.3	0.6	1.4	0.6	1.3	0.5
MC0-100_027	170	84.1	1.3	0.6	1.4	0.6	1.3	0.5
MC0-100_028	140	67.9	1.6	0.9	1.7	1.0	1.5	0.8
MC0-100_028	140	67.9	1.6	0.9	1.7	1.0	1.5	0.8
MC0-100_029	97	74.2	1.6	0.8	1.5	0.7	1.7	1.2
MC0-100_029	97	74.2	1.6	0.8	1.5	0.7	1.7	1.2
MC0-100_030	64	78.1	1.5	1.0	1.6	1.1	1.2	0.6
MC0-100_030	64	78.1	1.5	1.0	1.6	1.1	1.2	0.6
MC0-100_031	45	68.9	1.2	0.5	1.2	0.6	1.2	0.4
MC0-100_031	45	68.9	1.2	0.5	1.2	0.6	1.2	0.4
MC0-100_032	29	69.0	1.3	0.8	1.2	0.7	1.4	0.9
MC0-100_032	29	69.0	1.3	0.8	1.2	0.7	1.4	0.9
MC0-100_033	19	63.2	1.3	0.6	1.4	0.7	1.1	0.4
MC0-100_033	19	63.2	1.3	0.6	1.4	0.7	1.1	0.4
MC0-100_034	12	58.3	1.2	0.5	1.3	0.5	1.2	0.4
MC0-100_034	12	58.3	1.2	0.5	1.3	0.5	1.2	0.4
MC0-100_035	8	37.5	1.4	1.1	2.0	1.7	1.0	0.0
MC0-100_035	8	37.5	1.4	1.1	2.0	1.7	1.0	0.0
MC0-100_036	7	42.9	1.6	1.1	2.3	1.5	1.0	0.0
MC0-100_036	7	42.9	1.6	1.1	2.3	1.5	1.0	0.0
MC0-100_037	5	40.0	1.8	1.3	3.0	1.4	1.0	0.0
MC0-100_037	5	40.0	1.8	1.3	3.0	1.4	1.0	0.0
MC0-100_038	4	75.0	1.5	1.0	1.7	1.2	1.0	NA
MC0-100_038	4	75.0	1.5	1.0	1.7	1.2	1.0	NA
MC0-100_039	3	33.3	1.3	0.6	2.0	NA	1.0	0.0
MC0-100_039	3	33.3	1.3	0.6	2.0	NA	1.0	0.0
MC0-100_040	2	50.0	1.5	0.7	2.0	NA	1.0	NA
MC0-100_040	2	50.0	1.5	0.7	2.0	NA	1.0	NA
MC0-100_041	2	50.0	1.5	0.7	2.0	NA	1.0	NA
MC0-100_041	2	50.0	1.5	0.7	2.0	NA	1.0	NA
MC0-100_042	2	50.0	1.5	0.7	2.0	NA	1.0	NA
MC0-100_042	2	50.0	1.5	0.7	2.0	NA	1.0	NA
MC0-100_043	2	50.0	1.5	0.7	2.0	NA	1.0	NA
MC0-100_043	2	50.0	1.5	0.7	2.0	NA	1.0	NA
MC0-100_044	2	50.0	3.5	2.1	2.0	NA	5.0	NA
MC0-100_044	2	50.0	3.5	2.1	2.0	NA	5.0	NA
MC0-100_045	1	100.0	3.0	NA	3.0	NA	NaN	NA
MC0-100_045	1	100.0	3.0	NA	3.0	NA	NaN	NA
MC0-100_046	1	100.0	3.0	NA	3.0	NA	NaN	NA
MC0-100_046	1	100.0	3.0	NA	3.0	NA	NaN	NA
MC0-100_047	1	100.0	3.0	NA	3.0	NA	NaN	NA
MC0-100_047	1	100.0	3.0	NA	3.0	NA	NaN	NA
MC0-100_048	1	100.0	2.0	NA	2.0	NA	NaN	NA
MC0-100_048	1	100.0	2.0	NA	2.0	NA	NaN	NA
MC0-100_049	1	100.0	3.0	NA	3.0	NA	NaN	NA
MC0-100_049	1	100.0	3.0	NA	3.0	NA	NaN	NA

Performance by item type

Code

summarise_group_all <- function(df, group_id) {
  df %>%
    group_by(Test, !!sym(group_id)) %>%
    summarise(
      total     = n(),
      correct   = sum(`Is.Answer.Correct`),
      incorrect = total - correct,
      .groups   = "drop"
    ) %>%
    mutate(
      pct_corr = correct/total,
      pct_inc  = incorrect/total
    ) %>%
    pivot_longer(
      c(pct_inc, pct_corr),
      names_to  = "Outcome",
      values_to = "Proportion"
    ) %>%
    mutate(
      Outcome = factor(if_else(Outcome=="pct_inc","Incorrect","Correct"),
                       levels = c("Incorrect","Correct")),
      !!sym(group_id) := factor(!!sym(group_id), levels = unique(!!sym(group_id)))
    )
}

build_combined_group_plot <- function(df, group_id, group_desc, title) {
  df2 <- df %>%
    mutate(
      !!sym(group_id)   := replace_na(as.character(!!sym(group_id)),   "Other"),
      !!sym(group_desc) := replace_na(as.character(!!sym(group_desc)), "Other")
    )
  sum_df <- df2 %>%
    group_by(Test, !!sym(group_id)) %>%
    summarise(
      total     = n(),
      correct   = sum(`Is.Answer.Correct`),
      incorrect = total - correct,
      .groups   = "drop"
    ) %>%
    mutate(
      pct_corr = correct/total,
      pct_inc  = incorrect/total
    ) %>%
    pivot_longer(c(pct_inc, pct_corr),
                 names_to="Outcome",
                 values_to="Proportion") %>%
    mutate(
      Outcome = factor(if_else(Outcome=="pct_inc","Incorrect","Correct"),
                       levels=c("Incorrect","Correct")),
      !!sym(group_id) := factor(!!sym(group_id),
                                levels=unique(!!sym(group_id)))
    )
  pairs <- df2 %>%
    filter(!!sym(group_id) != "Other") %>%
    distinct(!!sym(group_id), !!sym(group_desc)) %>%
    arrange(!!sym(group_id)) %>%
    transmute(pair = paste0(!!sym(group_id), ": ", !!sym(group_desc))) %>%
    pull(pair)
  cap <- str_c(pairs, collapse = "\n")
  lab_df <- sum_df %>%
    distinct(Test, !!sym(group_id), total) %>%
    mutate(label = paste0("n = ", comma(total)))
  
  ggplot(sum_df, aes(x=Proportion, y=!!sym(group_id), fill=Outcome)) +
    geom_col(position="fill", colour="grey90") +
    geom_text(data=lab_df, inherit.aes=FALSE,
              aes(x=1.01, y=!!sym(group_id), label=label),
              hjust=0, size=3) +
    facet_wrap(~Test, scales="free_y") +
    coord_cartesian(xlim=c(0,1.12), clip="off") +
    scale_x_continuous(labels=percent_format(accuracy=1),
                       breaks=seq(0,1,0.25),
                       minor_breaks=seq(0,1,0.05),
                       expand=c(0.01,0.01)) +
    scale_y_discrete(labels=\(x) str_remove(x,"^.*_")) +
    scale_fill_manual(values=c("Incorrect"="#d95f02","Correct"="#1b9e77")) +
    geom_text(aes(label=percent(Proportion,accuracy=1)),
              position=position_fill(vjust=0.5),
              colour="white", size=3) +
    labs(title=title,
         x=NULL,
         y=str_replace_all(group_id,"_"," "),
         fill=NULL,
         caption=cap) +
    theme_minimal(base_size=12) +
    theme(
      plot.caption.position = "plot",
      axis.ticks           = element_blank(),
      panel.grid.major.y   = element_blank(),
      panel.grid.minor     = element_blank(),
      legend.position      = "bottom",
      legend.direction     = "horizontal",
      axis.text.y          = element_text(hjust=1, margin=margin(r=-6)),
      plot.margin          = margin(5.5,25,5.5,5.5),
      plot.caption         = element_text(hjust=0)
    )
}

plot_ratio <- build_combined_group_plot(
  results_cleaned_df, "Ratio_label", "Ratio_desc", "Performance by Ratio"
)
plot_congruence <- build_combined_group_plot(
  results_cleaned_df, "Congruence_label", "Congruence_desc", "Performance by Congruence"
)
plot_decade <- build_combined_group_plot(
  results_cleaned_df, "Decade_label", "Decade_desc", "Performance by Decade"
)

print(plot_ratio)

Code

print(plot_congruence)

Code

print(plot_decade)

Test Performance

Compute accuracy and response correct per minute (RCPM) for each student x test

Code

student_metrics <- results_cleaned_df %>%
  group_by(Identifier, `Test Identifier`, Test) %>%
  summarise(
    attempted      = n(),
    correct_count  = sum(RawScore, na.rm = TRUE),
    total_minutes  = sum(`Answer Duration`, na.rm = TRUE) / 60,
    .groups        = "drop"
  ) %>%
  mutate(
    Accuracy = correct_count / attempted,
    RCPM     = correct_count / total_minutes
  )

test_summary <- student_metrics %>%
  group_by(`Test Identifier`) %>%
  summarise(
    Acc_mean   = mean(Accuracy, na.rm = TRUE),
    Acc_sd     = sd(Accuracy,   na.rm = TRUE),
    Acc_skew   = skewness(Accuracy, na.rm = TRUE),
    Acc_kurt   = kurtosis(Accuracy, na.rm = TRUE),
    RCPM_mean  = mean(RCPM, na.rm = TRUE),
    RCPM_sd    = sd(RCPM,   na.rm = TRUE),
    RCPM_skew  = skewness(RCPM, na.rm = TRUE),
    RCPM_kurt  = kurtosis(RCPM, na.rm = TRUE),
    .groups    = "drop"
  )

test_summary_t <- test_summary %>%
  pivot_longer(-`Test Identifier`, names_to = "Metric", values_to = "Value") %>%
  pivot_wider(names_from = `Test Identifier`, values_from = Value)

knitr::kable(
  test_summary_t,
  digits  = 2,
  caption = "Test results summary"
)

Test results summary
Metric	MC0-100_2025	MC0-20_2025
Acc_mean	0.83	0.83
Acc_sd	0.17	0.18
Acc_skew	-1.41	-1.59
Acc_kurt	4.99	5.63
RCPM_mean	17.76	17.04
RCPM_sd	8.82	8.41
RCPM_skew	0.62	0.57
RCPM_kurt	3.26	3.73

Distribution of accuracy/fluency

Code

student_metrics_long <- student_metrics %>%
  select(Identifier, `Test Identifier`, Accuracy, RCPM) %>%
  pivot_longer(
    cols      = c(Accuracy, RCPM),
    names_to  = "Metric",
    values_to = "Value"
  )

ggplot(student_metrics_long, aes(x = `Test Identifier`, y = Value, fill = `Test Identifier`)) +
  geom_violin(alpha = 0.5, colour = NA) +
  geom_boxplot(width = 0.1, position = position_dodge(width = 0.9)) +
  facet_wrap(~ Metric, scales = "free_y") +
  labs(
    title = "Distribution of Accuracy and RCPM by Test",
    x     = NULL,
    y     = NULL,
    fill  = "Test"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    legend.position = "none",
    axis.text.x     = element_text(angle = 45, hjust = 1)
  )

Mean and spread test

Welch’s two-sample t-test in case of non-homogeneity of variance, and the two groups are independent (no overlap in students). Levene’s test for spread as distribution is not normal (required for F-test).
No evidence of difference in average accuracy and Levene’s more robust test for variance finds no significant spread difference. Both groups have essentially the same mean and the same variability in accuracy.
Small but statistically significant speed difference (RCPM) and borderline (p≈0.06) variance difference on Levene’s test. i.e. Statistically significant though small difference in mean speed (MC 0–100 > MC 0–20), and any difference in spread is at best marginal.

Code

ggqqplot(
  student_metrics,
  x        = "Accuracy",
  color    = "Test Identifier",
  facet.by = "Test Identifier",
  title    = "Q–Q Plot of Accuracy by Test"
)

Code

ggqqplot(
  student_metrics,
  x        = "RCPM",
  color    = "Test Identifier",
  facet.by = "Test Identifier",
  title    = "Q–Q Plot of RCPM by Test"
)

Code

t_acc <- t.test(
  Accuracy ~ `Test Identifier`,
  data      = student_metrics,
  var.equal = FALSE    # Welch’s t-test
)

t_rcpm <- t.test(
  RCPM ~ `Test Identifier`,
  data      = student_metrics,
  var.equal = FALSE
)

f_acc <- var.test(
  Accuracy ~ `Test Identifier`,
  data = student_metrics
)

f_rcpm <- var.test(
  RCPM ~ `Test Identifier`,
  data = student_metrics
)

lev_acc <- leveneTest(
  Accuracy ~ `Test Identifier`,
  data       = student_metrics
)

lev_rcpm <- leveneTest(
  RCPM ~ `Test Identifier`,
  data       = student_metrics
)

list(
  t_acc,
  t_rcpm,
  f_acc,
  f_rcpm,
  lev_acc,
  lev_rcpm
)

[[1]]

    Welch Two Sample t-test

data:  Accuracy by Test Identifier
t = -0.1235, df = 4847.7, p-value = 0.9017
alternative hypothesis: true difference in means between group MC0-100_2025 and group MC0-20_2025 is not equal to 0
95 percent confidence interval:
 -0.010627170  0.009367607
sample estimates:
mean in group MC0-100_2025  mean in group MC0-20_2025 
                 0.8272641                  0.8278939 


[[2]]

    Welch Two Sample t-test

data:  RCPM by Test Identifier
t = 2.9179, df = 4816.3, p-value = 0.00354
alternative hypothesis: true difference in means between group MC0-100_2025 and group MC0-20_2025 is not equal to 0
95 percent confidence interval:
 0.2371036 1.2080688
sample estimates:
mean in group MC0-100_2025  mean in group MC0-20_2025 
                  17.76444                   17.04185 


[[3]]

    F test to compare two variances

data:  Accuracy by Test Identifier
F = 0.91889, num df = 2383, denom df = 2465, p-value = 0.03746
alternative hypothesis: true ratio of variances is not equal to 1
95 percent confidence interval:
 0.8485680 0.9951034
sample estimates:
ratio of variances 
         0.9188944 


[[4]]

    F test to compare two variances

data:  RCPM by Test Identifier
F = 1.0995, num df = 2383, denom df = 2465, p-value = 0.01957
alternative hypothesis: true ratio of variances is not equal to 1
95 percent confidence interval:
 1.015334 1.190667
sample estimates:
ratio of variances 
          1.099481 


[[5]]
Levene's Test for Homogeneity of Variance (center = median)
        Df F value Pr(>F)
group    1  0.2647 0.6069
      4848               

[[6]]
Levene's Test for Homogeneity of Variance (center = median)
        Df F value  Pr(>F)  
group    1  3.4659 0.06271 .
      4848                  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Student total score

student_totals <- results_cleaned_df %>%
  group_by(`Test Identifier`, Identifier) %>%
  summarise(
    prop_correct = mean(RawScore),
    .groups      = "drop"
  )

student_totals %>%
  mutate(flag = case_when(
    prop_correct >= 0.9 ~ "90% & above",
    prop_correct <= 0.2 ~ "20% & below",
    TRUE              ~ "Middle"
  )) %>%
  count(`Test Identifier`, flag) %>%
  mutate(pct = n/sum(n) * 100) %>% 
  knitr::kable(caption = "Students at total ceiling / floor", digits = 1)

Students at total ceiling / floor
Test Identifier	flag	n	pct
MC0-100_2025	20% & below	11	0.2
MC0-100_2025	90% & above	1124	23.2
MC0-100_2025	Middle	1249	25.8
MC0-20_2025	20% & below	17	0.4
MC0-20_2025	90% & above	1138	23.5
MC0-20_2025	Middle	1311	27.0

Export

Code

# 1) Create a new workbook
wb <- createWorkbook()

# 2) Table of Contents sheet
addWorksheet(wb, "TOC")
toc <- data.frame(
  Sheet       = c(
    "Results raw",
    "MC summary",
    "Response summary",
    "Stacked responses",
    "MC times",
    "Meta plots",
    "Student metrics",
    "Test summary"
  ),
  Description = c(
    "All individual item‐level responses",
    "MC summary table plus p_mc20 and p_mc100 plots",
    "Response summary table plus plot_mc20_stack & plot_mc100_stack",
    "Bullet‐chart plots p20_with,p100_with,p20_without,p100_without",
    "Response‐time boxplots with/without outliers (4 panels)",
    "Performance by Ratio/Congruence/Decade (3 facet plots)",
    "Student metrics table plus Accuracy/RCPM violin‐boxplot",
    "Test‐level summary (means, SD, skewness, kurtosis)"
  ),
  stringsAsFactors = FALSE
)
writeData(wb, "TOC", toc, startCol = 1, startRow = 1)

# 3) Raw data
addWorksheet(wb, "Results raw")
writeData(wb, "Results raw", results_cleaned_df)

# 4) MC summary + p_mc20, p_mc100
addWorksheet(wb, "MC summary")
writeData(wb, "MC summary", mc_summary)
# export the two plots to PNG and insert
png(tmp <- tempfile(fileext = ".png"), width = 800, height = 600)
print(p_mc20)
dev.off()
insertImage(wb, "MC summary", tmp, startRow = nrow(mc_summary) + 3, startCol = 1, width = 6, height = 4)

png(tmp <- tempfile(fileext = ".png"), width = 800, height = 600)
print(p_mc100)
dev.off()
insertImage(wb, "MC summary", tmp, startRow = nrow(mc_summary) + 3, startCol = 9, width = 6, height = 4)

# 5) Response summary + plot_mc20_stack & plot_mc100_stack
addWorksheet(wb, "Response summary")
writeData(wb, "Response summary", response_summary)
png(tmp <- tempfile(fileext = ".png"), width = 800, height = 600)
print(plot_mc20_stack)
dev.off()
insertImage(wb, "Response summary", tmp, startRow = nrow(response_summary) + 3, startCol = 1, width = 6, height = 4)
png(tmp <- tempfile(fileext = ".png"), width = 800, height = 600)
print(plot_mc100_stack)
dev.off()
insertImage(wb, "Response summary", tmp, startRow = nrow(response_summary) + 3, startCol = 9, width = 6, height = 4)

# 6) Bullet‐chart sheet
addWorksheet(wb, "Stacked responses")
# no table, just the four plots
plots <- list(p20_with, p100_with, p20_without, p100_without)
cols  <- c(1, 9, 1, 9)
rows  <- c(1, 1, 20, 20)
for(i in seq_along(plots)) {
  png(tmp <- tempfile(fileext = ".png"), width = 800, height = 400)
  print(plots[[i]])
  dev.off()
  insertImage(wb, "Stacked responses", tmp,
              startRow = rows[i], startCol = cols[i],
              width = 6, height = 4)
}

# 7) MC times table
addWorksheet(wb, "MC times")
times_tbl <- mc_summary %>%
  mutate(`Correct pct` = correct/total*100) %>%
  select(
    `Question Identifier`,
    n            = total,
    `Correct pct`,
    mean_dur_all, sd_dur_all,
    mean_dur_corr, sd_dur_corr,
    mean_dur_inc,  sd_dur_inc
  )
writeData(wb, "MC times", times_tbl)

# 8) Meta plots (Ratio/Congruence/Decade)
addWorksheet(wb, "Meta plots")
meta_plots <- list(plot_ratio, plot_congruence, plot_decade)
for(i in seq_along(meta_plots)) {
  png(tmp <- tempfile(fileext = ".png"), width = 800, height = 400)
  print(meta_plots[[i]])
  dev.off()
  insertImage(wb, "Meta plots", tmp,
              startRow = (i-1)*20 + 1, startCol = 1,
              width = 8, height = 4)
}

# 9) Student metrics + violin‐boxplot
addWorksheet(wb, "Student metrics")
writeData(wb, "Student metrics", student_metrics)
png(tmp <- tempfile(fileext = ".png"), width = 800, height = 600)
print({
  student_metrics_long %>%
    ggplot(aes(x = `Test Identifier`, y = Value, fill = `Test Identifier`)) +
    geom_violin(alpha = .5) +
    geom_boxplot(width = .1, position = position_dodge(.9)) +
    facet_wrap(~Metric, scales="free_y") +
    theme_minimal()
})
dev.off()
insertImage(wb, "Student metrics", tmp, startRow = nrow(student_metrics)+3, startCol = 1, width = 8, height = 6)

# 10) Test summary
addWorksheet(wb, "Test summary")
writeData(wb, "Test summary", test_summary)

# 11) Save workbook
saveWorkbook(wb, "analysis_export.xlsx", overwrite = TRUE)

# 12) Download link
cat("Workbook exported")