1. Data Loading & Preparation

We load the two fluency datasets computed by the pipeline:

Mixed Model (MM): Accounts for student and item random effects.
Log Difference (LogDiff): Simple difference between item RT and baseline.

# Define paths relative to project root
path_mm <- "data/processed/metrics/timed_item_fluency.parquet"
path_ld <- "data/processed/metrics/timed_item_fluency_logdiff.parquet"

# Check if files exist
if (!file.exists(path_mm) || !file.exists(path_ld)) {
  stop("Data files not found. Please ensure the pipeline has run and paths are correct.")
}

# Load and tag datasets
df_mm <- read_parquet(path_mm) %>% 
  mutate(Method = "Mixed_Model", Score = fluency_mm) %>%
  select(-fluency_mm) 

df_ld <- read_parquet(path_ld) %>% 
  mutate(Method = "LogDiff", Score = fluency_logdiff) %>%
  select(-fluency_logdiff)

# Combine for initial processing
# Filter for Term 1 as per analysis requirements
df_all <- bind_rows(df_mm, df_ld) %>%
  filter(term == 1) %>% 
  mutate(
    # Pool cohorts: combine "Year 1 A/B" -> "Year 1", etc.
    Cohort = case_when(
      str_detect(exam_group_cohort, "(?i)(Year 1|Y1)") ~ "Year 1",
      str_detect(exam_group_cohort, "(?i)(Foundation|F-)") ~ "Foundation",
      TRUE ~ NA_character_
    ),
    Item_ID = question_id
  ) %>%
  filter(!is.na(Cohort), !is.na(Score))

# Extract Accuracy Scores (0/1) for the network analysis
# Accuracy is identical across methods, so we derive it from the MM dataset
df_acc <- df_mm %>% 
  filter(term == 1) %>%
  mutate(
    Cohort = case_when(
      str_detect(exam_group_cohort, "(?i)(Year 1|Y1)") ~ "Year 1",
      str_detect(exam_group_cohort, "(?i)(Foundation|F-)") ~ "Foundation",
      TRUE ~ NA_character_
    ),
    Acc_Score = as.integer(is_ans_correct)
  ) %>%
  filter(!is.na(Cohort), !is.na(Acc_Score))

2. Data Filtering

Applied the following filtering criteria:

Items: Must have at least 30 responses.
Students: Must have responded to at least 20 items.

# Calculate initial counts
counts_raw <- df_mm %>% group_by(student_id) %>% summarise(n_items = n()) 
item_counts_raw <- df_mm %>% group_by(question_id) %>% summarise(n_students = n())

# Visualise raw distributions
par(mfrow=c(1,2))
hist(counts_raw$n_items, main="Items per Student (Raw)", xlab="Count", col="lightblue", border="white")
hist(item_counts_raw$n_students, main="Students per Item (Raw)", xlab="Count", col="lightgreen", border="white")

# 1. Identify Valid Items
valid_items <- df_mm %>%
  count(question_id) %>%
  filter(n >= 30) %>%
  pull(question_id)

# 2. Identify Valid Students (based on valid items)
valid_students <- df_mm %>%
  filter(question_id %in% valid_items) %>%
  count(student_id) %>%
  filter(n >= 20) %>%
  pull(student_id)

# Apply filters to main datasets
df_filtered <- df_all %>%
  filter(question_id %in% valid_items, student_id %in% valid_students)

df_acc_filtered <- df_acc %>%
  filter(question_id %in% valid_items, student_id %in% valid_students)

# Summary stats (before/after filtering)
n_stud_raw <- length(unique(df_all$student_id))
n_item_raw <- length(unique(df_all$question_id))
n_resp_raw <- nrow(df_all)

n_stud_rem <- length(unique(df_filtered$student_id))
n_item_rem <- length(unique(df_filtered$question_id))
n_resp_rem <- nrow(df_filtered)

Post-filtering Summary:

tribble(
  ~Metric, ~`Before Filter`, ~`After Filter`,
  "Students", comma(n_stud_raw), comma(n_stud_rem),
  "Items", comma(n_item_raw), comma(n_item_rem),
  "Responses", comma(n_resp_raw), comma(n_resp_rem)
) %>%
  kable(align = c("l", "r", "r")) %>%
  kable_styling(bootstrap_options = "striped", full_width = FALSE, position = "left") %>%
  row_spec(0, bold = TRUE, color = "#1b1b1b", background = "#e5e5e5")

Metric	Before Filter	After Filter
Students	4,926	4,691
Items	325	234
Responses	339,920	331,720

3. Score Distributions

Comparison of score categories (0 = Incorrect, 1 = Correct Slow, 2 = Correct Fast) across methods and cohorts.

score_dist <- df_filtered %>%
  group_by(Method, Cohort, Score) %>%
  summarise(Count = n(), .groups='drop_last') %>%
  mutate(Percentage = Count / sum(Count) * 100) %>%
  pivot_wider(id_cols = c(Method, Cohort), names_from = Score, values_from = Percentage, names_prefix = "Score_")

kable(score_dist, digits=1, caption = "Percentage of Fluency Scores by Method and Cohort") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left") %>%
  row_spec(0, bold = TRUE, color = "#1b1b1b", background = "#e5e5e5")

Percentage of Fluency Scores by Method and Cohort
Method	Cohort	Score_0	Score_1	Score_2
LogDiff	Foundation	16.3	40.4	43.3
LogDiff	Year 1	14.1	35.1	50.9
Mixed_Model	Foundation	16.3	41.6	42.0
Mixed_Model	Year 1	14.1	42.8	43.1

4. Test Overlap Heatmap

Based on the overlapping test participation, we group together Year 1 and Foundation students (i.e. pool A/B groups together). We treat, for example, MNA0-100 (missing number ascending) and MNC0-100 (missing number choice) as similar items.

# Prepare data for heatmap
clean_fluency_hm <- df_mm %>%
  filter(term == 1) %>% 
  mutate(
    # Clean up test IDs to get 'Family' (e.g., AADD, STPM)
    standard_family = str_remove(standard_test_id, "(_2025.*|-MOY.*|ABR.*)")
  ) %>%
  distinct(student_id, exam_group, term, standard_family)

# Build presence matrix
family_matrix <- clean_fluency_hm %>%
  mutate(has_item = 1) %>%
  pivot_wider(names_from = standard_family, values_from = has_item, values_fill = 0)

# Calculate proportions
family_counts <- family_matrix %>%
  group_by(exam_group, term) %>%
  summarise(across(where(is.numeric), sum), .groups = "drop")

group_sizes <- family_matrix %>%
  count(exam_group, term, name = "n_students")

family_prop <- family_counts %>%
  left_join(group_sizes, by = c("exam_group", "term")) %>%
  mutate(across(-c(exam_group, term, n_students), ~ .x / n_students))

# Pivot for plotting
heatmap_df <- family_prop %>%
  select(-n_students) %>%
  pivot_longer(-c(exam_group, term), names_to = "test_family", values_to = "prop") %>%
  filter(prop > 0) 

# Plot
ggplot(heatmap_df, aes(x = test_family, y = exam_group, fill = prop)) +
  geom_tile(colour = "grey80") +
  scale_fill_gradient(low = "white", high = "steelblue", labels = scales::percent) +
  scale_x_discrete(position = "bottom") +
  theme_minimal(base_size = 12) + # Increased base size
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, size = 11, face="bold"), # Larger X labels
    axis.text.y = element_text(size = 11, face="bold"), # Larger Y labels
    panel.grid = element_blank(),
    strip.background = element_rect(fill = "grey90", colour = NA),
    axis.title.x = element_blank(), # Remove axis titles
    axis.title.y = element_blank()
  ) +
  labs(title = "Test Family Participation by Exam Group", fill = "Participation")

5. Network Analysis

# Function to build network graph and compute summary statistics
prepare_network <- function(data, score_col, threshold = 0.3) {

  if (nrow(data) == 0) {
    return(list(graph = make_empty_graph(), summary = NULL, communities = NULL))
  }

  wide_mat <- data %>%
    select(student_id, question_id, all_of(score_col)) %>%
    pivot_wider(names_from = question_id, values_from = all_of(score_col)) %>%
    select(-student_id) %>%
    as.matrix()

  cor_mat <- cor(wide_mat, use = "pairwise.complete.obs")
  cor_mat[is.na(cor_mat)] <- 0
  diag(cor_mat) <- 0

  adj_mat <- cor_mat
  adj_mat[abs(adj_mat) < threshold] <- 0
  adj_mat <- abs(adj_mat)

  g <- graph_from_adjacency_matrix(adj_mat, mode = "undirected", weighted = TRUE)

  if (gorder(g) == 0) {
    return(list(graph = g, summary = NULL, communities = NULL))
  }

  # Communities
  comm <- if (gsize(g) > 0) cluster_louvain(g) else NULL

  # Edge stats (retain only edges above threshold)
  edge_weights <- if (gsize(g) > 0) E(g)$weight else numeric()
  n_nodes <- gorder(g)
  possible_edges <- n_nodes * (n_nodes - 1) / 2
  n_edges <- gsize(g)
  pct_retained <- if (possible_edges > 0) n_edges / possible_edges else NA_real_
  pct_removed <- if (!is.na(pct_retained)) 1 - pct_retained else NA_real_

  density_val <- if (possible_edges > 0) n_edges / possible_edges else NA_real_

  cor_stats <- tibble(
    Mean_Abs_Corr = if (length(edge_weights) > 0) mean(edge_weights) else NA_real_,
    Median_Abs_Corr = if (length(edge_weights) > 0) median(edge_weights) else NA_real_,
    Min_Abs_Corr = if (length(edge_weights) > 0) min(edge_weights) else NA_real_,
    Max_Abs_Corr = if (length(edge_weights) > 0) max(edge_weights) else NA_real_
  )

  top_degree <- if (gsize(g) > 0) {
    deg <- sort(degree(g), decreasing = TRUE)
    head(names(deg), 5)
  } else character()

  top_betweenness <- if (gsize(g) > 0) {
    bt <- sort(betweenness(g, directed = FALSE, weights = 1 / (E(g)$weight + 1e-6)), decreasing = TRUE)
    head(names(bt), 3)
  } else character()

  summary_tbl <- tibble(
    Metric = c(
      "Number of items (nodes)",
      "Number of edges above threshold",
      "Percentage of possible edges retained",
      "Density",
      "Threshold used",
      "Edges removed by threshold",
      "Mean abs(correlation)",
      "Median abs(correlation)",
      "Minimum abs(correlation)",
      "Maximum abs(correlation)",
      "Number of communities (Louvain)",
      "Modularity",
      "Top 5 items by degree",
      "Top 3 by betweenness"
    ),
    Value = c(
      n_nodes,
      n_edges,
      percent(pct_retained, accuracy = 0.1),
      percent(density_val, accuracy = 0.1),
      threshold,
      percent(pct_removed, accuracy = 0.1),
      cor_stats$Mean_Abs_Corr,
      cor_stats$Median_Abs_Corr,
      cor_stats$Min_Abs_Corr,
      cor_stats$Max_Abs_Corr,
      if (!is.null(comm)) length(unique(membership(comm))) else NA_integer_,
      if (!is.null(comm)) round(modularity(comm), 3) else NA_real_,
      paste(top_degree, collapse = ", "),
      paste(top_betweenness, collapse = ", ")
    )
  )

  list(graph = g, summary = summary_tbl, communities = comm)
}

plot_network <- function(net_obj, title_suffix) {
  g <- net_obj$graph
  comm <- net_obj$communities

  if (gorder(g) == 0) {
    plot.new()
    title(main = paste0("No data for: ", title_suffix))
    return(NULL)
  }

  if (gsize(g) > 0 && !is.null(comm)) {
    num_comm <- length(unique(membership(comm)))
    plot(comm, g,
         layout = layout_with_fr(g),
         vertex.size = 5,
         vertex.label.cex = 0.6,
         vertex.label.color = "black",
         edge.width = E(g)$weight * 2,
         main = paste0("Network: ", title_suffix, "\n(", num_comm, " Communities)"))
  } else {
    plot(g, vertex.size = 5, vertex.label.cex = 0.6,
         main = paste0("Network: ", title_suffix, " (No strong edges)"))
  }
}

item_total_cor <- function(mat) {
  valid_cols <- apply(mat, 2, function(x) {
    nx <- sum(!is.na(x))
    vx <- if (nx > 1) var(x, na.rm = TRUE) else 0
    nx > 1 && vx > 0
  })
  mat2 <- mat[, valid_cols, drop = FALSE]

  cors <- numeric(ncol(mat2))
  for (j in seq_len(ncol(mat2))) {
    item <- mat2[, j]
    rest <- rowSums(mat2[, -j, drop = FALSE], na.rm = TRUE)
    cors[j] <- cor(item, rest, use = "complete.obs")
  }
  cors
}

Foundation Network Analysis

Accuracy Score Network (0/1)

df_sub <- df_acc_filtered %>% filter(Cohort == "Foundation")
net_found_acc <- prepare_network(df_sub, "Acc_Score", threshold = 0.3)
plot_network(net_found_acc, "Foundation Accuracy")

if (!is.null(net_found_acc$summary)) {
  net_found_acc$summary %>%
    kable(caption = "Network Summary — Foundation Accuracy", align = c("l", "l")) %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE, position = "left") %>%
    row_spec(0, bold = TRUE, color = "#1b1b1b", background = "#e5e5e5")
}

Network Summary — Foundation Accuracy
Metric	Value
Number of items (nodes)	133
Number of edges above threshold	1210
Percentage of possible edges retained	13.8%
Density	13.8%
Threshold used	0.3
Edges removed by threshold	86.2%
Mean abs(correlation)	0.473669227174478
Median abs(correlation)	0.416648606028297
Minimum abs(correlation)	0.3
Maximum abs(correlation)	1
Number of communities (Louvain)	7
Modularity	0.375
Top 5 items by degree	MC0-20_031, MC0-20_030, MC0-20_029, MC0-20_023, MC0-20_028
Top 3 by betweenness	MC0-20_029, MC0-20_036, MC0-20_031

Fluency Score Network (Mixed Model)

df_sub <- df_filtered %>% filter(Cohort == "Foundation", Method == "Mixed_Model")
net_found_mm <- prepare_network(df_sub, "Score", threshold = 0.3)
plot_network(net_found_mm, "Foundation MM Fluency")

if (!is.null(net_found_mm$summary)) {
  net_found_mm$summary %>%
    kable(caption = "Network Summary — Foundation MM Fluency", align = c("l", "l")) %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE, position = "left") %>%
    row_spec(0, bold = TRUE, color = "#1b1b1b", background = "#e5e5e5")
}

Network Summary — Foundation MM Fluency
Metric	Value
Number of items (nodes)	133
Number of edges above threshold	1149
Percentage of possible edges retained	13.1%
Density	13.1%
Threshold used	0.3
Edges removed by threshold	86.9%
Mean abs(correlation)	0.493148854700031
Median abs(correlation)	0.433738697665467
Minimum abs(correlation)	0.3
Maximum abs(correlation)	1
Number of communities (Louvain)	3
Modularity	0.321
Top 5 items by degree	MC0-20_031, MC0-20_035, MC0-20_033, MC0-20_032, MC0-20_040
Top 3 by betweenness	MC0-20_031, MC0-20_040, MC0-20_032

Year 1 Network Analysis

Accuracy Score Network (0/1)

df_sub <- df_acc_filtered %>% filter(Cohort == "Year 1")
net_y1_acc <- prepare_network(df_sub, "Acc_Score", threshold = 0.3)
plot_network(net_y1_acc, "Year 1 Accuracy")

if (!is.null(net_y1_acc$summary)) {
  net_y1_acc$summary %>%
    kable(caption = "Network Summary — Year 1 Accuracy", align = c("l", "l")) %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE, position = "left") %>%
    row_spec(0, bold = TRUE, color = "#1b1b1b", background = "#e5e5e5")
}

Network Summary — Year 1 Accuracy
Metric	Value
Number of items (nodes)	101
Number of edges above threshold	2014
Percentage of possible edges retained	39.9%
Density	39.9%
Threshold used	0.3
Edges removed by threshold	60.1%
Mean abs(correlation)	0.502889395900554
Median abs(correlation)	0.464152242075144
Minimum abs(correlation)	0.3
Maximum abs(correlation)	1
Number of communities (Louvain)	4
Modularity	0.137
Top 5 items by degree	ASDD_011, ASDD_012, AADD_013, AADD_014, ASDD_013
Top 3 by betweenness	MC0-100_032, ASDD_015, AADD_014

Fluency Score Network (Mixed Model)

df_sub <- df_filtered %>% filter(Cohort == "Year 1", Method == "Mixed_Model")
net_y1_mm <- prepare_network(df_sub, "Score", threshold = 0.3)
plot_network(net_y1_mm, "Year 1 MM Fluency")

if (!is.null(net_y1_mm$summary)) {
  net_y1_mm$summary %>%
    kable(caption = "Network Summary — Year 1 MM Fluency", align = c("l", "l")) %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE, position = "left") %>%
    row_spec(0, bold = TRUE, color = "#1b1b1b", background = "#e5e5e5")
}

Network Summary — Year 1 MM Fluency
Metric	Value
Number of items (nodes)	101
Number of edges above threshold	1176
Percentage of possible edges retained	23.3%
Density	23.3%
Threshold used	0.3
Edges removed by threshold	76.7%
Mean abs(correlation)	0.463631598060857
Median abs(correlation)	0.425073865402142
Minimum abs(correlation)	0.300155509504501
Maximum abs(correlation)	1
Number of communities (Louvain)	3
Modularity	0.144
Top 5 items by degree	ASDD_015, ASDD_013, ASDD_014, ASDD_012, ASDD_011
Top 3 by betweenness	MC0-100_033, ASDD_015, MNAs0-100_019

6. IRT Analysis

# Function to run IRT (Partial Credit Model)
run_irt_analysis <- function(data, score_col, cohort_label, method_label) {

  # Wide Format
  wide_df <- data %>%
    select(student_id, question_id, all_of(score_col)) %>%
    pivot_wider(names_from = question_id, values_from = all_of(score_col))

  resp_matrix <- wide_df %>% select(-student_id)

  # Run TAM (MML)
  mod <- suppressWarnings({
    captured_mod <- NULL
    capture.output({
      captured_mod <- tam.mml(resp = resp_matrix, verbose = FALSE)
    })
    captured_mod
  })

  # WLE Estimates
  wle_mod <- suppressWarnings({
    captured_wle <- NULL
    capture.output({
      captured_wle <- tam.wle(mod)
    })
    captured_wle
  })

  # Reliability & Error
  wle_reliability <- 1 - (mean(wle_mod$error^2) / var(wle_mod$theta))
  theta_mean <- mean(wle_mod$theta)
  theta_sd   <- sd(wle_mod$theta)
  avg_sem    <- mean(wle_mod$error)
  error_pct  <- (avg_sem / theta_sd) * 100

  # Item Stats
  # Use mod$item to get one difficulty per item (xsi.item) rather than step parameters
  diff_lookup <- setNames(mod$item$xsi.item, mod$item$item)

  # Discrimination
  calc_disc <- function(item_vec, theta) {
    cor(item_vec, theta, use = "pairwise.complete.obs")
  }
  disc <- sapply(resp_matrix, calc_disc, theta = wle_mod$theta)

  # Props
  prop_correct <- colMeans(resp_matrix >= 1, na.rm=TRUE)
  prop_fast    <- colMeans(resp_matrix == 2, na.rm=TRUE)

  # Construct Table
  # 1. Get IDs
  item_ids <- names(prop_correct)

  # 2. Build mapping for numerical ID (Sort alphabetically first)
  item_order <- data.frame(Question_ID = sort(item_ids)) %>%
    mutate(Item_Num = row_number())

  # 3. Build main stats frame
  item_stats <- data.frame(
    Question_ID = item_ids,
    Prop_Correct = round(prop_correct, 2),
    Prop_Fast = round(prop_fast, 2),
    Difficulty = round(diff_lookup[item_ids], 2),
    Discrimination = round(disc, 2)
  )

  # 4. Join and Reorder
  item_stats <- item_stats %>%
    left_join(item_order, by = "Question_ID") %>%
    arrange(Item_Num) %>%
    select(
      Item_Num,
      Question_ID,
      Prop_Correct,
      Prop_Fast,
      Difficulty,
      Discrimination
    )

  # Formatted Text
  txt_rel <- paste0("Reliability (WLE): ", round(wle_reliability, 3),
                    " (True variance: ", round(wle_reliability*100, 1), "%)")

  txt_err <- paste0("Mean Ability: ", round(theta_mean, 2), " (SD ", round(theta_sd, 2), "). ",
                    "Avg SEM: ", round(avg_sem, 2), " (", round(error_pct, 1), "% of SD).")

  return(list(
    model = mod,
    wle = wle_mod,
    item_stats = item_stats,
    text_rel = txt_rel,
    text_err = txt_err,
    title = paste(cohort_label, "-", method_label)
  ))
}

# Reliability helpers -----------------------------------------------------

make_matrix <- function(df, score_col) {
  mat <- tapply(df[[score_col]], list(df$student_id, df$question_id), mean)

  # drop rows with all NA
  keep <- apply(mat, 1, function(x) any(!is.na(x)))
  mat[keep, , drop = FALSE]
}

cronbach_alpha <- function(mat) {
  valid_cols <- apply(mat, 2, function(x) {
    nx <- sum(!is.na(x))
    vx <- if (nx > 1) var(x, na.rm = TRUE) else 0
    nx > 1 && vx > 0
  })
  mat2 <- mat[, valid_cols, drop = FALSE]

  k <- ncol(mat2)
  if (k < 2) return(NA_real_)

  item_vars <- apply(mat2, 2, var, na.rm = TRUE)
  total_scores <- rowSums(mat2, na.rm = TRUE)
  total_var <- var(total_scores)

  k / (k - 1) * (1 - sum(item_vars) / total_var)
}

item_total_cor <- function(mat) {
  valid_cols <- apply(mat, 2, function(x) {
    nx <- sum(!is.na(x))
    vx <- if (nx > 1) var(x, na.rm = TRUE) else 0
    nx > 1 && vx > 0
  })
  mat2 <- mat[, valid_cols, drop = FALSE]

  cors <- numeric(ncol(mat2))
  for (j in seq_len(ncol(mat2))) {
    item <- mat2[, j]
    rest <- rowSums(mat2[, -j, drop = FALSE], na.rm = TRUE)
    cors[j] <- cor(item, rest, use = "complete.obs")
  }
  cors
}

# Format combined IRT table with multi-level headers
combine_item_tables <- function(mm_stats, ld_stats) {
  mm_fmt <- mm_stats %>%
    rename(`Item #` = Item_Num, mm_pct_correct = Prop_Correct, mm_pct_fast = Prop_Fast,
           mm_difficulty = Difficulty, mm_discrimination = Discrimination)

  ld_fmt <- ld_stats %>%
    rename(`Item #` = Item_Num, ld_pct_correct = Prop_Correct, ld_pct_fast = Prop_Fast,
           ld_difficulty = Difficulty, ld_discrimination = Discrimination)

  mm_fmt %>%
    full_join(ld_fmt, by = c("Item #", "Question_ID")) %>%
    arrange(`Item #`) %>%
    mutate(
      Pct_Correct = percent(coalesce(mm_pct_correct, ld_pct_correct), accuracy = 0.1),
      mm_pct_fast = percent(mm_pct_fast, accuracy = 0.1),
      ld_pct_fast = percent(ld_pct_fast, accuracy = 0.1)
    ) %>%
    select(
      `Item #`, Question_ID,
      Pct_Correct,
      mm_pct_fast, mm_difficulty, mm_discrimination,
      ld_pct_fast, ld_difficulty, ld_discrimination
    )
}

# Wright map helper to show two methods side-by-side
plot_wright_side_by_side <- function(wle_mm, model_mm, wle_ld, model_ld, main_prefix) {
  op <- par(no.readonly = TRUE)
  on.exit({
    try(close.screen(all.screens = TRUE), silent = TRUE)
    par(op)
  })

  invisible(capture.output({
    wrightMap(wle_mm$theta, model_mm$xsi$xsi,
              label.items.srt = 90, main.title = paste(main_prefix, "- Mixed Model"),
              axis.items = "", item.side = itemClassic)
  }))

  try(close.screen(all.screens = TRUE), silent = TRUE)
  par(op)

  invisible(capture.output({
    wrightMap(wle_ld$theta, model_ld$xsi$xsi,
              label.items.srt = 90, main.title = paste(main_prefix, "- LogDiff"),
              axis.items = "", item.side = itemClassic)
  }))

  invisible(NULL)
}

Foundation

Reliability & validity

# Subset by method
found_mm <- df_filtered %>% filter(Cohort == "Foundation", Method == "Mixed_Model")
found_ld <- df_filtered %>% filter(Cohort == "Foundation", Method == "LogDiff")

# Cronbach's alpha
alpha_found <- tibble(
  Method = c("Mixed Model", "LogDiff"),
  Cronbach_alpha = c(
    cronbach_alpha(make_matrix(found_mm, "Score")),
    cronbach_alpha(make_matrix(found_ld, "Score"))
  )
)

# Correlation with accuracy
acc_found <- df_acc_filtered %>%
  filter(Cohort == "Foundation") %>%
  group_by(student_id) %>%
  summarise(prop_correct = mean(Acc_Score), .groups = "drop")

score_found <- bind_rows(
  found_mm %>% mutate(Method = "Mixed Model"),
  found_ld %>% mutate(Method = "LogDiff")
) %>%
  group_by(Method, student_id) %>%
  summarise(mean_score = mean(Score), .groups = "drop") %>%
  left_join(acc_found, by = "student_id") %>%
  group_by(Method) %>%
  summarise(accuracy_corr = cor(mean_score, prop_correct, use = "complete.obs"), .groups = "drop")

# Item-total correlations
itc_found <- tibble(
  Method = rep(c("Mixed Model", "LogDiff"), c(
    length(item_total_cor(make_matrix(found_mm, "Score"))),
    length(item_total_cor(make_matrix(found_ld, "Score")))
  )),
  Item_Total_Cor = c(
    item_total_cor(make_matrix(found_mm, "Score")),
    item_total_cor(make_matrix(found_ld, "Score"))
  )
) %>%
  group_by(Method) %>%
  summarise(
    Min = min(Item_Total_Cor, na.rm = TRUE),
    Q1 = quantile(Item_Total_Cor, 0.25, na.rm = TRUE),
    Median = median(Item_Total_Cor, na.rm = TRUE),
    Mean = mean(Item_Total_Cor, na.rm = TRUE),
    Q3 = quantile(Item_Total_Cor, 0.75, na.rm = TRUE),
    Max = max(Item_Total_Cor, na.rm = TRUE),
    .groups = "drop"
  )

kable(alpha_found, digits = 3, caption = "Cronbach's Alpha by Method (Foundation)") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE, position = "left") %>%
  row_spec(0, bold = TRUE, color = "#1b1b1b", background = "#e5e5e5")

Cronbach’s Alpha by Method (Foundation)
Method	Cronbach_alpha
Mixed Model	0.775
LogDiff	0.805

kable(score_found, digits = 3, caption = "Correlation with Accuracy by Method (Foundation)") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE, position = "left") %>%
  row_spec(0, bold = TRUE, color = "#1b1b1b", background = "#e5e5e5")

Correlation with Accuracy by Method (Foundation)
Method	accuracy_corr
LogDiff	0.755
Mixed Model	0.955

kable(itc_found, digits = 3, caption = "Item-Total Correlation Summary (Foundation)") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE, position = "left") %>%
  row_spec(0, bold = TRUE, color = "#1b1b1b", background = "#e5e5e5")

Item-Total Correlation Summary (Foundation)
Method	Min	Q1	Median	Mean	Q3	Max
LogDiff	-0.311	0.173	0.251	0.257	0.343	0.934
Mixed Model	-0.720	0.110	0.188	0.194	0.309	0.611

IRT Summary (Mixed Model vs LogDiff)

# Run Analyses
data_f_mm <- df_filtered %>% filter(Cohort == "Foundation", Method == "Mixed_Model")
data_f_ld <- df_filtered %>% filter(Cohort == "Foundation", Method == "LogDiff")

res_f_mm <- run_irt_analysis(data_f_mm, "Score", "Foundation", "Mixed Model")
res_f_ld <- run_irt_analysis(data_f_ld, "Score", "Foundation", "LogDiff")

irt_table_found <- combine_item_tables(res_f_mm$item_stats, res_f_ld$item_stats)

summary_found <- tibble(
  Metric = c("Reliability (WLE)", "Error Summary"),
  `Mixed Model` = c(res_f_mm$text_rel, res_f_mm$text_err),
  `Log Diff` = c(res_f_ld$text_rel, res_f_ld$text_err)
)

kable(summary_found, caption = "Foundation IRT Reliability and Error") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE, position = "left") %>%
  row_spec(0, bold = TRUE, color = "#1b1b1b", background = "#e5e5e5")

Foundation IRT Reliability and Error
Metric	Mixed Model	Log Diff
Reliability (WLE)	Reliability (WLE): 0.735 (True variance: 73.5%)	Reliability (WLE): 0.829 (True variance: 82.9%)
Error Summary	Mean Ability: 0 (SD 0.54). Avg SEM: 0.27 (50.5% of SD).	Mean Ability: -0.01 (SD 0.7). Avg SEM: 0.28 (40.4% of SD).

kable(
  irt_table_found,
  col.names = c(
    "Item #", "Question_ID", "Pct Correct",
    "Pct Fast", "Item Difficulty", "Item Discrim.",
    "Pct Fast", "Item Difficulty", "Item Discrim."
  ),
  caption = "Foundation IRT Item Statistics"
) %>%
  add_header_above(c(" " = 3, "Mixed Model" = 3, "Log Diff" = 3)) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE, position = "left", fixed_thead = TRUE) %>%
  row_spec(0, bold = TRUE, color = "#1b1b1b", background = "#e5e5e5") %>%
  scroll_box(width = "100%", height = "500px") %>%
  footnote(
    general = c(
      "Item Difficulty (Xsi): Latent difficulty parameter from the TAM partial credit model.",
      "Item Discrim. (PBIs): Point-biserial correlation between item scores and ability estimates."
    ),
    general_title = "Notes: ",
    footnote_as_chunk = TRUE
  )

Foundation IRT Item Statistics
			Mixed Model			Log Diff
Item #	Question_ID	Pct Correct	Pct Fast	Item Difficulty	Item Discrim.	Pct Fast	Item Difficulty	Item Discrim.
1	MC0-20_001	96.0%	48.0%	-1.30	0.25	51.0%	-1.41	0.36
2	MC0-20_002	96.0%	49.0%	-1.28	0.35	43.0%	-1.28	0.46
3	MC0-20_003	88.0%	45.0%	-0.72	0.36	43.0%	-0.76	0.44
4	MC0-20_004	95.0%	48.0%	-1.19	0.39	59.0%	-1.42	0.49
5	MC0-20_005	80.0%	40.0%	-0.35	0.39	57.0%	-0.65	0.43
6	MC0-20_006	90.0%	45.0%	-0.81	0.34	44.0%	-0.85	0.44
7	MC0-20_007	71.0%	35.0%	-0.08	0.34	43.0%	-0.24	0.40
8	MC0-20_008	60.0%	30.0%	0.19	0.27	38.0%	0.03	0.35
9	MC0-20_009	92.0%	46.0%	-0.92	0.27	46.0%	-0.99	0.41
10	MC0-20_010	87.0%	44.0%	-0.64	0.38	39.0%	-0.63	0.46
11	MC0-20_011	90.0%	45.0%	-0.78	0.28	44.0%	-0.83	0.43
12	MC0-20_012	95.0%	48.0%	-1.18	0.31	62.0%	-1.43	0.46
13	MC0-20_013	94.0%	47.0%	-1.07	0.31	60.0%	-1.30	0.44
14	MC0-20_014	96.0%	48.0%	-1.25	0.25	44.0%	-1.25	0.30
15	MC0-20_015	90.0%	45.0%	-0.76	0.25	49.0%	-0.87	0.37
16	MC0-20_016	90.0%	45.0%	-0.77	0.23	47.0%	-0.84	0.41
17	MC0-20_017	95.0%	48.0%	-1.15	0.28	61.0%	-1.37	0.47
18	MC0-20_018	84.0%	42.0%	-0.45	0.34	40.0%	-0.44	0.46
19	MC0-20_019	96.0%	48.0%	-1.29	0.29	51.0%	-1.36	0.30
20	MC0-20_020	85.0%	43.0%	-0.51	0.12	42.0%	-0.51	0.32
21	MC0-20_021	92.0%	46.0%	-0.86	0.36	44.0%	-0.86	0.35
22	MC0-20_022	95.0%	47.0%	-1.12	0.27	56.0%	-1.25	0.31
23	MC0-20_023	88.0%	44.0%	-0.61	0.43	43.0%	-0.59	0.35
24	MC0-20_024	89.0%	45.0%	-0.70	0.42	46.0%	-0.74	0.37
25	MC0-20_025	83.0%	42.0%	-0.40	0.30	44.0%	-0.46	0.38
26	MC0-20_026	89.0%	44.0%	-0.60	0.22	37.0%	-0.51	0.40
27	MC0-20_027	93.0%	46.0%	-0.83	0.31	54.0%	-0.97	0.38
28	MC0-20_028	74.0%	38.0%	-0.01	0.33	39.0%	-0.07	0.43
29	MC0-20_029	89.0%	45.0%	-0.65	0.36	52.0%	-0.84	0.50
30	MC0-20_030	64.0%	33.0%	0.21	0.39	31.0%	0.22	0.50
31	MC0-20_031	79.0%	39.0%	-0.29	0.59	32.0%	-0.21	0.53
32	MC0-20_032	91.0%	48.0%	-0.98	0.13	43.0%	-0.99	0.35
33	MC0-20_033	93.0%	47.0%	-0.87	0.30	33.0%	-0.81	0.55
34	MC0-20_034	93.0%	47.0%	-0.87	0.49	33.0%	-0.81	0.65
35	MC0-20_035	91.0%	45.0%	-0.72	0.25	27.0%	-0.45	0.64
36	MC0-20_036	86.0%	43.0%	-0.39	0.33	29.0%	-0.31	0.70
37	MC0-20_037	100.0%	57.0%	-15.62	-0.60	29.0%	-17.56	0.80
38	MC0-20_038	100.0%	60.0%	-12.10	0.34	20.0%	-17.28	0.97
39	MC0-20_039	100.0%	50.0%	-14.22	-0.64	0.0%	-37.68	NA
40	MC0-20_040	50.0%	25.0%	0.47	0.38	0.0%	-0.19	0.30
41	MNA0-20_001new	80.0%	41.0%	-0.39	0.35	36.0%	-0.37	0.36
42	MNA0-20_002new	83.0%	42.0%	-0.49	0.33	38.0%	-0.49	0.32
43	MNA0-20_003new	72.0%	36.0%	-0.10	0.34	32.0%	-0.08	0.35
44	MNA0-20_004new	81.0%	41.0%	-0.40	0.36	38.0%	-0.40	0.39
45	MNA0-20_005new	91.0%	46.0%	-0.84	0.30	44.0%	-0.88	0.38
46	MNA0-20_006new	78.0%	39.0%	-0.23	0.29	40.0%	-0.28	0.35
47	MNA0-20_007new	82.0%	41.0%	-0.39	0.24	46.0%	-0.50	0.33
48	MNA0-20_008new	84.0%	42.0%	-0.51	0.36	45.0%	-0.59	0.37
49	MNA0-20_009new	35.0%	17.0%	0.79	-0.13	20.0%	0.75	-0.07
50	MNA0-20_010new	69.0%	35.0%	-0.09	0.17	42.0%	-0.20	0.26
51	MNA0-20_011new	84.0%	42.0%	-0.65	0.31	47.0%	-0.72	0.29
52	MNA0-20_012new	54.0%	27.0%	0.19	0.13	32.0%	0.10	0.13
53	MNA0-20_013new	49.0%	25.0%	0.25	0.25	31.0%	0.19	0.33
54	MNA0-20_014new	61.0%	31.0%	-0.08	0.37	39.0%	-0.21	0.33
55	MNA0-20_015new	70.0%	35.0%	-0.26	-0.12	48.0%	-0.43	-0.11
56	MNA0-20_016new	60.0%	31.0%	-0.04	0.18	44.0%	-0.21	0.10
57	MNA0-20_017new	50.0%	25.0%	0.19	0.40	36.0%	0.02	0.23
58	MNA0-20_018new	59.0%	31.0%	-0.02	-0.23	34.0%	-0.03	-0.12
59	MNA0-20_019new	64.0%	32.0%	-0.09	0.05	40.0%	-0.18	0.17
60	MNC0-20_001	64.0%	32.0%	0.09	0.45	29.0%	0.16	0.42
61	MNC0-20_002	81.0%	41.0%	-0.40	0.50	36.0%	-0.36	0.47
62	MNC0-20_003	74.0%	37.0%	-0.16	0.54	35.0%	-0.15	0.50
63	MNC0-20_004	74.0%	37.0%	-0.15	0.57	36.0%	-0.16	0.57
64	MNC0-20_005	68.0%	34.0%	0.04	0.53	35.0%	0.03	0.55
65	MNC0-20_006	81.0%	41.0%	-0.36	0.55	38.0%	-0.32	0.53
66	MNC0-20_007	70.0%	35.0%	0.00	0.53	35.0%	0.01	0.55
67	MNC0-20_008	74.0%	37.0%	-0.16	0.40	40.0%	-0.22	0.39
68	MNC0-20_009	60.0%	30.0%	0.20	0.46	32.0%	0.19	0.51
69	MNC0-20_010	47.0%	24.0%	0.44	0.65	22.0%	0.56	0.66
70	MNC0-20_011	50.0%	26.0%	0.23	0.45	31.0%	0.16	0.42
71	MNC0-20_012	55.0%	28.0%	0.05	0.22	33.0%	-0.07	0.28
72	MNC0-20_013	44.0%	22.0%	0.31	0.31	29.0%	0.15	0.28
73	MNC0-20_014	34.0%	18.0%	0.49	0.59	19.0%	0.48	0.53
74	MNC0-20_015	52.0%	27.0%	0.05	0.31	29.0%	0.05	0.24
75	MNC0-20_016	36.0%	19.0%	0.43	0.32	14.0%	0.76	0.03
76	MNC0-20_017	42.0%	23.0%	0.13	0.00	26.0%	0.16	0.02
77	MNC0-20_018	21.0%	11.0%	0.61	0.11	14.0%	0.63	0.22
78	MQ1-10_001	97.0%	49.0%	-1.56	0.30	48.0%	-1.61	0.39
79	MQ1-10_002	91.0%	46.0%	-0.87	0.38	40.0%	-0.84	0.42
80	MQ1-10_003	98.0%	50.0%	-1.74	0.25	45.0%	-1.78	0.39
81	MQ1-10_004	83.0%	42.0%	-0.47	0.32	40.0%	-0.49	0.40
82	MQ1-10_005	79.0%	39.0%	-0.30	0.35	38.0%	-0.32	0.41
83	MQ1-10_006	99.0%	49.0%	-2.03	0.13	47.0%	-2.07	0.29
84	MQ1-10_007	90.0%	45.0%	-0.78	0.31	41.0%	-0.78	0.39
85	MQ1-10_008	85.0%	43.0%	-0.53	0.26	40.0%	-0.53	0.35
86	MQ1-10_009	98.0%	50.0%	-1.78	0.27	49.0%	-1.85	0.41
87	MQ1-10_010	89.0%	45.0%	-0.72	0.35	43.0%	-0.74	0.43
88	MQ1-10_011	85.0%	42.0%	-0.47	0.35	44.0%	-0.53	0.42
89	MQ1-10_012	97.0%	49.0%	-1.43	0.27	54.0%	-1.57	0.43
90	MQ1-10_013	95.0%	48.0%	-1.12	0.28	47.0%	-1.18	0.42
91	MQ1-10_014	83.0%	42.0%	-0.36	0.37	44.0%	-0.42	0.45
92	MQ1-10_015	83.0%	42.0%	-0.35	0.38	41.0%	-0.36	0.37
93	MQ1-10_016	88.0%	44.0%	-0.58	0.10	49.0%	-0.68	0.30
94	MQ1-10_017	95.0%	48.0%	-1.13	0.48	50.0%	-1.21	0.53
95	MQ1-10_018	70.0%	35.0%	0.02	0.45	40.0%	-0.05	0.46
96	MQ1-10_019	97.0%	48.0%	-1.40	0.47	63.0%	-1.64	0.42
97	MQ1-10_020	79.0%	40.0%	-0.32	0.41	41.0%	-0.36	0.50
98	MQ1-10_021	87.0%	44.0%	-0.63	0.56	49.0%	-0.75	0.55
99	MQ1-10_022	79.0%	40.0%	-0.33	0.54	41.0%	-0.31	0.57
100	MQ1-10_023	83.0%	42.0%	-0.69	0.21	42.0%	-0.67	0.17
101	MQ1-10_024	82.0%	42.0%	-0.73	0.63	42.0%	-0.71	0.55
102	MQ1-10_025	78.0%	40.0%	-0.61	0.39	42.0%	-0.62	0.46
103	MQ1-10_026	86.0%	44.0%	-0.93	0.54	44.0%	-0.90	0.60
104	MQ1-20-001	89.0%	45.0%	-0.75	0.35	40.0%	-0.75	0.34
105	MQ1-20-002	71.0%	36.0%	-0.09	0.42	33.0%	-0.10	0.42
106	MQ1-20-003	71.0%	36.0%	-0.10	0.45	34.0%	-0.11	0.44
107	MQ1-20-004	62.0%	31.0%	0.14	0.35	28.0%	0.18	0.36
108	MQ1-20-005	66.0%	34.0%	0.04	0.45	31.0%	0.05	0.43
109	MQ1-20-006	66.0%	33.0%	0.06	0.34	30.0%	0.08	0.33
110	MQ1-20-007	60.0%	30.0%	0.21	0.46	30.0%	0.18	0.46
111	MQ1-20-008	98.0%	49.0%	-1.76	0.35	54.0%	-1.89	0.35
112	MQ1-20-009	72.0%	36.0%	-0.09	0.47	37.0%	-0.13	0.43
113	MQ1-20-010	57.0%	29.0%	0.25	0.21	25.0%	0.31	0.16
114	MQ1-20-011	49.0%	25.0%	0.38	0.39	24.0%	0.38	0.41
115	MQ1-20-012	91.0%	45.0%	-0.94	0.54	46.0%	-0.99	0.43
116	MQ1-20-013	69.0%	34.0%	-0.16	0.49	36.0%	-0.18	0.42
117	MQ1-20-014	61.0%	31.0%	0.03	0.52	30.0%	0.05	0.41
118	MQ1-20-015	69.0%	35.0%	-0.18	0.60	36.0%	-0.19	0.55
119	MQ1-20-016	43.0%	21.0%	0.42	0.29	23.0%	0.42	0.33
120	MQ1-20-017	90.0%	45.0%	-0.98	0.46	52.0%	-1.11	0.35
121	MQ1-20-018	44.0%	22.0%	0.37	0.17	24.0%	0.34	0.21
122	MQ1-20-019	57.0%	28.0%	0.09	0.51	33.0%	0.02	0.52
123	MQ1-20-020	29.0%	15.0%	0.69	0.35	17.0%	0.65	0.28
124	MQ1-20-021	41.0%	20.0%	0.42	0.30	24.0%	0.35	0.30
125	MQ1-20-022	34.0%	17.0%	0.58	0.16	19.0%	0.56	0.23
126	MQ1-20-023	46.0%	24.0%	0.26	0.23	27.0%	0.16	0.17
127	MQ1-20-024	55.0%	27.0%	0.06	0.38	29.0%	-0.02	0.34
128	MQ1-20-025	90.0%	45.0%	-0.99	0.44	57.0%	-1.19	0.31
129	MQ1-20-026	38.0%	19.0%	0.45	0.25	21.0%	0.39	0.09
130	MQ1-20-027	48.0%	24.0%	0.23	0.48	37.0%	-0.04	0.40
131	MQ1-20-028	17.0%	10.0%	1.01	-0.06	10.0%	0.98	-0.11
132	MQ1-20-029	19.0%	11.0%	0.91	0.15	11.0%	0.85	0.01
133	MQ1-20-030	26.0%	13.0%	0.77	0.13	13.0%	0.70	0.25
Notes: Item Difficulty (Xsi): Latent difficulty parameter from the TAM partial credit model. Item Discrim. (PBIs): Point-biserial correlation between item scores and ability estimates.

plot_wright_side_by_side(res_f_mm$wle, res_f_mm$model, res_f_ld$wle, res_f_ld$model, "Foundation")

Year 1

Reliability & validity

# Subset by method
y1_mm <- df_filtered %>% filter(Cohort == "Year 1", Method == "Mixed_Model")
y1_ld <- df_filtered %>% filter(Cohort == "Year 1", Method == "LogDiff")

# Cronbach's alpha
alpha_y1 <- tibble(
  Method = c("Mixed Model", "LogDiff"),
  Cronbach_alpha = c(
    cronbach_alpha(make_matrix(y1_mm, "Score")),
    cronbach_alpha(make_matrix(y1_ld, "Score"))
  )
)

# Correlation with accuracy
acc_y1 <- df_acc_filtered %>%
  filter(Cohort == "Year 1") %>%
  group_by(student_id) %>%
  summarise(prop_correct = mean(Acc_Score), .groups = "drop")

score_y1 <- bind_rows(
  y1_mm %>% mutate(Method = "Mixed Model"),
  y1_ld %>% mutate(Method = "LogDiff")
) %>%
  group_by(Method, student_id) %>%
  summarise(mean_score = mean(Score), .groups = "drop") %>%
  left_join(acc_y1, by = "student_id") %>%
  group_by(Method) %>%
  summarise(accuracy_corr = cor(mean_score, prop_correct, use = "complete.obs"), .groups = "drop")

# Item-total correlations
itc_y1 <- tibble(
  Method = rep(c("Mixed Model", "LogDiff"), c(
    length(item_total_cor(make_matrix(y1_mm, "Score"))),
    length(item_total_cor(make_matrix(y1_ld, "Score")))
  )),
  Item_Total_Cor = c(
    item_total_cor(make_matrix(y1_mm, "Score")),
    item_total_cor(make_matrix(y1_ld, "Score"))
  )
) %>%
  group_by(Method) %>%
  summarise(
    Min = min(Item_Total_Cor, na.rm = TRUE),
    Q1 = quantile(Item_Total_Cor, 0.25, na.rm = TRUE),
    Median = median(Item_Total_Cor, na.rm = TRUE),
    Mean = mean(Item_Total_Cor, na.rm = TRUE),
    Q3 = quantile(Item_Total_Cor, 0.75, na.rm = TRUE),
    Max = max(Item_Total_Cor, na.rm = TRUE),
    .groups = "drop"
  )

kable(alpha_y1, digits = 3, caption = "Cronbach's Alpha by Method (Year 1)") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE, position = "left") %>%
  row_spec(0, bold = TRUE, color = "#1b1b1b", background = "#e5e5e5")

Cronbach’s Alpha by Method (Year 1)
Method	Cronbach_alpha
Mixed Model	0.883
LogDiff	0.908

kable(score_y1, digits = 3, caption = "Correlation with Accuracy by Method (Year 1)") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE, position = "left") %>%
  row_spec(0, bold = TRUE, color = "#1b1b1b", background = "#e5e5e5")

Correlation with Accuracy by Method (Year 1)
Method	accuracy_corr
LogDiff	0.794
Mixed Model	0.952

kable(itc_y1, digits = 3, caption = "Item-Total Correlation Summary (Year 1)") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE, position = "left") %>%
  row_spec(0, bold = TRUE, color = "#1b1b1b", background = "#e5e5e5")

Item-Total Correlation Summary (Year 1)
Method	Min	Q1	Median	Mean	Q3	Max
LogDiff	-0.382	0.294	0.361	0.374	0.455	0.862
Mixed Model	-0.347	0.195	0.305	0.303	0.398	0.815

IRT Summary (Mixed Model vs LogDiff)

# Run Analyses
data_y_mm <- df_filtered %>% filter(Cohort == "Year 1", Method == "Mixed_Model")
data_y_ld <- df_filtered %>% filter(Cohort == "Year 1", Method == "LogDiff")

res_y_mm <- run_irt_analysis(data_y_mm, "Score", "Year 1", "Mixed Model")
res_y_ld <- run_irt_analysis(data_y_ld, "Score", "Year 1", "LogDiff")

irt_table_y1 <- combine_item_tables(res_y_mm$item_stats, res_y_ld$item_stats)

summary_y1 <- tibble(
  Metric = c("Reliability (WLE)", "Error Summary"),
  `Mixed Model` = c(res_y_mm$text_rel, res_y_mm$text_err),
  `Log Diff` = c(res_y_ld$text_rel, res_y_ld$text_err)
)

kable(summary_y1, caption = "Year 1 IRT Reliability and Error") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE, position = "left") %>%
  row_spec(0, bold = TRUE, color = "#1b1b1b", background = "#e5e5e5")

Year 1 IRT Reliability and Error
Metric	Mixed Model	Log Diff
Reliability (WLE)	Reliability (WLE): 0.795 (True variance: 79.5%)	Reliability (WLE): 0.877 (True variance: 87.7%)
Error Summary	Mean Ability: -0.02 (SD 0.62). Avg SEM: 0.27 (44.5% of SD).	Mean Ability: -0.03 (SD 0.83). Avg SEM: 0.29 (34.2% of SD).

kable(
  irt_table_y1,
  col.names = c(
    "Item #", "Question_ID", "Pct Correct",
    "Pct Fast", "Item Difficulty", "Item Discrim.",
    "Pct Fast", "Item Difficulty", "Item Discrim."
  ),
  caption = "Year 1 IRT Item Statistics"
) %>%
  add_header_above(c(" " = 3, "Mixed Model" = 3, "Log Diff" = 3)) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE, position = "left", fixed_thead = TRUE) %>%
  row_spec(0, bold = TRUE, color = "#1b1b1b", background = "#e5e5e5") %>%
  scroll_box(width = "100%", height = "500px") %>%
  footnote(
    general = c(
      "Item Difficulty (Xsi): Latent difficulty parameter from the TAM partial credit model.",
      "Item Discrim. (PBIs): Point-biserial correlation between item scores and ability estimates."
    ),
    general_title = "Notes: ",
    footnote_as_chunk = TRUE
  )

Year 1 IRT Item Statistics
			Mixed Model			Log Diff
Item #	Question_ID	Pct Correct	Pct Fast	Item Difficulty	Item Discrim.	Pct Fast	Item Difficulty	Item Discrim.
1	AADD_001	98.0%	50.0%	-1.84	0.11	51.0%	-1.95	0.28
2	AADD_002	94.0%	47.0%	-1.15	0.45	49.0%	-1.26	0.50
3	AADD_003	92.0%	46.0%	-1.00	0.39	53.0%	-1.18	0.49
4	AADD_004	93.0%	47.0%	-1.06	0.38	50.0%	-1.19	0.48
5	AADD_005	80.0%	40.0%	-0.27	0.42	40.0%	-0.30	0.48
6	AADD_006	95.0%	47.0%	-1.02	0.36	51.0%	-1.12	0.45
7	AADD_007	92.0%	46.0%	-0.79	0.43	54.0%	-0.93	0.52
8	AADD_008	97.0%	49.0%	-1.26	0.29	49.0%	-1.28	0.42
9	AADD_009	85.0%	43.0%	-0.29	0.51	48.0%	-0.32	0.54
10	AADD_010	69.0%	35.0%	0.26	0.43	35.0%	0.40	0.42
11	AADD_011	90.0%	45.0%	-0.57	0.53	62.0%	-0.74	0.58
12	AADD_012	92.0%	46.0%	-0.63	0.60	47.0%	-0.60	0.63
13	AADD_013	86.0%	44.0%	-0.36	0.58	44.0%	-0.30	0.70
14	AADD_014	85.0%	43.0%	-0.36	0.69	63.0%	-0.59	0.64
15	AADD_015	94.0%	48.0%	-0.92	0.10	59.0%	-0.97	0.25
16	ASDD_001	83.0%	42.0%	-0.51	0.49	44.0%	-0.61	0.51
17	ASDD_002	71.0%	36.0%	-0.09	0.45	37.0%	-0.14	0.48
18	ASDD_003	63.0%	32.0%	0.14	0.45	33.0%	0.12	0.47
19	ASDD_004	60.0%	30.0%	0.24	0.50	32.0%	0.23	0.53
20	ASDD_005	77.0%	39.0%	-0.16	0.54	43.0%	-0.26	0.55
21	ASDD_006	73.0%	36.0%	0.01	0.56	41.0%	-0.03	0.60
22	ASDD_007	76.0%	38.0%	-0.03	0.63	44.0%	-0.05	0.66
23	ASDD_008	75.0%	37.0%	0.03	0.65	42.0%	0.08	0.66
24	ASDD_009	73.0%	37.0%	0.03	0.51	44.0%	0.07	0.56
25	ASDD_010	72.0%	36.0%	0.10	0.53	36.0%	0.31	0.60
26	ASDD_011	72.0%	37.0%	-0.01	0.74	48.0%	-0.05	0.77
27	ASDD_012	68.0%	34.0%	0.08	0.76	48.0%	0.00	0.76
28	ASDD_013	56.0%	28.0%	0.41	0.73	33.0%	0.57	0.76
29	ASDD_014	54.0%	27.0%	0.33	0.74	34.0%	0.48	0.83
30	ASDD_015	50.0%	25.0%	0.42	0.82	36.0%	0.36	0.91
31	MC0-100_001	97.0%	49.0%	-1.46	0.10	53.0%	-1.62	0.26
32	MC0-100_002	86.0%	44.0%	-0.64	0.55	49.0%	-0.82	0.61
33	MC0-100_003	72.0%	36.0%	-0.12	0.39	41.0%	-0.26	0.46
34	MC0-100_004	88.0%	44.0%	-0.73	0.47	52.0%	-0.95	0.56
35	MC0-100_005	84.0%	43.0%	-0.57	0.41	52.0%	-0.79	0.49
36	MC0-100_006	95.0%	47.0%	-1.20	0.21	54.0%	-1.41	0.41
37	MC0-100_007	94.0%	47.0%	-1.17	0.28	63.0%	-1.50	0.46
38	MC0-100_008	89.0%	45.0%	-0.75	0.45	49.0%	-0.91	0.56
39	MC0-100_009	91.0%	46.0%	-0.89	0.26	71.0%	-1.32	0.39
40	MC0-100_010	58.0%	29.0%	0.27	0.37	40.0%	0.07	0.46
41	MC0-100_011	97.0%	48.0%	-1.44	0.16	64.0%	-1.75	0.33
42	MC0-100_012	94.0%	47.0%	-1.06	0.30	54.0%	-1.23	0.46
43	MC0-100_013	93.0%	46.0%	-0.97	0.36	68.0%	-1.32	0.49
44	MC0-100_014	92.0%	46.0%	-0.90	0.27	50.0%	-1.00	0.39
45	MC0-100_015	85.0%	43.0%	-0.50	0.35	56.0%	-0.70	0.46
46	MC0-100_016	87.0%	43.0%	-0.56	0.24	54.0%	-0.72	0.35
47	MC0-100_017	88.0%	44.0%	-0.64	0.45	57.0%	-0.80	0.52
48	MC0-100_018	91.0%	46.0%	-0.80	0.31	61.0%	-0.99	0.42
49	MC0-100_019	87.0%	44.0%	-0.56	0.40	59.0%	-0.73	0.46
50	MC0-100_020	92.0%	46.0%	-0.86	0.34	51.0%	-0.86	0.38
51	MC0-100_021	93.0%	47.0%	-0.97	0.34	51.0%	-0.95	0.36
52	MC0-100_022	93.0%	47.0%	-0.92	0.27	63.0%	-1.05	0.36
53	MC0-100_023	91.0%	45.0%	-0.77	0.38	50.0%	-0.72	0.34
54	MC0-100_024	89.0%	45.0%	-0.65	0.42	55.0%	-0.67	0.42
55	MC0-100_025	84.0%	42.0%	-0.37	0.24	56.0%	-0.48	0.40
56	MC0-100_026	87.0%	44.0%	-0.54	0.14	64.0%	-0.73	0.38
57	MC0-100_027	91.0%	46.0%	-0.72	0.29	68.0%	-1.00	0.37
58	MC0-100_028	77.0%	38.0%	-0.15	0.26	54.0%	-0.35	0.37
59	MC0-100_029	82.0%	41.0%	-0.33	0.44	46.0%	-0.33	0.48
60	MC0-100_030	85.0%	42.0%	-0.61	0.34	53.0%	-0.66	0.31
61	MC0-100_031	82.0%	42.0%	-0.61	0.33	70.0%	-1.02	0.34
62	MC0-100_032	83.0%	42.0%	-0.40	0.41	74.0%	-0.86	0.57
63	MC0-100_033	80.0%	40.0%	-0.37	0.56	57.0%	-0.61	0.43
64	MNAs0-100_001	82.0%	41.0%	-0.46	0.38	48.0%	-0.64	0.43
65	MNAs0-100_002	89.0%	45.0%	-0.77	0.26	52.0%	-0.98	0.37
66	MNAs0-100_003	87.0%	44.0%	-0.69	0.39	52.0%	-0.90	0.49
67	MNAs0-100_004	97.0%	48.0%	-1.47	0.21	62.0%	-1.79	0.33
68	MNAs0-100_005	78.0%	39.0%	-0.29	0.19	49.0%	-0.48	0.26
69	MNAs0-100_006	90.0%	45.0%	-0.77	0.30	57.0%	-0.99	0.39
70	MNAs0-100_007	92.0%	46.0%	-0.89	0.31	66.0%	-1.18	0.45
71	MNAs0-100_008	93.0%	47.0%	-0.94	0.30	60.0%	-1.12	0.39
72	MNAs0-100_009	72.0%	36.0%	0.00	0.22	53.0%	-0.18	0.25
73	MNAs0-100_010	89.0%	45.0%	-0.68	0.25	65.0%	-0.94	0.45
74	MNAs0-100_011	75.0%	38.0%	-0.12	0.37	50.0%	-0.21	0.41
75	MNAs0-100_012	86.0%	43.0%	-0.60	0.35	69.0%	-0.93	0.45
76	MNAs0-100_013	84.0%	42.0%	-0.54	0.26	62.0%	-0.82	0.20
77	MNAs0-100_014	70.0%	35.0%	-0.11	0.31	53.0%	-0.34	0.42
78	MNAs0-100_015	63.0%	31.0%	-0.03	0.40	45.0%	-0.21	0.35
79	MNAs0-100_016	59.0%	31.0%	-0.09	0.49	41.0%	-0.20	0.43
80	MNAs0-100_017	52.0%	26.0%	0.08	0.58	37.0%	-0.10	0.58
81	MNAs0-100_018	42.0%	21.0%	0.07	0.51	42.0%	-0.39	0.49
82	MNAs0-100_019	59.0%	29.0%	-0.42	0.69	35.0%	-0.55	0.72
83	MNC0-100_001	95.0%	49.0%	-1.24	0.36	51.0%	-1.36	0.43
84	MNC0-100_002	89.0%	45.0%	-0.77	0.49	45.0%	-0.84	0.56
85	MNC0-100_003	84.0%	42.0%	-0.55	0.52	41.0%	-0.60	0.57
86	MNC0-100_004	83.0%	42.0%	-0.47	0.57	43.0%	-0.54	0.63
87	MNC0-100_005	84.0%	42.0%	-0.49	0.60	47.0%	-0.61	0.65
88	MNC0-100_006	92.0%	46.0%	-0.89	0.31	57.0%	-1.09	0.38
89	MNC0-100_007	90.0%	45.0%	-0.75	0.48	52.0%	-0.88	0.54
90	MNC0-100_008	86.0%	43.0%	-0.52	0.55	49.0%	-0.59	0.63
91	MNC0-100_009	88.0%	44.0%	-0.61	0.44	52.0%	-0.69	0.51
92	MNC0-100_010	83.0%	41.0%	-0.38	0.59	48.0%	-0.39	0.64
93	MNC0-100_011	81.0%	41.0%	-0.41	0.44	44.0%	-0.33	0.46
94	MNC0-100_012	71.0%	35.0%	-0.15	0.61	38.0%	-0.03	0.65
95	MNC0-100_013	62.0%	31.0%	0.05	0.78	41.0%	0.01	0.82
96	MNC0-100_014	64.0%	32.0%	-0.19	0.63	49.0%	-0.38	0.64
97	MNC0-100_015	62.0%	32.0%	-0.28	0.69	29.0%	-0.04	0.62
98	MNC0-100_016	59.0%	30.0%	-0.31	0.44	37.0%	-0.36	0.43
99	MNC0-100_017	22.0%	11.0%	0.95	-0.34	18.0%	0.95	-0.36
100	MNC0-100_018	16.0%	8.0%	1.24	-0.33	11.0%	1.38	-0.31
101	MNC0-100_019	17.0%	9.0%	0.78	0.06	13.0%	0.79	-0.02
Notes: Item Difficulty (Xsi): Latent difficulty parameter from the TAM partial credit model. Item Discrim. (PBIs): Point-biserial correlation between item scores and ability estimates.

plot_wright_side_by_side(res_y_mm$wle, res_y_mm$model, res_y_ld$wle, res_y_ld$model, "Year 1")

Comparing Fluency Score Methods: Mixed Model vs LogDiff

2025-12-11

1. Data Loading & Preparation

2. Data Filtering

3. Score Distributions

4. Test Overlap Heatmap

5. Network Analysis

Foundation Network Analysis

Accuracy Score Network (0/1)

Fluency Score Network (Mixed Model)

Year 1 Network Analysis

Accuracy Score Network (0/1)

Fluency Score Network (Mixed Model)

6. IRT Analysis

Foundation

Reliability & validity

IRT Summary (Mixed Model vs LogDiff)

Year 1

Reliability & validity

IRT Summary (Mixed Model vs LogDiff)