1. Data Loading & Preparation

We load the two fluency datasets computed by the pipeline:

  1. Mixed Model (MM): Accounts for student and item random effects.
  2. Log Difference (LogDiff): Simple difference between item RT and baseline.
# Define paths relative to project root
path_mm <- "data/processed/metrics/timed_item_fluency.parquet"
path_ld <- "data/processed/metrics/timed_item_fluency_logdiff.parquet"

# Check if files exist
if (!file.exists(path_mm) || !file.exists(path_ld)) {
  stop("Data files not found. Please ensure the pipeline has run and paths are correct.")
}

# Load and tag datasets
df_mm <- read_parquet(path_mm) %>% 
  mutate(Method = "Mixed_Model", Score = fluency_mm) %>%
  select(-fluency_mm) 

df_ld <- read_parquet(path_ld) %>% 
  mutate(Method = "LogDiff", Score = fluency_logdiff) %>%
  select(-fluency_logdiff)

# Combine for initial processing
# Filter for Term 1 as per analysis requirements
df_all <- bind_rows(df_mm, df_ld) %>%
  filter(term == 1) %>% 
  mutate(
    # Pool cohorts: combine "Year 1 A/B" -> "Year 1", etc.
    Cohort = case_when(
      str_detect(exam_group_cohort, "(?i)(Year 1|Y1)") ~ "Year 1",
      str_detect(exam_group_cohort, "(?i)(Foundation|F-)") ~ "Foundation",
      TRUE ~ NA_character_
    ),
    Item_ID = question_id
  ) %>%
  filter(!is.na(Cohort), !is.na(Score))

# Extract Accuracy Scores (0/1) for the network analysis
# Accuracy is identical across methods, so we derive it from the MM dataset
df_acc <- df_mm %>% 
  filter(term == 1) %>%
  mutate(
    Cohort = case_when(
      str_detect(exam_group_cohort, "(?i)(Year 1|Y1)") ~ "Year 1",
      str_detect(exam_group_cohort, "(?i)(Foundation|F-)") ~ "Foundation",
      TRUE ~ NA_character_
    ),
    Acc_Score = as.integer(is_ans_correct)
  ) %>%
  filter(!is.na(Cohort), !is.na(Acc_Score))

2. Data Filtering

Applied the following filtering criteria:

  1. Items: Must have at least 30 responses.
  2. Students: Must have responded to at least 20 items.
# Calculate initial counts
counts_raw <- df_mm %>% group_by(student_id) %>% summarise(n_items = n()) 
item_counts_raw <- df_mm %>% group_by(question_id) %>% summarise(n_students = n())

# Visualise raw distributions
par(mfrow=c(1,2))
hist(counts_raw$n_items, main="Items per Student (Raw)", xlab="Count", col="lightblue", border="white")
hist(item_counts_raw$n_students, main="Students per Item (Raw)", xlab="Count", col="lightgreen", border="white")

# 1. Identify Valid Items
valid_items <- df_mm %>%
  count(question_id) %>%
  filter(n >= 30) %>%
  pull(question_id)

# 2. Identify Valid Students (based on valid items)
valid_students <- df_mm %>%
  filter(question_id %in% valid_items) %>%
  count(student_id) %>%
  filter(n >= 20) %>%
  pull(student_id)

# Apply filters to main datasets
df_filtered <- df_all %>%
  filter(question_id %in% valid_items, student_id %in% valid_students)

df_acc_filtered <- df_acc %>%
  filter(question_id %in% valid_items, student_id %in% valid_students)

# Summary stats (before/after filtering)
n_stud_raw <- length(unique(df_all$student_id))
n_item_raw <- length(unique(df_all$question_id))
n_resp_raw <- nrow(df_all)

n_stud_rem <- length(unique(df_filtered$student_id))
n_item_rem <- length(unique(df_filtered$question_id))
n_resp_rem <- nrow(df_filtered)

Post-filtering Summary:

tribble(
  ~Metric, ~`Before Filter`, ~`After Filter`,
  "Students", comma(n_stud_raw), comma(n_stud_rem),
  "Items", comma(n_item_raw), comma(n_item_rem),
  "Responses", comma(n_resp_raw), comma(n_resp_rem)
) %>%
  kable(align = c("l", "r", "r")) %>%
  kable_styling(bootstrap_options = "striped", full_width = FALSE, position = "left") %>%
  row_spec(0, bold = TRUE, color = "#1b1b1b", background = "#e5e5e5")
Metric Before Filter After Filter
Students 4,926 4,691
Items 325 234
Responses 339,920 331,720

3. Score Distributions

Comparison of score categories (0 = Incorrect, 1 = Correct Slow, 2 = Correct Fast) across methods and cohorts.

score_dist <- df_filtered %>%
  group_by(Method, Cohort, Score) %>%
  summarise(Count = n(), .groups='drop_last') %>%
  mutate(Percentage = Count / sum(Count) * 100) %>%
  pivot_wider(id_cols = c(Method, Cohort), names_from = Score, values_from = Percentage, names_prefix = "Score_")

kable(score_dist, digits=1, caption = "Percentage of Fluency Scores by Method and Cohort") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left") %>%
  row_spec(0, bold = TRUE, color = "#1b1b1b", background = "#e5e5e5")
Percentage of Fluency Scores by Method and Cohort
Method Cohort Score_0 Score_1 Score_2
LogDiff Foundation 16.3 40.4 43.3
LogDiff Year 1 14.1 35.1 50.9
Mixed_Model Foundation 16.3 41.6 42.0
Mixed_Model Year 1 14.1 42.8 43.1

4. Test Overlap Heatmap

Based on the overlapping test participation, we group together Year 1 and Foundation students (i.e. pool A/B groups together). We treat, for example, MNA0-100 (missing number ascending) and MNC0-100 (missing number choice) as similar items.

# Prepare data for heatmap
clean_fluency_hm <- df_mm %>%
  filter(term == 1) %>% 
  mutate(
    # Clean up test IDs to get 'Family' (e.g., AADD, STPM)
    standard_family = str_remove(standard_test_id, "(_2025.*|-MOY.*|ABR.*)")
  ) %>%
  distinct(student_id, exam_group, term, standard_family)

# Build presence matrix
family_matrix <- clean_fluency_hm %>%
  mutate(has_item = 1) %>%
  pivot_wider(names_from = standard_family, values_from = has_item, values_fill = 0)

# Calculate proportions
family_counts <- family_matrix %>%
  group_by(exam_group, term) %>%
  summarise(across(where(is.numeric), sum), .groups = "drop")

group_sizes <- family_matrix %>%
  count(exam_group, term, name = "n_students")

family_prop <- family_counts %>%
  left_join(group_sizes, by = c("exam_group", "term")) %>%
  mutate(across(-c(exam_group, term, n_students), ~ .x / n_students))

# Pivot for plotting
heatmap_df <- family_prop %>%
  select(-n_students) %>%
  pivot_longer(-c(exam_group, term), names_to = "test_family", values_to = "prop") %>%
  filter(prop > 0) 

# Plot
ggplot(heatmap_df, aes(x = test_family, y = exam_group, fill = prop)) +
  geom_tile(colour = "grey80") +
  scale_fill_gradient(low = "white", high = "steelblue", labels = scales::percent) +
  scale_x_discrete(position = "bottom") +
  theme_minimal(base_size = 12) + # Increased base size
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, size = 11, face="bold"), # Larger X labels
    axis.text.y = element_text(size = 11, face="bold"), # Larger Y labels
    panel.grid = element_blank(),
    strip.background = element_rect(fill = "grey90", colour = NA),
    axis.title.x = element_blank(), # Remove axis titles
    axis.title.y = element_blank()
  ) +
  labs(title = "Test Family Participation by Exam Group", fill = "Participation")

5. Network Analysis

# Function to build network graph and compute summary statistics
prepare_network <- function(data, score_col, threshold = 0.3) {

  if (nrow(data) == 0) {
    return(list(graph = make_empty_graph(), summary = NULL, communities = NULL))
  }

  wide_mat <- data %>%
    select(student_id, question_id, all_of(score_col)) %>%
    pivot_wider(names_from = question_id, values_from = all_of(score_col)) %>%
    select(-student_id) %>%
    as.matrix()

  cor_mat <- cor(wide_mat, use = "pairwise.complete.obs")
  cor_mat[is.na(cor_mat)] <- 0
  diag(cor_mat) <- 0

  adj_mat <- cor_mat
  adj_mat[abs(adj_mat) < threshold] <- 0
  adj_mat <- abs(adj_mat)

  g <- graph_from_adjacency_matrix(adj_mat, mode = "undirected", weighted = TRUE)

  if (gorder(g) == 0) {
    return(list(graph = g, summary = NULL, communities = NULL))
  }

  # Communities
  comm <- if (gsize(g) > 0) cluster_louvain(g) else NULL

  # Edge stats (retain only edges above threshold)
  edge_weights <- if (gsize(g) > 0) E(g)$weight else numeric()
  n_nodes <- gorder(g)
  possible_edges <- n_nodes * (n_nodes - 1) / 2
  n_edges <- gsize(g)
  pct_retained <- if (possible_edges > 0) n_edges / possible_edges else NA_real_
  pct_removed <- if (!is.na(pct_retained)) 1 - pct_retained else NA_real_

  density_val <- if (possible_edges > 0) n_edges / possible_edges else NA_real_

  cor_stats <- tibble(
    Mean_Abs_Corr = if (length(edge_weights) > 0) mean(edge_weights) else NA_real_,
    Median_Abs_Corr = if (length(edge_weights) > 0) median(edge_weights) else NA_real_,
    Min_Abs_Corr = if (length(edge_weights) > 0) min(edge_weights) else NA_real_,
    Max_Abs_Corr = if (length(edge_weights) > 0) max(edge_weights) else NA_real_
  )

  top_degree <- if (gsize(g) > 0) {
    deg <- sort(degree(g), decreasing = TRUE)
    head(names(deg), 5)
  } else character()

  top_betweenness <- if (gsize(g) > 0) {
    bt <- sort(betweenness(g, directed = FALSE, weights = 1 / (E(g)$weight + 1e-6)), decreasing = TRUE)
    head(names(bt), 3)
  } else character()

  summary_tbl <- tibble(
    Metric = c(
      "Number of items (nodes)",
      "Number of edges above threshold",
      "Percentage of possible edges retained",
      "Density",
      "Threshold used",
      "Edges removed by threshold",
      "Mean abs(correlation)",
      "Median abs(correlation)",
      "Minimum abs(correlation)",
      "Maximum abs(correlation)",
      "Number of communities (Louvain)",
      "Modularity",
      "Top 5 items by degree",
      "Top 3 by betweenness"
    ),
    Value = c(
      n_nodes,
      n_edges,
      percent(pct_retained, accuracy = 0.1),
      percent(density_val, accuracy = 0.1),
      threshold,
      percent(pct_removed, accuracy = 0.1),
      cor_stats$Mean_Abs_Corr,
      cor_stats$Median_Abs_Corr,
      cor_stats$Min_Abs_Corr,
      cor_stats$Max_Abs_Corr,
      if (!is.null(comm)) length(unique(membership(comm))) else NA_integer_,
      if (!is.null(comm)) round(modularity(comm), 3) else NA_real_,
      paste(top_degree, collapse = ", "),
      paste(top_betweenness, collapse = ", ")
    )
  )

  list(graph = g, summary = summary_tbl, communities = comm)
}

plot_network <- function(net_obj, title_suffix) {
  g <- net_obj$graph
  comm <- net_obj$communities

  if (gorder(g) == 0) {
    plot.new()
    title(main = paste0("No data for: ", title_suffix))
    return(NULL)
  }

  if (gsize(g) > 0 && !is.null(comm)) {
    num_comm <- length(unique(membership(comm)))
    plot(comm, g,
         layout = layout_with_fr(g),
         vertex.size = 5,
         vertex.label.cex = 0.6,
         vertex.label.color = "black",
         edge.width = E(g)$weight * 2,
         main = paste0("Network: ", title_suffix, "\n(", num_comm, " Communities)"))
  } else {
    plot(g, vertex.size = 5, vertex.label.cex = 0.6,
         main = paste0("Network: ", title_suffix, " (No strong edges)"))
  }
}

item_total_cor <- function(mat) {
  valid_cols <- apply(mat, 2, function(x) {
    nx <- sum(!is.na(x))
    vx <- if (nx > 1) var(x, na.rm = TRUE) else 0
    nx > 1 && vx > 0
  })
  mat2 <- mat[, valid_cols, drop = FALSE]

  cors <- numeric(ncol(mat2))
  for (j in seq_len(ncol(mat2))) {
    item <- mat2[, j]
    rest <- rowSums(mat2[, -j, drop = FALSE], na.rm = TRUE)
    cors[j] <- cor(item, rest, use = "complete.obs")
  }
  cors
}

Foundation Network Analysis

Accuracy Score Network (0/1)

df_sub <- df_acc_filtered %>% filter(Cohort == "Foundation")
net_found_acc <- prepare_network(df_sub, "Acc_Score", threshold = 0.3)
plot_network(net_found_acc, "Foundation Accuracy")

if (!is.null(net_found_acc$summary)) {
  net_found_acc$summary %>%
    kable(caption = "Network Summary — Foundation Accuracy", align = c("l", "l")) %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE, position = "left") %>%
    row_spec(0, bold = TRUE, color = "#1b1b1b", background = "#e5e5e5")
}
Network Summary — Foundation Accuracy
Metric Value
Number of items (nodes) 133
Number of edges above threshold 1210
Percentage of possible edges retained 13.8%
Density 13.8%
Threshold used 0.3
Edges removed by threshold 86.2%
Mean abs(correlation) 0.473669227174478
Median abs(correlation) 0.416648606028297
Minimum abs(correlation) 0.3
Maximum abs(correlation) 1
Number of communities (Louvain) 7
Modularity 0.375
Top 5 items by degree MC0-20_031, MC0-20_030, MC0-20_029, MC0-20_023, MC0-20_028
Top 3 by betweenness MC0-20_029, MC0-20_036, MC0-20_031

Fluency Score Network (Mixed Model)

df_sub <- df_filtered %>% filter(Cohort == "Foundation", Method == "Mixed_Model")
net_found_mm <- prepare_network(df_sub, "Score", threshold = 0.3)
plot_network(net_found_mm, "Foundation MM Fluency")

if (!is.null(net_found_mm$summary)) {
  net_found_mm$summary %>%
    kable(caption = "Network Summary — Foundation MM Fluency", align = c("l", "l")) %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE, position = "left") %>%
    row_spec(0, bold = TRUE, color = "#1b1b1b", background = "#e5e5e5")
}
Network Summary — Foundation MM Fluency
Metric Value
Number of items (nodes) 133
Number of edges above threshold 1149
Percentage of possible edges retained 13.1%
Density 13.1%
Threshold used 0.3
Edges removed by threshold 86.9%
Mean abs(correlation) 0.493148854700031
Median abs(correlation) 0.433738697665467
Minimum abs(correlation) 0.3
Maximum abs(correlation) 1
Number of communities (Louvain) 3
Modularity 0.321
Top 5 items by degree MC0-20_031, MC0-20_035, MC0-20_033, MC0-20_032, MC0-20_040
Top 3 by betweenness MC0-20_031, MC0-20_040, MC0-20_032

Year 1 Network Analysis

Accuracy Score Network (0/1)

df_sub <- df_acc_filtered %>% filter(Cohort == "Year 1")
net_y1_acc <- prepare_network(df_sub, "Acc_Score", threshold = 0.3)
plot_network(net_y1_acc, "Year 1 Accuracy")

if (!is.null(net_y1_acc$summary)) {
  net_y1_acc$summary %>%
    kable(caption = "Network Summary — Year 1 Accuracy", align = c("l", "l")) %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE, position = "left") %>%
    row_spec(0, bold = TRUE, color = "#1b1b1b", background = "#e5e5e5")
}
Network Summary — Year 1 Accuracy
Metric Value
Number of items (nodes) 101
Number of edges above threshold 2014
Percentage of possible edges retained 39.9%
Density 39.9%
Threshold used 0.3
Edges removed by threshold 60.1%
Mean abs(correlation) 0.502889395900554
Median abs(correlation) 0.464152242075144
Minimum abs(correlation) 0.3
Maximum abs(correlation) 1
Number of communities (Louvain) 4
Modularity 0.137
Top 5 items by degree ASDD_011, ASDD_012, AADD_013, AADD_014, ASDD_013
Top 3 by betweenness MC0-100_032, ASDD_015, AADD_014

Fluency Score Network (Mixed Model)

df_sub <- df_filtered %>% filter(Cohort == "Year 1", Method == "Mixed_Model")
net_y1_mm <- prepare_network(df_sub, "Score", threshold = 0.3)
plot_network(net_y1_mm, "Year 1 MM Fluency")

if (!is.null(net_y1_mm$summary)) {
  net_y1_mm$summary %>%
    kable(caption = "Network Summary — Year 1 MM Fluency", align = c("l", "l")) %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE, position = "left") %>%
    row_spec(0, bold = TRUE, color = "#1b1b1b", background = "#e5e5e5")
}
Network Summary — Year 1 MM Fluency
Metric Value
Number of items (nodes) 101
Number of edges above threshold 1176
Percentage of possible edges retained 23.3%
Density 23.3%
Threshold used 0.3
Edges removed by threshold 76.7%
Mean abs(correlation) 0.463631598060857
Median abs(correlation) 0.425073865402142
Minimum abs(correlation) 0.300155509504501
Maximum abs(correlation) 1
Number of communities (Louvain) 3
Modularity 0.144
Top 5 items by degree ASDD_015, ASDD_013, ASDD_014, ASDD_012, ASDD_011
Top 3 by betweenness MC0-100_033, ASDD_015, MNAs0-100_019

6. IRT Analysis

# Function to run IRT (Partial Credit Model)
run_irt_analysis <- function(data, score_col, cohort_label, method_label) {

  # Wide Format
  wide_df <- data %>%
    select(student_id, question_id, all_of(score_col)) %>%
    pivot_wider(names_from = question_id, values_from = all_of(score_col))

  resp_matrix <- wide_df %>% select(-student_id)

  # Run TAM (MML)
  mod <- suppressWarnings({
    captured_mod <- NULL
    capture.output({
      captured_mod <- tam.mml(resp = resp_matrix, verbose = FALSE)
    })
    captured_mod
  })

  # WLE Estimates
  wle_mod <- suppressWarnings({
    captured_wle <- NULL
    capture.output({
      captured_wle <- tam.wle(mod)
    })
    captured_wle
  })

  # Reliability & Error
  wle_reliability <- 1 - (mean(wle_mod$error^2) / var(wle_mod$theta))
  theta_mean <- mean(wle_mod$theta)
  theta_sd   <- sd(wle_mod$theta)
  avg_sem    <- mean(wle_mod$error)
  error_pct  <- (avg_sem / theta_sd) * 100

  # Item Stats
  # Use mod$item to get one difficulty per item (xsi.item) rather than step parameters
  diff_lookup <- setNames(mod$item$xsi.item, mod$item$item)

  # Discrimination
  calc_disc <- function(item_vec, theta) {
    cor(item_vec, theta, use = "pairwise.complete.obs")
  }
  disc <- sapply(resp_matrix, calc_disc, theta = wle_mod$theta)

  # Props
  prop_correct <- colMeans(resp_matrix >= 1, na.rm=TRUE)
  prop_fast    <- colMeans(resp_matrix == 2, na.rm=TRUE)

  # Construct Table
  # 1. Get IDs
  item_ids <- names(prop_correct)

  # 2. Build mapping for numerical ID (Sort alphabetically first)
  item_order <- data.frame(Question_ID = sort(item_ids)) %>%
    mutate(Item_Num = row_number())

  # 3. Build main stats frame
  item_stats <- data.frame(
    Question_ID = item_ids,
    Prop_Correct = round(prop_correct, 2),
    Prop_Fast = round(prop_fast, 2),
    Difficulty = round(diff_lookup[item_ids], 2),
    Discrimination = round(disc, 2)
  )

  # 4. Join and Reorder
  item_stats <- item_stats %>%
    left_join(item_order, by = "Question_ID") %>%
    arrange(Item_Num) %>%
    select(
      Item_Num,
      Question_ID,
      Prop_Correct,
      Prop_Fast,
      Difficulty,
      Discrimination
    )

  # Formatted Text
  txt_rel <- paste0("Reliability (WLE): ", round(wle_reliability, 3),
                    " (True variance: ", round(wle_reliability*100, 1), "%)")

  txt_err <- paste0("Mean Ability: ", round(theta_mean, 2), " (SD ", round(theta_sd, 2), "). ",
                    "Avg SEM: ", round(avg_sem, 2), " (", round(error_pct, 1), "% of SD).")

  return(list(
    model = mod,
    wle = wle_mod,
    item_stats = item_stats,
    text_rel = txt_rel,
    text_err = txt_err,
    title = paste(cohort_label, "-", method_label)
  ))
}

# Reliability helpers -----------------------------------------------------

make_matrix <- function(df, score_col) {
  mat <- tapply(df[[score_col]], list(df$student_id, df$question_id), mean)

  # drop rows with all NA
  keep <- apply(mat, 1, function(x) any(!is.na(x)))
  mat[keep, , drop = FALSE]
}

cronbach_alpha <- function(mat) {
  valid_cols <- apply(mat, 2, function(x) {
    nx <- sum(!is.na(x))
    vx <- if (nx > 1) var(x, na.rm = TRUE) else 0
    nx > 1 && vx > 0
  })
  mat2 <- mat[, valid_cols, drop = FALSE]

  k <- ncol(mat2)
  if (k < 2) return(NA_real_)

  item_vars <- apply(mat2, 2, var, na.rm = TRUE)
  total_scores <- rowSums(mat2, na.rm = TRUE)
  total_var <- var(total_scores)

  k / (k - 1) * (1 - sum(item_vars) / total_var)
}

item_total_cor <- function(mat) {
  valid_cols <- apply(mat, 2, function(x) {
    nx <- sum(!is.na(x))
    vx <- if (nx > 1) var(x, na.rm = TRUE) else 0
    nx > 1 && vx > 0
  })
  mat2 <- mat[, valid_cols, drop = FALSE]

  cors <- numeric(ncol(mat2))
  for (j in seq_len(ncol(mat2))) {
    item <- mat2[, j]
    rest <- rowSums(mat2[, -j, drop = FALSE], na.rm = TRUE)
    cors[j] <- cor(item, rest, use = "complete.obs")
  }
  cors
}

# Format combined IRT table with multi-level headers
combine_item_tables <- function(mm_stats, ld_stats) {
  mm_fmt <- mm_stats %>%
    rename(`Item #` = Item_Num, mm_pct_correct = Prop_Correct, mm_pct_fast = Prop_Fast,
           mm_difficulty = Difficulty, mm_discrimination = Discrimination)

  ld_fmt <- ld_stats %>%
    rename(`Item #` = Item_Num, ld_pct_correct = Prop_Correct, ld_pct_fast = Prop_Fast,
           ld_difficulty = Difficulty, ld_discrimination = Discrimination)

  mm_fmt %>%
    full_join(ld_fmt, by = c("Item #", "Question_ID")) %>%
    arrange(`Item #`) %>%
    mutate(
      Pct_Correct = percent(coalesce(mm_pct_correct, ld_pct_correct), accuracy = 0.1),
      mm_pct_fast = percent(mm_pct_fast, accuracy = 0.1),
      ld_pct_fast = percent(ld_pct_fast, accuracy = 0.1)
    ) %>%
    select(
      `Item #`, Question_ID,
      Pct_Correct,
      mm_pct_fast, mm_difficulty, mm_discrimination,
      ld_pct_fast, ld_difficulty, ld_discrimination
    )
}

# Wright map helper to show two methods side-by-side
plot_wright_side_by_side <- function(wle_mm, model_mm, wle_ld, model_ld, main_prefix) {
  op <- par(no.readonly = TRUE)
  on.exit({
    try(close.screen(all.screens = TRUE), silent = TRUE)
    par(op)
  })

  invisible(capture.output({
    wrightMap(wle_mm$theta, model_mm$xsi$xsi,
              label.items.srt = 90, main.title = paste(main_prefix, "- Mixed Model"),
              axis.items = "", item.side = itemClassic)
  }))

  try(close.screen(all.screens = TRUE), silent = TRUE)
  par(op)

  invisible(capture.output({
    wrightMap(wle_ld$theta, model_ld$xsi$xsi,
              label.items.srt = 90, main.title = paste(main_prefix, "- LogDiff"),
              axis.items = "", item.side = itemClassic)
  }))

  invisible(NULL)
}

Foundation

Reliability & validity

# Subset by method
found_mm <- df_filtered %>% filter(Cohort == "Foundation", Method == "Mixed_Model")
found_ld <- df_filtered %>% filter(Cohort == "Foundation", Method == "LogDiff")

# Cronbach's alpha
alpha_found <- tibble(
  Method = c("Mixed Model", "LogDiff"),
  Cronbach_alpha = c(
    cronbach_alpha(make_matrix(found_mm, "Score")),
    cronbach_alpha(make_matrix(found_ld, "Score"))
  )
)

# Correlation with accuracy
acc_found <- df_acc_filtered %>%
  filter(Cohort == "Foundation") %>%
  group_by(student_id) %>%
  summarise(prop_correct = mean(Acc_Score), .groups = "drop")

score_found <- bind_rows(
  found_mm %>% mutate(Method = "Mixed Model"),
  found_ld %>% mutate(Method = "LogDiff")
) %>%
  group_by(Method, student_id) %>%
  summarise(mean_score = mean(Score), .groups = "drop") %>%
  left_join(acc_found, by = "student_id") %>%
  group_by(Method) %>%
  summarise(accuracy_corr = cor(mean_score, prop_correct, use = "complete.obs"), .groups = "drop")

# Item-total correlations
itc_found <- tibble(
  Method = rep(c("Mixed Model", "LogDiff"), c(
    length(item_total_cor(make_matrix(found_mm, "Score"))),
    length(item_total_cor(make_matrix(found_ld, "Score")))
  )),
  Item_Total_Cor = c(
    item_total_cor(make_matrix(found_mm, "Score")),
    item_total_cor(make_matrix(found_ld, "Score"))
  )
) %>%
  group_by(Method) %>%
  summarise(
    Min = min(Item_Total_Cor, na.rm = TRUE),
    Q1 = quantile(Item_Total_Cor, 0.25, na.rm = TRUE),
    Median = median(Item_Total_Cor, na.rm = TRUE),
    Mean = mean(Item_Total_Cor, na.rm = TRUE),
    Q3 = quantile(Item_Total_Cor, 0.75, na.rm = TRUE),
    Max = max(Item_Total_Cor, na.rm = TRUE),
    .groups = "drop"
  )

kable(alpha_found, digits = 3, caption = "Cronbach's Alpha by Method (Foundation)") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE, position = "left") %>%
  row_spec(0, bold = TRUE, color = "#1b1b1b", background = "#e5e5e5")
Cronbach’s Alpha by Method (Foundation)
Method Cronbach_alpha
Mixed Model 0.775
LogDiff 0.805
kable(score_found, digits = 3, caption = "Correlation with Accuracy by Method (Foundation)") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE, position = "left") %>%
  row_spec(0, bold = TRUE, color = "#1b1b1b", background = "#e5e5e5")
Correlation with Accuracy by Method (Foundation)
Method accuracy_corr
LogDiff 0.755
Mixed Model 0.955
kable(itc_found, digits = 3, caption = "Item-Total Correlation Summary (Foundation)") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE, position = "left") %>%
  row_spec(0, bold = TRUE, color = "#1b1b1b", background = "#e5e5e5")
Item-Total Correlation Summary (Foundation)
Method Min Q1 Median Mean Q3 Max
LogDiff -0.311 0.173 0.251 0.257 0.343 0.934
Mixed Model -0.720 0.110 0.188 0.194 0.309 0.611

IRT Summary (Mixed Model vs LogDiff)

# Run Analyses
data_f_mm <- df_filtered %>% filter(Cohort == "Foundation", Method == "Mixed_Model")
data_f_ld <- df_filtered %>% filter(Cohort == "Foundation", Method == "LogDiff")

res_f_mm <- run_irt_analysis(data_f_mm, "Score", "Foundation", "Mixed Model")
res_f_ld <- run_irt_analysis(data_f_ld, "Score", "Foundation", "LogDiff")

irt_table_found <- combine_item_tables(res_f_mm$item_stats, res_f_ld$item_stats)

summary_found <- tibble(
  Metric = c("Reliability (WLE)", "Error Summary"),
  `Mixed Model` = c(res_f_mm$text_rel, res_f_mm$text_err),
  `Log Diff` = c(res_f_ld$text_rel, res_f_ld$text_err)
)

kable(summary_found, caption = "Foundation IRT Reliability and Error") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE, position = "left") %>%
  row_spec(0, bold = TRUE, color = "#1b1b1b", background = "#e5e5e5")
Foundation IRT Reliability and Error
Metric Mixed Model Log Diff
Reliability (WLE) Reliability (WLE): 0.735 (True variance: 73.5%) Reliability (WLE): 0.829 (True variance: 82.9%)
Error Summary Mean Ability: 0 (SD 0.54). Avg SEM: 0.27 (50.5% of SD). Mean Ability: -0.01 (SD 0.7). Avg SEM: 0.28 (40.4% of SD).
kable(
  irt_table_found,
  col.names = c(
    "Item #", "Question_ID", "Pct Correct",
    "Pct Fast", "Item Difficulty", "Item Discrim.",
    "Pct Fast", "Item Difficulty", "Item Discrim."
  ),
  caption = "Foundation IRT Item Statistics"
) %>%
  add_header_above(c(" " = 3, "Mixed Model" = 3, "Log Diff" = 3)) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE, position = "left", fixed_thead = TRUE) %>%
  row_spec(0, bold = TRUE, color = "#1b1b1b", background = "#e5e5e5") %>%
  scroll_box(width = "100%", height = "500px") %>%
  footnote(
    general = c(
      "Item Difficulty (Xsi): Latent difficulty parameter from the TAM partial credit model.",
      "Item Discrim. (PBIs): Point-biserial correlation between item scores and ability estimates."
    ),
    general_title = "Notes: ",
    footnote_as_chunk = TRUE
  )
Foundation IRT Item Statistics
Mixed Model
Log Diff
Item # Question_ID Pct Correct Pct Fast Item Difficulty Item Discrim. Pct Fast Item Difficulty Item Discrim.
1 MC0-20_001 96.0% 48.0% -1.30 0.25 51.0% -1.41 0.36
2 MC0-20_002 96.0% 49.0% -1.28 0.35 43.0% -1.28 0.46
3 MC0-20_003 88.0% 45.0% -0.72 0.36 43.0% -0.76 0.44
4 MC0-20_004 95.0% 48.0% -1.19 0.39 59.0% -1.42 0.49
5 MC0-20_005 80.0% 40.0% -0.35 0.39 57.0% -0.65 0.43
6 MC0-20_006 90.0% 45.0% -0.81 0.34 44.0% -0.85 0.44
7 MC0-20_007 71.0% 35.0% -0.08 0.34 43.0% -0.24 0.40
8 MC0-20_008 60.0% 30.0% 0.19 0.27 38.0% 0.03 0.35
9 MC0-20_009 92.0% 46.0% -0.92 0.27 46.0% -0.99 0.41
10 MC0-20_010 87.0% 44.0% -0.64 0.38 39.0% -0.63 0.46
11 MC0-20_011 90.0% 45.0% -0.78 0.28 44.0% -0.83 0.43
12 MC0-20_012 95.0% 48.0% -1.18 0.31 62.0% -1.43 0.46
13 MC0-20_013 94.0% 47.0% -1.07 0.31 60.0% -1.30 0.44
14 MC0-20_014 96.0% 48.0% -1.25 0.25 44.0% -1.25 0.30
15 MC0-20_015 90.0% 45.0% -0.76 0.25 49.0% -0.87 0.37
16 MC0-20_016 90.0% 45.0% -0.77 0.23 47.0% -0.84 0.41
17 MC0-20_017 95.0% 48.0% -1.15 0.28 61.0% -1.37 0.47
18 MC0-20_018 84.0% 42.0% -0.45 0.34 40.0% -0.44 0.46
19 MC0-20_019 96.0% 48.0% -1.29 0.29 51.0% -1.36 0.30
20 MC0-20_020 85.0% 43.0% -0.51 0.12 42.0% -0.51 0.32
21 MC0-20_021 92.0% 46.0% -0.86 0.36 44.0% -0.86 0.35
22 MC0-20_022 95.0% 47.0% -1.12 0.27 56.0% -1.25 0.31
23 MC0-20_023 88.0% 44.0% -0.61 0.43 43.0% -0.59 0.35
24 MC0-20_024 89.0% 45.0% -0.70 0.42 46.0% -0.74 0.37
25 MC0-20_025 83.0% 42.0% -0.40 0.30 44.0% -0.46 0.38
26 MC0-20_026 89.0% 44.0% -0.60 0.22 37.0% -0.51 0.40
27 MC0-20_027 93.0% 46.0% -0.83 0.31 54.0% -0.97 0.38
28 MC0-20_028 74.0% 38.0% -0.01 0.33 39.0% -0.07 0.43
29 MC0-20_029 89.0% 45.0% -0.65 0.36 52.0% -0.84 0.50
30 MC0-20_030 64.0% 33.0% 0.21 0.39 31.0% 0.22 0.50
31 MC0-20_031 79.0% 39.0% -0.29 0.59 32.0% -0.21 0.53
32 MC0-20_032 91.0% 48.0% -0.98 0.13 43.0% -0.99 0.35
33 MC0-20_033 93.0% 47.0% -0.87 0.30 33.0% -0.81 0.55
34 MC0-20_034 93.0% 47.0% -0.87 0.49 33.0% -0.81 0.65
35 MC0-20_035 91.0% 45.0% -0.72 0.25 27.0% -0.45 0.64
36 MC0-20_036 86.0% 43.0% -0.39 0.33 29.0% -0.31 0.70
37 MC0-20_037 100.0% 57.0% -15.62 -0.60 29.0% -17.56 0.80
38 MC0-20_038 100.0% 60.0% -12.10 0.34 20.0% -17.28 0.97
39 MC0-20_039 100.0% 50.0% -14.22 -0.64 0.0% -37.68 NA
40 MC0-20_040 50.0% 25.0% 0.47 0.38 0.0% -0.19 0.30
41 MNA0-20_001new 80.0% 41.0% -0.39 0.35 36.0% -0.37 0.36
42 MNA0-20_002new 83.0% 42.0% -0.49 0.33 38.0% -0.49 0.32
43 MNA0-20_003new 72.0% 36.0% -0.10 0.34 32.0% -0.08 0.35
44 MNA0-20_004new 81.0% 41.0% -0.40 0.36 38.0% -0.40 0.39
45 MNA0-20_005new 91.0% 46.0% -0.84 0.30 44.0% -0.88 0.38
46 MNA0-20_006new 78.0% 39.0% -0.23 0.29 40.0% -0.28 0.35
47 MNA0-20_007new 82.0% 41.0% -0.39 0.24 46.0% -0.50 0.33
48 MNA0-20_008new 84.0% 42.0% -0.51 0.36 45.0% -0.59 0.37
49 MNA0-20_009new 35.0% 17.0% 0.79 -0.13 20.0% 0.75 -0.07
50 MNA0-20_010new 69.0% 35.0% -0.09 0.17 42.0% -0.20 0.26
51 MNA0-20_011new 84.0% 42.0% -0.65 0.31 47.0% -0.72 0.29
52 MNA0-20_012new 54.0% 27.0% 0.19 0.13 32.0% 0.10 0.13
53 MNA0-20_013new 49.0% 25.0% 0.25 0.25 31.0% 0.19 0.33
54 MNA0-20_014new 61.0% 31.0% -0.08 0.37 39.0% -0.21 0.33
55 MNA0-20_015new 70.0% 35.0% -0.26 -0.12 48.0% -0.43 -0.11
56 MNA0-20_016new 60.0% 31.0% -0.04 0.18 44.0% -0.21 0.10
57 MNA0-20_017new 50.0% 25.0% 0.19 0.40 36.0% 0.02 0.23
58 MNA0-20_018new 59.0% 31.0% -0.02 -0.23 34.0% -0.03 -0.12
59 MNA0-20_019new 64.0% 32.0% -0.09 0.05 40.0% -0.18 0.17
60 MNC0-20_001 64.0% 32.0% 0.09 0.45 29.0% 0.16 0.42
61 MNC0-20_002 81.0% 41.0% -0.40 0.50 36.0% -0.36 0.47
62 MNC0-20_003 74.0% 37.0% -0.16 0.54 35.0% -0.15 0.50
63 MNC0-20_004 74.0% 37.0% -0.15 0.57 36.0% -0.16 0.57
64 MNC0-20_005 68.0% 34.0% 0.04 0.53 35.0% 0.03 0.55
65 MNC0-20_006 81.0% 41.0% -0.36 0.55 38.0% -0.32 0.53
66 MNC0-20_007 70.0% 35.0% 0.00 0.53 35.0% 0.01 0.55
67 MNC0-20_008 74.0% 37.0% -0.16 0.40 40.0% -0.22 0.39
68 MNC0-20_009 60.0% 30.0% 0.20 0.46 32.0% 0.19 0.51
69 MNC0-20_010 47.0% 24.0% 0.44 0.65 22.0% 0.56 0.66
70 MNC0-20_011 50.0% 26.0% 0.23 0.45 31.0% 0.16 0.42
71 MNC0-20_012 55.0% 28.0% 0.05 0.22 33.0% -0.07 0.28
72 MNC0-20_013 44.0% 22.0% 0.31 0.31 29.0% 0.15 0.28
73 MNC0-20_014 34.0% 18.0% 0.49 0.59 19.0% 0.48 0.53
74 MNC0-20_015 52.0% 27.0% 0.05 0.31 29.0% 0.05 0.24
75 MNC0-20_016 36.0% 19.0% 0.43 0.32 14.0% 0.76 0.03
76 MNC0-20_017 42.0% 23.0% 0.13 0.00 26.0% 0.16 0.02
77 MNC0-20_018 21.0% 11.0% 0.61 0.11 14.0% 0.63 0.22
78 MQ1-10_001 97.0% 49.0% -1.56 0.30 48.0% -1.61 0.39
79 MQ1-10_002 91.0% 46.0% -0.87 0.38 40.0% -0.84 0.42
80 MQ1-10_003 98.0% 50.0% -1.74 0.25 45.0% -1.78 0.39
81 MQ1-10_004 83.0% 42.0% -0.47 0.32 40.0% -0.49 0.40
82 MQ1-10_005 79.0% 39.0% -0.30 0.35 38.0% -0.32 0.41
83 MQ1-10_006 99.0% 49.0% -2.03 0.13 47.0% -2.07 0.29
84 MQ1-10_007 90.0% 45.0% -0.78 0.31 41.0% -0.78 0.39
85 MQ1-10_008 85.0% 43.0% -0.53 0.26 40.0% -0.53 0.35
86 MQ1-10_009 98.0% 50.0% -1.78 0.27 49.0% -1.85 0.41
87 MQ1-10_010 89.0% 45.0% -0.72 0.35 43.0% -0.74 0.43
88 MQ1-10_011 85.0% 42.0% -0.47 0.35 44.0% -0.53 0.42
89 MQ1-10_012 97.0% 49.0% -1.43 0.27 54.0% -1.57 0.43
90 MQ1-10_013 95.0% 48.0% -1.12 0.28 47.0% -1.18 0.42
91 MQ1-10_014 83.0% 42.0% -0.36 0.37 44.0% -0.42 0.45
92 MQ1-10_015 83.0% 42.0% -0.35 0.38 41.0% -0.36 0.37
93 MQ1-10_016 88.0% 44.0% -0.58 0.10 49.0% -0.68 0.30
94 MQ1-10_017 95.0% 48.0% -1.13 0.48 50.0% -1.21 0.53
95 MQ1-10_018 70.0% 35.0% 0.02 0.45 40.0% -0.05 0.46
96 MQ1-10_019 97.0% 48.0% -1.40 0.47 63.0% -1.64 0.42
97 MQ1-10_020 79.0% 40.0% -0.32 0.41 41.0% -0.36 0.50
98 MQ1-10_021 87.0% 44.0% -0.63 0.56 49.0% -0.75 0.55
99 MQ1-10_022 79.0% 40.0% -0.33 0.54 41.0% -0.31 0.57
100 MQ1-10_023 83.0% 42.0% -0.69 0.21 42.0% -0.67 0.17
101 MQ1-10_024 82.0% 42.0% -0.73 0.63 42.0% -0.71 0.55
102 MQ1-10_025 78.0% 40.0% -0.61 0.39 42.0% -0.62 0.46
103 MQ1-10_026 86.0% 44.0% -0.93 0.54 44.0% -0.90 0.60
104 MQ1-20-001 89.0% 45.0% -0.75 0.35 40.0% -0.75 0.34
105 MQ1-20-002 71.0% 36.0% -0.09 0.42 33.0% -0.10 0.42
106 MQ1-20-003 71.0% 36.0% -0.10 0.45 34.0% -0.11 0.44
107 MQ1-20-004 62.0% 31.0% 0.14 0.35 28.0% 0.18 0.36
108 MQ1-20-005 66.0% 34.0% 0.04 0.45 31.0% 0.05 0.43
109 MQ1-20-006 66.0% 33.0% 0.06 0.34 30.0% 0.08 0.33
110 MQ1-20-007 60.0% 30.0% 0.21 0.46 30.0% 0.18 0.46
111 MQ1-20-008 98.0% 49.0% -1.76 0.35 54.0% -1.89 0.35
112 MQ1-20-009 72.0% 36.0% -0.09 0.47 37.0% -0.13 0.43
113 MQ1-20-010 57.0% 29.0% 0.25 0.21 25.0% 0.31 0.16
114 MQ1-20-011 49.0% 25.0% 0.38 0.39 24.0% 0.38 0.41
115 MQ1-20-012 91.0% 45.0% -0.94 0.54 46.0% -0.99 0.43
116 MQ1-20-013 69.0% 34.0% -0.16 0.49 36.0% -0.18 0.42
117 MQ1-20-014 61.0% 31.0% 0.03 0.52 30.0% 0.05 0.41
118 MQ1-20-015 69.0% 35.0% -0.18 0.60 36.0% -0.19 0.55
119 MQ1-20-016 43.0% 21.0% 0.42 0.29 23.0% 0.42 0.33
120 MQ1-20-017 90.0% 45.0% -0.98 0.46 52.0% -1.11 0.35
121 MQ1-20-018 44.0% 22.0% 0.37 0.17 24.0% 0.34 0.21
122 MQ1-20-019 57.0% 28.0% 0.09 0.51 33.0% 0.02 0.52
123 MQ1-20-020 29.0% 15.0% 0.69 0.35 17.0% 0.65 0.28
124 MQ1-20-021 41.0% 20.0% 0.42 0.30 24.0% 0.35 0.30
125 MQ1-20-022 34.0% 17.0% 0.58 0.16 19.0% 0.56 0.23
126 MQ1-20-023 46.0% 24.0% 0.26 0.23 27.0% 0.16 0.17
127 MQ1-20-024 55.0% 27.0% 0.06 0.38 29.0% -0.02 0.34
128 MQ1-20-025 90.0% 45.0% -0.99 0.44 57.0% -1.19 0.31
129 MQ1-20-026 38.0% 19.0% 0.45 0.25 21.0% 0.39 0.09
130 MQ1-20-027 48.0% 24.0% 0.23 0.48 37.0% -0.04 0.40
131 MQ1-20-028 17.0% 10.0% 1.01 -0.06 10.0% 0.98 -0.11
132 MQ1-20-029 19.0% 11.0% 0.91 0.15 11.0% 0.85 0.01
133 MQ1-20-030 26.0% 13.0% 0.77 0.13 13.0% 0.70 0.25
Notes: Item Difficulty (Xsi): Latent difficulty parameter from the TAM partial credit model. Item Discrim. (PBIs): Point-biserial correlation between item scores and ability estimates.
plot_wright_side_by_side(res_f_mm$wle, res_f_mm$model, res_f_ld$wle, res_f_ld$model, "Foundation")

Year 1

Reliability & validity

# Subset by method
y1_mm <- df_filtered %>% filter(Cohort == "Year 1", Method == "Mixed_Model")
y1_ld <- df_filtered %>% filter(Cohort == "Year 1", Method == "LogDiff")

# Cronbach's alpha
alpha_y1 <- tibble(
  Method = c("Mixed Model", "LogDiff"),
  Cronbach_alpha = c(
    cronbach_alpha(make_matrix(y1_mm, "Score")),
    cronbach_alpha(make_matrix(y1_ld, "Score"))
  )
)

# Correlation with accuracy
acc_y1 <- df_acc_filtered %>%
  filter(Cohort == "Year 1") %>%
  group_by(student_id) %>%
  summarise(prop_correct = mean(Acc_Score), .groups = "drop")

score_y1 <- bind_rows(
  y1_mm %>% mutate(Method = "Mixed Model"),
  y1_ld %>% mutate(Method = "LogDiff")
) %>%
  group_by(Method, student_id) %>%
  summarise(mean_score = mean(Score), .groups = "drop") %>%
  left_join(acc_y1, by = "student_id") %>%
  group_by(Method) %>%
  summarise(accuracy_corr = cor(mean_score, prop_correct, use = "complete.obs"), .groups = "drop")

# Item-total correlations
itc_y1 <- tibble(
  Method = rep(c("Mixed Model", "LogDiff"), c(
    length(item_total_cor(make_matrix(y1_mm, "Score"))),
    length(item_total_cor(make_matrix(y1_ld, "Score")))
  )),
  Item_Total_Cor = c(
    item_total_cor(make_matrix(y1_mm, "Score")),
    item_total_cor(make_matrix(y1_ld, "Score"))
  )
) %>%
  group_by(Method) %>%
  summarise(
    Min = min(Item_Total_Cor, na.rm = TRUE),
    Q1 = quantile(Item_Total_Cor, 0.25, na.rm = TRUE),
    Median = median(Item_Total_Cor, na.rm = TRUE),
    Mean = mean(Item_Total_Cor, na.rm = TRUE),
    Q3 = quantile(Item_Total_Cor, 0.75, na.rm = TRUE),
    Max = max(Item_Total_Cor, na.rm = TRUE),
    .groups = "drop"
  )

kable(alpha_y1, digits = 3, caption = "Cronbach's Alpha by Method (Year 1)") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE, position = "left") %>%
  row_spec(0, bold = TRUE, color = "#1b1b1b", background = "#e5e5e5")
Cronbach’s Alpha by Method (Year 1)
Method Cronbach_alpha
Mixed Model 0.883
LogDiff 0.908
kable(score_y1, digits = 3, caption = "Correlation with Accuracy by Method (Year 1)") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE, position = "left") %>%
  row_spec(0, bold = TRUE, color = "#1b1b1b", background = "#e5e5e5")
Correlation with Accuracy by Method (Year 1)
Method accuracy_corr
LogDiff 0.794
Mixed Model 0.952
kable(itc_y1, digits = 3, caption = "Item-Total Correlation Summary (Year 1)") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE, position = "left") %>%
  row_spec(0, bold = TRUE, color = "#1b1b1b", background = "#e5e5e5")
Item-Total Correlation Summary (Year 1)
Method Min Q1 Median Mean Q3 Max
LogDiff -0.382 0.294 0.361 0.374 0.455 0.862
Mixed Model -0.347 0.195 0.305 0.303 0.398 0.815

IRT Summary (Mixed Model vs LogDiff)

# Run Analyses
data_y_mm <- df_filtered %>% filter(Cohort == "Year 1", Method == "Mixed_Model")
data_y_ld <- df_filtered %>% filter(Cohort == "Year 1", Method == "LogDiff")

res_y_mm <- run_irt_analysis(data_y_mm, "Score", "Year 1", "Mixed Model")
res_y_ld <- run_irt_analysis(data_y_ld, "Score", "Year 1", "LogDiff")

irt_table_y1 <- combine_item_tables(res_y_mm$item_stats, res_y_ld$item_stats)

summary_y1 <- tibble(
  Metric = c("Reliability (WLE)", "Error Summary"),
  `Mixed Model` = c(res_y_mm$text_rel, res_y_mm$text_err),
  `Log Diff` = c(res_y_ld$text_rel, res_y_ld$text_err)
)

kable(summary_y1, caption = "Year 1 IRT Reliability and Error") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE, position = "left") %>%
  row_spec(0, bold = TRUE, color = "#1b1b1b", background = "#e5e5e5")
Year 1 IRT Reliability and Error
Metric Mixed Model Log Diff
Reliability (WLE) Reliability (WLE): 0.795 (True variance: 79.5%) Reliability (WLE): 0.877 (True variance: 87.7%)
Error Summary Mean Ability: -0.02 (SD 0.62). Avg SEM: 0.27 (44.5% of SD). Mean Ability: -0.03 (SD 0.83). Avg SEM: 0.29 (34.2% of SD).
kable(
  irt_table_y1,
  col.names = c(
    "Item #", "Question_ID", "Pct Correct",
    "Pct Fast", "Item Difficulty", "Item Discrim.",
    "Pct Fast", "Item Difficulty", "Item Discrim."
  ),
  caption = "Year 1 IRT Item Statistics"
) %>%
  add_header_above(c(" " = 3, "Mixed Model" = 3, "Log Diff" = 3)) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE, position = "left", fixed_thead = TRUE) %>%
  row_spec(0, bold = TRUE, color = "#1b1b1b", background = "#e5e5e5") %>%
  scroll_box(width = "100%", height = "500px") %>%
  footnote(
    general = c(
      "Item Difficulty (Xsi): Latent difficulty parameter from the TAM partial credit model.",
      "Item Discrim. (PBIs): Point-biserial correlation between item scores and ability estimates."
    ),
    general_title = "Notes: ",
    footnote_as_chunk = TRUE
  )
Year 1 IRT Item Statistics
Mixed Model
Log Diff
Item # Question_ID Pct Correct Pct Fast Item Difficulty Item Discrim. Pct Fast Item Difficulty Item Discrim.
1 AADD_001 98.0% 50.0% -1.84 0.11 51.0% -1.95 0.28
2 AADD_002 94.0% 47.0% -1.15 0.45 49.0% -1.26 0.50
3 AADD_003 92.0% 46.0% -1.00 0.39 53.0% -1.18 0.49
4 AADD_004 93.0% 47.0% -1.06 0.38 50.0% -1.19 0.48
5 AADD_005 80.0% 40.0% -0.27 0.42 40.0% -0.30 0.48
6 AADD_006 95.0% 47.0% -1.02 0.36 51.0% -1.12 0.45
7 AADD_007 92.0% 46.0% -0.79 0.43 54.0% -0.93 0.52
8 AADD_008 97.0% 49.0% -1.26 0.29 49.0% -1.28 0.42
9 AADD_009 85.0% 43.0% -0.29 0.51 48.0% -0.32 0.54
10 AADD_010 69.0% 35.0% 0.26 0.43 35.0% 0.40 0.42
11 AADD_011 90.0% 45.0% -0.57 0.53 62.0% -0.74 0.58
12 AADD_012 92.0% 46.0% -0.63 0.60 47.0% -0.60 0.63
13 AADD_013 86.0% 44.0% -0.36 0.58 44.0% -0.30 0.70
14 AADD_014 85.0% 43.0% -0.36 0.69 63.0% -0.59 0.64
15 AADD_015 94.0% 48.0% -0.92 0.10 59.0% -0.97 0.25
16 ASDD_001 83.0% 42.0% -0.51 0.49 44.0% -0.61 0.51
17 ASDD_002 71.0% 36.0% -0.09 0.45 37.0% -0.14 0.48
18 ASDD_003 63.0% 32.0% 0.14 0.45 33.0% 0.12 0.47
19 ASDD_004 60.0% 30.0% 0.24 0.50 32.0% 0.23 0.53
20 ASDD_005 77.0% 39.0% -0.16 0.54 43.0% -0.26 0.55
21 ASDD_006 73.0% 36.0% 0.01 0.56 41.0% -0.03 0.60
22 ASDD_007 76.0% 38.0% -0.03 0.63 44.0% -0.05 0.66
23 ASDD_008 75.0% 37.0% 0.03 0.65 42.0% 0.08 0.66
24 ASDD_009 73.0% 37.0% 0.03 0.51 44.0% 0.07 0.56
25 ASDD_010 72.0% 36.0% 0.10 0.53 36.0% 0.31 0.60
26 ASDD_011 72.0% 37.0% -0.01 0.74 48.0% -0.05 0.77
27 ASDD_012 68.0% 34.0% 0.08 0.76 48.0% 0.00 0.76
28 ASDD_013 56.0% 28.0% 0.41 0.73 33.0% 0.57 0.76
29 ASDD_014 54.0% 27.0% 0.33 0.74 34.0% 0.48 0.83
30 ASDD_015 50.0% 25.0% 0.42 0.82 36.0% 0.36 0.91
31 MC0-100_001 97.0% 49.0% -1.46 0.10 53.0% -1.62 0.26
32 MC0-100_002 86.0% 44.0% -0.64 0.55 49.0% -0.82 0.61
33 MC0-100_003 72.0% 36.0% -0.12 0.39 41.0% -0.26 0.46
34 MC0-100_004 88.0% 44.0% -0.73 0.47 52.0% -0.95 0.56
35 MC0-100_005 84.0% 43.0% -0.57 0.41 52.0% -0.79 0.49
36 MC0-100_006 95.0% 47.0% -1.20 0.21 54.0% -1.41 0.41
37 MC0-100_007 94.0% 47.0% -1.17 0.28 63.0% -1.50 0.46
38 MC0-100_008 89.0% 45.0% -0.75 0.45 49.0% -0.91 0.56
39 MC0-100_009 91.0% 46.0% -0.89 0.26 71.0% -1.32 0.39
40 MC0-100_010 58.0% 29.0% 0.27 0.37 40.0% 0.07 0.46
41 MC0-100_011 97.0% 48.0% -1.44 0.16 64.0% -1.75 0.33
42 MC0-100_012 94.0% 47.0% -1.06 0.30 54.0% -1.23 0.46
43 MC0-100_013 93.0% 46.0% -0.97 0.36 68.0% -1.32 0.49
44 MC0-100_014 92.0% 46.0% -0.90 0.27 50.0% -1.00 0.39
45 MC0-100_015 85.0% 43.0% -0.50 0.35 56.0% -0.70 0.46
46 MC0-100_016 87.0% 43.0% -0.56 0.24 54.0% -0.72 0.35
47 MC0-100_017 88.0% 44.0% -0.64 0.45 57.0% -0.80 0.52
48 MC0-100_018 91.0% 46.0% -0.80 0.31 61.0% -0.99 0.42
49 MC0-100_019 87.0% 44.0% -0.56 0.40 59.0% -0.73 0.46
50 MC0-100_020 92.0% 46.0% -0.86 0.34 51.0% -0.86 0.38
51 MC0-100_021 93.0% 47.0% -0.97 0.34 51.0% -0.95 0.36
52 MC0-100_022 93.0% 47.0% -0.92 0.27 63.0% -1.05 0.36
53 MC0-100_023 91.0% 45.0% -0.77 0.38 50.0% -0.72 0.34
54 MC0-100_024 89.0% 45.0% -0.65 0.42 55.0% -0.67 0.42
55 MC0-100_025 84.0% 42.0% -0.37 0.24 56.0% -0.48 0.40
56 MC0-100_026 87.0% 44.0% -0.54 0.14 64.0% -0.73 0.38
57 MC0-100_027 91.0% 46.0% -0.72 0.29 68.0% -1.00 0.37
58 MC0-100_028 77.0% 38.0% -0.15 0.26 54.0% -0.35 0.37
59 MC0-100_029 82.0% 41.0% -0.33 0.44 46.0% -0.33 0.48
60 MC0-100_030 85.0% 42.0% -0.61 0.34 53.0% -0.66 0.31
61 MC0-100_031 82.0% 42.0% -0.61 0.33 70.0% -1.02 0.34
62 MC0-100_032 83.0% 42.0% -0.40 0.41 74.0% -0.86 0.57
63 MC0-100_033 80.0% 40.0% -0.37 0.56 57.0% -0.61 0.43
64 MNAs0-100_001 82.0% 41.0% -0.46 0.38 48.0% -0.64 0.43
65 MNAs0-100_002 89.0% 45.0% -0.77 0.26 52.0% -0.98 0.37
66 MNAs0-100_003 87.0% 44.0% -0.69 0.39 52.0% -0.90 0.49
67 MNAs0-100_004 97.0% 48.0% -1.47 0.21 62.0% -1.79 0.33
68 MNAs0-100_005 78.0% 39.0% -0.29 0.19 49.0% -0.48 0.26
69 MNAs0-100_006 90.0% 45.0% -0.77 0.30 57.0% -0.99 0.39
70 MNAs0-100_007 92.0% 46.0% -0.89 0.31 66.0% -1.18 0.45
71 MNAs0-100_008 93.0% 47.0% -0.94 0.30 60.0% -1.12 0.39
72 MNAs0-100_009 72.0% 36.0% 0.00 0.22 53.0% -0.18 0.25
73 MNAs0-100_010 89.0% 45.0% -0.68 0.25 65.0% -0.94 0.45
74 MNAs0-100_011 75.0% 38.0% -0.12 0.37 50.0% -0.21 0.41
75 MNAs0-100_012 86.0% 43.0% -0.60 0.35 69.0% -0.93 0.45
76 MNAs0-100_013 84.0% 42.0% -0.54 0.26 62.0% -0.82 0.20
77 MNAs0-100_014 70.0% 35.0% -0.11 0.31 53.0% -0.34 0.42
78 MNAs0-100_015 63.0% 31.0% -0.03 0.40 45.0% -0.21 0.35
79 MNAs0-100_016 59.0% 31.0% -0.09 0.49 41.0% -0.20 0.43
80 MNAs0-100_017 52.0% 26.0% 0.08 0.58 37.0% -0.10 0.58
81 MNAs0-100_018 42.0% 21.0% 0.07 0.51 42.0% -0.39 0.49
82 MNAs0-100_019 59.0% 29.0% -0.42 0.69 35.0% -0.55 0.72
83 MNC0-100_001 95.0% 49.0% -1.24 0.36 51.0% -1.36 0.43
84 MNC0-100_002 89.0% 45.0% -0.77 0.49 45.0% -0.84 0.56
85 MNC0-100_003 84.0% 42.0% -0.55 0.52 41.0% -0.60 0.57
86 MNC0-100_004 83.0% 42.0% -0.47 0.57 43.0% -0.54 0.63
87 MNC0-100_005 84.0% 42.0% -0.49 0.60 47.0% -0.61 0.65
88 MNC0-100_006 92.0% 46.0% -0.89 0.31 57.0% -1.09 0.38
89 MNC0-100_007 90.0% 45.0% -0.75 0.48 52.0% -0.88 0.54
90 MNC0-100_008 86.0% 43.0% -0.52 0.55 49.0% -0.59 0.63
91 MNC0-100_009 88.0% 44.0% -0.61 0.44 52.0% -0.69 0.51
92 MNC0-100_010 83.0% 41.0% -0.38 0.59 48.0% -0.39 0.64
93 MNC0-100_011 81.0% 41.0% -0.41 0.44 44.0% -0.33 0.46
94 MNC0-100_012 71.0% 35.0% -0.15 0.61 38.0% -0.03 0.65
95 MNC0-100_013 62.0% 31.0% 0.05 0.78 41.0% 0.01 0.82
96 MNC0-100_014 64.0% 32.0% -0.19 0.63 49.0% -0.38 0.64
97 MNC0-100_015 62.0% 32.0% -0.28 0.69 29.0% -0.04 0.62
98 MNC0-100_016 59.0% 30.0% -0.31 0.44 37.0% -0.36 0.43
99 MNC0-100_017 22.0% 11.0% 0.95 -0.34 18.0% 0.95 -0.36
100 MNC0-100_018 16.0% 8.0% 1.24 -0.33 11.0% 1.38 -0.31
101 MNC0-100_019 17.0% 9.0% 0.78 0.06 13.0% 0.79 -0.02
Notes: Item Difficulty (Xsi): Latent difficulty parameter from the TAM partial credit model. Item Discrim. (PBIs): Point-biserial correlation between item scores and ability estimates.
plot_wright_side_by_side(res_y_mm$wle, res_y_mm$model, res_y_ld$wle, res_y_ld$model, "Year 1")