setwd("/Users/isaiahmireles/Desktop/Misconceptions/S24_Final_Exam_Version_Set_Scores")
dfA <- 
  read.csv("Final_Exam_Version_A_scores.csv", header=T)

dfB <- 
  read.csv("Final_Exam_Version_B_scores.csv", header=T)

1 Comb. Dat

library(dplyr)
dfA <-
  dfA[, -(1:5)] |> mutate(student = paste0("s", row_number(), "_A")) |> select(student, Total.Score, Max.Points, contains("pts")) |>
  rename_with(
    ~ paste0("q", seq_along(.)),
    contains("pts")
  )

dfB <- 
  dfB[, -(1:5)] |> mutate(student = paste0("s", row_number(), "_B")) |> select(student, Total.Score, Max.Points, contains("pts")) |>
  rename_with(
    ~ paste0("q", seq_along(.)),
    contains("pts")
  )
library(stringr)
df_all <-
  bind_rows(dfA, dfB) |>
  mutate(
    Version = str_extract(student, "[A-Z]$")
  ) |>
  select(student, Version, everything())
df_all |> head(3)

2 PerQ-Pct

library(tidyr)
pct_tbl <-
  df_all |> select(contains("q")) |>
  summarise(across(everything(), ~ mean(.x, na.rm = TRUE) * 100)) |>
  pivot_longer(
    everything(),
    names_to = "Question",
    values_to = "Percent_Correct"
  )

pct_tbl
dfA |> select(contains("q")) |>
  summarise(across(everything(), ~ mean(.x, na.rm = TRUE) * 100)) |>
  pivot_longer(
    everything(),
    names_to = "Question",
    values_to = "Percent_Correct"
  )
dfB |> select(contains("q")) |>
  summarise(across(everything(), ~ mean(.x, na.rm = TRUE) * 100)) |>
  pivot_longer(
    everything(),
    names_to = "Question",
    values_to = "Percent_Correct"
  )
library(dplyr)
library(tidyr)
library(ggplot2)

question_perf <-

  df_all |>

  select(starts_with("q")) |>

  summarise(
    across(everything(), ~ mean(.x, na.rm = TRUE) * 100)
  ) |>

  pivot_longer(
    everything(),
    names_to = "Question",
    values_to = "Percent_Correct"
  )

ggplot(
  question_perf,
  aes(
    x = reorder(Question, Percent_Correct),
    y = Percent_Correct
  )
) +
  geom_col() +
  coord_flip() +
  labs(
    title = "Percent Correct by Question",
    x = "Question",
    y = "Percent Correct (%)"
  ) +
  theme_minimal()

typical score :

mean(df_all$Total.Score, na.rm=T)/32
## [1] 0.7544949
xbar <- mean(df_all$Total.Score, na.rm=T)/32

student level score :

df_all <- df_all|> mutate(pct = Total.Score/Max.Points)
df_all |> select(pct) 
  • highly varied performance
pct_tbl |> 
  mutate(
    below_avr = Percent_Correct < mean(Percent_Correct, na.rm = TRUE)
  ) |> 
  arrange(Percent_Correct) 
focus_q <-
  pct_tbl |> 
  mutate(
    below_avr = Percent_Correct < mean(Percent_Correct, na.rm = TRUE)
  ) |> 
  arrange(Percent_Correct) |> filter(below_avr==T)

focus_q

3 Focus Q

  • defined as below avr percent on overall exam
focus_q <- focus_q |> arrange(Question) 
focus_q$Question
##  [1] "q12" "q13" "q14" "q15" "q17" "q18" "q2"  "q20" "q24" "q25" "q27" "q28"
## [13] "q30" "q31" "q7"

4 Q-specific bar plt

barplot(
  height = pct_tbl$Percent_Correct,
  names.arg = pct_tbl$Question,
  las = 2,
  ylim = c(0, 100),
  ylab = "Percent Correct",
  main = "Percent Correct by Question"
)

abline(
  h = mean(pct_tbl$Percent_Correct),
  lty = 2
)

library(dplyr)

student_paths <-

  df_all |>

  na.omit() |>

  select(student, starts_with("q")) |>

  rowwise() |>

  mutate(
    path = list(cumsum(c_across(starts_with("q"))))
  ) |>

  ungroup()

5 student-specific path

library(animation)
saveGIF({

  for (i in seq_len(nrow(student_paths))) {

    plot(
      student_paths$path[[i]],
      type = "o",
      xlab = "Question",
      ylab = "Cumulative Correct",
      main = student_paths$student[i],
      ylim = c(0, 32)
    )

    abline(a = 0, b = 1, col = "green", lwd = 2)

  }

}, movie.name = "student_paths.gif")
## [1] TRUE
knitr::include_graphics("student_paths.gif")

library(plotly)

p <- plot_ly()

for(i in seq_len(nrow(student_paths))) {

  p <- p |>
    add_trace(
      x = 1:32,
      y = student_paths$path[[i]],
      frame = student_paths$student[i],
      type = "scatter",
      mode = "lines+markers",
      showlegend = FALSE
    )

}

p |>
  animation_opts(
    frame = 800,
    transition = 0
  ) |>
  animation_slider() |>
  animation_button()

6 avr path

paths <- do.call(rbind, student_paths$path)
avg_path <- colMeans(paths)

plot(
  avg_path,
  type = "o",
  lwd = 3,
  xlab = "Question",
  ylab = "Average Cumulative Correct",
  main = "Average Student Path",
  ylim = c(0, 32)
)

7 all-paths

focus_idx <- as.numeric(sub("q", "", focus_q$Question))

plot(
  NA,
  xlim = c(1, ncol(paths)),
  ylim = c(0, 32),
  xlab = "Question",
  ylab = "Cumulative Correct",
  main = "Student Paths Through Exam",
  xaxt = "n"
)

axis(
  side = 1,
  at = 1:32,
  labels = FALSE
)

text(
  x = 1:32,
  y = par("usr")[3] - 1,
  labels = 1:32,
  srt = 90,
  adj = 1,
  xpd = TRUE,
  cex = 0.6
)

for (i in seq_len(nrow(paths))) {

  y <- paths[i, ]

  for (j in 2:length(y)) {

    col <- if ((y[j] - y[j - 1]) == 0)
      rgb(1, 0, 0, 0.15)
    else
      rgb(0, 0, 0, 0.05)

    segments(
      j - 1, y[j - 1],
      j,     y[j],
      col = col,
      lwd = 2
    )
  }
}

# Perfect score trajectory
abline(a = 0, b = 1, col = "green", lwd = 3)

# Focus questions
abline(
  v = focus_idx,
  col = "blue",
  lty = 2,
  lwd = 1
)

legend(
  "topleft",
  legend = c(
    "Correct Segment",
    "Incorrect Segment",
    "Perfect Score Path",
    "Focus Question"
  ),
  col = c(
    "black",
    "red",
    "green",
    "blue"
  ),
  lty = c(1, 1, 1, 2),
  lwd = c(2, 2, 3, 1),
  cex = 0.7,      # smaller text
  pt.cex = 0.7,   # smaller symbols (if points are used)
  x.intersp = 0.5,
  y.intersp = 0.8,
  bty = "n"
)

library(tibble)
library(dplyr)

item_disc <-

  sapply(
    df_all |> select(starts_with("q")),
    function(x)
      cor(
        x,
        df_all$Total.Score - x,
        use = "complete.obs"
      )
  )

item_disc_tbl <-

  enframe(
    item_disc,
    name = "Question",
    value = "Item_Discrimination"
  ) |>

  mutate(
    Disc_Level = case_when(
      Item_Discrimination >= 0.50 ~ "Very High",
      Item_Discrimination >= 0.40 ~ "High",
      Item_Discrimination >= 0.30 ~ "Moderate",
      Item_Discrimination >= 0.20 ~ "Low",
      TRUE ~ "Very Low"
    )
  ) |>

  arrange(desc(Item_Discrimination))

item_disc_tbl
  • Questions with high correlations are answered correctly mainly by high-performing students and incorrectly by low-performing students.
disc_colors <-

  item_disc_tbl |>

  arrange(
    as.numeric(sub("q", "", Question))
  ) |>

  mutate(
    col = case_when(
      Disc_Level == "Very High" ~ "darkgreen",
      Disc_Level == "High"      ~ "blue",
      Disc_Level == "Moderate"  ~ "orange",
      Disc_Level == "Low"       ~ "red",
      Disc_Level == "Very Low"  ~ "purple"
    )
  )

label_cols <- disc_colors$col

8 all path + descrimination

par(mar = c(8, 4, 4, 2))

focus_idx <- as.numeric(sub("q", "", focus_q$Question))

plot(
  NA,
  xlim = c(1, ncol(paths)),
  ylim = c(0, 32),
  xlab = "Question",
  ylab = "Cumulative Correct",
  main = "Student Paths Through Exam",
  xaxt = "n"
)

axis(
  side = 1,
  at = 1:32,
  labels = FALSE
)

text(
  x = 1:32,
  y = par("usr")[3] - 1,
  labels = 1:32,
  srt = 90,
  adj = 1,
  xpd = TRUE,
  cex = 0.7,
  col = label_cols
)

for (i in seq_len(nrow(paths))) {

  y <- paths[i, ]

  for (j in 2:length(y)) {

    col <- if ((y[j] - y[j - 1]) == 0)
      rgb(1, 0, 0, 0.15)
    else
      rgb(0, 0, 0, 0.05)

    segments(
      j - 1, y[j - 1],
      j,     y[j],
      col = col,
      lwd = 2
    )
  }
}

# Perfect score trajectory
abline(a = 0, b = 1, col = "green", lwd = 3)

# Focus questions
abline(
  v = focus_idx,
  col = "blue",
  lty = 2,
  lwd = 1
)

legend(
  "topleft",
  legend = c(
    "Correct Segment",
    "Incorrect Segment",
    "Perfect Score Path",
    "Focus Question"
  ),
  col = c(
    "black",
    "red",
    "green",
    "blue"
  ),
  lty = c(1, 1, 1, 2),
  lwd = c(2, 2, 3, 1),
  cex = 0.7,      # smaller text
  pt.cex = 0.7,   # smaller symbols (if points are used)
  x.intersp = 0.5,
  y.intersp = 0.8,
  bty = "n"
)

legend(
  x = 1,
  y = -6,
  legend = c(
    "Very High",
    "High",
    "Moderate",
    "Low",
    "Very Low"
  ),
  col = c(
    "darkgreen",
    "blue",
    "orange",
    "red",
    "purple"
  ),
  pch = 19,
  horiz = TRUE,
  cex = 0.7,
  bty = "n",
  xpd = TRUE
)