setwd("/Users/isaiahmireles/Desktop/Misconceptions/S24_Final_Exam_Version_Set_Scores")
dfA <-
read.csv("Final_Exam_Version_A_scores.csv", header=T)
dfB <-
read.csv("Final_Exam_Version_B_scores.csv", header=T)
library(dplyr)
dfA <-
dfA[, -(1:5)] |> mutate(student = paste0("s", row_number(), "_A")) |> select(student, Total.Score, Max.Points, contains("pts")) |>
rename_with(
~ paste0("q", seq_along(.)),
contains("pts")
)
dfB <-
dfB[, -(1:5)] |> mutate(student = paste0("s", row_number(), "_B")) |> select(student, Total.Score, Max.Points, contains("pts")) |>
rename_with(
~ paste0("q", seq_along(.)),
contains("pts")
)
library(stringr)
df_all <-
bind_rows(dfA, dfB) |>
mutate(
Version = str_extract(student, "[A-Z]$")
) |>
select(student, Version, everything())
df_all |> head(3)
library(tidyr)
pct_tbl <-
df_all |> select(contains("q")) |>
summarise(across(everything(), ~ mean(.x, na.rm = TRUE) * 100)) |>
pivot_longer(
everything(),
names_to = "Question",
values_to = "Percent_Correct"
)
pct_tbl
dfA |> select(contains("q")) |>
summarise(across(everything(), ~ mean(.x, na.rm = TRUE) * 100)) |>
pivot_longer(
everything(),
names_to = "Question",
values_to = "Percent_Correct"
)
dfB |> select(contains("q")) |>
summarise(across(everything(), ~ mean(.x, na.rm = TRUE) * 100)) |>
pivot_longer(
everything(),
names_to = "Question",
values_to = "Percent_Correct"
)
library(dplyr)
library(tidyr)
library(ggplot2)
question_perf <-
df_all |>
select(starts_with("q")) |>
summarise(
across(everything(), ~ mean(.x, na.rm = TRUE) * 100)
) |>
pivot_longer(
everything(),
names_to = "Question",
values_to = "Percent_Correct"
)
ggplot(
question_perf,
aes(
x = reorder(Question, Percent_Correct),
y = Percent_Correct
)
) +
geom_col() +
coord_flip() +
labs(
title = "Percent Correct by Question",
x = "Question",
y = "Percent Correct (%)"
) +
theme_minimal()
typical score :
mean(df_all$Total.Score, na.rm=T)/32
## [1] 0.7544949
xbar <- mean(df_all$Total.Score, na.rm=T)/32
student level score :
df_all <- df_all|> mutate(pct = Total.Score/Max.Points)
df_all |> select(pct)
pct_tbl |>
mutate(
below_avr = Percent_Correct < mean(Percent_Correct, na.rm = TRUE)
) |>
arrange(Percent_Correct)
focus_q <-
pct_tbl |>
mutate(
below_avr = Percent_Correct < mean(Percent_Correct, na.rm = TRUE)
) |>
arrange(Percent_Correct) |> filter(below_avr==T)
focus_q
focus_q <- focus_q |> arrange(Question)
focus_q$Question
## [1] "q12" "q13" "q14" "q15" "q17" "q18" "q2" "q20" "q24" "q25" "q27" "q28"
## [13] "q30" "q31" "q7"
barplot(
height = pct_tbl$Percent_Correct,
names.arg = pct_tbl$Question,
las = 2,
ylim = c(0, 100),
ylab = "Percent Correct",
main = "Percent Correct by Question"
)
abline(
h = mean(pct_tbl$Percent_Correct),
lty = 2
)
library(dplyr)
student_paths <-
df_all |>
na.omit() |>
select(student, starts_with("q")) |>
rowwise() |>
mutate(
path = list(cumsum(c_across(starts_with("q"))))
) |>
ungroup()
library(animation)
saveGIF({
for (i in seq_len(nrow(student_paths))) {
plot(
student_paths$path[[i]],
type = "o",
xlab = "Question",
ylab = "Cumulative Correct",
main = student_paths$student[i],
ylim = c(0, 32)
)
abline(a = 0, b = 1, col = "green", lwd = 2)
}
}, movie.name = "student_paths.gif")
## [1] TRUE
knitr::include_graphics("student_paths.gif")
library(plotly)
p <- plot_ly()
for(i in seq_len(nrow(student_paths))) {
p <- p |>
add_trace(
x = 1:32,
y = student_paths$path[[i]],
frame = student_paths$student[i],
type = "scatter",
mode = "lines+markers",
showlegend = FALSE
)
}
p |>
animation_opts(
frame = 800,
transition = 0
) |>
animation_slider() |>
animation_button()
paths <- do.call(rbind, student_paths$path)
avg_path <- colMeans(paths)
plot(
avg_path,
type = "o",
lwd = 3,
xlab = "Question",
ylab = "Average Cumulative Correct",
main = "Average Student Path",
ylim = c(0, 32)
)
focus_idx <- as.numeric(sub("q", "", focus_q$Question))
plot(
NA,
xlim = c(1, ncol(paths)),
ylim = c(0, 32),
xlab = "Question",
ylab = "Cumulative Correct",
main = "Student Paths Through Exam",
xaxt = "n"
)
axis(
side = 1,
at = 1:32,
labels = FALSE
)
text(
x = 1:32,
y = par("usr")[3] - 1,
labels = 1:32,
srt = 90,
adj = 1,
xpd = TRUE,
cex = 0.6
)
for (i in seq_len(nrow(paths))) {
y <- paths[i, ]
for (j in 2:length(y)) {
col <- if ((y[j] - y[j - 1]) == 0)
rgb(1, 0, 0, 0.15)
else
rgb(0, 0, 0, 0.05)
segments(
j - 1, y[j - 1],
j, y[j],
col = col,
lwd = 2
)
}
}
# Perfect score trajectory
abline(a = 0, b = 1, col = "green", lwd = 3)
# Focus questions
abline(
v = focus_idx,
col = "blue",
lty = 2,
lwd = 1
)
legend(
"topleft",
legend = c(
"Correct Segment",
"Incorrect Segment",
"Perfect Score Path",
"Focus Question"
),
col = c(
"black",
"red",
"green",
"blue"
),
lty = c(1, 1, 1, 2),
lwd = c(2, 2, 3, 1),
cex = 0.7, # smaller text
pt.cex = 0.7, # smaller symbols (if points are used)
x.intersp = 0.5,
y.intersp = 0.8,
bty = "n"
)
library(tibble)
library(dplyr)
item_disc <-
sapply(
df_all |> select(starts_with("q")),
function(x)
cor(
x,
df_all$Total.Score - x,
use = "complete.obs"
)
)
item_disc_tbl <-
enframe(
item_disc,
name = "Question",
value = "Item_Discrimination"
) |>
mutate(
Disc_Level = case_when(
Item_Discrimination >= 0.50 ~ "Very High",
Item_Discrimination >= 0.40 ~ "High",
Item_Discrimination >= 0.30 ~ "Moderate",
Item_Discrimination >= 0.20 ~ "Low",
TRUE ~ "Very Low"
)
) |>
arrange(desc(Item_Discrimination))
item_disc_tbl
disc_colors <-
item_disc_tbl |>
arrange(
as.numeric(sub("q", "", Question))
) |>
mutate(
col = case_when(
Disc_Level == "Very High" ~ "darkgreen",
Disc_Level == "High" ~ "blue",
Disc_Level == "Moderate" ~ "orange",
Disc_Level == "Low" ~ "red",
Disc_Level == "Very Low" ~ "purple"
)
)
label_cols <- disc_colors$col
par(mar = c(8, 4, 4, 2))
focus_idx <- as.numeric(sub("q", "", focus_q$Question))
plot(
NA,
xlim = c(1, ncol(paths)),
ylim = c(0, 32),
xlab = "Question",
ylab = "Cumulative Correct",
main = "Student Paths Through Exam",
xaxt = "n"
)
axis(
side = 1,
at = 1:32,
labels = FALSE
)
text(
x = 1:32,
y = par("usr")[3] - 1,
labels = 1:32,
srt = 90,
adj = 1,
xpd = TRUE,
cex = 0.7,
col = label_cols
)
for (i in seq_len(nrow(paths))) {
y <- paths[i, ]
for (j in 2:length(y)) {
col <- if ((y[j] - y[j - 1]) == 0)
rgb(1, 0, 0, 0.15)
else
rgb(0, 0, 0, 0.05)
segments(
j - 1, y[j - 1],
j, y[j],
col = col,
lwd = 2
)
}
}
# Perfect score trajectory
abline(a = 0, b = 1, col = "green", lwd = 3)
# Focus questions
abline(
v = focus_idx,
col = "blue",
lty = 2,
lwd = 1
)
legend(
"topleft",
legend = c(
"Correct Segment",
"Incorrect Segment",
"Perfect Score Path",
"Focus Question"
),
col = c(
"black",
"red",
"green",
"blue"
),
lty = c(1, 1, 1, 2),
lwd = c(2, 2, 3, 1),
cex = 0.7, # smaller text
pt.cex = 0.7, # smaller symbols (if points are used)
x.intersp = 0.5,
y.intersp = 0.8,
bty = "n"
)
legend(
x = 1,
y = -6,
legend = c(
"Very High",
"High",
"Moderate",
"Low",
"Very Low"
),
col = c(
"darkgreen",
"blue",
"orange",
"red",
"purple"
),
pch = 19,
horiz = TRUE,
cex = 0.7,
bty = "n",
xpd = TRUE
)