This is an RMarkdown document displaying R code for generating Wright plots. Cut-scores, score distributions, and item difficulties are simulated for 20 items (of two different types), 3 cut-scores, 1000 test-takers across 3 separate tests.
Difficulty values are rounded to allow for stacking in a dotplot style. Furthermore, the color and symbol of the item difficulty points will depend on whether it is one of the two item types. Score distributions are aligned to a histogram. Both difficulties and score estimates will be placed on the same vertical axis (in Logits). The horizontal axis will reflect the two unique axis units for difficulties and scores. Cut-scores will be represented by horizontal dashed lines.
The final result is a plot that compared distribution of difficulties against both score distribution and test cut-scores.
The first block of code accomplished the following:
library(ggplot2)
library(dplyr)
library(vcdExtra)
Test <- c(1,2,3)
Test <- data.frame(Test)
Cut_1 <- rnorm(3,-.75,.15)
Cut_2 <- rnorm(3,0,.15)
Cut_3 <- rnorm(3,.75,.15)
Cut_set <- cbind(Test, Cut_1, Cut_2, Cut_3)
Item difficulties are simulated for two item types (15 operational items and 5 field items) for each of the three tests. These are then coerced into a data frame.
diff_11 <- rnorm(15,0,1)
diff_12 <- rnorm(5,-.25,.75)
diff_21 <- rnorm(15,.25,1)
diff_22 <- rnorm(5,-.5,1.75)
diff_31 <- rnorm(15,1,.5)
diff_32 <- rnorm(5,-.25,.5)
diff_11 <- data.frame(diff_11)
diff_12 <- data.frame(diff_12)
diff_21 <- data.frame(diff_21)
diff_22 <- data.frame(diff_22)
diff_31 <- data.frame(diff_31)
diff_32 <- data.frame(diff_32)
colnames(diff_11) <- "Difficulty"
colnames(diff_12) <- "Difficulty"
colnames(diff_21) <- "Difficulty"
colnames(diff_22) <- "Difficulty"
colnames(diff_31) <- "Difficulty"
colnames(diff_32) <- "Difficulty"
diffset <- rbind(diff_11, diff_12, diff_21, diff_22, diff_31, diff_32)
In order to discretize difficulty values for the purpose of creating a stacked dotplot, specialized rounding is needed. The following code creates the needed function.
mround <- function(number, multiple) {
# if number and multiple have different sign, returning an error.
if (sign(number) != sign(multiple)) {
stop("number and multiple have different sign")
}
n = number / multiple
if (abs(n - trunc(n)) == 0.5) {
if (sign(n) > 0) {
n = n + 0.1
} else {
n = n - 0.1
}
}
round(n) * multiple
}
The mround function is then applied using a loop. This will overwrite the diffset data frame with the newly rounded values.
for (i in seq(1,60,1))
{
diffset[i,1] <- ifelse(diffset[i,1] < 0, mround(diffset[i,1],-.1818), mround(diffset[i,1],.1818))
}
The data frame containing the difficulty data is then created. This includes creating a running tally of duplicates of difficulty values within a given test. This is instrumental in helping create the dotplot element of the Wright plot later on.
Test <- c(rep(1,20), rep(2,20), rep(3,20))
Test <- data.frame(Test)
Type <- c(rep(1,15), rep(2,5), rep(1,15), rep(2,5), rep(1,15), rep(2,5))
Type <- data.frame(Type)
Difficulty_set <- cbind(Test, Type, diffset)
Difficulty_set <- Difficulty_set %>% arrange(Test, Difficulty, Type)
Difficulty_set[1,4] <- 1
colnames(Difficulty_set)[4] <- "Count"
for (i in seq(2,60,1))
{
Difficulty_set[i,4] <- ifelse(Difficulty_set[i,3] == Difficulty_set[i-1,3], Difficulty_set[i-1,4] + 1, 1)
}
Score distributions are simulated (n = 1000) for each test. A number of steps are then taken to create an appropriate data frame with defined factors.
Test <- c(rep(1,1000), rep(2,1000), rep(3,1000))
Test <- data.frame(Test)
Score_1 <- rnorm(1000,-.25,1)
Score_2 <- rnorm(1000,0,.9)
Score_3 <- rnorm(1000,.25,.9)
Score_1 <- data.frame(Score_1)
Score_2 <- data.frame(Score_2)
Score_3 <- data.frame(Score_3)
colnames(Score_1) <- "Logits"
colnames(Score_2) <- "Logits"
colnames(Score_3) <- "Logits"
Score_set <- rbind(Score_1, Score_2, Score_3)
Score_set_full <- cbind(Test, Score_set)
Score_set_full$Test <- as.factor(Score_set_full$Test)
Cut_set$Test <- as.factor(Cut_set$Test)
Difficulty_set$Test <- as.factor(Difficulty_set$Test)
Difficulty_set$Type <- factor(Difficulty_set$Type, levels = c(1,2), labels = c("Operational","Field"))
Data frames specific to a single test are created via filtering. Specifically, for each test there will be a data frame for: difficulties, score distribution, and cut-scores.
Diff_1 <- Difficulty_set %>% filter(Test == 1)
Diff_1 <- Diff_1 %>% mutate(CountNew = Count*(-.02))
Diff_2 <- Difficulty_set %>% filter(Test == 2)
Diff_2 <- Diff_2 %>% mutate(CountNew = Count*(-.02))
Diff_3 <- Difficulty_set %>% filter(Test == 3)
Diff_3 <- Diff_3 %>% mutate(CountNew = Count*(-.02))
Score_1 <- Score_set_full %>% filter(Test == 1)
Score_2 <- Score_set_full %>% filter(Test == 2)
Score_3 <- Score_set_full %>% filter(Test == 3)
Cut1_set <- Cut_set %>% filter(Test == 1)
Cut2_set <- Cut_set %>% filter(Test == 2)
Cut3_set <- Cut_set %>% filter(Test == 3)
A plot is then created for each of the three tests. From each plot, the following can be inspected:
wr_plot1 <- ggplot() + labs(title = "Wright Plot; Test 1") +
geom_histogram(data = Score_1, aes(x = Logits, y = ..density..), color = "red", fill = "red", binwidth = .33, alpha = .5) +
coord_flip() + scale_x_continuous(name = "Logits", breaks = seq(-3,3,1), limits = c(-3.5,3.5)) +
scale_y_reverse(name = "Density", limits = c(.6, -.2), breaks = seq(0,.6,.1), sec.axis = sec_axis(~.*(50), name = "Count of Items", breaks = seq(-10,-1,1), labels = c("10","9","8","7","6","5","4","3","2","1"))) +
geom_point(data = Diff_1, aes(x = Difficulty, y = CountNew, shape = Type, fill = Type), size = 3, color = "black", stroke = 1, alpha = .5) +
scale_shape_manual(values = c(22,24)) +
scale_fill_manual(values = c("darkslateblue","orange")) +
geom_vline(data = Cut1_set, aes(xintercept = Cut_1), linetype = "longdash") +
geom_vline(data = Cut1_set, aes(xintercept = Cut_2), linetype = "longdash") +
geom_vline(data = Cut1_set, aes(xintercept = Cut_3), linetype = "longdash") +
geom_hline(yintercept = 0, color = "black") + theme_bw() +
theme(plot.title = element_text(face = "bold", hjust = 0.5))
wr_plot1
## Warning: Removed 2 rows containing missing values (geom_bar).
wr_plot2 <- ggplot() + labs(title = "Wright Plot; Test 2") +
geom_histogram(data = Score_2, aes(x = Logits, y = ..density..), color = "red", fill = "red", binwidth = .33, alpha = .5) +
coord_flip() + scale_x_continuous(name = "Logits", breaks = seq(-3,3,1), limits = c(-3.5,3.5)) +
scale_y_reverse(name = "Density", limits = c(.6, -.2), breaks = seq(0,.6,.1), sec.axis = sec_axis(~ . *(50), name = "Count of Items", breaks = seq(-10,-1,1), labels = c("10","9","8","7","6","5","4","3","2","1"))) +
geom_point(data = Diff_2, aes(x = Difficulty, y = CountNew, shape = Type, fill = Type), size = 3, color = "black", stroke = 1, alpha = .5) +
scale_shape_manual(values = c(22,24)) +
scale_fill_manual(values = c("darkslateblue","orange")) +
geom_vline(data = Cut2_set, aes(xintercept = Cut_1), linetype = "longdash") +
geom_vline(data = Cut2_set, aes(xintercept = Cut_2), linetype = "longdash") +
geom_vline(data = Cut2_set, aes(xintercept = Cut_3), linetype = "longdash") +
geom_hline(yintercept = 0, color = "black") + theme_bw() +
theme(plot.title = element_text(face = "bold", hjust = 0.5))
wr_plot2
## Warning: Removed 2 rows containing missing values (geom_bar).
wr_plot3 <- ggplot() + labs(title = "Wright Plot; Test 3") +
geom_histogram(data = Score_3, aes(x = Logits, y = ..density..), color = "red", fill = "red", binwidth = .33, alpha = .5) +
coord_flip() + scale_x_continuous(name = "Logits", breaks = seq(-3,3,1), limits = c(-3.5,3.5)) +
scale_y_reverse(name = "Density", limits = c(.6, -.2), breaks = seq(0,.6,.1), sec.axis = sec_axis(~ . *(50), name = "Count of Items", breaks = seq(-10,-1,1), labels = c("10","9","8","7","6","5","4","3","2","1"))) +
geom_point(data = Diff_3, aes(x = Difficulty, y = CountNew, shape = Type, fill = Type), size = 3, color = "black", stroke = 1, alpha = .5) +
scale_shape_manual(values = c(22,24)) +
scale_fill_manual(values = c("darkslateblue","orange")) +
geom_vline(data = Cut3_set, aes(xintercept = Cut_1), linetype = "longdash") +
geom_vline(data = Cut3_set, aes(xintercept = Cut_2), linetype = "longdash") +
geom_vline(data = Cut3_set, aes(xintercept = Cut_3), linetype = "longdash") +
geom_hline(yintercept = 0, color = "black") + theme_bw() +
theme(plot.title = element_text(face = "bold", hjust = 0.5))
wr_plot3
## Warning: Removed 1 rows containing non-finite values (stat_bin).
## Warning: Removed 2 rows containing missing values (geom_bar).