Preamble
# clear memory
rm(list = ls())
ls() # check memory
## character(0)
# set the default working directory
setwd("/Users/salvadorcastro/Desktop/RFiles/IRT/Part2")
# the number of digits to print
options(digits = 5)
# turn off warning messages
options(warn = -1)
Simulate data for a 3-PL model where 5000 persons take a 50-item test. The test should be somewhat easy (average examinee score greater than 50% correct). Include a reasonable or realistic amount of guessing. Estimate parameters for 1-Pl, 2-PL model and 3-PL models.
Question: Do the item difficulty parameter estimates for the 2-PL model correlate more strongly with the estimates for the 3-PL model or with the true values?
The Algorithm:
Step 1: Simulation of true item parameters (discrimination, difficulty and guessing) and person location (ability).
Step 2: Simulation of response data using the true item and person parameters produced in step 1.
Step 3: Estimation of item parameters and peson locations by 1PL, 2PL and 3PL models using the responses produced in step 2.
Step 4: Correlations of true item parameters and their estimations using the data produced in step 3.
Step 5: Estimation of person locations (ability) using the responses produced in step 2 and item parameters produced in step 3.
Step 6: Correlations of true ability and its estimations using the data produced in step 5.
# Specify seed for Random Number Generation to reproduce same results
set.seed(566568, kind = "Mersenne-Twister", normal.kind = "Inversion")
# CONSTANTS
npersons <- 5000 # number of test-takers
numitems <- 50 # number of test questions
# True ability parameters
# A vector of values of the latent variable ("abilities").
true_theta <- rnorm(npersons, mean = 0, sd = 1)
# check
summary(true_theta)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -3.5800 -0.6620 -0.0014 0.0071 0.6700 3.3100
str(true_theta)
## num [1:5000] -0.57 0.17 1.969 -1.255 -0.103 ...
## save true thetas
save(true_theta, file = "true_theta.RData")
# True item parameters
set.seed(4123)
true_items <- as.matrix(cbind(alpha = runif(numitems, .5, 2.5),
delta = rnorm(numitems, -.5, 1.0),
chi = runif(numitems, 0, .2)))
# check
head(true_items)
## alpha delta chi
## [1,] 2.37939 0.17337 0.043726
## [2,] 1.35841 -2.15516 0.085615
## [3,] 0.79187 0.21949 0.053041
## [4,] 1.58943 -1.73346 0.036032
## [5,] 0.95027 0.13704 0.194842
## [6,] 1.22624 -1.79317 0.106150
summary(true_items)
## alpha delta chi
## Min. :0.591 Min. :-2.757 Min. :0.00527
## 1st Qu.:0.955 1st Qu.:-1.374 1st Qu.:0.07475
## Median :1.377 Median :-0.508 Median :0.10777
## Mean :1.460 Mean :-0.582 Mean :0.11313
## 3rd Qu.:2.026 3rd Qu.: 0.208 3rd Qu.:0.16036
## Max. :2.415 Max. : 1.040 Max. :0.19964
## save true item parameters
save(true_items, file = "true_items.RData")
A set of two (T1 and T2) dichotomous (1=correct, 0=incorrect) responses to the same test (items as columns) by the same persons (as rows).
require(irtoys, warn.conflicts = FALSE, quietly = TRUE)
## Package 'sm', version 2.2-5.4: type help(sm) for summary information
##
## Attaching package: 'MASS'
## The following object is masked from 'package:sm':
##
## muscle
# Test
set.seed(86576574)
T1 <- irtoys::sim(ip = true_items, x = true_theta)
# column and row names
dimnames(T1) <- list(paste("p", 1:nrow(T1), sep = ""),
paste("i", 1:ncol(T1), sep = ""))
# check
str(T1)
## num [1:5000, 1:50] 1 0 1 0 0 1 0 1 0 0 ...
## - attr(*, "dimnames")=List of 2
## ..$ : chr [1:5000] "p1" "p2" "p3" "p4" ...
## ..$ : chr [1:50] "i1" "i2" "i3" "i4" ...
## save
save(T1, file = "T1.RData")
# Retest
set.seed(56744445)
T2 <- irtoys::sim(ip = true_items, x = true_theta)
# column and row names
dimnames(T2) <- list(paste("p", 1:nrow(T2), sep = ""),
paste("i", 1:ncol(T2), sep = ""))
# check
str(T2)
## num [1:5000, 1:50] 0 1 1 0 0 0 0 0 0 0 ...
## - attr(*, "dimnames")=List of 2
## ..$ : chr [1:5000] "p1" "p2" "p3" "p4" ...
## ..$ : chr [1:50] "i1" "i2" "i3" "i4" ...
## save
save(T2, file = "T2.RData")
Calculate test scores
rawScores <- apply(T1, 1, sum)
summary(rawScores)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 5.0 26.0 33.0 32.7 40.0 50.0
scores <- rawScores/numitems
summary(scores)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.100 0.520 0.660 0.655 0.800 1.000
rawScores <- apply(T2, 1, sum)
summary(rawScores)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.0 26.0 34.0 32.8 40.0 50.0
scores <- rawScores/numitems
summary(scores)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.040 0.520 0.680 0.656 0.800 1.000
The average score is 66%, so the tests are somewhat easy.
One-Parameter IRT Model:
require(irtoys, warn.conflicts = FALSE, quietly = TRUE)
# Test 1PL
set.seed(234511)
est1PLT1 <- irtoys::est(resp = T1,
model = "1PL",
engine = "ltm")
# column and row names
dimnames(est1PLT1$est) <- list(paste("i", 1:nrow(est1PLT1$est), sep = ""),
cbind("alpha", "delta", "chi"))
# check
# Notice the discrimination parameter (slope), a,
# is essentially 1 (first column), and
# the pseudo-guessing parameter (chance), c ,
# is 0 (third column) in a 1PL model
head(est1PLT1$est)
## alpha delta chi
## i1 1.1338 0.115695 0
## i2 1.1338 -2.544986 0
## i3 1.1338 -0.021374 0
## i4 1.1338 -2.166389 0
## i5 1.1338 -0.306825 0
## i6 1.1338 -1.972535 0
## save
write.table(est1PLT1$est, file = "est1PLT1.csv")
# Retest
set.seed(879976)
est1PLT2 <- irtoys::est(resp = T2,
model = "1PL",
engine = "ltm")
# column and row names
dimnames(est1PLT2$est) <- list(paste("i", 1:nrow(est1PLT2$est), sep = ""),
cbind("alpha", "delta", "chi"))
# check
# Notice the discrimination parameter (slope), a,
# is essentially 1 (first column), and
# the pseudo-guessing parameter (chance), c ,
# is 0 (third column) in a 1PL model
head(est1PLT2$est)
## alpha delta chi
## i1 1.1234 0.115324 0
## i2 1.1234 -2.519014 0
## i3 1.1234 0.014573 0
## i4 1.1234 -2.175353 0
## i5 1.1234 -0.415983 0
## i6 1.1234 -2.053933 0
## save
write.table(est1PLT2$est, file = "est1PLT2.csv")
Item response function (irf) plot
palette(rainbow(numitems))
plot(irf(est1PLT1$est),
main = "Item characteristic curve 1PL",
label = TRUE,
co = NA)
The Two-Parameter IRT Model:
require(irtoys, warn.conflicts = FALSE, quietly = TRUE)
# Test 2PL
set.seed(65784)
est2PLT1 <- irtoys::est(resp = T1,
model = "2PL",
engine = "ltm")
# column and row names
dimnames(est2PLT1$est) <- list(paste("i", 1:nrow(est2PLT1$est), sep = ""),
cbind("alpha", "delta", "chi"))
# check
# the pseudo-guessing parameter (chance), c ,
# is 0 (third column) in a 2PL model
head(est2PLT1$est)
## alpha delta chi
## i1 2.12013 0.077001 0
## i2 1.44098 -2.162225 0
## i3 0.70202 -0.011481 0
## i4 1.61249 -1.736991 0
## i5 0.73165 -0.404192 0
## i6 1.18807 -1.899755 0
## save
write.table(est2PLT1$est, file = "est2PLT1.csv")
# Retest
set.seed(324545)
est2PLT2 <- irtoys::est(resp = T2,
model = "2PL",
engine = "ltm")
# column and row names
dimnames(est2PLT2$est) <- list(paste("i", 1:nrow(est2PLT2$est), sep = ""),
cbind("alpha", "delta", "chi"))
# check
# the pseudo-guessing parameter (chance), c ,
# is 0 (third column) in a 2PL model
head(est2PLT2$est)
## alpha delta chi
## i1 2.06141 0.069252 0
## i2 1.40302 -2.164243 0
## i3 0.74243 0.027506 0
## i4 1.62877 -1.726873 0
## i5 0.67670 -0.596560 0
## i6 1.31101 -1.848405 0
## save
write.table(est2PLT2$est, file = "est2PLT2.csv")
Item response function (irf) plot
palette(rainbow(numitems))
plot(irf(est2PLT1$est),
main = "Item characteristic curve 2PL",
label = TRUE,
co = NA)
The Three-Parameter IRT Model:
require(irtoys, warn.conflicts = FALSE, quietly = TRUE)
# Test 3PL
set.seed(890675)
est3PLT1 <- irtoys::est(resp = T1,
model = "3PL",
engine = "ltm")
# column and row names
dimnames(est3PLT1$est) <- list(paste("i", 1:nrow(est3PLT1$est), sep = ""),
cbind("alpha", "delta", "chi"))
# check
head(est3PLT1$est)
## alpha delta chi
## i1 2.3926 0.15457 0.031971
## i2 1.4339 -1.97947 0.206108
## i3 0.7927 0.25316 0.082807
## i4 1.5121 -1.80484 0.008511
## i5 1.0711 0.37575 0.250223
## i6 1.1583 -1.85026 0.073251
## save
write.table(est3PLT1$est, file = "est3PLT1.csv")
# Retest
set.seed(859687)
est3PLT2 <- irtoys::est(resp = T2,
model = "3PL",
engine = "ltm")
# column and row names
dimnames(est3PLT2$est) <- list(paste("i", 1:nrow(est3PLT2$est), sep = ""),
cbind("alpha", "delta", "chi"))
# check
head(est3PLT2$est)
## alpha delta chi
## i1 2.33985 0.140151 0.0377388
## i2 1.33121 -2.215677 0.0622647
## i3 0.74915 0.037647 0.0067368
## i4 1.66427 -1.531452 0.2038462
## i5 0.69827 -0.468834 0.0443080
## i6 1.37931 -1.489873 0.2660486
## save
write.table(est3PLT2$est, file = "est3PLT2.csv")
Item response function (irf) plot
palette(rainbow(numitems))
plot(irf(est3PLT1$est),
main = "Item characteristic curve 3PL",
label = TRUE,
co = NA)
Combine Item Difficulty Estimates:
# a matrix of true item difficulty and its estimates
deltasT1 <- data.frame(true_delta = true_items[,2],
estimate_1PL = est1PLT1$est[,2],
estimate_2PL = est2PLT1$est[,2],
estimate_3PL = est3PLT1$est[,2])
head(deltasT1)
## true_delta estimate_1PL estimate_2PL estimate_3PL
## i1 0.17337 0.115695 0.077001 0.15457
## i2 -2.15516 -2.544986 -2.162225 -1.97947
## i3 0.21949 -0.021374 -0.011481 0.25316
## i4 -1.73346 -2.166389 -1.736991 -1.80484
## i5 0.13704 -0.306825 -0.404192 0.37575
## i6 -1.79317 -1.972535 -1.899755 -1.85026
summary(deltasT1)
## true_delta estimate_1PL estimate_2PL estimate_3PL
## Min. :-2.757 Min. :-3.283 Min. :-2.7886 Min. :-2.607
## 1st Qu.:-1.374 1st Qu.:-1.629 1st Qu.:-1.5882 1st Qu.:-1.433
## Median :-0.508 Median :-0.675 Median :-0.7490 Median :-0.405
## Mean :-0.582 Mean :-0.851 Mean :-0.8032 Mean :-0.537
## 3rd Qu.: 0.208 3rd Qu.: 0.041 3rd Qu.: 0.0549 3rd Qu.: 0.318
## Max. : 1.040 Max. : 0.976 Max. : 0.8980 Max. : 1.043
# save
write.table(deltasT1,
file = "deltasT1.csv",
sep = ",",
row.names = TRUE,
qmethod = "double")
# a matrix of true item difficulty and its estimates
deltasT2 <- data.frame(true_delta = true_items[,2],
estimate_1PL = est1PLT2$est[,2],
estimate_2PL = est2PLT2$est[,2],
estimate_3PL = est3PLT2$est[,2])
head(deltasT2)
## true_delta estimate_1PL estimate_2PL estimate_3PL
## i1 0.17337 0.115324 0.069252 0.140151
## i2 -2.15516 -2.519014 -2.164243 -2.215677
## i3 0.21949 0.014573 0.027506 0.037647
## i4 -1.73346 -2.175353 -1.726873 -1.531452
## i5 0.13704 -0.415983 -0.596560 -0.468834
## i6 -1.79317 -2.053933 -1.848405 -1.489873
summary(deltasT2)
## true_delta estimate_1PL estimate_2PL estimate_3PL
## Min. :-2.757 Min. :-3.2568 Min. :-2.6494 Min. :-2.637
## 1st Qu.:-1.374 1st Qu.:-1.6143 1st Qu.:-1.6052 1st Qu.:-1.493
## Median :-0.508 Median :-0.6923 Median :-0.8851 Median :-0.487
## Mean :-0.582 Mean :-0.8634 Mean :-0.8161 Mean :-0.596
## 3rd Qu.: 0.208 3rd Qu.: 0.0901 3rd Qu.: 0.0588 3rd Qu.: 0.257
## Max. : 1.040 Max. : 0.9977 Max. : 0.9158 Max. : 1.033
# save
write.table(deltasT2,
file = "deltasT2.csv",
sep = ",",
row.names = TRUE,
qmethod = "double")
cor(deltasT1)
## true_delta estimate_1PL estimate_2PL estimate_3PL
## true_delta 1.00000 0.95044 0.98382 0.96832
## estimate_1PL 0.95044 1.00000 0.95146 0.95378
## estimate_2PL 0.98382 0.95146 1.00000 0.94501
## estimate_3PL 0.96832 0.95378 0.94501 1.00000
Item difficulty parameter estimates for the 2PL and 3PL models correlate somehow higher with the true values (r=0.98 and r=0.97, respectively) than they correlate with each other (r=0.95).
Functions for the Correlation Matrix Plots
Histograms on the diagonal:
panel.hist.density <-
function(x,...)
{
usr <- par("usr")
on.exit(par(usr))
par(usr = c(usr[1:2], 0, 1.5) )
h <- hist(x, plot = FALSE)
breaks <- h$breaks
nB <- length(breaks)
y <- h$counts/max(h$counts)
rect(breaks[-nB], 0, breaks[-1], y, col = "dodgerblue")
tryd <- try( d <- density(x, na.rm = TRUE, bw = "nrd", adjust = 1.2),
silent = TRUE)
if (class(tryd) != "try-error") {
d$y <- d$y/max(d$y)
lines(d, lwd = 2, col = "tomato")
}
} # end panel.hist.density
dump("panel.hist.density", file = "panel.hist.density.R")
Scatter plot with tolerance ellipsoids and lowess line on the lower-triangle:
panel.smooth <-
function(x, y,
col = par("col"),
bg = NA,
pch = 21,
cex = .5,
col.smooth = "tomato",
span = 2/3,
iter = 3, ...){
require(cluster, warn.conflicts = FALSE, quietly = TRUE)
points(x, y,
pch = pch,
col = col,
bg = bg,
cex = cex)
ok <- is.finite(x) & is.finite(y)
if (any(ok))
lines(stats::lowess(x[ok], y[ok], f = span, iter = iter),
col = col.smooth,
lwd = 2, ...)
# classical and robust cov.matrix ellipsoids
X <- cbind(x,y)
C.ls <- cov(X)
m.ls <- colMeans(X)
# calculates a 99% ellipsoid
d2.99 <- qchisq(0.99, df = 2)
# classical cov.matrix ellipsoids
lines(ellipsoidPoints(C.ls, d2.99, loc = m.ls),
lwd = 2,
lty = 3,
col = "purple")
if (require(MASS)) {
Cxy <- cov.rob(cbind(x,y))
# robust cov.matrix ellipsoids
lines(ellipsoidPoints(Cxy$cov, d2 = d2.99, loc = Cxy$center),
lty = 3,
lwd = 2,
col = "brown")
}
} # end panel.smooth
dump("panel.smooth.R", file = "panel.smooth.R")
Correlations with significance on the upper triangle:
panel.cor <-
function(x, y,
method = "pearson",
digits = 3,
cex.cor = 1,
no.col = FALSE) {
usr <- par("usr")
on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r <- cor(x, y, method = method)
ra <- cor.test(x, y, method = method)$p.value
txt <- format(c(r, 0.123456789), digits = digits)[1]
# mark significant correlations
prefix <- ""
if (ra <= 0.1) prefix <- "*"
if (ra <= 0.05) prefix <- "**"
if (ra <= 0.01) prefix <- "***"
if (ra <= 0.001) prefix <- "****"
if (no.col)
{
color <- "tomato"
if (r < 0) {if (ra <= 0.001) sig <- 4 else sig <- 3}
else {if (ra <= 0.001) sig <- 2 else sig <- 1}
}
else
{
sig <- 1
if (ra <= 0.001) sig <- 2
color <- 2
if (r < 0) color <- 4
}
txt <- paste(txt, prefix, sep = "\n")
if (missing(cex.cor)) cex.cor <- 0.5/strwidth(txt)
text(0.5, 0.5, txt, cex = cex.cor * r, col = "maroon")
} # end panel.cor
dump("panel.cor", file = "panel.cor.R")
The Legend for Significance:
Prefix | Significance |
---|---|
* | 0.1 |
** | 0.05 |
*** | 0.01 |
**** | 0.001 |
pairs(~true_delta + estimate_1PL + estimate_2PL + estimate_3PL,
data = deltasT1,
lower.panel = panel.smooth,
upper.panel = panel.cor,
diag.panel = panel.hist.density,
main = "Scatterplot Matrix with Correlations",
na.action = stats::na.omit)
There aren’t large differences between 1PL, 2PL and 3PL models in predicting the true item locations (correlations .95, .98 and .97, respectively), but 2PL model correlated slightly higher with the true item parameters.
Simulate data for a 1PL, 2Pl and 3PL IRT models where 5000 persons take a 50-item test.
Question: If the same persons take the same test a second time, do the person ability estimates on the second testing correlate more strongly with the ability estimates from the first testing or with the true thetas?
1PL Model
require(irtoys, warn.conflicts = FALSE, quietly = TRUE)
set.seed(897098)
# EAP estimation of ability for T1
theta1PLT1 <- irtoys::eap(resp = T1,
ip = est1PLT1$est,
qu = normal.qu())
summary(theta1PLT1[,"est"])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.81000 -0.66400 -0.05120 0.00324 0.64700 2.47000
# EAP estimation of ability for T1
set.seed(897098)
theta1PLT2 <- irtoys::eap(resp = T2,
ip = est1PLT2$est,
qu = normal.qu())
summary(theta1PLT2[,"est"])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -3.3300 -0.6760 0.0248 0.0014 0.6470 2.4800
thetas1PL <- data.frame(true_theta = true_theta,
est1PLT1 = theta1PLT1[,"est"],
est1PLT2 = theta1PLT2[,"est"])
rownames(thetas1PL) <- paste("p", 1:nrow(thetas1PL), sep = "")
head(thetas1PL)
## true_theta est1PLT1 est1PLT2
## p1 -0.56966 -0.051198 -0.76617
## p2 0.16974 0.324797 0.43297
## p3 1.96851 1.908844 1.50076
## p4 -1.25510 -0.751414 -0.96688
## p5 -0.10308 0.115432 -0.24779
## p6 0.38167 0.435954 0.21084
write.table(thetas1PL, file = "thetas1PL.csv")
2PL Model
require(irtoys, warn.conflicts = FALSE, quietly = TRUE)
set.seed(678976)
# EAP estimation of ability for T1
theta2PLT1 <- irtoys::eap(resp = T1,
ip = est2PLT1$est,
qu = normal.qu())
summary(theta2PLT1[,"est"])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.8100 -0.6500 -0.0114 0.0169 0.6740 2.4500
set.seed(7456465)
# EAP estimation of ability for T2
theta2PLT2 <- irtoys::eap(resp = T2,
ip = est2PLT2$est,
qu = normal.qu())
summary(theta2PLT2[,"est"])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -3.1900 -0.6450 -0.0310 0.0087 0.6740 2.4500
thetas2PL <- data.frame(true_theta = true_theta,
est2PLT1 = theta2PLT1[,"est"],
est2PLT2 = theta2PLT2[,"est"])
rownames(thetas2PL) <- paste("p", 1:nrow(thetas2PL), sep = "")
head(thetas2PL)
## true_theta est2PLT1 est2PLT2
## p1 -0.56966 -0.122703 -0.78784
## p2 0.16974 0.273782 0.23458
## p3 1.96851 1.940733 1.83598
## p4 -1.25510 -0.819787 -0.91290
## p5 -0.10308 0.053516 -0.24302
## p6 0.38167 0.498853 0.15467
write.table(thetas2PL, file = "thetas2PL.csv")
3PL Model
require(irtoys, warn.conflicts = FALSE, quietly = TRUE)
set.seed(12344123)
# EAP estimation of ability
theta3PLT1 <- irtoys::eap(resp = T1,
ip = est3PLT1$est,
qu = normal.qu())
summary(theta3PLT1[,"est"])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -3.2500 -0.6340 0.0137 0.0126 0.6620 2.3900
# EAP estimation of ability for T2
set.seed(897098)
theta3PLT2 <- irtoys::eap(resp = T2,
ip = est3PLT2$est,
qu = normal.qu())
summary(theta3PLT2[,"est"])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -3.16000 -0.64100 -0.01460 -0.00854 0.65800 2.39000
thetas3PL <- data.frame(true_theta = true_theta,
est3PLT1 = theta3PLT1[,"est"],
est3PLT2 = theta3PLT2[,"est"])
rownames(thetas3PL) <- paste("p", 1:nrow(thetas3PL), sep = "")
head(thetas3PL)
## true_theta est3PLT1 est3PLT2
## p1 -0.56966 -0.073388 -0.75197
## p2 0.16974 0.312012 0.25481
## p3 1.96851 1.793685 1.78200
## p4 -1.25510 -0.761053 -0.91889
## p5 -0.10308 0.062358 -0.20043
## p6 0.38167 0.490221 0.16476
write.table(thetas3PL, file = "thetas3PL.csv")
Combine true person location and its estimates in a data frame
thetasT1 <- data.frame(true_theta = true_theta,
est1PLT1 = theta1PLT1[,"est"],
est2PLT1 = theta2PLT1[,"est"],
est3PLT1 = theta3PLT1[,"est"])
rownames(thetasT1) <- paste("p", 1:nrow(thetasT1), sep = "")
head(thetasT1)
## true_theta est1PLT1 est2PLT1 est3PLT1
## p1 -0.56966 -0.051198 -0.122703 -0.073388
## p2 0.16974 0.324797 0.273782 0.312012
## p3 1.96851 1.908844 1.940733 1.793685
## p4 -1.25510 -0.751414 -0.819787 -0.761053
## p5 -0.10308 0.115432 0.053516 0.062358
## p6 0.38167 0.435954 0.498853 0.490221
write.table(thetasT1, file = "thetasT1.csv")
thetasT2 <- data.frame(true_theta = true_theta,
est1PLT2 = theta1PLT2[,"est"],
est2PLT2 = theta2PLT2[,"est"],
est3PLT2 = theta3PLT2[,"est"])
rownames(thetasT2) <- paste("p", 1:nrow(thetasT2), sep = "")
head(thetasT2)
## true_theta est1PLT2 est2PLT2 est3PLT2
## p1 -0.56966 -0.76617 -0.78784 -0.75197
## p2 0.16974 0.43297 0.23458 0.25481
## p3 1.96851 1.50076 1.83598 1.78200
## p4 -1.25510 -0.96688 -0.91290 -0.91889
## p5 -0.10308 -0.24779 -0.24302 -0.20043
## p6 0.38167 0.21084 0.15467 0.16476
write.table(thetasT2, file = "thetasT2.csv")
pairs(~true_theta + est1PLT1 + est2PLT1 + est3PLT1,
data = thetasT1,
lower.panel = panel.smooth,
upper.panel = panel.cor,
diag.panel = panel.hist.density,
main = "Scatterplot Matrix with Correlations",
na.action = stats::na.omit)
pairs(~true_theta + est1PLT2 + est2PLT2 + est3PLT2,
data = thetasT2,
lower.panel = panel.smooth,
upper.panel = panel.cor,
diag.panel = panel.hist.density,
main = "Scatterplot Matrix with Correlations",
na.action = stats::na.omit)
All three IRT models performed well in predicting abilities of respondents. The correlations between the true person location and all three IRT models (1PL, 2PL and 3PL) were about 0.95 to 0.96.
pairs(~true_theta + est1PLT1 + est1PLT2,
data = thetas1PL,
lower.panel = panel.smooth,
upper.panel = panel.cor,
diag.panel = panel.hist.density,
main = "Scatterplot Matrix with Correlations",
na.action = stats::na.omit)
pairs(~true_theta + est2PLT1 + est2PLT2,
data = thetas2PL,
lower.panel = panel.smooth,
upper.panel = panel.cor,
diag.panel = panel.hist.density,
main = "Scatterplot Matrix with Correlations",
na.action = stats::na.omit)
pairs(~true_theta + est3PLT1 + est3PLT2,
data = thetas3PL,
lower.panel = panel.smooth,
upper.panel = panel.cor,
diag.panel = panel.hist.density,
main = "Scatterplot Matrix with Correlations",
na.action = stats::na.omit)
When the same persons take the same test a second time, the correlations between the person ability estimates are weaker (r=0.90 to 0.91) between the two estimates than the each of estimates correlate with the true thetas (r=0.95 to 0.96). This makes sense, since the estimates reflect the randomness within the simulated data while the true values do not.
Correlation coefficients for the item parameter estimates do not take the interaction between item parameters into account. The root-mean-square difference (RMSD) computes the area between two ICCs and reflects the interaction between the item parameter estimates in 2PL and 3Pl models. Prior to RMSD computation, subsample metrics should be linked as they are scale dependent. \(RMSD_j\) statistics for item j is given by
\[ RMSD_j = \sqrt{\frac{\sum(p_{js}-p_{jt})^2}{n}} \]
where n is the number of \(\Theta\)’s used in computing \(p_{js}\) and \(p_{jt}\)’s. For example, if the range of \(\Theta\) is (-4, 4) in 0.01 increments, then n = 801.
RMSD has a range of 0 to 1 (inclussive) with small values indicating good agreement between the two IRFs.
RMSD <-
function(P1, P2, seed = NULL, model = NULL, plot = FALSE) {
source("logistic.R")
require(ltm, warn.conflicts = FALSE, quietly = TRUE)
require(irtoys, warn.conflicts = FALSE, quietly = TRUE)
# to produce the same instance of randomization process
if (!is.null(seed)) {set.seed(seed)}
# default model
if (is.null(model)) {model <- "2PL"}
P1 <- as.matrix(P1)
P2 <- as.matrix(P2)
if (nrow(P1) != nrow(P2))
return("tests must have equal number of items")
# the latent trait continuum (theta)
thetas <- seq(from = -4, to = 4, by = 0.1)
n <- length(thetas) # the number of thetas
# initialize largest RMSD
largest.rmsd <- 0
# compute RMSD
rmsd <- rep(NA, nrow(P1))
for (j in 1:nrow(P1)) {
# probability of a correct response
Pt <- logistic(a = P1[j, 1],
b = P1[j, 2],
c = P1[j, 3],
D = 1,
theta = thetas)
Ps <- logistic(a = P2[j,1],
b = P2[j,2],
c = P2[j,3],
D = 1,
theta = thetas)
rmsd[j] <- sqrt(sum((Pt - Ps)^2)/n)
# item pairs with the largest RMSD
if (rmsd[j] > largest.rmsd) {
largest.rmsd <- rmsd[j]
item.number <- j
} # end if
} # end for j
# plots
if (plot) {
# irf plot
plot(irtoys::irf(rbind(P1[item.number,], P2[item.number,])), co = NA,
main = paste("IRF plots for the largest RMSD \n item #", item.number))
# trf plot
plot(irtoys::trf(P1, thetas),
co = adjustcolor("dodgerblue", alpha.f = 0.5),
main = "TRF plots for the subsets")
par(new = TRUE)
plot(irtoys::trf(P2, thetas),
co = adjustcolor("tomato", alpha.f = 0.5),
main = NULL)
} # end if
return(rmsd)
} # end RMSD
dump("RMSD", file = "RMSD.R")
# change default color scheme
palette(adjustcolor(c("tomato", "dodgerblue"), alpha.f = 0.5))
# 2PL RMSDs
summary(RMSD(P1 = true_items , P2 = est2PLT1$est, model = "2PL", plot = TRUE))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00488 0.02720 0.04170 0.04310 0.05660 0.08810
summary(RMSD(P1 = true_items , P2 = est2PLT2$est, model = "2PL", plot = TRUE))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00143 0.02330 0.04250 0.04310 0.06100 0.09000
# 3PL RMSDs
summary(RMSD(P1 = true_items , P2 = est3PLT1$est, model = "3PL", plot = TRUE))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00257 0.00710 0.01530 0.01810 0.02680 0.04710
summary(RMSD(P1 = true_items , P2 = est3PLT2$est, model = "3PL", plot = TRUE))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00217 0.00672 0.01210 0.01670 0.02080 0.05880
Largest RMSD value is less than 0.1 for both models. Mean RMSD values for the 3PL models (0.0167 and 0.0181) are smaller than the mean RMSD values for the 2PL models (both 0.0431) .
The Theory and Practice of Item Response Theory (Ayala 2009):
A likelihood ratio test is a statistical test used to compare the fit of two models, one of which (the null or the reduced model) is a special case of the other (the alternative or the full model). The test is based on the likelihood ratio, which expresses how many times more likely the data are under one model than the other. This likelihood ratio, or equivalently its logarithm, can then be used to compute a p-value, or compared to a critical value to decide whether to reject the null model in favour of the alternative model. Each of the two competing models, the null model and the alternative model, is separately fitted to the data and the log-likelihood recorded. The test statistic, \(G^2\) is twice the difference in these log-likelihoods:
\[\Delta G^2 = -2ln(L_R) - (-2ln(L_F))\]
The degrees of freedom for evaluating the significance of D statistics is the difference in the number of parameters between the full model and the reduced model. D statistic is distributed as a \(\chi^2\) when the sample size is large and the full model is statistically significant (p < 0.05). A nonsignificant result indicates that the additional complexity (increased number of item parameters) is unnecessary. For example, if the comparison of 2PL and 3PL models is not significant, then the pseudo-guessing parameter is not necessary to improve model-data fit over and above that obtained wit the 2PL model.
The df for a model is given by \(2^K - (\text{number of parameters})\) where K is the number of items on the instrument and the number of parameters is based on the model and the number of items.
For the 3PL model, there are three item parameters \((\alpha_j, \delta_j, \chi_j)\) and for a 100-item instrument the number of item parameters is \(3*100=300\). Therefore, the \(df = 2^{100} - 300 - 1 = 1.267651 \times 10^{30}\).
For the 2PL model, there are two item parameters \((\alpha_j, \delta_j)\) and for a 100-item instrument the number of item parameters is \(2*100=200\). Therefore, the \(df = 2^{200} - 200 - 1\).
For the 1PL model, each item has a location \((\delta_j)\) and a common discrimination \((\alpha_j)\) and for a 100-item instrument the number of item parameters is \(100 + 1 = 101\). Therefore, the \(df = 2^{100} - 101 - 1\).
D_1PL <- est1PLT1$est[,2]
head(D_1PL)
## i1 i2 i3 i4 i5 i6
## 0.115695 -2.544986 -0.021374 -2.166389 -0.306825 -1.972535
D_2PL <- est2PLT1$est[,2]
head(D_2PL)
## i1 i2 i3 i4 i5 i6
## 0.077001 -2.162225 -0.011481 -1.736991 -0.404192 -1.899755
D_3PL <- est3PLT1$est[,2]
head(D_3PL)
## i1 i2 i3 i4 i5 i6
## 0.15457 -1.97947 0.25316 -1.80484 0.37575 -1.85026
source("likelihood.R")
source("logistic.R")
# CONSTANTS
(K <- ncol(T1)) # Number of items
## [1] 50
(N <- nrow(T1)) # Number of examinees
## [1] 5000
(NumParams <- c(K + 1, 2*K, 3*K))
## [1] 51 100 150
(DegreesFreedom <- c(2^K - (K + 1) - 1, 2^K - (2*K) - 1, 2^K - (3*K) - 1))
## [1] 1.1259e+15 1.1259e+15 1.1259e+15
# D = -2 ln(L)
D1PL <- -2 * log(max(likelihood(par.mat = est1PLT1$est, resp.mat = T1)))
D2PL <- -2 * log(max(likelihood(par.mat = est2PLT1$est, resp.mat = T1)))
D3PL <- -2 * log(max(likelihood(par.mat = est3PLT1$est, resp.mat = T1)))
## D = D.reduced - D.full = -2ln(Lr) - (-2ln(Lf))
deltaD12 <- D1PL - D2PL
deltaD23 <- D2PL - D3PL
# deltaRSQ = (GSQr - GSQf)/GSQr
deltaR12 <- (D1PL - D2PL)/D1PL
deltaR23 <- (D2PL - D3PL)/D2PL
\[AIC = -2lnL + 2\times(\text{number of parameters})\]
# AIC = -2lnL + 2*Nparm
AIC1PL <- D1PL + 2*(K+1)
AIC2PL <- D2PL + 2*(K*2)
AIC3PL <- D3PL + 2*(K*3)
AIC <- c(AIC1PL, AIC2PL, AIC3PL)
\[ BIC = -2lnL + ln(N) \times (\text{number of parameters}) \]
The model with the smalest BIC indicates the model with the best comparative fit and tends to favor constraided (reduced) models.
# BIC = -2lnL + ln(N)*Nparm
BIC1PL <- D1PL + log(N)*(K+1)
BIC2PL <- D2PL + log(N)*(K*2)
BIC3PL <- D3PL + log(N)*(K*3)
BIC <- c(BIC1PL, BIC2PL, BIC3PL)
(Model <- c("1PL","2PL","3PL"))
## [1] "1PL" "2PL" "3PL"
Negative2LL <- c(D1PL, D2PL, D3PL)
RelativeChange <- c(NA, deltaD12, deltaD23)
(comparison <-
data.frame(Model, Negative2LL, DegreesFreedom, RelativeChange, NumParams, AIC, BIC))
## Model Negative2LL DegreesFreedom RelativeChange NumParams AIC BIC
## 1 1PL 2.7552 1.1259e+15 NA 51 104.76 437.13
## 2 2PL 5.4232 1.1259e+15 -2.668 100 205.42 857.14
## 3 3PL -38.2064 1.1259e+15 43.630 150 261.79 1239.37
Using a more complex 2PL model results in a deterioration of fit by 27% over the 1PL model and using 3PL model results in a slight improvement of fit by 9% over the 2PL model. Overall 1Pl model fits significantly better than the 2PL and 3PL models.
To evaluate the prevalence of guessing, we take the lowest scoring examinees and see how they performed on the most difficult items. There should be little success for these persons on these items if guessing is not a factor.
# clear memory
rm(list = ls())
ls() # check memory
## character(0)
# set the default working directory
setwd("/Users/salvadorcastro/Desktop/RFiles/IRT/Part2")
# the number of digits to print
options(digits = 5)
# turn off warning messages
options(warn = -1)
Import the data file
# A data frame of responses: persons (450) as rows, items (15) as columns,
# entries are either 0 or 1, no missing values
mytest <- read.csv("test.csv", header = TRUE, na.strings = " ")
# CONSTANTS
(npersons <- nrow(mytest))
## [1] 450
(numitems <- ncol(mytest))
## [1] 15
# Check
str(mytest)
## 'data.frame': 450 obs. of 15 variables:
## $ V1 : int 0 0 0 1 0 0 1 0 0 0 ...
## $ V2 : int 0 1 1 1 1 1 1 1 0 1 ...
## $ V3 : int 1 1 1 1 1 0 1 0 0 1 ...
## $ V4 : int 0 1 0 0 0 0 1 0 0 1 ...
## $ V5 : int 1 0 0 0 0 1 0 1 1 0 ...
## $ V6 : int 0 0 1 1 1 1 1 0 0 1 ...
## $ V7 : int 0 1 1 1 1 1 1 0 0 1 ...
## $ V8 : int 0 0 1 1 1 0 1 0 0 1 ...
## $ V9 : int 0 0 1 1 1 0 1 0 0 1 ...
## $ V10: int 1 1 1 1 1 1 1 0 1 1 ...
## $ V11: int 0 1 1 1 1 1 1 1 1 1 ...
## $ V12: int 0 1 0 0 0 0 1 0 0 1 ...
## $ V13: int 0 0 1 1 0 0 1 0 0 1 ...
## $ V14: int 0 0 0 1 0 0 1 0 0 1 ...
## $ V15: int 1 1 1 1 1 0 1 0 1 1 ...
Estimate of the IRT item parameters using ltm (Latent Trait Model).
library(irtoys, warn.conflicts = FALSE, quietly = TRUE)
est1PL <- irtoys::est(resp = mytest,
model = "1PL",
engine = "ltm")
head(est1PL$est)
## [,1] [,2] [,3]
## V1 1.1925 1.904672 0
## V2 1.1925 -1.956884 0
## V3 1.1925 -1.626909 0
## V4 1.1925 1.816504 0
## V5 1.1925 0.091406 0
## V6 1.1925 -0.531918 0
Estimates of the expectation of the posterior distribution of the latent variable (“ability”) for each person.
theta1PL <- irtoys::eap(resp = mytest,
ip = est1PL$est,
qu = normal.qu())
head(theta1PL)
## est sem n
## [1,] -1.30373 0.50181 15
## [2,] -0.13636 0.49487 15
## [3,] 0.45816 0.50510 15
## [4,] 1.08973 0.52656 15
## [5,] 0.15786 0.49892 15
## [6,] -0.71640 0.49293 15
head(mydf <- data.frame(mytest, thetahat = theta1PL[,1]))
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 thetahat
## 1 0 0 1 0 1 0 0 0 0 1 0 0 0 0 1 -1.30373
## 2 0 1 1 1 0 0 1 0 0 1 1 1 0 0 1 -0.13636
## 3 0 1 1 0 0 1 1 1 1 1 1 0 1 0 1 0.45816
## 4 1 1 1 0 0 1 1 1 1 1 1 0 1 1 1 1.08973
## 5 0 1 1 0 0 1 1 1 1 1 1 0 0 0 1 0.15786
## 6 0 1 0 0 1 1 1 0 0 1 1 0 0 0 0 -0.71640
Create an ordinal variable of the estimates of ability split into quintiles
(quintiles <- quantile(theta1PL[,1], probs = seq(from = 0.2, to = 1.0, by = 0.2)))
## 20% 40% 60% 80% 100%
## -0.71640 -0.13636 0.15786 0.76743 1.80146
x <- cut(mydf$thetahat, c(-Inf, quintiles))
summary(x)
## (-Inf,-0.716] (-0.716,-0.136] (-0.136,0.158] (0.158,0.767]
## 98 85 88 92
## (0.767,1.8]
## 87
mydf <- data.frame(mytest,
thetahat = theta1PL[,1],
group = as.numeric(x))
head(mydf)
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 thetahat group
## 1 0 0 1 0 1 0 0 0 0 1 0 0 0 0 1 -1.30373 1
## 2 0 1 1 1 0 0 1 0 0 1 1 1 0 0 1 -0.13636 3
## 3 0 1 1 0 0 1 1 1 1 1 1 0 1 0 1 0.45816 4
## 4 1 1 1 0 0 1 1 1 1 1 1 0 1 1 1 1.08973 5
## 5 0 1 1 0 0 1 1 1 1 1 1 0 0 0 1 0.15786 3
## 6 0 1 0 0 1 1 1 0 0 1 1 0 0 0 0 -0.71640 1
# Create a data frame of item difficulty estimates
delta <- data.frame(item = c(paste("V", 1:numitems, sep = "")),
delta = est1PL$est[,2])
# order item difficulties in an ascenting order
delta[order(delta$delta, decreasing = FALSE), ]
## item delta
## V10 V10 -3.107648
## V11 V11 -1.976310
## V2 V2 -1.956884
## V3 V3 -1.626909
## V7 V7 -1.595678
## V15 V15 -1.068847
## V8 V8 -0.794759
## V6 V6 -0.531918
## V5 V5 0.091406
## V9 V9 0.129619
## V13 V13 0.638424
## V14 V14 1.290031
## V12 V12 1.381420
## V4 V4 1.816504
## V1 V1 1.904672
# Put item difficulty, person location estimates and ability group together in a data frame
mydfsort <- data.frame(item = delta$item,
delta = delta$delta,
theta = mydf$thetahat,
grp = mydf$group)
head(mydfsort)
## item delta theta grp
## 1 V1 1.904672 -1.30373 1
## 2 V2 -1.956884 -0.13636 3
## 3 V3 -1.626909 0.45816 4
## 4 V4 1.816504 1.08973 5
## 5 V5 0.091406 0.15786 3
## 6 V6 -0.531918 -0.71640 1
mean.estimates <- data.frame(Q1 = sapply(mydfsort[mydfsort$grp == 1,
c("delta", "theta")], mean),
Q2 = sapply(mydfsort[mydfsort$grp == 2,
c("delta", "theta")], mean),
Q3 = sapply(mydfsort[mydfsort$grp == 3,
c("delta", "theta")], mean),
Q4 = sapply(mydfsort[mydfsort$grp == 4,
c("delta", "theta")], mean),
Q5 = sapply(mydfsort[mydfsort$grp == 5,
c("delta", "theta")], mean))
mean.estimates
## Q1 Q2 Q3 Q4 Q5
## delta -0.35357 -0.062349 -0.253789 -0.63037 -0.48194
## theta -1.23991 -0.351627 0.064248 0.51886 1.12668
Function: Produces a Sunflower Scatter Plot for identification of guessing and carelessness
sunflower.plot <-
function(x, y){
## add extra space to right margin of plot within frame
par(mar = c(4, 4.5, 5.5, 0) + 0.1)
# Sunflower Scatter Plot
# Multiple points are plotted as sunflowers with multiple leaves (petals)
# such that overplotting is visualized instead of accidental and invisible.
sunflowerplot(x, y,
xlim = c(-3, 3),
ylim = c(-0.5, 1.5),
ylab = "",
xlab = "",
axes = FALSE)
box()
# x-axis
axis(1, pretty(c(-3, 3), 7))
axis(3, pretty(c(-3, 3), 7))
mtext("Ability",
side = 1,
line = 2.5)
# y-axis
axis(2, at = c(0, 1),
labels = c("incorrect", "correct"),
col.axis = "tomato",
col.ticks = "tomato")
mtext("Responses",
side = 2,
line = 2.5)
# Simple Regression Line
# a, b: the intercept and slope, single values.
abline(a = lm(y ~ x)[1], # intercept
b = lm(y ~ x)[2], # slope
col = "dodgerblue",
lwd = 3)
# Quadrant tracing lines
abline(a = .5, # intercept
b = -.5, # slope
h = .5, # horizontal divider
col = "green",
lwd = 3,
lty = "dotted")
# Quadrant labels
points(c(1, -2.5, -1, 2.5),
c(1.25, 1.25, -0.25, -0.25),
pch = 21,
bg = "white",
cex = 4)
text(c(1, -2.5, -1, 2.5),
c(1.25, 1.25, -0.25, -0.25),
c("I", "II", "III", "IV"))
# gridlines
grid(nx = NULL, ny = NULL,
col = "lightgray",
lty = 3,
lwd = .5,
equilogs = TRUE)
}
dump("sunflower.plot", file = "sunflower.plot.R")
If low performing students \((\Theta < -1)\) give a correct response to most difficult questions, there is evidence for guessing.
# Most difficult items
head(delta[order(delta$delta, decreasing = TRUE), ], 4)
## item delta
## V1 V1 1.9047
## V4 V4 1.8165
## V12 V12 1.3814
## V14 V14 1.2900
Item 1
There aren’t any sunflowers in the second quadrant (low performing students give correct response), so there isn’t any evidence for guessing.
lm(mydf$V1 ~ mydf$thetahat)
##
## Call:
## lm(formula = mydf$V1 ~ mydf$thetahat)
##
## Coefficients:
## (Intercept) mydf$thetahat
## 0.138 0.189
sunflower.plot(mydf$thetahat, mydf$V1)
title(main = "Item 1", col.main = "dodgerblue")
Item 4
lm(mydf$V4 ~ mydf$thetahat)
##
## Call:
## lm(formula = mydf$V4 ~ mydf$thetahat)
##
## Coefficients:
## (Intercept) mydf$thetahat
## 0.149 0.163
sunflower.plot(mydf$thetahat, mydf$V4)
title(main = "Item 4", col.main = "dodgerblue")
Item 14
lm(mydf$V14 ~ mydf$thetahat)
##
## Call:
## lm(formula = mydf$V14 ~ mydf$thetahat)
##
## Coefficients:
## (Intercept) mydf$thetahat
## 0.229 0.275
sunflower.plot(mydf$thetahat, mydf$V14)
title(main = "Item 14", col.main = "dodgerblue")
Item 12
lm(mydf$V12 ~ mydf$thetahat)
##
## Call:
## lm(formula = mydf$V12 ~ mydf$thetahat)
##
## Coefficients:
## (Intercept) mydf$thetahat
## 0.213 0.194
sunflower.plot(mydf$thetahat, mydf$V12)
title(main = "Item 12", col.main = "dodgerblue")
If high performing students \((\Theta > 1)\) give an incorrect response to easiest questions, there is evidence for carelessness.
# Easiest items
head(delta[order(delta$delta, decreasing = FALSE), ], 4)
## item delta
## V10 V10 -3.1076
## V11 V11 -1.9763
## V2 V2 -1.9569
## V3 V3 -1.6269
Item 10
lm(mydf$V10 ~ mydf$thetahat)
##
## Call:
## lm(formula = mydf$V10 ~ mydf$thetahat)
##
## Coefficients:
## (Intercept) mydf$thetahat
## 0.956 0.089
sunflower.plot(mydf$thetahat, mydf$V10)
title(main = "Item 10", col.main = "dodgerblue")
Item 11
lm(mydf$V11 ~ mydf$thetahat)
##
## Call:
## lm(formula = mydf$V11 ~ mydf$thetahat)
##
## Coefficients:
## (Intercept) mydf$thetahat
## 0.869 0.215
sunflower.plot(mydf$thetahat, mydf$V11)
title(main = "Item 11", col.main = "dodgerblue")
Item 2
lm(mydf$V2 ~ mydf$thetahat)
##
## Call:
## lm(formula = mydf$V2 ~ mydf$thetahat)
##
## Coefficients:
## (Intercept) mydf$thetahat
## 0.867 0.209
sunflower.plot(mydf$thetahat, mydf$V2)
title(main = "Item 2", col.main = "dodgerblue")
Item 3
lm(mydf$V3 ~ mydf$thetahat)
##
## Call:
## lm(formula = mydf$V3 ~ mydf$thetahat)
##
## Coefficients:
## (Intercept) mydf$thetahat
## 0.824 0.260
sunflower.plot(mydf$thetahat, mydf$V3)
title(main = "Item 3", col.main = "dodgerblue")
An item with average difficulty, V9
delta[9,]
## item delta
## V9 V9 0.12962
sunflower.plot(mydf$thetahat, mydf$V9)
title(main = "Item 9", col.main = "dodgerblue")
A miscoded item, V5
Most of the low ability students gave a correct response (second quadrant) indicating guessing, while almost all of the high ability students gave an incorrect response (fourth quadrant). Also the linear regression line has a negative slope indicating an item that was not performing the way it was intended.
delta[5,]
## item delta
## V5 V5 0.091406
## add extra space to right margin of plot within frame
par(mar = c(4, 4.5, 2, 0) + 0.1)
# Sunflower Scatter Plot
# Multiple points are plotted as sunflowers with multiple leaves (petals)
# such that overplotting is visualized instead of accidental and invisible.
sunflowerplot(mydf$thetahat, mydf$V5,
xlim = c(-3, 3),
ylim = c(-0.5, 1.5),
ylab = "",
xlab = "",
axes = FALSE,
main = "Ability vs. Responses for Item 5")
box()
# x-axis
axis(1, pretty(c(-3, 3), 7))
mtext("Ability",
side = 1,
col = "black",
line = 2.5)
# y-axis
axis(2, at = c(0, 1),
labels = c("incorrect", "correct"),
col.axis = "tomato",
col.ticks = "tomato")
mtext("Responses",
side = 2,
line = 2.5)
# Simple Regression Line
# a, b: the intercept and slope, single values.
abline(a = lm(mydf$V5 ~ mydf$thetahat)[1], # intercept
b = lm(mydf$V5 ~ mydf$thetahat)[2], # slope
col = "dodgerblue",
lwd = 3)
# Quadrant tracing lines
abline(a = .5, # intercept
b = .5, # slope
h = .5, # horizontal divider
col = "green",
lwd = 3,
lty = "dotted")
# Quadrant labels
points(c(2.5, -1, -2.5, 1),
c(1.25, 1.25, -0.25, -0.25),
pch = 21,
bg = "white",
cex = 4)
text(c(2.5, -1, -2.5, 1),
c(1.25, 1.25, -0.25, -0.25),
c("I", "II", "III", "IV"))
The Rasch model (Rasch 1960) is mathematically equivalent to the 1PL IRT model, more generally a special case of a generalized linear model:
\[ \begin{aligned} P(x=1|\beta, \delta) &= \frac{e^{(\beta-\delta_i)}}{1+e(\beta-\delta_i)} \\ \\ &= ln \big(\frac{P(\beta)}{1-P(\beta)}\big) \\ \\ &= \beta-\delta_i \end{aligned} \]
where \(P(x=1|\beta, \delta)\) is the probability of correct response given latent trait ability, \(\beta\), and item difficulty, \(\delta\).
Like the proportion correct responses in CTT (P) for the item difficulty, the correct number of responses is a sufficient statistic for \(\beta\) in the Rasch model. In other words, all examinees with the same score in the Rasch model have the same estimated ability \((\beta)\) (“Rasch Measurement: The Dichotomous Model. in E. V. Smith, Jr., & R. M. Smith (Eds.), Introduction to Rasch Measurement: Theory, Models, and Applications.” 2004; B. D. Wright 1997; M. Wright B. D. & Stone 1979, 20). Whereas, in the 2PL and 3PL IRT models, examinees with the same number of correct responses but with different patterns will have somewhat different estimates of \(\beta\). Similarly, when the number of examinees responding correctly is the same for two items, then those items will have the same Rasch difficulty estimate, regardless of which examinees responded correctly (DeMars 2010).
library(eRm, warn.conflicts = FALSE, quietly = TRUE)
Rasch <- eRm::RM(mytest, se = TRUE)
(delta <- Rasch$etapar) # item difficulty
## V2 V3 V4 V5 V6 V7 V8 V9
## -1.92357 -1.51335 2.57022 0.57056 -0.17129 -1.47458 -0.48924 0.61552
## V10 V11 V12 V13 V14 V15
## -3.32610 -1.94774 2.06894 1.20968 1.96351 -0.82398
# plot
eRm::plotINFO(Rasch, type = "item") # information plots
eRm::plotjointICC(Rasch, legend = FALSE) # ICC curves
(beta <- eRm::person.parameter(Rasch)) # person parameters
##
## Person Parameters:
##
## Raw Score Estimate Std.Error
## 1 -3.77861 1.12536
## 2 -2.82610 0.86801
## 3 -2.16961 0.76400
## 4 -1.62902 0.71186
## 5 -1.14314 0.68526
## 6 -0.68297 0.67327
## 7 -0.23247 0.67034
## 8 0.21849 0.67365
## 9 0.67744 0.68213
## 10 1.15185 0.69655
## 11 1.65238 0.72052
## 12 2.19996 0.76393
## 13 2.84533 0.85410
## 14 3.75727 1.09934
# log-likelihood of person parameter estimation
(log.likelihood <- stats::logLik(beta))
## Unconditional (joint) log Lik.: -86.828 (df=14)
# Information Criteria
(eRm::IC(beta)) #log-likelihood, AIC, BIC
##
## Information Criteria:
## value npar AIC BIC cAIC
## joint log-lik -2638.1 28 5332.3 5447.3 5475.3
## marginal log-lik -3158.7 14 6345.3 6402.9 6416.9
## conditional log-lik -2069.3 14 4166.6 4224.1 4238.1
# itemfit statistics
(fit_of_items <- eRm::itemfit(beta))
##
## Itemfit Statistics:
## Chisq df p-value Outfit MSQ Infit MSQ Outfit t Infit t
## V1 226.26 449 1.000 0.503 0.726 -2.31 -3.47
## V2 274.08 449 1.000 0.609 0.763 -1.88 -2.62
## V3 234.45 449 1.000 0.521 0.761 -3.05 -3.12
## V4 349.65 449 1.000 0.777 0.858 -0.91 -1.78
## V5 2105.43 449 0.000 4.679 2.290 21.43 21.12
## V6 276.52 449 1.000 0.614 0.738 -4.52 -5.55
## V7 391.12 449 0.977 0.869 0.898 -0.68 -1.27
## V8 271.72 449 1.000 0.604 0.754 -4.13 -4.69
## V9 272.42 449 1.000 0.605 0.725 -4.82 -6.74
## V10 253.62 449 1.000 0.564 0.694 -0.92 -1.95
## V11 218.94 449 1.000 0.487 0.738 -2.63 -2.90
## V12 411.40 449 0.898 0.914 0.944 -0.41 -0.85
## V13 418.88 449 0.843 0.931 0.931 -0.55 -1.46
## V14 250.77 449 1.000 0.557 0.733 -2.96 -4.70
## V15 286.39 449 1.000 0.636 0.783 -3.16 -3.63
# MSQ
MSQ <- cbind(rownames(fit_of_items),
outfitMSQ = fit_of_items$i.outfitMSQ,
infitMSQ = fit_of_items$i.infitMSQ)
# Sort decreasing out MSQ
head(MSQ[order(abs(fit_of_items$i.outfitMSQ), decreasing = TRUE),], 5)
## outfitMSQ infitMSQ
## V5 4.67874 2.28981
## V13 0.93083 0.93076
## V12 0.91422 0.94421
## V7 0.86916 0.89793
## V4 0.77701 0.85780
# Sort decreasing in MSQ
head(MSQ[order(abs(fit_of_items$i.infitMSQ), decreasing = TRUE),], 5)
## outfitMSQ infitMSQ
## V5 4.67874 2.28981
## V12 0.91422 0.94421
## V13 0.93083 0.93076
## V7 0.86916 0.89793
## V4 0.77701 0.85780
#Personfit
fit_of_persons <- eRm::personfit(beta)
person_fit <- data.frame(outMSQ = fit_of_persons$p.outfitMSQ,
inMSQ = fit_of_persons$p.infitMSQ,
outZ = fit_of_persons$p.outfitZ,
inZ = fit_of_persons$p.infitZ)
# pull out just those VERY largest values using MSQ and then sort
misfit_persons <- subset(person_fit,
subset = (person_fit$outMSQ > 4 |
person_fit$inMSQ > 4 |
abs(person_fit$outZ) > 4 |
abs(person_fit$inZ) > 4))
# Sort decreasing out MSQ
misfit_persons[order(abs(misfit_persons$outMSQ), decreasing = TRUE),]
## outMSQ inMSQ outZ inZ
## P70 7.1720 4.25848 4.7749 5.27815
## P208 6.1596 0.89829 3.1767 -0.16385
## P111 5.9872 1.93888 1.9054 1.62972
## P15 5.2477 1.50072 1.7545 0.81773
## P65 5.2477 1.50072 1.7545 0.81773
## P123 5.2477 1.50072 1.7545 0.81773
## P172 5.2477 1.50072 1.7545 0.81773
## P210 5.2477 1.50072 1.7545 0.81773
## P249 5.2477 1.50072 1.7545 0.81773
## P243 4.1594 1.18583 2.8453 0.59988
# Sort increasin in MSQ
misfit_persons[order(abs(misfit_persons$inMSQ), decreasing = TRUE),]
## outMSQ inMSQ outZ inZ
## P70 7.1720 4.25848 4.7749 5.27815
## P111 5.9872 1.93888 1.9054 1.62972
## P15 5.2477 1.50072 1.7545 0.81773
## P65 5.2477 1.50072 1.7545 0.81773
## P123 5.2477 1.50072 1.7545 0.81773
## P172 5.2477 1.50072 1.7545 0.81773
## P210 5.2477 1.50072 1.7545 0.81773
## P249 5.2477 1.50072 1.7545 0.81773
## P243 4.1594 1.18583 2.8453 0.59988
## P208 6.1596 0.89829 3.1767 -0.16385
eRm::plotICC(Rasch,
empICC = list("raw", type = "b", col = "tomato", lty = 1),
item.subset = c(10, 5, 6, 1),
ask = FALSE,
empCI = list(gamma = 0.95, col = "dodgerblue", lty = 1),
col = "green",
lwd = 3)
# Goodness-of-Fit Tests
(gof.res <- eRm::gofIRT(beta))
##
## Goodness-of-Fit Results:
## Collapsed Deviance = 956 (df = 210, p-value = 0)
## Pearson R2: 0.499
## Area Under ROC: 0.905
summary(gof.res)
##
## Goodness-of-Fit Tests
## value df p-value
## Collapsed Deviance 955.997 210 0
## Hosmer-Lemeshow 42.007 8 0
## Rost Deviance 1652.285 32753 1
## Casewise Deviance 5276.261 6722 1
##
## R-Squared Measures
## Pearson R2: 0.499
## Sum-of-Squares R2: 0.498
## McFadden R2: 0.754
##
## Classifier Results - Confusion Matrix (relative frequencies)
## observed
## predicted 0 1
## 0 0.347 0.081
## 1 0.092 0.480
##
## Accuracy: 0.826
## Sensitivity: 0.855
## Specificity: 0.79
## Area under ROC: 0.905
## Gini coefficient: 0.809
# LR-test on dichotomous Rasch model with user-defined split
(npersons <- nrow(mytest))
## [1] 450
splitvec <- rep(1:2, each = (npersons/2))
LR <- eRm::LRtest(Rasch, splitcr = splitvec, se = TRUE)
summary(LR)
##
## Andersen LR-test:
## LR-value: 21.383
## Chi-square df: 14
## p-value: 0.092
##
##
## Subject Subgroup: splitvec 1:
## Log-likelihood: -1087.3
##
## Beta Parameters:
## beta V1 beta V2 beta V3 beta V4 beta V5 beta V6 beta V7
## Estimate -2.55876 1.87395 1.43133 -2.32592 -0.31564 0.21637 1.25568
## Std.Err. 0.20085 0.20627 0.18629 0.18872 0.14833 0.15345 0.17967
## beta V8 beta V9 beta V10 beta V11 beta V12 beta V13 beta V14
## Estimate 0.46625 -0.64868 2.92533 1.74386 -2.0223 -0.96216 -1.8705
## Std.Err. 0.15782 0.14816 0.27771 0.19987 0.1756 0.15027 0.1701
## beta V15
## Estimate 0.79121
## Std.Err. 0.16531
##
##
## Subject Subgroup: splitvec 2:
## Log-likelihood: -971.31
##
## Beta Parameters:
## beta V1 beta V2 beta V3 beta V4 beta V5 beta V6 beta V7 beta V8
## Estimate -2.8206 1.96040 1.58708 -2.86304 -0.85751 0.10634 1.7185 0.49627
## Std.Err. 0.2060 0.22305 0.20261 0.20831 0.15430 0.15859 0.2093 0.16530
## beta V9 beta V10 beta V11 beta V12 beta V13 beta V14 beta V15
## Estimate -0.6045 3.99490 2.18111 -2.15117 -1.49850 -2.09111 0.84185
## Std.Err. 0.1538 0.44615 0.23737 0.17712 0.16101 0.17519 0.17394
# Computation of Andersen's LR-test with mean split
(lrres.rasch <- LRtest(Rasch, splitcr = "mean"))
##
## Andersen LR-test:
## LR-value: 402.59
## Chi-square df: 14
## p-value: 0
# We expect the difference to be close to zero
LR.matrix <- cbind(high = lrres.rasch$etalist$high,
low = lrres.rasch$etalist$low,
dif = lrres.rasch$etalist$high -
lrres.rasch$etalist$low)
LR.matrix
## high low dif
## V2 -2.27315 -1.923722 -0.34943
## V3 -2.97297 -1.391662 -1.58131
## V4 2.89481 2.493805 0.40100
## V5 2.44486 -1.462141 3.90700
## V6 -0.36566 0.032572 -0.39824
## V7 -1.14709 -1.583065 0.43598
## V8 -1.14709 -0.226234 -0.92085
## V9 0.54898 1.104688 -0.55571
## V10 -2.97297 -3.434553 0.46158
## V11 -2.97297 -1.895872 -1.07710
## V12 2.48451 1.636600 0.84791
## V13 1.52177 1.104688 0.41708
## V14 2.12569 2.790355 -0.66466
## V15 -1.06352 -0.742050 -0.32147
# potentially problematic items
# (the difference between high and low is larger than 1)
subset(LR.matrix, abs(LR.matrix[, "dif"]) > 1)
## high low dif
## V3 -2.9730 -1.3917 -1.5813
## V5 2.4449 -1.4621 3.9070
## V11 -2.9730 -1.8959 -1.0771
# color scheme
numitems <- ncol(mytest)
mycolors <- rainbow(numitems)
# Goodness of Fit Plot
eRm::plotGOF(lrres.rasch,
set_par = TRUE,
tlab = "number",
col = mycolors,
type = "p",
ctrline = list(gamma = 0.95,
col = "black",
lty = 2,
cex = .5),
conf = list(ia = FALSE,
col = mycolors,
lty = 3,
cex = .5))
# Item 5 tracing lines
abline(h = lrres.rasch$etalist$high[4],
v = lrres.rasch$etalist$low[4],
col = mycolors[5],
lty = 3)
Ideally, we would like to see a range of items difficulties over the latent trait continuum (-3, 3) without any gaps. Items’ confidence ellipses should not significantly overlap (i.e. an items confidence ellipse should not include another items center). However, the confidence ellipses should include the black diagonal split line between groups; and their center should be within the black-dotted 95% confidence line.
For example, there is a large gap between items 6 and 9 (theta range about 0 to 1), and similary items 10 and 2. Confidence ellipses for items 2 and 11 include each others center, so they do about the same job as they cover the same region the of the latent trait continuum with item 2 doing a little bit better because its center is in the 95% confidence line and extending on both sides of the diagonal split line. Similarly items 9 and 13 have the same theta value. On the other hand, despite the fact that the confidence ellipse of item 1 includes the center of item 4, they cover different sections of the latent trait continuum (about theta 2.5 to 3.5). Also, there are issues with items 3, 5, 8, 9, 12 and 14 according to the above criteria, but item 5 clearly stands out. For the group, which has a raw score that is above the mean, the item-difficulty for 5 is about 2.5, where as for the group which has a raw score that is below the mean, item difficulty for it is -1.5. So the difficulty parameter of item 5 is clearly not invariant across the latent trait continuum. It may have been a negatively coded item as its ICC plot above also suggests.
Item exposure can compromise the security of tests where administrations across more than one occasion and more than one examinee group can lead to overexposure of items. It can be limited by using alternate test forms, however multiple forms lead to multiple score scales that measure the construct of interest at differing levels of difficulty. These differences in difficulty across alternate forms of a test can be adjusted by equating to produce comparable score scales.
Raw scores often are transformed to scale scores to enhance the interpretability so that a scale score has the same meaning regardless of the test form administered or the group of examinees tested (Kolen M. J. and Brennan 2013, 4). Score scales typically are established using a single test form, which is maintained through an equating process that places raw scores from subsequent forms on the established score scale.
Although their purposes are different, similar statistical procedures often are used in linking and equating. Equating is used to adjust scores on test forms that are built to be as similar as possible in content and statistical characteristics, whereas tests that are purposefully built to be different are linked (Kolen M. J. and Brennan 2013, 3).
Create two data sets with unique and common items
# CONSTANTS
npersons <- 1000
numitems <- 30
numanchoritems <- 10
Population P and Form X
# Population P true theta (ability)
set.seed(26543476)
P <- rnorm(npersons, mean = -0.5, sd = 1.0)
summary(P)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -3.580 -1.150 -0.488 -0.502 0.160 2.700
# Form X items
set.seed(5687845)
X <- cbind(a = runif(numitems, 0.5, 2.5), # discrimination
b = rnorm(numitems, mean = -.1, sd = 1.0), # difficulty
c = runif(numitems, 0.0, 0.2) # guessing
)
head(X)
## a b c
## [1,] 1.81275 -1.198700 0.14298
## [2,] 1.89450 0.200355 0.15389
## [3,] 2.24316 -0.035753 0.11639
## [4,] 1.31774 -0.961044 0.14435
## [5,] 0.75002 2.263847 0.11434
## [6,] 1.94931 -1.213571 0.13678
summary(X)
## a b c
## Min. :0.528 Min. :-2.201 Min. :0.000314
## 1st Qu.:1.057 1st Qu.:-0.751 1st Qu.:0.064189
## Median :1.676 Median :-0.290 Median :0.108290
## Mean :1.526 Mean :-0.202 Mean :0.105860
## 3rd Qu.:1.937 3rd Qu.: 0.296 3rd Qu.:0.151507
## Max. :2.433 Max. : 2.586 Max. :0.189391
# Simulate responses for population P and Form X
set.seed(609678)
XP <- data.frame(irtoys::sim(ip = X, x = P))
names(XP) <- c(paste("X", 1:numitems, sep = ""))
str(XP)
## 'data.frame': 1000 obs. of 30 variables:
## $ X1 : num 0 0 0 0 1 1 1 1 1 1 ...
## $ X2 : num 0 0 0 0 0 1 0 1 0 0 ...
## $ X3 : num 0 0 0 0 1 1 1 1 0 0 ...
## $ X4 : num 0 0 0 0 0 1 1 1 1 1 ...
## $ X5 : num 1 0 0 0 0 0 0 0 0 0 ...
## $ X6 : num 0 1 1 0 1 1 0 1 1 1 ...
## $ X7 : num 0 0 0 1 0 1 0 1 0 0 ...
## $ X8 : num 0 0 1 0 1 1 0 1 1 0 ...
## $ X9 : num 0 0 0 1 1 1 0 1 1 0 ...
## $ X10: num 0 0 0 0 1 1 1 1 1 1 ...
## $ X11: num 0 0 1 0 0 1 1 1 1 0 ...
## $ X12: num 0 0 0 0 1 1 1 1 1 1 ...
## $ X13: num 0 0 1 1 0 1 0 1 1 0 ...
## $ X14: num 0 0 0 1 0 1 0 1 0 1 ...
## $ X15: num 0 0 0 0 0 0 0 0 0 0 ...
## $ X16: num 0 1 1 1 1 1 1 1 0 1 ...
## $ X17: num 0 0 1 0 0 1 0 0 1 0 ...
## $ X18: num 0 0 0 1 0 0 0 1 1 0 ...
## $ X19: num 0 0 0 0 0 1 0 1 1 0 ...
## $ X20: num 0 0 0 0 0 1 0 1 0 0 ...
## $ X21: num 0 0 1 1 1 1 0 0 1 1 ...
## $ X22: num 0 0 0 0 1 1 1 1 1 1 ...
## $ X23: num 1 0 0 0 0 1 0 1 1 1 ...
## $ X24: num 0 1 0 0 0 1 1 1 0 0 ...
## $ X25: num 0 0 1 1 0 1 1 1 0 1 ...
## $ X26: num 1 0 0 0 0 1 0 1 1 1 ...
## $ X27: num 0 0 0 1 0 1 0 1 0 0 ...
## $ X28: num 0 1 0 1 0 1 0 1 0 0 ...
## $ X29: num 1 0 1 1 1 1 0 1 1 1 ...
## $ X30: num 0 0 0 1 0 1 0 1 1 0 ...
Population Q and Form Y
# Population Q true theta (ability)
set.seed(3412543)
Q <- rnorm(npersons, mean = .5, sd = 1)
summary(Q)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.840 -0.105 0.578 0.540 1.190 3.800
# Form Y items
set.seed(6377344)
Y <- cbind(a = runif(numitems, 0.5, 2.5), # discrimination
b = rnorm(numitems, mean = .1, sd = 1.0), # difficulty
c = runif(numitems, 0.0, 0.2) # guessing
)
head(Y)
## a b c
## [1,] 2.22504 -0.567791 0.16561076
## [2,] 0.50177 0.076983 0.00617487
## [3,] 1.18109 0.707334 0.07757834
## [4,] 1.75398 -0.929852 0.19578527
## [5,] 1.75312 1.741684 0.00054576
## [6,] 0.87359 1.893410 0.05537028
summary(Y)
## a b c
## Min. :0.502 Min. :-1.9301 Min. :0.000546
## 1st Qu.:1.093 1st Qu.:-0.6312 1st Qu.:0.015809
## Median :1.635 Median :-0.1090 Median :0.092555
## Mean :1.511 Mean : 0.0809 Mean :0.095483
## 3rd Qu.:1.877 3rd Qu.: 0.7039 3rd Qu.:0.164322
## Max. :2.225 Max. : 2.4160 Max. :0.199548
# Simulate responses for population Q and Form Y
set.seed(112524541)
YQ <- data.frame(irtoys::sim(ip = Y, x = Q))
names(YQ) <- c(paste("Y", 1:numitems, sep = ""))
str(YQ)
## 'data.frame': 1000 obs. of 30 variables:
## $ Y1 : num 0 0 1 1 1 1 1 1 1 1 ...
## $ Y2 : num 1 1 0 0 0 1 0 0 1 1 ...
## $ Y3 : num 1 0 0 0 0 1 1 0 0 1 ...
## $ Y4 : num 1 0 1 0 0 1 1 1 1 1 ...
## $ Y5 : num 0 0 0 0 0 0 1 0 0 0 ...
## $ Y6 : num 1 0 0 0 0 0 1 0 0 0 ...
## $ Y7 : num 0 0 0 1 0 1 1 1 0 1 ...
## $ Y8 : num 0 0 1 1 1 1 1 1 0 1 ...
## $ Y9 : num 0 0 0 0 1 0 1 0 0 1 ...
## $ Y10: num 1 0 0 1 0 0 1 0 1 0 ...
## $ Y11: num 1 1 1 1 1 1 1 1 1 1 ...
## $ Y12: num 1 0 0 1 0 1 1 1 0 1 ...
## $ Y13: num 1 0 0 0 0 1 1 1 1 1 ...
## $ Y14: num 1 1 0 0 0 1 1 0 0 1 ...
## $ Y15: num 1 1 0 0 1 1 1 0 1 1 ...
## $ Y16: num 1 1 1 1 1 1 1 1 1 1 ...
## $ Y17: num 0 0 0 0 0 1 1 1 0 1 ...
## $ Y18: num 1 0 0 0 0 0 1 1 0 1 ...
## $ Y19: num 0 0 0 0 0 1 1 0 0 0 ...
## $ Y20: num 0 0 0 1 0 1 1 0 0 0 ...
## $ Y21: num 0 0 0 0 1 0 0 0 0 1 ...
## $ Y22: num 1 1 1 0 0 1 1 1 0 1 ...
## $ Y23: num 1 0 0 0 0 1 1 1 0 1 ...
## $ Y24: num 1 1 0 0 0 0 1 0 0 1 ...
## $ Y25: num 0 1 0 1 1 1 1 1 0 1 ...
## $ Y26: num 0 0 0 0 0 1 1 0 0 1 ...
## $ Y27: num 1 0 0 0 1 1 1 1 1 1 ...
## $ Y28: num 0 1 1 0 0 1 1 1 0 1 ...
## $ Y29: num 1 1 0 1 0 1 1 1 1 1 ...
## $ Y30: num 0 0 0 0 0 0 1 1 0 1 ...
Anchor Items for both populations, P and Q
# Anchor test items, V
set.seed(569698)
V <- cbind(a = runif(numanchoritems, 0.5, 2.5), # discrimination
b = rnorm(numanchoritems, mean = 0, sd = 1.0), # difficulty
c = runif(numanchoritems, 0.0, 0.2) # guessing
)
head(V)
## a b c
## [1,] 0.58718 0.31063 0.098580
## [2,] 1.33322 0.44685 0.125070
## [3,] 1.19667 -1.71764 0.091262
## [4,] 1.09101 2.11134 0.015697
## [5,] 1.59992 0.15976 0.032924
## [6,] 1.25632 -0.42496 0.111658
summary(V)
## a b c
## Min. :0.587 Min. :-1.718 Min. :0.0054
## 1st Qu.:0.704 1st Qu.:-1.075 1st Qu.:0.0462
## Median :1.041 Median :-0.133 Median :0.0949
## Mean :1.014 Mean :-0.204 Mean :0.0862
## 3rd Qu.:1.241 3rd Qu.: 0.293 3rd Qu.:0.1217
## Max. :1.600 Max. : 2.111 Max. :0.1486
# Simulate responses to the anchor items by pupulation P
set.seed(31245)
VP <- data.frame(irtoys::sim(ip = V, x = P))
names(VP) <- c(paste("V", 1:numanchoritems, sep = ""))
str(VP)
## 'data.frame': 1000 obs. of 10 variables:
## $ V1 : num 0 1 0 1 0 1 0 1 0 1 ...
## $ V2 : num 0 0 0 0 0 1 1 1 0 1 ...
## $ V3 : num 1 0 0 1 1 1 0 1 1 1 ...
## $ V4 : num 0 0 0 0 0 0 0 1 0 0 ...
## $ V5 : num 0 0 0 0 0 1 0 1 0 0 ...
## $ V6 : num 0 0 0 0 1 1 0 1 1 1 ...
## $ V7 : num 0 0 1 0 0 1 1 0 0 0 ...
## $ V8 : num 0 1 1 1 0 1 1 1 1 0 ...
## $ V9 : num 0 1 0 1 0 1 1 1 1 1 ...
## $ V10: num 1 0 0 1 0 0 0 0 0 0 ...
# Simulate responses to the anchor items by population Q
set.seed(367458)
VQ <- data.frame(irtoys::sim(ip = V, x = Q))
names(VQ) <- c(paste("V", 1:numanchoritems, sep = ""))
str(VQ)
## 'data.frame': 1000 obs. of 10 variables:
## $ V1 : num 0 0 1 1 0 0 0 1 1 1 ...
## $ V2 : num 0 0 0 0 1 1 1 0 0 0 ...
## $ V3 : num 0 1 1 1 1 1 1 1 1 1 ...
## $ V4 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V5 : num 0 0 0 0 0 0 1 1 1 1 ...
## $ V6 : num 1 0 0 1 1 1 1 0 1 1 ...
## $ V7 : num 1 0 0 0 0 1 0 0 1 1 ...
## $ V8 : num 0 1 0 0 1 1 1 1 0 1 ...
## $ V9 : num 1 1 0 1 1 1 1 1 1 1 ...
## $ V10: num 0 0 1 0 1 1 0 1 0 0 ...
# The anchor item scores are included in the total scores
XVP <- data.frame(total = rowSums(cbind(XP, VP)),
anchor = rowSums(VP))
head(XVP)
## total anchor
## 1 6 2
## 2 7 3
## 3 11 2
## 4 17 5
## 5 13 2
## 6 35 8
library(equate, warn.conflicts = FALSE, quietly = TRUE)
x <- equate::freqtab(XVP)
plot(x)
xs <- loglinear(x,
degrees = c(4, 1),
stepup = TRUE,
showWarnings = FALSE)
plot(x, xs, lwd = 2)
# The anchor item scores are included in the total scores
YQV <- data.frame(total_score = rowSums(cbind(YQ, VQ)),
anchor_score = rowSums(VQ))
head(YQV)
## total_score anchor_score
## 1 20 3
## 2 13 3
## 3 10 3
## 4 14 4
## 5 15 6
## 6 29 7
y <- equate::freqtab(YQV)
plot(y)
ys <- loglinear(y, stepup = TRUE, showWarnings = FALSE)
plot(y, ys, lwd = 2)
Test Scores Under a Non-Equivalent-groups Anchor Test (NEAT) Design
The NEAT design is a data collection design involving two populations of test-takers, P and Q, and a sample of examinees from each. The sample from P takes test X, the sample from Q takes test Y, and both samples take an anchor test, V, which is used to link X and Y.
Vertical equating is the process of equating tests administered to groups of students with different abilities, such as students in different grades (years of schooling).
Horizontal equating is the equating of tests administered to groups with similar abilities. Different tests are used to avoid practice effects.
Six equating types and corresponding linking methods are supported.
The equating design is implied by the method argument, where “none” (default) indicates that no method is needed (because examinees taking forms X and Y are assumed to be the same or equivalent). The nominal weights, Tucker, Levine observed score, Levine true score, frequency estimation, Braun/Holland, and chained equating methods are supported for the nonequivalent-groups with anchor test design.
In mean equating, scores on the two forms that are an equal (signed) distance away from their respective means are set equal:
\[ \begin{aligned} x_{i}-\mu(X) &= y_{i}-\mu(Y) \\ y_{i} &= x_{i}- \mu(X) + \mu(Y) \end{aligned} \]
General linear equating is a new approach to estimating a linear linking or equating function. The linear conversion is defined by setting standardized deviation scores (z-scores) on the two forms to be equal such that
\[ \begin{aligned} \frac{x_{i}-\mu(X)}{\sigma(X)} &= \frac{y_{i}-\mu(Y)}{\sigma(Y)} \\ y &= \sigma(Y) \big [\frac{x_{i}-\mu(X)}{\sigma(X)} \big] + \mu(Y) \end{aligned} \]
library(equate, warn.conflicts = FALSE, quietly = TRUE)
linear <- equate(x, y,
type = "general linear",
smooth = "none")
linear
##
## General Linear Equating: x to y
##
## Design: nonequivalent groups
##
## Summary Statistics:
## mean sd skew kurt min max n
## x 20.10 7.89 0.15 -0.87 4 39 1000
## y 25.78 6.95 -0.49 -0.29 3 40 1000
## yx 20.02 8.35 0.15 -0.87 3 40 1000
##
## Coefficients:
## intercept slope cx cy sx sy
## -1.2286 1.0571 21.5000 21.5000 35.0000 37.0000
Equipercentile linking and equating define a nonlinear relationship between score scales by setting equal the cumulative distribution functions. Braun and Holland (1982) indicated that the following function is an equipercentile equating function when X and Y are continuous random variables:
\[ y_{i} = G^{-1}[F(x_{i})], \]
and by symmetry
\[ x_{i} = F^{-1}[G(y_{i})] \]
where \(G^{-1}\) is the inverse of the cumulative distribution function G.
Observed-Score Linking and Equating (Albano 2015):
The equipercentile equivalent of a form-X score on the Y scale is calculated by finding the percentile rank in X of particular score, and then finding the form-Y score associated with that form-Y percentile rank.
Equipercentile equating is appropriate when X and Y differ nonlinearly in difficulty, that is, when difficulty differences fluctuate across the score scale, potentially at each score point. Each coordinate on the equipercentile curve is estimated using information from the distributions of X and Y . Thus, compared to identity, mean, and linear equating, equipercentile equating is more susceptible to sampling error because it involves the estimation of as many parameters as there are unique score points on X.
Smoothing methods are typically used to reduce irregularities due to sampling error in either the score distributions or the equipercentile equating function itself. Two commonly used smoothing methods include polynomial loglinear presmoothing (Holland P. W. and Thayer 2000) and cubic-spline postsmoothing (M. J. Kolen 1984).
library(equate, warn.conflicts = FALSE, quietly = TRUE)
equiper <- equate(x, y,
type = "equipercentile",
smoothmethod = "loglinear",
internal = TRUE,
boot = TRUE)
equiper
##
## Equipercentile Equating: x to y
##
## Design: nonequivalent groups
##
## Smoothing Method: loglinear presmoothing
##
## Summary Statistics:
## mean sd skew kurt min max n
## x 20.10 7.89 0.15 -0.87 4.00 39.00 1000
## y 25.78 6.95 -0.49 -0.29 3.00 40.00 1000
## yx 25.79 6.94 -0.50 -0.29 5.62 40.11 1000
Observed-Score Linking and Equating (Albano 2015):
Circle-arc Equating is the simplified approach to circle-arc equating, as demonstrated by (Livingston 2009), involves combining a circle-arc with the identity function. When the low and high scores differ for the X and Y scales, this becomes the identity linking function. The linear component can be omitted, and symmetric circle-arc equating used, with simple = FALSE. The result is an equating function based only on the circle-arc that passes through the points lowp, highp, and the estimated midpoint.
library(equate, warn.conflicts = FALSE, quietly = TRUE)
circ.arc <- equate(x, y,
type = "circle-arc",
method = "braun/holland")
circ.arc
##
## Braun/Holland Circle-Arc Equating: x to y
##
## Design: nonequivalent groups
##
## Summary Statistics:
## mean sd skew kurt min max n
## x 20.10 7.89 0.15 -0.87 4 39 1000
## y 25.78 6.95 -0.49 -0.29 3 40 1000
## yx 20.54 8.37 0.09 -0.88 3 40 1000
##
## Coefficients:
## intercept slope xcenter ycenter r
## -1.2286 1.0571 21.5000 -234.9046 235.5556
A composite of mean and identity
library(equate, warn.conflicts = FALSE, quietly = TRUE)
id.mean <- composite(list(equate(x, y, type = "i", boot = TRUE, reps = 5),
equate(x, y, type = "m", boot = TRUE, reps = 5)),
wc = .5, symmetric = TRUE)
id.mean
## [[1]]
##
## Composite Equating: x to y
##
## Design: nonequivalent groups
##
## Summary Statistics:
## mean sd skew kurt min max n
## x 20.10 7.89 0.15 -0.87 4.00 39.00 1000
## y 25.78 6.95 -0.49 -0.29 3.00 40.00 1000
## yx 22.90 8.35 0.15 -0.87 5.88 42.88 1000
##
##
## [[2]]
##
## Identity Equating: x to y
##
## Design: nonequivalent groups
##
## Summary Statistics:
## mean sd skew kurt min max n
## x 20.10 7.89 0.15 -0.87 4 39 1000
## y 25.78 6.95 -0.49 -0.29 3 40 1000
## yx 20.02 8.35 0.15 -0.87 3 40 1000
##
## Coefficients:
## intercept slope cx cy sx sy
## -1.2286 1.0571 21.5000 21.5000 35.0000 37.0000
##
##
## [[3]]
##
## Mean Equating: x to y
##
## Design: nonequivalent groups
##
## Summary Statistics:
## mean sd skew kurt min max n
## x 20.10 7.89 0.15 -0.87 4.00 39.00 1000
## y 25.78 6.95 -0.49 -0.29 3.00 40.00 1000
## yx 25.78 8.35 0.15 -0.87 8.76 45.76 1000
##
## Coefficients:
## intercept slope cx cy sx sy
## 4.5303 1.0571 21.5000 21.5000 35.0000 37.0000
##
##
## attr(,"class")
## [1] "equate.list"
Plot all six equivalent-groups design
# Conversion table containing scores on X with their form Y equivalents.
score.table <- round(data.frame(scale = equiper$concordance$scale,
equiper = equiper$concordance$yx,
linear = linear$concordance$yx,
circ.arc = circ.arc$concordance$yx,
composit = id.mean[[1]]$concordance$yx,
identity = id.mean[[2]]$concordance$yx,
mean.eq = id.mean[[3]]$concordance$yx), 2)
knitr::kable(score.table,
caption = "Score Conversion Table",
col.names = c("Scale",
"Equipercentile",
"Linear",
"Circle-arc",
"Composite",
"Identity",
"Mean")
)
Scale | Equipercentile | Linear | Circle-arc | Composite | Identity | Mean |
---|---|---|---|---|---|---|
4 | 5.62 | 3.00 | 3.00 | 5.88 | 3.00 | 8.76 |
5 | 7.86 | 4.06 | 4.13 | 6.94 | 4.06 | 9.82 |
6 | 9.67 | 5.11 | 5.25 | 7.99 | 5.11 | 10.87 |
7 | 11.34 | 6.17 | 6.38 | 9.05 | 6.17 | 11.93 |
8 | 12.95 | 7.23 | 7.49 | 10.11 | 7.23 | 12.99 |
9 | 14.55 | 8.29 | 8.60 | 11.17 | 8.29 | 14.04 |
10 | 16.07 | 9.34 | 9.71 | 12.22 | 9.34 | 15.10 |
11 | 17.57 | 10.40 | 10.82 | 13.28 | 10.40 | 16.16 |
12 | 18.96 | 11.46 | 11.92 | 14.34 | 11.46 | 17.22 |
13 | 20.27 | 12.51 | 13.01 | 15.39 | 12.51 | 18.27 |
14 | 21.49 | 13.57 | 14.10 | 16.45 | 13.57 | 19.33 |
15 | 22.59 | 14.63 | 15.19 | 17.51 | 14.63 | 20.39 |
16 | 23.60 | 15.69 | 16.27 | 18.57 | 15.69 | 21.44 |
17 | 24.54 | 16.74 | 17.35 | 19.62 | 16.74 | 22.50 |
18 | 25.39 | 17.80 | 18.42 | 20.68 | 17.80 | 23.56 |
19 | 26.18 | 18.86 | 19.49 | 21.74 | 18.86 | 24.62 |
20 | 26.93 | 19.91 | 20.56 | 22.79 | 19.91 | 25.67 |
21 | 27.64 | 20.97 | 21.62 | 23.85 | 20.97 | 26.73 |
22 | 28.31 | 22.03 | 22.68 | 24.91 | 22.03 | 27.79 |
23 | 28.97 | 23.09 | 23.73 | 25.97 | 23.09 | 28.84 |
24 | 29.61 | 24.14 | 24.78 | 27.02 | 24.14 | 29.90 |
25 | 30.24 | 25.20 | 25.82 | 28.08 | 25.20 | 30.96 |
26 | 30.87 | 26.26 | 26.87 | 29.14 | 26.26 | 32.02 |
27 | 31.49 | 27.31 | 27.90 | 30.19 | 27.31 | 33.07 |
28 | 32.14 | 28.37 | 28.93 | 31.25 | 28.37 | 34.13 |
29 | 32.79 | 29.43 | 29.96 | 32.31 | 29.43 | 35.19 |
30 | 33.44 | 30.49 | 30.98 | 33.37 | 30.49 | 36.24 |
31 | 34.13 | 31.54 | 32.00 | 34.42 | 31.54 | 37.30 |
32 | 34.82 | 32.60 | 33.02 | 35.48 | 32.60 | 38.36 |
33 | 35.49 | 33.66 | 34.03 | 36.54 | 33.66 | 39.42 |
34 | 36.24 | 34.71 | 35.03 | 37.59 | 34.71 | 40.47 |
35 | 36.99 | 35.77 | 36.04 | 38.65 | 35.77 | 41.53 |
36 | 37.71 | 36.83 | 37.03 | 39.71 | 36.83 | 42.59 |
37 | 38.44 | 37.89 | 38.03 | 40.77 | 37.89 | 43.64 |
38 | 39.27 | 38.94 | 39.02 | 41.82 | 38.94 | 44.70 |
39 | 40.11 | 40.00 | 40.00 | 42.88 | 40.00 | 45.76 |
# plots
plot(equiper,
linear,
circ.arc,
id.mean[[1]],
id.mean[[2]],
id.mean[[3]],
legendplace = "top",
addident = FALSE)
library(equateIRT, warn.conflicts = FALSE, quietly = TRUE)
require(ltm, warn.conflicts = FALSE, quietly = TRUE)
# simulation of IRT tests with anchor items
A <- cbind(XP, VP)
str(A)
## 'data.frame': 1000 obs. of 40 variables:
## $ X1 : num 0 0 0 0 1 1 1 1 1 1 ...
## $ X2 : num 0 0 0 0 0 1 0 1 0 0 ...
## $ X3 : num 0 0 0 0 1 1 1 1 0 0 ...
## $ X4 : num 0 0 0 0 0 1 1 1 1 1 ...
## $ X5 : num 1 0 0 0 0 0 0 0 0 0 ...
## $ X6 : num 0 1 1 0 1 1 0 1 1 1 ...
## $ X7 : num 0 0 0 1 0 1 0 1 0 0 ...
## $ X8 : num 0 0 1 0 1 1 0 1 1 0 ...
## $ X9 : num 0 0 0 1 1 1 0 1 1 0 ...
## $ X10: num 0 0 0 0 1 1 1 1 1 1 ...
## $ X11: num 0 0 1 0 0 1 1 1 1 0 ...
## $ X12: num 0 0 0 0 1 1 1 1 1 1 ...
## $ X13: num 0 0 1 1 0 1 0 1 1 0 ...
## $ X14: num 0 0 0 1 0 1 0 1 0 1 ...
## $ X15: num 0 0 0 0 0 0 0 0 0 0 ...
## $ X16: num 0 1 1 1 1 1 1 1 0 1 ...
## $ X17: num 0 0 1 0 0 1 0 0 1 0 ...
## $ X18: num 0 0 0 1 0 0 0 1 1 0 ...
## $ X19: num 0 0 0 0 0 1 0 1 1 0 ...
## $ X20: num 0 0 0 0 0 1 0 1 0 0 ...
## $ X21: num 0 0 1 1 1 1 0 0 1 1 ...
## $ X22: num 0 0 0 0 1 1 1 1 1 1 ...
## $ X23: num 1 0 0 0 0 1 0 1 1 1 ...
## $ X24: num 0 1 0 0 0 1 1 1 0 0 ...
## $ X25: num 0 0 1 1 0 1 1 1 0 1 ...
## $ X26: num 1 0 0 0 0 1 0 1 1 1 ...
## $ X27: num 0 0 0 1 0 1 0 1 0 0 ...
## $ X28: num 0 1 0 1 0 1 0 1 0 0 ...
## $ X29: num 1 0 1 1 1 1 0 1 1 1 ...
## $ X30: num 0 0 0 1 0 1 0 1 1 0 ...
## $ V1 : num 0 1 0 1 0 1 0 1 0 1 ...
## $ V2 : num 0 0 0 0 0 1 1 1 0 1 ...
## $ V3 : num 1 0 0 1 1 1 0 1 1 1 ...
## $ V4 : num 0 0 0 0 0 0 0 1 0 0 ...
## $ V5 : num 0 0 0 0 0 1 0 1 0 0 ...
## $ V6 : num 0 0 0 0 1 1 0 1 1 1 ...
## $ V7 : num 0 0 1 0 0 1 1 0 0 0 ...
## $ V8 : num 0 1 1 1 0 1 1 1 1 0 ...
## $ V9 : num 0 1 0 1 0 1 1 1 1 1 ...
## $ V10: num 1 0 0 1 0 0 0 0 0 0 ...
B <- cbind(YQ, VQ)
str(B)
## 'data.frame': 1000 obs. of 40 variables:
## $ Y1 : num 0 0 1 1 1 1 1 1 1 1 ...
## $ Y2 : num 1 1 0 0 0 1 0 0 1 1 ...
## $ Y3 : num 1 0 0 0 0 1 1 0 0 1 ...
## $ Y4 : num 1 0 1 0 0 1 1 1 1 1 ...
## $ Y5 : num 0 0 0 0 0 0 1 0 0 0 ...
## $ Y6 : num 1 0 0 0 0 0 1 0 0 0 ...
## $ Y7 : num 0 0 0 1 0 1 1 1 0 1 ...
## $ Y8 : num 0 0 1 1 1 1 1 1 0 1 ...
## $ Y9 : num 0 0 0 0 1 0 1 0 0 1 ...
## $ Y10: num 1 0 0 1 0 0 1 0 1 0 ...
## $ Y11: num 1 1 1 1 1 1 1 1 1 1 ...
## $ Y12: num 1 0 0 1 0 1 1 1 0 1 ...
## $ Y13: num 1 0 0 0 0 1 1 1 1 1 ...
## $ Y14: num 1 1 0 0 0 1 1 0 0 1 ...
## $ Y15: num 1 1 0 0 1 1 1 0 1 1 ...
## $ Y16: num 1 1 1 1 1 1 1 1 1 1 ...
## $ Y17: num 0 0 0 0 0 1 1 1 0 1 ...
## $ Y18: num 1 0 0 0 0 0 1 1 0 1 ...
## $ Y19: num 0 0 0 0 0 1 1 0 0 0 ...
## $ Y20: num 0 0 0 1 0 1 1 0 0 0 ...
## $ Y21: num 0 0 0 0 1 0 0 0 0 1 ...
## $ Y22: num 1 1 1 0 0 1 1 1 0 1 ...
## $ Y23: num 1 0 0 0 0 1 1 1 0 1 ...
## $ Y24: num 1 1 0 0 0 0 1 0 0 1 ...
## $ Y25: num 0 1 0 1 1 1 1 1 0 1 ...
## $ Y26: num 0 0 0 0 0 1 1 0 0 1 ...
## $ Y27: num 1 0 0 0 1 1 1 1 1 1 ...
## $ Y28: num 0 1 1 0 0 1 1 1 0 1 ...
## $ Y29: num 1 1 0 1 0 1 1 1 1 1 ...
## $ Y30: num 0 0 0 0 0 0 1 1 0 1 ...
## $ V1 : num 0 0 1 1 0 0 0 1 1 1 ...
## $ V2 : num 0 0 0 0 1 1 1 0 0 0 ...
## $ V3 : num 0 1 1 1 1 1 1 1 1 1 ...
## $ V4 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V5 : num 0 0 0 0 0 0 1 1 1 1 ...
## $ V6 : num 1 0 0 1 1 1 1 0 1 1 ...
## $ V7 : num 1 0 0 0 0 1 0 0 1 1 ...
## $ V8 : num 0 1 0 0 1 1 1 1 0 1 ...
## $ V9 : num 1 1 0 1 1 1 1 1 1 1 ...
## $ V10: num 0 0 1 0 1 1 0 1 0 0 ...
# Birnbaum’s three parameter model
modA <- tpm(A)
modB <- tpm(B)
# Import Item Parameters Estimates and Covariance Matrices
estA <- import.ltm(modA, display = FALSE)
estB <- import.ltm(modB, display = FALSE)
# Estimated Coefficients and Covariance Matrix
forms <- c("A", "B")
mod3pl <- modIRT(coef = list(estA$coef, estB$coef),
var = list(estA$var, estB$var),
names = forms,
display = FALSE)
# Direct Equating Coefficients Between All Pairs of a List of Forms
direclist3pl <- alldirec(mods = mod3pl, method = "Stocking-Lord")
# equating coefficients with standard errors
summary(direclist3pl)
## Link: A.B
## Method: Stocking-Lord
## Equating coefficients:
## Estimate StdErr
## A 1.0708 0.071867
## B -1.0685 0.077642
##
##
## Link: B.A
## Method: Stocking-Lord
## Equating coefficients:
## Estimate StdErr
## A 0.89723 0.065510
## B 0.98093 0.069162
# extract equating coefficients
eqc(direclist3pl)
## link A B
## 1 A.B 1.07082 -1.06847
## 2 B.A 0.89723 0.98093
# number of common items between a list of forms
linkp(list(estA$coef, estB$coef))
## [,1] [,2]
## [1,] 40 10
## [2,] 10 40
# Extract Item Parameters
tmp <- itm(direclist3pl, link = "A.B")
tmp$itempar <- substr(tmp$Item, 1, 6)
tmp$Item <- substr(tmp$Item, 8, 10)
# Form A
mat1 <- tmp[!is.na(tmp$A), c(1, 2, 5)]
reshape(mat1, direction = "wide", idvar = "Item", timevar = "itempar")
## Item A.Dffclt A.Dscrmn A.Gussng
## 1 V1 0.422116 0.43104 6.1271e-03
## 2 V10 0.991853 0.84509 1.6509e-01
## 3 V2 0.838824 1.23117 1.0458e-01
## 4 V3 -0.837738 1.38078 2.7466e-01
## 5 V4 2.684473 0.94429 6.5336e-06
## 6 V5 0.662318 1.62360 4.0806e-02
## 7 V6 -0.046055 1.11618 5.8524e-02
## 8 V7 -0.530318 0.89275 1.5240e-04
## 9 V8 0.207880 0.86609 2.5398e-01
## 10 V9 0.337597 1.21252 4.5905e-01
## 11 X1 -0.884817 1.45239 1.7487e-06
## 12 X10 -0.190790 1.77562 2.0855e-02
## 13 X11 1.524136 1.19727 2.4625e-01
## 14 X12 -0.661942 0.38182 3.0688e-04
## 15 X13 0.131799 2.18559 2.1564e-01
## 16 X14 0.924188 1.57276 3.8722e-02
## 17 X15 5.610634 0.73482 9.7568e-02
## 18 X16 -1.272777 0.53284 3.8520e-04
## 19 X17 0.395360 1.75936 9.4906e-03
## 20 X18 -0.169342 1.21423 2.2778e-02
## 21 X19 0.409623 2.23656 2.1343e-01
## 22 X2 0.674090 1.84595 1.4137e-01
## 23 X20 0.675596 1.26547 6.4229e-02
## 24 X21 -1.040938 1.58276 8.4884e-04
## 25 X22 -0.505662 1.71170 1.5437e-01
## 26 X23 0.538178 1.52053 1.4344e-01
## 27 X24 0.811186 1.23958 3.5889e-01
## 28 X25 -0.113873 2.70814 1.6996e-01
## 29 X26 -0.813283 0.55902 4.4732e-03
## 30 X27 0.881435 1.79706 1.9876e-01
## 31 X28 0.719707 1.13011 5.0312e-02
## 32 X29 -1.755385 1.69346 2.7889e-06
## 33 X3 0.406914 1.78274 5.2853e-02
## 34 X30 1.247810 1.91359 1.5131e-01
## 35 X4 -0.281351 1.57484 2.0812e-01
## 36 X5 2.733458 1.05947 1.5907e-01
## 37 X6 -0.740226 1.99988 6.0270e-02
## 38 X7 0.771116 1.54082 1.5272e-05
## 39 X8 0.561965 1.52754 2.1799e-01
## 40 X9 -0.211952 1.83230 1.1225e-01
# Form B
mat2 <- tmp[!is.na(tmp$B), c(1, 3, 5)]
reshape(mat2, direction = "wide", idvar = "Item", timevar = "itempar")
## Item B.Dffclt B.Dscrmn B.Gussng
## 1 V1 -0.167139 0.66157 1.3864e-01
## 2 V10 0.508466 0.88488 3.0075e-01
## 3 V2 -0.360708 1.12825 3.1941e-05
## 4 V3 -2.640483 0.96036 1.7159e-02
## 5 V4 1.707322 0.88791 2.0523e-04
## 6 V5 -0.346325 1.69339 1.1691e-03
## 7 V6 -1.393890 0.96345 1.6622e-03
## 8 V7 -1.801173 0.78621 1.8426e-03
## 9 V8 -1.774730 0.60125 8.0395e-03
## 10 V9 0.469689 1.81590 6.6483e-01
## 41 Y1 -1.301897 1.89190 9.0930e-02
## 42 Y10 1.054646 1.46101 2.0437e-01
## 43 Y11 -2.094884 2.40664 4.0629e-01
## 44 Y12 -2.056745 1.81677 3.1551e-03
## 45 Y13 -1.609218 0.61307 2.8410e-03
## 46 Y14 0.879878 1.13359 1.6988e-01
## 47 Y15 -1.088558 1.49848 2.2485e-04
## 48 Y16 -1.423389 1.96028 2.3433e-01
## 49 Y17 -0.908722 1.34721 4.5256e-05
## 50 Y18 -0.625701 1.49970 1.0931e-01
## 51 Y19 1.776612 1.51252 7.8699e-02
## 52 Y2 -0.418983 0.39383 1.3990e-02
## 53 Y20 0.497080 0.86399 9.1645e-02
## 54 Y21 0.442053 1.25839 3.2329e-05
## 55 Y22 -0.329911 1.58202 2.1055e-01
## 56 Y23 -0.923238 1.70497 2.1728e-01
## 57 Y24 -0.035186 2.29109 1.4483e-01
## 58 Y25 -0.991288 1.98788 1.7145e-01
## 59 Y26 1.217851 1.80673 1.6195e-01
## 60 Y27 -1.098266 2.21189 3.9811e-01
## 61 Y28 -1.389828 0.70759 2.1910e-03
## 62 Y29 -1.693509 2.37092 4.3126e-01
## 63 Y3 -0.023495 1.02670 2.9180e-02
## 64 Y30 -0.309548 2.08274 1.1108e-01
## 65 Y4 -1.750460 1.64563 8.9029e-04
## 66 Y5 1.276924 1.76154 1.0803e-02
## 67 Y6 1.235279 0.79923 5.3317e-03
## 68 Y7 -0.485583 1.15999 1.0771e-01
## 69 Y8 -0.876836 1.92049 2.5824e-01
## 70 Y9 -0.370329 1.04369 4.5011e-04
# Form A as form B
mat3 <- tmp[!is.na(tmp$A.as.B), c(1, 4, 5)]
reshape(mat3, direction = "wide", idvar = "Item", timevar = "itempar")
## Item A.as.B.Dffclt A.as.B.Dscrmn A.as.B.Gussng
## 1 V1 -0.6164556 0.40254 6.1271e-03
## 2 V10 -0.0063678 0.78919 1.6509e-01
## 3 V2 -0.1702353 1.14974 1.0458e-01
## 4 V3 -1.9655354 1.28946 2.7466e-01
## 5 V4 1.8061277 0.88184 6.5336e-06
## 6 V5 -0.3592418 1.51622 4.0806e-02
## 7 V6 -1.1177831 1.04236 5.8524e-02
## 8 V7 -1.6363430 0.83371 1.5240e-04
## 9 V8 -0.8458641 0.80881 2.5398e-01
## 10 V9 -0.7069598 1.13233 4.5905e-01
## 11 X1 -2.0159486 1.35633 1.7487e-06
## 12 X10 -1.2727689 1.65819 2.0855e-02
## 13 X11 0.5636125 1.11808 2.4625e-01
## 14 X12 -1.7772888 0.35657 3.0688e-04
## 15 X13 -0.9273328 2.04104 2.1564e-01
## 16 X14 -0.0788250 1.46874 3.8722e-02
## 17 X15 4.9395278 0.68622 9.7568e-02
## 18 X16 -2.4313847 0.49760 3.8520e-04
## 19 X17 -0.6451059 1.64300 9.4906e-03
## 20 X18 -1.2498014 1.13392 2.2778e-02
## 21 X19 -0.6298330 2.08864 2.1343e-01
## 22 X2 -0.3466360 1.72386 1.4137e-01
## 23 X20 -0.3450236 1.18177 6.4229e-02
## 24 X21 -2.1831264 1.47808 8.4884e-04
## 25 X22 -1.6099408 1.59849 1.5437e-01
## 26 X23 -0.4921739 1.41996 1.4344e-01
## 27 X24 -0.1998306 1.15760 3.5889e-01
## 28 X25 -1.1904043 2.52903 1.6996e-01
## 29 X26 -1.9393483 0.52204 4.4732e-03
## 30 X27 -0.1246064 1.67821 1.9876e-01
## 31 X28 -0.2977877 1.05537 5.0312e-02
## 32 X29 -2.9481724 1.58145 2.7889e-06
## 33 X3 -0.6327338 1.66483 5.2853e-02
## 34 X30 0.2677162 1.78703 1.5131e-01
## 35 X4 -1.3697440 1.47068 2.0812e-01
## 36 X5 1.8585819 0.98940 1.5907e-01
## 37 X6 -1.8611169 1.86761 6.0270e-02
## 38 X7 -0.2427381 1.43891 1.5272e-05
## 39 X8 -0.4667015 1.42651 2.1799e-01
## 40 X9 -1.2954291 1.71112 1.1225e-01
# two parameter model
modA <- ltm(A ~ z1)
modB <- ltm(B ~ z1)
# Import Item Parameters Estimates and Covariance Matrices
estA <- import.ltm(modA, display = FALSE)
estB <- import.ltm(modB, display = FALSE)
# Estimated Coefficients and Covariance Matrix
mod2pl <- modIRT(coef = list(estA$coef, estB$coef),
var = list(estA$var, estB$var),
names = forms,
display = FALSE)
# Direct Equating Coefficients Between All Pairs of a List of Forms
direclist2pl <- direc(mod1 = mod2pl[1],
mod2 = mod2pl[2],
method = "Haebara")
summary(direclist2pl)
## Link: A.B
## Method: Haebara
## Equating coefficients:
## Estimate StdErr
## A 0.95617 0.056912
## B -1.01774 0.064032
# extract equating coefficients
eqc(direclist2pl)
## link A B
## 1 A.B 0.95617 -1.0177
# number of common items between a list of forms
linkp(list(estA$coef, estB$coef))
## [,1] [,2]
## [1,] 40 10
## [2,] 10 40
# Extract Item Parameters
tmp <- itm(direclist2pl, link = "A.B")
tmp$itempar <- substr(tmp$Item, 1, 6)
tmp$Item <- substr(tmp$Item, 8, 10)
# Form A
mat1 <- tmp[!is.na(tmp$A), c(1, 2, 5)]
reshape(mat1, direction = "wide", idvar = "Item", timevar = "itempar")
## Item A.Dffclt A.Dscrmn
## 1 V1 0.391899 0.42580
## 2 V10 0.455193 0.59281
## 3 V2 0.600175 0.92286
## 4 V3 -1.351524 1.21008
## 5 V4 2.834631 0.88467
## 6 V5 0.576782 1.39534
## 7 V6 -0.201031 1.03596
## 8 V7 -0.538938 0.89954
## 9 V8 -0.682445 0.64422
## 10 V9 -1.356634 0.63271
## 11 X1 -0.885108 1.49454
## 12 X10 -0.257940 1.72703
## 13 X11 0.931837 0.50711
## 14 X12 -0.658961 0.38484
## 15 X13 -0.319755 1.42780
## 16 X14 0.865919 1.31392
## 17 X15 15.939050 0.12776
## 18 X16 -1.284484 0.52835
## 19 X17 0.351786 1.69471
## 20 X18 -0.238268 1.18022
## 21 X19 -0.052517 1.24506
## 22 X2 0.381138 1.17541
## 23 X20 0.531390 1.05539
## 24 X21 -1.026379 1.65946
## 25 X22 -0.782701 1.52739
## 26 X23 0.203627 1.08101
## 27 X24 -0.447387 0.60408
## 28 X25 -0.426561 1.95273
## 29 X26 -0.819296 0.56819
## 30 X27 0.463117 0.90292
## 31 X28 0.603448 0.98137
## 32 X29 -1.663698 1.81691
## 33 X3 0.284591 1.53113
## 34 X30 1.085668 0.88473
## 35 X4 -0.713111 1.29051
## 36 X5 3.639485 0.34811
## 37 X6 -0.828014 1.99710
## 38 X7 0.764366 1.49847
## 39 X8 0.011767 0.94208
## 40 X9 -0.439206 1.57915
# Form B
mat2 <- tmp[!is.na(tmp$B), c(1, 3, 5)]
reshape(mat2, direction = "wide", idvar = "Item", timevar = "itempar")
## Item B.Dffclt B.Dscrmn
## 1 V1 -0.69552 0.57591
## 2 V10 -0.64842 0.56876
## 3 V2 -0.37741 1.12464
## 4 V3 -2.57932 0.99967
## 5 V4 1.74065 0.86367
## 6 V5 -0.37168 1.69134
## 7 V6 -1.38411 0.98459
## 8 V7 -1.78606 0.79990
## 9 V8 -1.78908 0.60571
## 10 V9 -2.32283 0.61063
## 41 Y1 -1.41477 1.83973
## 42 Y10 0.61972 0.72129
## 43 Y11 -2.36531 2.34790
## 44 Y12 -1.96625 1.96463
## 45 Y13 -1.60501 0.62096
## 46 Y14 0.44079 0.72165
## 47 Y15 -1.09297 1.52755
## 48 Y16 -1.70046 1.82250
## 49 Y17 -0.91678 1.36916
## 50 Y18 -0.84478 1.36230
## 51 Y19 2.05734 0.79022
## 52 Y2 -0.49830 0.38793
## 53 Y20 0.21760 0.73540
## 54 Y21 0.43322 1.23709
## 55 Y22 -0.79583 1.21534
## 56 Y23 -1.29792 1.47136
## 57 Y24 -0.32464 1.63951
## 58 Y25 -1.24500 1.79039
## 59 Y26 1.04726 0.81666
## 60 Y27 -1.69425 1.73836
## 61 Y28 -1.39051 0.71509
## 62 Y29 -2.11917 2.14348
## 63 Y3 -0.10919 0.97900
## 64 Y30 -0.52699 1.73425
## 65 Y4 -1.69618 1.74984
## 66 Y5 1.31619 1.54471
## 67 Y6 1.23027 0.77786
## 68 Y7 -0.74907 1.03857
## 69 Y8 -1.31004 1.58012
## 70 Y9 -0.38532 1.04586
# Form A as form B
mat3 <- tmp[!is.na(tmp$A.as.B), c(1, 4, 5)]
reshape(mat3, direction = "wide", idvar = "Item", timevar = "itempar")
## Item A.as.B.Dffclt A.as.B.Dscrmn
## 1 V1 -0.643018 0.44532
## 2 V10 -0.582498 0.61998
## 3 V2 -0.443871 0.96516
## 4 V3 -2.310028 1.26554
## 5 V4 1.692651 0.92522
## 6 V5 -0.466238 1.45930
## 7 V6 -1.209961 1.08345
## 8 V7 -1.533057 0.94077
## 9 V8 -1.670274 0.67375
## 10 V9 -2.314914 0.66171
## 11 X1 -1.864055 1.56305
## 12 X10 -1.264375 1.80620
## 13 X11 -0.126745 0.53035
## 14 X12 -1.647819 0.40249
## 15 X13 -1.323480 1.49324
## 16 X14 -0.189774 1.37414
## 17 X15 14.222714 0.13361
## 18 X16 -2.245926 0.55257
## 19 X17 -0.681372 1.77239
## 20 X18 -1.245565 1.23432
## 21 X19 -1.067956 1.30213
## 22 X2 -0.653308 1.22929
## 23 X20 -0.509641 1.10377
## 24 X21 -1.999134 1.73553
## 25 X22 -1.766136 1.59741
## 26 X23 -0.823038 1.13056
## 27 X24 -1.445519 0.63177
## 28 X25 -1.425606 2.04224
## 29 X26 -1.801127 0.59424
## 30 X27 -0.574922 0.94431
## 31 X28 -0.440741 1.02636
## 32 X29 -2.608519 1.90020
## 33 X3 -0.745623 1.60131
## 34 X30 0.020343 0.92528
## 35 X4 -1.699596 1.34967
## 36 X5 2.462228 0.36407
## 37 X6 -1.809463 2.08864
## 38 X7 -0.286876 1.56716
## 39 X8 -1.006489 0.98527
## 40 X9 -1.437696 1.65154
# Rasch model
modA <- rasch(A)
modB <- rasch(B)
# Import Item Parameters Estimates and Covariance Matrices
estA <- import.ltm(modA, display = FALSE)
estB <- import.ltm(modB, display = FALSE)
# Estimated Coefficients and Covariance Matrix
modrasch <- modIRT(coef = list(estA$coef, estB$coef),
var = list(estA$var, estB$var),
display = FALSE)
# Direct Equating Coefficients Between All Pairs of a List of Forms
direclistrasch <- direc(mod1 = mod2pl[1],
mod2 = mod2pl[2],
method = "mean-mean")
summary(direclistrasch)
## Link: A.B
## Method: mean-mean
## Equating coefficients:
## Estimate StdErr
## A 0.97951 0.056944
## B -1.09270 0.091556
# extract equating coefficients
eqc(direclistrasch)
## link A B
## 1 A.B 0.97951 -1.0927
# number of common items between a list of forms
linkp(list(estA$coef, estB$coef))
## [,1] [,2]
## [1,] 40 10
## [2,] 10 40
# Extract Item Parameters
tmp <- itm(direclistrasch, link = "A.B")
tmp$itempar <- substr(tmp$Item, 1, 6)
tmp$Item <- substr(tmp$Item, 8, 10)
# Form A
mat1 <- tmp[!is.na(tmp$A), c(1, 2, 5)]
reshape(mat1, direction = "wide", idvar = "Item", timevar = "itempar")
## Item A.Dffclt A.Dscrmn
## 1 V1 0.391899 0.42580
## 2 V10 0.455193 0.59281
## 3 V2 0.600175 0.92286
## 4 V3 -1.351524 1.21008
## 5 V4 2.834631 0.88467
## 6 V5 0.576782 1.39534
## 7 V6 -0.201031 1.03596
## 8 V7 -0.538938 0.89954
## 9 V8 -0.682445 0.64422
## 10 V9 -1.356634 0.63271
## 11 X1 -0.885108 1.49454
## 12 X10 -0.257940 1.72703
## 13 X11 0.931837 0.50711
## 14 X12 -0.658961 0.38484
## 15 X13 -0.319755 1.42780
## 16 X14 0.865919 1.31392
## 17 X15 15.939050 0.12776
## 18 X16 -1.284484 0.52835
## 19 X17 0.351786 1.69471
## 20 X18 -0.238268 1.18022
## 21 X19 -0.052517 1.24506
## 22 X2 0.381138 1.17541
## 23 X20 0.531390 1.05539
## 24 X21 -1.026379 1.65946
## 25 X22 -0.782701 1.52739
## 26 X23 0.203627 1.08101
## 27 X24 -0.447387 0.60408
## 28 X25 -0.426561 1.95273
## 29 X26 -0.819296 0.56819
## 30 X27 0.463117 0.90292
## 31 X28 0.603448 0.98137
## 32 X29 -1.663698 1.81691
## 33 X3 0.284591 1.53113
## 34 X30 1.085668 0.88473
## 35 X4 -0.713111 1.29051
## 36 X5 3.639485 0.34811
## 37 X6 -0.828014 1.99710
## 38 X7 0.764366 1.49847
## 39 X8 0.011767 0.94208
## 40 X9 -0.439206 1.57915
# Form B
mat2 <- tmp[!is.na(tmp$B), c(1, 3, 5)]
reshape(mat2, direction = "wide", idvar = "Item", timevar = "itempar")
## Item B.Dffclt B.Dscrmn
## 1 V1 -0.69552 0.57591
## 2 V10 -0.64842 0.56876
## 3 V2 -0.37741 1.12464
## 4 V3 -2.57932 0.99967
## 5 V4 1.74065 0.86367
## 6 V5 -0.37168 1.69134
## 7 V6 -1.38411 0.98459
## 8 V7 -1.78606 0.79990
## 9 V8 -1.78908 0.60571
## 10 V9 -2.32283 0.61063
## 41 Y1 -1.41477 1.83973
## 42 Y10 0.61972 0.72129
## 43 Y11 -2.36531 2.34790
## 44 Y12 -1.96625 1.96463
## 45 Y13 -1.60501 0.62096
## 46 Y14 0.44079 0.72165
## 47 Y15 -1.09297 1.52755
## 48 Y16 -1.70046 1.82250
## 49 Y17 -0.91678 1.36916
## 50 Y18 -0.84478 1.36230
## 51 Y19 2.05734 0.79022
## 52 Y2 -0.49830 0.38793
## 53 Y20 0.21760 0.73540
## 54 Y21 0.43322 1.23709
## 55 Y22 -0.79583 1.21534
## 56 Y23 -1.29792 1.47136
## 57 Y24 -0.32464 1.63951
## 58 Y25 -1.24500 1.79039
## 59 Y26 1.04726 0.81666
## 60 Y27 -1.69425 1.73836
## 61 Y28 -1.39051 0.71509
## 62 Y29 -2.11917 2.14348
## 63 Y3 -0.10919 0.97900
## 64 Y30 -0.52699 1.73425
## 65 Y4 -1.69618 1.74984
## 66 Y5 1.31619 1.54471
## 67 Y6 1.23027 0.77786
## 68 Y7 -0.74907 1.03857
## 69 Y8 -1.31004 1.58012
## 70 Y9 -0.38532 1.04586
# Form A as form B
mat3 <- tmp[!is.na(tmp$A.as.B), c(1, 4, 5)]
reshape(mat3, direction = "wide", idvar = "Item", timevar = "itempar")
## Item A.as.B.Dffclt A.as.B.Dscrmn
## 1 V1 -0.708829 0.43471
## 2 V10 -0.646832 0.60521
## 3 V2 -0.504822 0.94217
## 4 V3 -2.416525 1.23539
## 5 V4 1.683845 0.90317
## 6 V5 -0.527735 1.42453
## 7 V6 -1.289609 1.05763
## 8 V7 -1.620591 0.91836
## 9 V8 -1.761157 0.65770
## 10 V9 -2.421531 0.64595
## 11 X1 -1.959667 1.52581
## 12 X10 -1.345352 1.76317
## 13 X11 -0.179956 0.51772
## 14 X12 -1.738154 0.39290
## 15 X13 -1.405899 1.45767
## 16 X14 -0.244523 1.34140
## 17 X15 14.519721 0.13043
## 18 X16 -2.350859 0.53940
## 19 X17 -0.748120 1.73017
## 20 X18 -1.326082 1.20491
## 21 X19 -1.144138 1.27111
## 22 X2 -0.719370 1.20000
## 23 X20 -0.572197 1.07747
## 24 X21 -2.098044 1.69418
## 25 X22 -1.859359 1.55935
## 26 X23 -0.893244 1.10363
## 27 X24 -1.530916 0.61671
## 28 X25 -1.510517 1.99359
## 29 X26 -1.895204 0.58008
## 30 X27 -0.639071 0.92181
## 31 X28 -0.501616 1.00190
## 32 X29 -2.722302 1.85492
## 33 X3 -0.813939 1.56316
## 34 X30 -0.029278 0.90324
## 35 X4 -1.791195 1.31751
## 36 X5 2.472205 0.35540
## 37 X6 -1.903743 2.03888
## 38 X7 -0.343995 1.52982
## 39 X8 -1.081172 0.96179
## 40 X9 -1.522903 1.61219
Theory and Practice of Item Response Theory Methodology in the Social Sciences (Ayala 2009):
Uniform DIF is the simplest type of DIF where the magnitude of conditional dependency is relatively invariant across the latent trait continuum \((\Theta)\). The item of interest consistently gives one group an advantage across all levels of ability. Within an item response theory (IRT) framework this would be evidenced when both item characteristic curves (ICC) are equally discriminating yet exhibit differences in the difficulty parameters (i.e., \(a_r = a_f\) and \(b_r < b_f\)) as depicted below.
# parameter matrix for uniform diff
parameter.matrix <- matrix(c(1, -.5, 0,
1, .5, 0), 2, 3, byrow = TRUE)
source("IRF.R")
IRF(parameter.matrix, 1, irf.plot = TRUE)
## $probabilities
## [,1] [,2]
## [1,] 0.81757 0.62246
##
## $expected.score
## [1] 72.002
# set color scheme
mycolors <- palette(rainbow(2))
mycolors <- palette(rainbow(2))
# legend
legend(-4, 1, c("Reference Group", "Focal Group"),
col = c(mycolors[1], mycolors[2]),
text.col = c(mycolors[1], mycolors[2]),
lty = 1,
lwd = 5,
bty = "n",
merge = TRUE)
Theory and Practice of Item Response Theory Methodology in the Social Sciences (Ayala 2009):
In nonuniform DIF rather than a consistent advantage being given to the reference group across the ability continuum, the conditional dependency moves and changes direction at different locations on the \(\Theta\) continuum. For instance, an item may give the reference group a minor advantage at the lower end of the continuum while a major advantage at the higher end. Also, unlike uniform DIF, an item can simultaneously vary in discrimination for the two groups while also varying in difficulty (i.e., \(a_r \neq a_f\) and \(b_r < b_f\)).
# parameter matrix for uniform diff
parameter.matrix <- matrix(c(1.3, 0, 0,
0.7, 0, 0), 2, 3, byrow = TRUE)
source("IRF.R")
IRF(parameter.matrix, irf.plot = TRUE)
## $probabilities
## [,1] [,2]
## [1,] 0.5 0.5
##
## $expected.score
## [1] 50
# legend
legend(-4, 1, c("Reference Group", "Focal Group"),
col = c(mycolors[1], mycolors[2]),
text.col = c(mycolors[1], mycolors[2]),
lty = 1,
lwd = 5,
bty = "n",
merge = TRUE)
# import data
dif.data <- read.csv("test2.csv", header = TRUE, sep = ",")
# check
head(dif.data)
## Gender V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18
## 1 1 1 1 1 1 0 1 1 0 0 1 1 0 0 0 1 1 1 1
## 2 1 1 1 1 1 0 1 1 1 1 1 1 1 0 1 1 0 0 1
## 3 1 1 1 0 0 1 1 1 0 0 1 1 0 1 1 0 0 0 0
## 4 1 0 1 0 1 1 1 0 0 0 0 0 0 0 0 0 1 0 0
## 5 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1
## 6 1 0 1 1 0 0 1 0 0 0 0 1 1 1 0 0 0 0 0
## V19 V20 V21 V22 V23 V24 V25
## 1 1 1 1 1 1 1 1
## 2 1 1 1 1 0 1 1
## 3 1 0 1 1 0 1 0
## 4 0 0 0 1 1 1 0
## 5 1 1 1 1 1 1 1
## 6 1 0 0 1 0 1 0
str(dif.data)
## 'data.frame': 1720 obs. of 26 variables:
## $ Gender: int 1 1 1 1 1 1 1 1 1 1 ...
## $ V1 : int 1 1 1 0 1 0 0 1 0 1 ...
## $ V2 : int 1 1 1 1 1 1 0 1 1 1 ...
## $ V3 : int 1 1 0 0 1 1 1 1 1 1 ...
## $ V4 : int 1 1 0 1 1 0 1 1 0 1 ...
## $ V5 : int 0 0 1 1 1 0 0 1 0 1 ...
## $ V6 : int 1 1 1 1 1 1 1 1 1 1 ...
## $ V7 : int 1 1 1 0 0 0 1 1 1 1 ...
## $ V8 : int 0 1 0 0 1 0 0 0 0 0 ...
## $ V9 : int 0 1 0 0 1 0 1 1 0 0 ...
## $ V10 : int 1 1 1 0 1 0 0 1 1 1 ...
## $ V11 : int 1 1 1 0 1 1 1 1 1 0 ...
## $ V12 : int 0 1 0 0 1 1 1 1 0 1 ...
## $ V13 : int 0 0 1 0 1 1 0 1 0 1 ...
## $ V14 : int 0 1 1 0 1 0 1 1 0 1 ...
## $ V15 : int 1 1 0 0 1 0 1 1 1 0 ...
## $ V16 : int 1 0 0 1 1 0 0 0 0 0 ...
## $ V17 : int 1 0 0 0 1 0 0 1 1 0 ...
## $ V18 : int 1 1 0 0 1 0 0 1 0 0 ...
## $ V19 : int 1 1 1 0 1 1 0 1 1 1 ...
## $ V20 : int 1 1 0 0 1 0 1 1 0 1 ...
## $ V21 : int 1 1 1 0 1 0 0 1 0 0 ...
## $ V22 : int 1 1 1 1 1 1 1 1 0 1 ...
## $ V23 : int 1 0 0 1 1 0 0 0 0 1 ...
## $ V24 : int 1 1 1 1 1 1 1 1 1 1 ...
## $ V25 : int 1 1 0 0 1 0 0 1 1 1 ...
test.data <- dif.data[,-1]
# Add a column of raw scores
dif.data$scores <- rowSums(test.data)
summary(dif.data$scores)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.0 11.0 14.0 14.8 19.0 25.0
# Add a column of decile factors
quantile(dif.data$scores, probs = seq(0.25, 1, by = 0.25))
## 25% 50% 75% 100%
## 11 14 19 25
dif.data$deciles <- cut(dif.data$scores,
breaks = c(-Inf, quantile(dif.data$scores,
probs = seq(0.25, 1, by = 0.25)),
Inf))
summary(dif.data$deciles)
## (-Inf,11] (11,14] (14,19] (19,25] (25, Inf]
## 537 342 466 375 0
difresults <- difR::difMH(dif.data[, 1:26],
group = "Gender",
focal.name = 1,
alpha = 0.01)
difresults
##
## Detection of Differential Item Functioning using Mantel-Haenszel method
## with continuity correction and without item purification
##
## Results based on asymptotic inference
##
## No set of anchor items was provided
##
## Mantel-Haenszel Chi-square statistic:
##
## Stat. P-value
## V1 1.6940 0.1931
## V2 0.6207 0.4308
## V3 1.0023 0.3168
## V4 0.0426 0.8364
## V5 0.0019 0.9653
## V6 0.8315 0.3618
## V7 3.1715 0.0749 .
## V8 2.5334 0.1115
## V9 0.0035 0.9531
## V10 4.3188 0.0377 *
## V11 102.7551 0.0000 ***
## V12 0.0832 0.7730
## V13 3.3598 0.0668 .
## V14 1.8200 0.1773
## V15 0.0532 0.8176
## V16 0.0414 0.8388
## V17 0.1296 0.7189
## V18 0.7640 0.3821
## V19 0.0238 0.8773
## V20 0.0033 0.9544
## V21 0.8746 0.3497
## V22 1.2326 0.2669
## V23 0.3873 0.5337
## V24 0.8383 0.3599
## V25 6.7209 0.0095 **
##
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Detection threshold: 6.6349 (significance level: 0.01)
##
## Items detected as DIF items:
##
## V11
## V25
##
##
## Effect size (ETS Delta scale):
##
## Effect size code:
## 'A': negligible effect
## 'B': moderate effect
## 'C': large effect
##
## alphaMH deltaMH
## V1 1.1980 -0.4246 A
## V2 1.1048 -0.2342 A
## V3 0.8805 0.2990 A
## V4 1.0325 -0.0753 A
## V5 0.9987 0.0031 A
## V6 1.6431 -1.1670 B
## V7 1.2160 -0.4596 A
## V8 1.1955 -0.4196 A
## V9 1.0119 -0.0277 A
## V10 1.2798 -0.5798 A
## V11 0.3034 2.8025 C
## V12 1.0408 -0.0941 A
## V13 1.2322 -0.4906 A
## V14 1.1818 -0.3926 A
## V15 1.0313 -0.0724 A
## V16 0.9719 0.0670 A
## V17 1.0461 -0.1059 A
## V18 0.8985 0.2515 A
## V19 1.0276 -0.0640 A
## V20 1.0131 -0.0307 A
## V21 1.1292 -0.2855 A
## V22 1.3152 -0.6439 A
## V23 1.0751 -0.1701 A
## V24 0.8065 0.5054 A
## V25 0.7244 0.7577 A
##
## Effect size codes: 0 'A' 1.0 'B' 1.5 'C'
## (for absolute values of 'deltaMH')
##
## Output was not captured!
plot(difresults)
## The plot was not captured!
difBD <- difR::difBD(dif.data[, 1:26],
group = "Gender",
focal.name = 2,
purify = FALSE,
alpha = 0.01)
difBD
##
## Detection of Differential Item Functioning using Breslow-Day method
## without item purification
##
## No set of anchor items was provided
##
## Breslow-Day statistic:
##
## Stat. df P-value
## V1 13.9049 18.0000 0.7353
## V2 29.5451 20.0000 0.0776 .
## V3 21.5465 17.0000 0.2028
## V4 15.4914 18.0000 0.6280
## V5 19.5482 20.0000 0.4865
## V6 7.6583 7.0000 0.3637
## V7 19.1563 19.0000 0.4469
## V8 28.3599 19.0000 0.0767 .
## V9 17.8732 20.0000 0.5958
## V10 12.4255 18.0000 0.8245
## V11 37.9025 17.0000 0.0025 **
## V12 19.6895 19.0000 0.4135
## V13 19.0885 19.0000 0.4512
## V14 29.8974 19.0000 0.0531 .
## V15 15.7983 20.0000 0.7291
## V16 15.0052 18.0000 0.6616
## V17 24.9896 19.0000 0.1609
## V18 15.9941 18.0000 0.5930
## V19 10.0502 17.0000 0.9015
## V20 33.0567 20.0000 0.0333 *
## V21 9.4499 16.0000 0.8937
## V22 13.0381 12.0000 0.3663
## V23 126.4894 19.0000 0.0000 ***
## V24 14.9394 13.0000 0.3112
## V25 17.5889 16.0000 0.3485
##
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Significance level: 0.01
##
## Items detected as DIF items:
##
## V11
## V23
##
## Output was not captured!
plot(difBD)
## The plot was not captured!
logistic.dif <- difR::difLogistic(dif.data[c(1:26)],
group = "Gender",
focal.name = 2,
purify = FALSE)
logistic.dif
##
## Detection of both types of Differential Item Functioning
## using Logistic regression method, without item purification
## and with LRT DIF statistic
##
## Matching variable: test score
##
## No set of anchor items was provided
##
## Logistic regression DIF statistic:
##
## Stat. P-value
## V1 3.2193 0.2000
## V2 2.0670 0.3558
## V3 1.6569 0.4367
## V4 0.0922 0.9549
## V5 4.0223 0.1338
## V6 1.8693 0.3927
## V7 3.9788 0.1368
## V8 5.1314 0.0769 .
## V9 0.6488 0.7230
## V10 5.8281 0.0543 .
## V11 109.9090 0.0000 ***
## V12 0.3015 0.8601
## V13 6.3557 0.0417 *
## V14 8.4932 0.0143 *
## V15 1.8673 0.3931
## V16 1.2384 0.5384
## V17 2.8499 0.2405
## V18 1.4416 0.4864
## V19 0.0354 0.9824
## V20 1.9955 0.3687
## V21 1.3598 0.5067
## V22 1.8036 0.4058
## V23 103.7505 0.0000 ***
## V24 1.2068 0.5470
## V25 6.1843 0.0454 *
##
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Detection threshold: 5.9915 (significance level: 0.05)
##
## Items detected as DIF items:
##
## V11
## V13
## V14
## V23
## V25
##
##
## Effect size (Nagelkerke's R^2):
##
## Effect size code:
## 'A': negligible effect
## 'B': moderate effect
## 'C': large effect
##
## R^2 ZT JG
## V1 0.0014 A A
## V2 0.0010 A A
## V3 0.0007 A A
## V4 0.0000 A A
## V5 0.0018 A A
## V6 0.0082 A A
## V7 0.0020 A A
## V8 0.0026 A A
## V9 0.0004 A A
## V10 0.0026 A A
## V11 0.0473 A B
## V12 0.0001 A A
## V13 0.0033 A A
## V14 0.0037 A A
## V15 0.0010 A A
## V16 0.0006 A A
## V17 0.0014 A A
## V18 0.0006 A A
## V19 0.0000 A A
## V20 0.0009 A A
## V21 0.0005 A A
## V22 0.0027 A A
## V23 0.0472 A B
## V24 0.0016 A A
## V25 0.0025 A A
##
## Effect size codes:
## Zumbo & Thomas (ZT): 0 'A' 0.13 'B' 0.26 'C' 1
## Jodoin & Gierl (JG): 0 'A' 0.035 'B' 0.07 'C' 1
##
## Output was not captured!
plot(logistic.dif)
## The plot was not captured!
# Males
male.data <- subset(dif.data, subset = Gender == 1)
aggregate.male <- aggregate(data.matrix(male.data),
by = list(male.data$scores),
FUN = mean,
simplify = TRUE)
thick.male <- aggregate(data.matrix(male.data),
by = list(male.data$deciles),
FUN = mean,
simplify = TRUE)
# Females
female.data <- subset(dif.data, subset = Gender == 2)
aggregate.female <- aggregate(data.matrix(female.data),
by = list(female.data$scores),
FUN = mean,
simplify = TRUE)
thick.female <- aggregate(data.matrix(female.data),
by = list(female.data$deciles),
FUN = mean,
simplify = TRUE)
Item 11 (uniform DIF)
dif.plot <-
function(group1score, group2score, group1item, group2item){
# graph boundaries
xmin <- min(c(group1score, group2score))
xmax <- max(c(group1score, group2score))
ymin <- min(c(group1item, group2item))
ymax <- max(c(group1item, group2item))
# group 1
plot(group1score,
group1item,
type = "l",
xlim = c(xmin, xmax),
ylim = c(ymin, ymax),
col = adjustcolor("dodgerblue", alpha.f = .6),
lwd = 6,
axes = FALSE,
xlab = "",
ylab = "")
# group 2
lines(group2score,
group2item,
type = "l",
col = adjustcolor("tomato", alpha.f = .6),
lwd = 6)
# x-axis
axis(1, pretty(c(xmin, xmax), 10))
mtext("Score",
side = 1,
col = "black",
line = 2.5)
box()
# y-axis
axis(2, pretty(c(ymin, ymax), 10),
las = 1,
col = "black",
col.axis = "black")
mtext(expression(paste("Probability of correct response, ", P(Theta))),
side = 2,
col = "black",
line = 2.5)
# legend
legend(xmin, ymax,
c("male", "female"),
col = c("dodgerblue", "tomato"),
text.col = c("dodgerblue", "tomato"),
lty = 1,
lwd = 5,
bty = "n",
merge = TRUE)
}
dif.plot(thick.male$score, thick.female$score, thick.male$V11, thick.female$V11)
title(main = "Differential item functioning by gender\n (Item 11)")
Mantel-Haenszel chi-squared test with continuity correction
mantelhaen.test(table(dif.data$Gender,dif.data$V11,dif.data$score))
##
## Mantel-Haenszel chi-squared test with continuity correction
##
## data: table(dif.data$Gender, dif.data$V11, dif.data$score)
## Mantel-Haenszel X-squared = 103, df = 1, p-value <2e-16
## alternative hypothesis: true common odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.23941 0.38461
## sample estimates:
## common odds ratio
## 0.30344
Woolf-test on Homogeneity of Odds Ratios
vcd::woolf_test(table(dif.data$Gender, dif.data$V11, dif.data$deciles))
##
## Woolf-test on Homogeneity of Odds Ratios (no 3-Way assoc.)
##
## data: table(dif.data$Gender, dif.data$V11, dif.data$deciles)
## X-squared = 12.4, df = 4, p-value = 0.015
Item 23 (non-uniform DIF)
dif.plot(thick.male$score, thick.female$score, thick.male$V23, thick.female$V23)
title(main = "Differential item functioning by gender\n (Item 23)")
# Mantel-Haenszel chi-squared test with continuity correction
mantelhaen.test(table(dif.data$Gender, dif.data$V23, dif.data$score))
##
## Mantel-Haenszel chi-squared test with continuity correction
##
## data: table(dif.data$Gender, dif.data$V23, dif.data$score)
## Mantel-Haenszel X-squared = 0.387, df = 1, p-value = 0.53
## alternative hypothesis: true common odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.86699 1.33311
## sample estimates:
## common odds ratio
## 1.0751
# Woolf-test on Homogeneity of Odds Ratios
vcd::woolf_test(table(dif.data$Gender, dif.data$V23, dif.data$deciles))
##
## Woolf-test on Homogeneity of Odds Ratios (no 3-Way assoc.)
##
## data: table(dif.data$Gender, dif.data$V23, dif.data$deciles)
## X-squared = 63.3, df = 4, p-value = 5.8e-13
Albano, A. 2015. Observed-Score Linking and Equating. R Package Version 2.0-3. 2014.
Ayala, R. J. de. 2009. Theory and Practice of Item Response Theory Methodology in the Social Sciences. New York, NY: Guilford Publications.
Braun, H. I., and P. W. Holland. 1982. Observed-Score Test Equating: A Mathematical Analysis of Some ETS Equating Procedures. in P. W. Holland & d. B. Rubin (Eds.), Test Equating. New York: Academic.
DeMars, C. 2010. Item Response Theory. New York: Oxford University Press.
Holland, P. W., and D. T. Thayer. 2000. “Univariate and Bivariate Loglinear Models for Discrete Test Score Distributions.” Journal of Educational and Behavioral Statistics 25: 133–83.
Kolen, M. J. 1984. “Effectiveness of Analytic Smoothing in Equipercentile Equating.” Journal of Educational Statistics 9: 25–44.
Kolen, M. J., and R. L. Brennan. 2013. Test Equating, Scaling, and Linking. 3rd ed. New York: Springer.
Livingston, & Kim, S. A. 2009. “The Circle-Arc Method for Equating in Small Samples.” Journal of Educational Measurement 46: 330–43.
Rasch, G. 1960. Probabilistic Models for Some Intelligence and Attainment Tests. Copenhagen: Danish Institute for Educational Research.
“Rasch Measurement: The Dichotomous Model. in E. V. Smith, Jr., & R. M. Smith (Eds.), Introduction to Rasch Measurement: Theory, Models, and Applications.” 2004. In, 226–57. Maple Grove, MN: JAM Press.
Wright, B. D. 1997. “A History of Social Science Measurement.” Educational Measurement: Issues and Practice 16 (4): 33–45, 52.
Wright, M., B. D. & Stone. 1979. Best Test Design. Chicago: MESA Press.