Introduction to Item Response Theory

rm(list=ls()); gc(full=T); objects() 
set.seed(10012)

Creating an Index of the Variables used

gmo$polknowTF<-(gmo$pk_medmarij+gmo$pk_nafta+gmo$pk_parisagree+
                            gmo$pk_guantanamo+gmo$pk_gdpgrow+gmo$pk_deficit2018)
table(gmo$polknowTF)

gmo$polknowMC<-(gmo$pkT_medmarij+gmo$pkF_nafta+gmo$pkT_parisagree+
                            gmo$pkF_guantanamo+gmo$pkT_gdpgrow+gmo$pkF_deficit2018)
table(gmo$polknowMC)

Subsetting data for IRT analysis

polMC<-subset(gmo, select=c(pk_medmarij, pk_nafta, pk_parisagree, 
                            pk_guantanamo, pk_deficit2018, pk_gdpgrow, 
                            polknowMC))
psych::describe(polMC); polMC<-na.omit(polMC) # Remover NAs

polTF<-subset(gmo, select=c(pkT_medmarij, pkF_nafta, pkT_parisagree, 
                            pkF_guantanamo, pkF_deficit2018, pkT_gdpgrow, polknowTF))
psych::describe(polTF); polTF<-na.omit(polTF) # Remover NAs

Correlation Table

pol.corMC <- cor(polMC, use="complete.obs")
pol.corMC

pol.corTF <- cor(polTF, use="complete.obs")
pol.corTF

ITEM-LEVEL EXAM

Let’s obtain some basic descriptive information about the items. All this information is given by the psychometric package.

psychometric::item.exam(polMC[,1:6], discrim=TRUE, y=polMC[,7])
psychometric::item.exam(polTF[,1:6], discrim=TRUE, y=polTF[,7])

psychometric::alpha(polMC)
psychometric::alpha(polTF)

# Item.total:  correlation between items and the total score.
# Item.Tot.woi: correlation between items and the total score by omitting the item.
# Difficulty: proportion of correct responses.
# Discrimination: values above 0.40 are satisfactory and values below 0.19 suggest removal.

?psychometric::item.exam # For each column value

polMC<-subset(gmo, select=c(pk_medmarij, pk_nafta, pk_parisagree, 
                            pk_guantanamo, pk_deficit2018, pk_gdpgrow))
psych::describe(polMC); polMC<-na.omit(polMC)

polTF<-subset(gmo, select=c(pkT_medmarij, pkF_nafta, pkT_parisagree, 
                            pkF_guantanamo, pkF_deficit2018, pkT_gdpgrow))
psych::describe(polTF); polTF<-na.omit(polTF)

One-parameter IRT models (1-PL a.k.a. the Rasch Model)

“ltm” package

# Discrimination fixed at 1 (not allowed to vary)
fitMC <- ltm::rasch(polMC, constraint=cbind(ncol(polMC) +1, 1)) # N of cols, +1, 1 (fixed)
summary(fitMC) # 'value' column is each item difficulty + std.err

# Model Summary: logLik, AIC, and BIC are model fit stats

# Rasch for binary variables (0 or 1)
fitTF <- ltm::rasch(polTF_01, constraint=cbind(length(polTF_01) +1, 1)) # N of cols + 1, 1 (fixed)
summary(fitTF) # 'value' column is each item difficulty + std.err

# ICC plot
# # --------------------------------------------------------------------------------------
# the slope parameter is missing, which implies that all slopes are the same.
plot(fitMC, type=c("ICC")) 
plot(fitTF, type=c("ICC")) 
# they are parallel because model estimation forced them to be

# lowest to highest difficulty coefs
coef(fitMC, prob=TRUE, order=TRUE) 
coef(fitTF, prob=TRUE, order=TRUE) 

# Test of absolute fit: we want to fail to reject the null.
# # --------------------------------------------------------------------------------------
# Null: the model fits the data (chi-square test)
ltm::GoF.rasch(fitMC, B=1000)
ltm::GoF.rasch(fitTF, B=1000)

ltm::item.fit(fitMC, simulate.p.value = T)
ltm::item.fit(fitTF, simulate.p.value = T)

# Latent trait estimates for Rasch: theta scores for each individual in the sample.
# # --------------------------------------------------------------------------------------
theta.raschMC<-ltm::factor.scores(fitMC)
plot(theta.raschMC)
summary(theta.raschMC$score.dat$z1)
sqrt(var(theta.raschMC$score.dat$z1))

theta.raschTF<-ltm::factor.scores(fitTF)
plot(theta.raschTF)
summary(theta.raschTF$score.dat$z1)
sqrt(var(theta.raschTF$score.dat$z1))

1-PL Model

fit1MC <- ltm::rasch(polMC)
summary(fit1MC)
plot(fitMC, type=c("ICC"))

Two-parameter IRT model (2-PL)

fit2MC <- ltm::ltm(polMC ~ z1)
summary(fit2MC)  # it provides item difficulty and item discrimination
coef(fit2MC, order = TRUE) 
# these results show that some items have really different discrimination parameters, 
# which affects item difficulties.

fit2TF <- ltm::ltm(polTF ~ z1)
summary(fit2TF)  # it provides item difficulty and item discrimination
coef(fit2TF, order = TRUE) 

# Plot Item Characteristic Curves for two-parameter model
# # --------------------------------------------------------------------------------------
par(mfrow=c(2,1)); plot(fit2MC, type=c("ICC")); plot(fit2TF, type=c("ICC"))

# Compare with the Rasch Model above...
par(mfrow=c(2,1)); plot(fitMC, type=c("ICC")); plot(fit2MC, type=c("ICC"))
par(mfrow=c(2,1)); plot(fitTF, type=c("ICC")); plot(fit2TF, type=c("ICC"))

# Plot Item Characteristic Curves
plot(fit2MC, type=c("ICC"), items=c(1,6)) # bad items (prob better to exclude)
plot(fit2MC, type=c("ICC"), items=c(2,3,4,5)) # good items

plot(fit2TF, type=c("ICC"), items=c(1,6)) # bad items (prob better to exclude)
plot(fit2TF, type=c("ICC"), items=c(2,3,4,5)) # good items

# Plot Item Information curves (IIC)
# # --------------------------------------------------------------------------------------
# For each item it gives the relative amount of info that the item yields, 
# the highest are the most informative, if the slope is shallow, it does not help at all, 
# because they do not detect any differences.
par(mfrow=c(2,1))
plot(fit2MC, type=c("IIC"), items=c(1,6))
plot(fit2TF, type=c("IIC"), items=c(1,6))

plot(fit2MC, type=c("IIC"), items=c(2,3,4,5))
plot(fit2TF, type=c("IIC"), items=c(2,3,4,5))

# Overall, we want a curve that stays high among a substantive range of THETA (x-axis)
plot(fit2MC, type=c("IIC"), items=0) 
plot(fit2TF, type=c("IIC"), items=0)

Estimate a three-parameter model (TPM or 3-PL)

# The 3-parameter model is usually employed to handle the phenomenon of 
# non-random guessing in the case of difficult items.
fit3MC <- ltm::tpm(polMC, type=c("latent.trait"), max.guessing=.25)
fit3MC
plot(fit3MC, type=c("ICC"))

fit3TF <- ltm::tpm(polTF, type=c("latent.trait"), max.guessing=.25)
fit3TF
plot(fit3TF, type=c("ICC"))

par(mfrow=c(2,1))
plot(fit3MC, type=c("IIC"))
plot(fit3TF, type=c("IIC"))

# Warning: The 3-parameter model is known to have numerical problems like non-convergence,
# especially for the guessing parameters. These problems usually result in a zero estimate 
# for some guessing parameters and/or in a non positive definite Hessian matrix.
### For mathematical details: https://www.jstatsoft.org/article/view/v017i05

NEXT STEPS TO CONTINUE YOUR IRT JOURNEY

Rasch Models for Ordered Polytomous Data (PC Model)
Non-Rasch Models for Ordered Polytomous Data (GPC and GR Models)
Nominal Polytomous Data (NRM)
Models for Multidimensional Data (MIRT)

Introduction to Item Response Theory

Robert Vidigal, PhD

Creating an Index of the Variables used

Subsetting data for IRT analysis

Correlation Table

ITEM-LEVEL EXAM

One-parameter IRT models (1-PL a.k.a. the Rasch Model)

1-PL Model

Two-parameter IRT model (2-PL)

Estimate a three-parameter model (TPM or 3-PL)

NEXT STEPS TO CONTINUE YOUR IRT JOURNEY