#HTML file with output details is here:
# https://rpubs.com/rkap786/701341
#install.packages("dplyr")
library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stringr)
library(ggplot2)
library(readr)
library(norm)
library(knitr)
#install.packages("psych")
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(mirt)
## Loading required package: stats4
## Loading required package: lattice
#install.packages("lmtest")
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
1. Simulate a response dataset, using 1PL, 2 PL and 3PL
2. Estimate each model and check for fit : a. Estimate item parameters using mirt for 1PL, 2PL and 3PL models b. Compare model fit using LR (nested models), AIC and BIC (not nested) - (using statistics from MIRT) c. Estimate RSME for data generation and estimation for 1PL, 2PL and 3PL - I can’t replicate results from paper here d. Run 50 simulations to see which model gives the best item parametrics
*3. Prediction following Ben’s work
#Set the seed and generate the parameters
#nitem=20
#sample.size=1000
#model="2PL"
##function to generate dataset
fun_simulate_data= function(nitem, sample.size, model,a, b,c, ability) {
#Simulate response data
if (model == "1PL"){
dat <- simdata(a = a,
d = b,
N = sample.size,
itemtype = '2PL',
Theta = ability)
}
if (model == "2PL"){
dat <- simdata(a = a,
d = b,
N = sample.size,
itemtype = '2PL',
Theta = ability)
}
if (model == "3PL"){
dat <- simdata(a = a,
d = b,
N = sample.size,
itemtype = '3PL',
guess = c,
Theta = ability)
}
return(dat)
}
Tried this out but didn’t end up using it, it didnt change the results
# fun_simulate_1PL= function(nitem, sample.size, a, b, c, ability) {
# dat <- simdata(a = a,
# d = b,
# N = sample.size,
# itemtype = 'dich',
# guess = c
# Theta = ability)
# return(dat)
# }
#
#
# fun_simulate_2PL= function(nitem, sample.size, a, b, ability) {
# dat <- simdata(a = a,
# d = b,
# N = sample.size,
# itemtype = '2PL',
# guess = c,
# Theta = ability)
# return(dat)
# }
#
# fun_simulate_3PL= function(nitem, sample.size, a, b, c, ability) {
# dat <- simdata(a = a,
# d = b,
# N = sample.size,
# itemtype = '3PL',
# guess = c,
# Theta = ability)
# return(dat)
#}
fun_model_fit= function(model1PL, model2PL, model3PL) {
#extract fit statistics
aic1PL = extract.mirt(model1PL, 'AIC')
bic1PL = extract.mirt(model1PL, 'BIC')
Gsq_1PL = extract.mirt(model1PL, 'G2')
loglik_1PL=extract.mirt(model1PL, "logLik")
parameters= cbind("model"=c("1PL"), "AIC"=round(aic1PL,3), "BIC"=round(bic1PL,3), "G-sq"=round(Gsq_1PL,3), "LL"=round(loglik_1PL, 3))
aic2PL = extract.mirt(model2PL, 'AIC')
bic2PL = extract.mirt(model2PL, 'BIC')
Gsq_2PL = extract.mirt(model2PL, 'G2')
loglik_2PL=extract.mirt(model2PL, "logLik")
parameters1= cbind("model"=c("2PL"), "AIC"=round(aic2PL,3), "BIC"=round(bic2PL,3), "G-sq"=round(Gsq_2PL,3), "LL"=round(loglik_2PL, 3))
aic3PL = extract.mirt(model3PL, 'AIC')
bic3PL = extract.mirt(model3PL, 'BIC')
Gsq_3PL = extract.mirt(model3PL, 'G2')
loglik_3PL=extract.mirt(model3PL, "logLik")
parameters2= cbind("model"=c("3PL"), "AIC"=round(aic3PL,3), "BIC"=round(bic3PL,3), "G-sq"=round(Gsq_3PL,3), "LL"=round(loglik_3PL, 3))
#combine fit statistics
parameters=rbind(parameters, parameters1, parameters2)
return(parameters)
}
#parameters <- as.data.frame(coef(model1PL, simplify=TRUE)$items)
For data generated from 3 PL- AIC and G-squared finds 3PL to be the best fit, and BIC finds 2PL to be the best fit
# Generate datasets similar to Kang Cohen
## Study 1 - Block 4 has 21 multiple choice items, 1000 students. the paper said 3PM was the best fit, so generating data using 3PM
nitem= 21
sample.size=1000
a <- as.matrix(round(rlnorm(nitem, meanlog = 0, sdlog = 1),3), ncol=1) #lognormal
b <- as.matrix(round(rnorm(nitem, mean = 0, sd = 1),3), ncol=1) #normal
c <- as.matrix(round(rbeta(nitem, shape1 = 5, shape2 = 17),3), ncol=1) #beta
ability <- as.matrix(round(rnorm(sample.size, mean = 0, sd = 1),3), ncol=1) #normal
dat = fun_simulate_data(21, 1000, "3PL",a, b,c,ability)
# Estimate model fit
#estimate IRT parameters
model1PL <- mirt(data=dat, 1, itemtype='Rasch', SE=TRUE, verbose=FALSE)
model2PL <- mirt(data=dat, 1, itemtype='2PL', SE=TRUE, verbose=FALSE)
model3PL <- mirt(data=dat, 1, itemtype='3PL', SE=TRUE, verbose=FALSE)
## EM cycles terminated after 500 iterations.
## Warning: Could not invert information matrix; model likely is not empirically
## identified.
fit_statistics = fun_model_fit(model1PL, model2PL, model3PL)
fit_statistics
## model AIC BIC G-sq LL
## [1,] "1PL" "24700.258" "24808.229" "11087.021" "-12328.129"
## [2,] "2PL" "23858.139" "24064.265" "10204.902" "-11887.07"
## [3,] "3PL" "23783.26" "24092.449" "10088.023" "-11828.63"
## 3PL gives the best fit
## Study 2 -
# 2 test lengths, 20 and 40 items.
# 2 sample sizes, 500 and 1000 examinees
Steps followed in this section:
Taken the case for number of items as 20 and sample size as 500
Generate parameters (a, d, c).
Using parameters, generate 1PL, 2 PL and 3 PL datasets.
Estimate parameters for each of the datasets, using the correct model (i.e. estimate b for 1PL, a&b for 2PL and a&b&c for 3PL). Estimated parameters are names a_est, b_est, c_est
Compare estimations using RSME
# Simulate data for Study 2
## Test length = 20 items, sample size = 500
## Item difficulty b ~ N(0,1), a∼ln(0,0.5), c~B(5,17)
## 3 distributions of ability: N(-1,1) = low ability, N(0,1) = ability at item difficulty level and N(1,1) = high ability
nitem=20
sample.size=1000
a <- as.matrix(round(rlnorm(20, meanlog = 0, sdlog = 0.5),3), ncol=1) #lognormal
a1<- matrix(rep( 1, len=20), ncol = 1)
b <- as.matrix(round(rnorm(20, mean = 0, sd = 1),3), ncol=1) #normal
c <- as.matrix(round(rbeta(20, shape1 = 5, shape2 = 17),3), ncol=1) #beta
c1<- matrix(rep( 0, len=20), ncol = 1)
low_ability <- as.matrix(round(rnorm(1000, mean = -1, sd = 1),3), ncol=1) #normal
med_ability <- as.matrix(round(rnorm(1000, mean = 0, sd = 1),3), ncol=1) #normal
high_ability <- as.matrix(round(rnorm(1000, mean = 1, sd = 1),3), ncol=1) #normal
# Generate datasets, low ability
#dat_1PL= fun_simulate_1PL(nitem, sample.size, a, b, low_ability)
#dat_2PL= fun_simulate_2PL(nitem, sample.size, a, b, low_ability)
#dat_3PL= fun_simulate_3PL(nitem, sample.size, a, b, c, low_ability)
dat_1PL = fun_simulate_data(20, 1000, "1PL", a1, b,c1, med_ability)
dat_2PL = fun_simulate_data(20, 1000, "2PL", a, b,c1, med_ability)
dat_3PL = fun_simulate_data(20, 1000, "3PL", a, b,c, med_ability)
# estimate IRT parameters
## estimate parameters for 1PL
model1PL <- mirt(data=dat_1PL, 1,itemtype='Rasch', SE=FALSE, verbose=FALSE)
coef = as.data.frame(coef(model1PL, simplify=T)$items[,2]) %>%
tibble::rownames_to_column(., "Item no") %>%
mutate("b"= b) %>%
rename(b_est= "coef(model1PL, simplify = T)$items[, 2]")
plot(coef$b,coef$b_est)
## estimate parameters for 2PL
model2PL <- mirt(data=dat_2PL, 1, itemtype='2PL', SE=TRUE, verbose=FALSE)
b_est= coef(model2PL, simplify=T)$items[,2]
a_est=coef(model2PL, simplify=T)$items[,1]
coef_2PL = data.frame(b_est, b, a_est, a)
coef_2PL = tibble::rownames_to_column(coef_2PL, "Item no")
plot(coef_2PL$b,coef_2PL$b_est)
## estimate parameters for 3PL
model3PL <- mirt(data=dat_3PL, 1, itemtype='3PL', SE=TRUE, verbose=FALSE)
b_est= coef(model3PL, simplify=T)$items[,2]
a_est=coef(model3PL, simplify=T)$items[,1]
c_est=coef(model3PL, simplify=T)$items[,3]
coef_3PL = data.frame(b_est, b, a_est, a, c_est, c)
plot(coef_3PL$b,coef_3PL$b_est)
## Plot original and estimates
coef_3PL %>%
ggplot(aes(x=b,y=b_est)) +geom_point()
coef_2PL %>%
ggplot(aes(x=b,y=b_est)) +geom_point()
coef %>%
ggplot(aes(x=b,y=b_est)) +geom_point()
# Estimate RSME
rsme = sqrt(mean((coef$b - coef$b_est)^2))
rsme_b_2PL = sqrt(mean((coef_2PL$b - coef_2PL$b_est)^2))
rsme_a_2PL = sqrt(mean((coef_2PL$a - coef_2PL$a_est)^2))
rsme_b_3PL = sqrt(mean((coef_3PL$b - coef_3PL$b_est)^2))
rsme_a_3PL = sqrt(mean((coef_3PL$a - coef_3PL$a_est)^2))
# Estimate correlation
## Paper says product moment correlation, I think this is Pearson?
### All highly correlated (>.9) and significant
cor.test(coef$b, coef$b_est, method="pearson",alternative="two.sided",na.action)
##
## Pearson's product-moment correlation
##
## data: coef$b and coef$b_est
## t = 45.25, df = 18, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9887396 0.9983101
## sample estimates:
## cor
## 0.9956332
cor.test(coef_2PL$b, coef_2PL$b_est, method="pearson",alternative="two.sided",na.action)
##
## Pearson's product-moment correlation
##
## data: coef_2PL$b and coef_2PL$b_est
## t = 43.031, df = 18, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9875619 0.9981325
## sample estimates:
## cor
## 0.9951747
cor.test(coef_3PL$b, coef_3PL$b_est, method="pearson",alternative="two.sided",na.action)
##
## Pearson's product-moment correlation
##
## data: coef_3PL$b and coef_3PL$b_est
## t = 7.0844, df = 18, p-value = 1.323e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.6696153 0.9425887
## sample estimates:
## cor
## 0.8579187
# Compare model fit - b
rsme
## [1] 0.100307
rsme_b_2PL
## [1] 0.104282
rsme_b_3PL
## [1] 0.4914497
# Compare model fit - a
rsme_a_2PL
## [1] 0.1031682
rsme_a_3PL
## [1] 0.3388465
## rsme is lowest for 2PL
This replication is only conducted for the case of low ability, 20 items, 500 sample size i.e. the first 3 rows of Table 9. The tests explored are AIC, BIC, Log likelihood and G-squared.
I generate parameters (a,b,c) only once. Using this, data is generated. Using generated datasets, parameters are estimated for 50 iterations, and we see which model performed best. Is this understanding right
I find that the model that generated the dataset is not the best performing model, in contrast to Kang & Cohen’s findings:
Case examined: 20 items, 500 sample size, low ability
When data is generated by 1PL: AIC, BIC and G-square are not the lowest for 1PL for all 50 iterations. 1PL is the best performing for all 50 iterations for log likelihood
When data is generated by 2PL: AIC, LL and G-square are not the lowest for 1PL for all 50 iterations. 2PL is the best performing dataset for all 50 iterations for BIC
When data is generated by 3PL: BIC and LL are not the lowest for 1PL for all 50 iterations. AIC and G-sq is the best performing dataset for all 50 iterations for log likelihood
# Iterate for 50 loops to see which model fits best
# Data generated using 1PL, low ability
# Generate parameters
iter=50
nitem=20
sample.size=500
a <- as.matrix(round(rlnorm(20, meanlog = 0, sdlog = 1),3), ncol=1) #lognormal
b <- as.matrix(round(rnorm(20, mean = 0, sd = 1),3), ncol=1) #normal
c <- as.matrix(round(rbeta(20, shape1 = 5, shape2 = 17),3), ncol=1) #beta
low_ability <- as.matrix(round(rnorm(1000, mean = -1, sd = 1),3), ncol=1) #normal
med_ability <- as.matrix(round(rnorm(1000, mean = 0, sd = 1),3), ncol=1) #normal
high_ability <- as.matrix(round(rnorm(1000, mean = 1, sd = 1),3), ncol=1) #normal
c1<- matrix(rep( 0, len=20), ncol = 1)
a1<- matrix(rep( 1, len=20), ncol = 1)
# Generate datasets, low ability
dat_1PL = fun_simulate_data(40, 1000, "1PL", a1, b,c1, med_ability)
#dat_1PL= fun_simulate_1PL(nitem, sample.size, a, b, low_ability)
# Create vectors to record results from iterations
bestfit_1PL= data.frame(iteration=1:50, "AIC"=0, "BIC"=0, "G-sq"=0, "LL"=0)
# Create vectors to record final summary
for (i in 1:iter){
#Estimate
model1PL <- mirt(data=dat_1PL, 1, itemtype='Rasch', SE=TRUE, verbose=FALSE)
model2PL <- mirt(data=dat_1PL, 1, itemtype='2PL', SE=TRUE, verbose=FALSE)
model3PL <- mirt(data=dat_1PL, 1, itemtype='3PL', SE=TRUE, verbose=FALSE)
compare = as.data.frame(fun_model_fit(model1PL, model2PL, model3PL))
if((which.min(compare$AIC))==1) {
bestfit_1PL[i,2]=1
}
if((which.min(compare$BIC))==1) {
bestfit_1PL[i,3]=1
}
if((which.min(compare$`G-sq`))==1) {
bestfit_1PL[i,4]=1
}
if((which.max(compare$LL))==1) {
bestfit_1PL[i,5]=1
}
}
summary_1PL = bestfit_1PL %>%
replace(is.na(.), 0) %>%
select(-"iteration") %>%
summarise_all(funs(sum)) %>%
mutate(model= "1PL")
## Warning: `funs()` is deprecated as of dplyr 0.8.0.
## Please use a list of either functions or lambdas:
##
## # Simple named list:
## list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`:
## tibble::lst(mean, median)
##
## # Using lambdas
## list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
summary_1PL
## AIC BIC G.sq LL model
## 1 50 50 0 0 1PL
# Generate datasets, low ability
nitem=40
sample.size=1000
dat_2PL = fun_simulate_data(nitem, sample.size, "2PL", a, b,c1, low_ability)
# Create vectors to record results from iterations
bestfit_2PL= data.frame(iteration=1:50, "AIC"=0, "BIC"=0, "G-sq"=0, "LL"=0)
for (i in 1:iter){
#Estimate
model1PL <- mirt(data=dat_2PL, 1, itemtype='Rasch', SE=TRUE, verbose=FALSE)
model2PL <- mirt(data=dat_2PL, 1, itemtype='2PL', SE=TRUE, verbose=FALSE)
model3PL <- mirt(data=dat_2PL, 1, itemtype='3PL', SE=TRUE, verbose=FALSE)
compare = as.data.frame(fun_model_fit(model1PL, model2PL, model3PL))
if((which.min(compare$AIC))==2) {
bestfit_2PL[i,2]=1
}
if((which.min(compare$BIC))==2) {
bestfit_2PL[i,3]=1
}
if((which.min(compare$`G-sq`))==2) {
bestfit_2PL[i,4]=1
}
if((which.max(compare$LL))==2) {
bestfit_2PL[i,5]=1
}
}
summary_2PL = bestfit_2PL %>%
replace(is.na(.), 0) %>%
select(-"iteration") %>%
summarise_all(funs(sum)) %>%
mutate(model= "2PL")
summary_2PL
## AIC BIC G.sq LL model
## 1 50 50 0 0 2PL
# Generate datasets, low ability
dat_3PL = fun_simulate_data(nitem, sample.size, "3PL", a, b,c, low_ability)
# Create vectors to record results from iterations
bestfit_3PL= data.frame(iteration=1:50, "AIC"=0, "BIC"=0, "G-sq"=0, "LL"=0)
for (i in 1:iter){
#Estimate
model1PL <- mirt(data=dat_3PL, 1, itemtype='Rasch', SE=F, verbose=FALSE)
model2PL <- mirt(data=dat_3PL, 1, itemtype='2PL', SE=F, verbose=FALSE)
model3PL <- mirt(data=dat_3PL, 1, itemtype='3PL', SE=F, verbose=FALSE)
compare = as.data.frame(fun_model_fit(model1PL, model2PL, model3PL))
if((which.min(compare$AIC))==3) {
bestfit_3PL[i,2]=1
}
if((which.min(compare$BIC))==3) {
bestfit_3PL[i,3]=1
}
if((which.min(compare$`G-sq`))==3) {
bestfit_3PL[i,4]=1
}
if((which.max(compare$LL))==3) {
bestfit_3PL[i,5]=1
}
}
summary_3PL = bestfit_3PL %>%
replace(is.na(.), 0) %>%
select(-"iteration") %>%
summarise_all(funs(sum)) %>%
mutate(model= "3PL")
summary_3PL
## AIC BIC G.sq LL model
## 1 50 0 50 50 3PL