The purpose of this report is to demonstrate a CAT simulation study. A CAT algorithm chooses items close to estimated candidate ability, evaluates probability of candidate success, given candidate latent ability, and calculates candidate estimated ability. This procedures is repeated under different assumed candidate latent abilities numerous times. The estimated theta and standard error of measurement is captured at the end of each simulated exam. The data are summarized.
This is the irt scoring procedures, using EAP then MLE, when response vector has more than one unique response.
Show the code
###*** Following is algorithm for estimating candidate theta from response vector. Algorithm is applied in simulation# Procedure for EAP to MLE estimation- EAP -------------------------------------## From Irtoys: https://github.com/cran/irtoys###*** EAP first, when responses all 0 or 1## Normal quadraturenormal.qu =function(Q =15, # Number of quadrature points to be selectedlower=-4, # Boundariesupper=4, mu=0, # mean and Sdsigma=1,scaling="points") # Quadrature points rescaled {if (upper<=lower || sigma<=0|| Q<3) stop("bad argument")# Q quadrature points evenly distributed between boundaries qp=seq(lower,upper,length.out = Q)if(scaling=="points") {# Should always be this condition, rather than "else" qw=dnorm(qp,mean =0, sd =1) # Probability density standard normal qw=qw/sum(qw) # For qw to add up to unity qp=qp*sigma+mu # Returns qp for standard normal } else {# Only useful for another value of mu, sigma other than 0, 1 qw=dnorm(qp,mu,sigma) qw=qw/sum(qw) }# A list of probabilities (quad.weights) of quad.pointsreturn(list(quad.points=qp, quad.weights=qw))}log_likelihood_function =function( qp, # Quadrature points r, # Response matrix p, # Rasch parameters mu, sigma,method ="ML") # Maximum likelihood {# Calculate probabilities, generalized here for multiple parameter IRT, but Rasch parameters# will condense to Rasch model pr = p[,3] + (1.0- p[,3])/(1.0+exp(p[,1]*(p[,2] - qp)))# Get maximimum and minimum probabilities or defined max min pr =pmax(pr, .00001); pr =pmin(pr, .99999)# Log likelihood- response_vector time log probabilities plus variance ll = r*log(pr) + (1-r)*log(1.0-pr) lf =sum(ll)if (method !="ML") lf = lf +log(dnorm(qp,mu,sigma)) return(lf)} eap.one =function(r, p, qp, qw) {# Remove na values from response matrix and probabilities cc =!is.na(r) r = r[cc] p = p[cc,,drop=FALSE] n =length(r)if (n <1) return(c(NA, NA, 0)) ll =sapply(qp, log_likelihood_function, r=r, p=p, mu=NULL, sigma=NULL, method="ML")# Weighted loglikelihood wl =exp(ll)*qw swl =sum(wl)# Standardized weighted likelihood quad points x =sum(wl*qp)/swl# Deviance dev = qp - x sem =sqrt(sum(wl*dev*dev)/swl)return(c(x,sem,n))}eap =function(resp, ip, qu) {if (is.list(ip)) ip = ip$estif (is.null(dim(resp))) dim(resp) =c(1,length(resp))# Error handlingif (is.null(dim(ip))) stop("item parameters not a matrix")if (nrow(ip) !=ncol(resp)) stop("responses - item parameters mismatch") np =nrow(resp) qp = qu$quad.points qw = qu$quad.weights o =sapply(1:np, function(i) eap.one(r=resp[i,], p=ip, qp, qw),USE.NAMES=FALSE)rownames(o) =c("est","sem","n")return(t(o))}# Define the Rasch parameters. Will be a L x 3 matrix. Column 1 is a, discrimination, is L# length vector of 1. Column 2 is difficulties, passed to list, Column 3 is c, set to 0.# Function that takes difficulties and response vector and returns EAP theta and semeap_estimation_func <-function(difficulties, response_vector){item_parameters<-list(est =matrix(c(rep(1,length(difficulties)), # a difficulties, # brep(0,length(difficulties))), # cncol =3 ) )response_matrix <-matrix( response_vector, nrow =1 )eap(resp = response_matrix, ip = item_parameters, qu =normal.qu())}# MLE ---------------------------------------------------------------------###*** MLE- to be used after responses are all not 0 or 1# Item Response Function, or Item Characteristic Curve; picture logistic curve of response# for each theta estimateirf =function( ip, # item parametersitems=NULL,x=NULL# Values of latent variable at which will be evaluated, if NULL 99 equal spaces# between interval ) {if (is.null(x)) x =seq(-4, 4, length =101)if (is.list(ip)) ip = ip$estif (is.null(dim(ip))) dim(ip) =c(1, 3)if (!is.null(items)) ip = ip[items, , drop=FALSE]# Returns array of x by nrow of parameters of x, abilities, subtracted from difficulties f =sweep(outer(x, ip[,2], "-"), 2, ip[,1], "*") f =1/ (1+exp(-f))if (any(ip[,3]!=0)) f =sweep(sweep(f, 2, 1-ip[,3], "*"), 2, ip[,3], "+") r =list(x = x, f = f)class(r) ="irf"return(r)}# Item Information Function- gets item information for each item, using item response# function from above; this function used in Test Information Functio belowiif =function(ip, items=NULL, x=NULL) {if (is.null(x)) x =seq(-4, 4, length =101) # Ability interval# Check dataif (is.list(ip)) ip = ip$estif (is.null(dim(ip))) dim(ip) =c(1, 3)if (!is.null(items)) ip = ip[items, ,drop=FALSE] p =irf(ip, items=NULL, x)$f # Item response function from aboveif (any(ip[, 3] !=0)) { ip[,3] =0 q =irf(ip, items=NULL, x)$f f = q^2*(1-p)/p # standardized probability } else f = p*(1-p) f =sweep(f, 2, ip[,1]^2, "*") r =list(x = x, f = f)class(r) ="iif"return(r)}# Test Information Function- Applies item information function to items;# used in mle.one belowtif =function(ip, x=NULL) { i =iif(ip=ip, items=NULL, x=x) # item information function aboveif (is.null(dim(i$f))) dim(i$f) =c(length(i$x),length(i$f)) f =apply(i$f, 1, sum) # apply across columns, down rows r =list(x=i$x, f=f, ni=ncol(i$f))class(r) ="tif"return(r)}## Function to conduct maximum likelihood estimationmle.one =function( resp, # response matrix ip, # item parametersmu=mu, sigma=sigma, method=method) { # Remove na cc =!is.na(resp) resp = resp[cc] ip = ip[cc, , drop=FALSE] n =length(resp) if (n <1) return(c(NA, NA, 0))# Finds maximum of log-likelihood for given constraints est =optimize(log_likelihood_function, lower =-4, upper =4, maximum =TRUE, r = resp, p = ip, mu = mu, sigma = sigma, method = method)$maximum# Test information function ti =tif(ip, est)$fif (method !="ML") ti = ti +1/(sigma * sigma) # ti plus variance sem =sqrt(1/ti)return(c(est, sem, n))}## Checks data and applies mle.one function, returns estimate, sem, and nmlebme =function(resp, ip, mu=0, sigma=1, method="ML") {# Check for invalid dataif (is.list(ip)) ip = ip$estif (is.null(dim(resp))) dim(resp) =c(1,length(resp))if (is.null(dim(ip))) stop("item parameters not a matrix")if (nrow(ip) !=ncol(resp)) stop("responses - item parameters mismatch") np =nrow(resp)# Apply mle.one o =sapply(1:np, function(i) mle.one(resp=resp[i,], ip=ip, mu=mu, sigma=sigma, method=method))rownames(o) =c("est","sem","n")return(t(o)) }## Prepares data to apply above mlebme functionmle_estimation_func <-function(difficulties, response_vector){# Convert rasch difficulties to matrix, which function requires item_parameters<-list(est =matrix(c(rep(1,length(difficulties)), # a- in Rasch all 1's difficulties, # brep(0,length(difficulties))), # c- in Rasch all 0'sncol =3 ) )# Convert response_vector to matrix, which function requires response_matrix <-matrix( response_vector, nrow =1 )mlebme(resp = response_matrix, ip = item_parameters)}####**** ALL ABOVE ARE ESTIMATION FUNCTIONS. BELOW APPLIES# EAP and MLE Algorithm ---------------------------------------------------###*** Integrating above functions into algorithm## Function to choose between EAP and MLE. Start with EAP until not all 0 or 1, then MLEapply_estimation_function <-function(set_responses){# Check if responses are all either 0 or 1if(length(unique(set_responses$responses)) ==1){eap_estimation_func(difficulties = set_responses$difficulties, response_vector = set_responses$responses ) } else{mle_estimation_func(difficulties = set_responses$difficulties, response_vector = set_responses$responses ) }}
This is the simulation, where the scoring algorithm and item selection is applied across different latent abilities.
Show the code
# Simulation ---------------------------------------------------------###*** Data for simulating###* In real use, use Rasch item parameters and candidate response vector## Sample data, 200 items, random item difficulties from -3 to 3. You can supply your own.difficulties_items <- tibble::tibble(question_number =1:200,difficulties =runif(200, -3, 3))###*** Generate 0 or 1 response based on logistic function and candidate ability###* This will be used for each selected item to generate a responseis_response_correct_func <-function( item_difficulty, current_candidate_latent_ability){ # This is assumed candidate ability# rbinom chooses 0 or 1 for given probabilityrbinom(n =1, size =1, prob =# logistic function solved for Pexp(1.7*(current_candidate_latent_ability - item_difficulty)) / (1+exp(1.7*(current_candidate_latent_ability - item_difficulty))) )}## Selects from unadministered items, chooses 15 closest items, randomly selects items, removes from unadministered items and places in administered, chooses if item is correct or not based on probability, calculates candidate thetaadministration_function <-function(){ unadministered_items %>%# finds distance from current estimated theta to item difficultesmutate(current_closest_item_difference =abs(current_theta_estimate$est - difficulties)) %>%slice_min(current_closest_item_difference, n =15) %>%# 15 closest slice_sample %>%# randomly chooses one {. -> tmp # Chosen item# adds item to administered items administered_items <<-bind_rows(administered_items, tmp %>% dplyr::select(difficulties, responses) )# Removes item from unadministered unadministered_items <<-anti_join(unadministered_items, tmp, by ="question_number") }# Decides if item is correct, then calculates candidate theta current_theta_estimate <<-apply_estimation_function(administered_items) %>%as.data.frame()}###*** Initializes all holder variables, df's at beginning of each testinitialize_var_func <-function(latent_ability){ administered_items <<-tibble() unadministered_items <<- difficulties_items %>%rowwise() %>%mutate(responses =is_response_correct_func(difficulties, latent_ability)) %>%ungroup() current_theta_estimate <<-tibble(est =-1)}###*** Apply simulation across different latent abilities, multiple examinations, capture output as dataframe with candidate latent ability, estimated ability, semsimulation_output <- purrr::map_dfr(.x =seq(-2, 1, .5), # Different latent abilities.f =function(latent_ability){ # list for each ability purrr::map_dfr(.x =1:20, # number of administrations of each test per latent ability.f =~{initialize_var_func(latent_ability) # Initialize all holder varsreplicate(50, administration_function()) # Number of items per exam, can change current_theta_estimate # return theta_estimate df } ) %>% tibble::add_column(latent_ability, .before =1) })
Simulation Summary
Summary stastics of output captured from each candidate exam, grouped by latent ability.
Boxplots of estimated ability by latent ability and sem for each latent ability. What we are lookin for is that estimates should be centered on latent ability and grouping should be narrow–that means estimates are close to what candidate’s true ability is. For SEM, we don’t want to see that vary across scores and closer to 0 is better, though it probably asymptotes around .2 or .3.
Show the code
simulation_output %>% dplyr::select(-n) %>%pivot_longer(., cols =- latent_ability, names_to ="measure", values_to ="values") %>%mutate(latent_ability =as_factor(latent_ability)) %>%ggplot(., aes(x = latent_ability, y = values, color = measure)) +geom_boxplot() +ggtitle("Boxplot of Simulation by Theta")