Rasch CAT Simulation Study

Author

Brian Syzdek–Email

Published

December 6, 2022

The purpose of this report is to demonstrate a CAT simulation study. A CAT algorithm chooses items close to estimated candidate ability, evaluates probability of candidate success, given candidate latent ability, and calculates candidate estimated ability. This procedures is repeated under different assumed candidate latent abilities numerous times. The estimated theta and standard error of measurement is captured at the end of each simulated exam. The data are summarized.

This is the irt scoring procedures, using EAP then MLE, when response vector has more than one unique response.

Show the code

###*** Following is algorithm for estimating candidate theta from response vector. Algorithm is applied in simulation

# Procedure for EAP to MLE estimation- EAP -------------------------------------
## From Irtoys: https://github.com/cran/irtoys

###*** EAP first, when responses all 0 or 1

## Normal quadrature
normal.qu = function(
    Q = 15, # Number of quadrature points to be selected
    lower=-4, # Boundaries
    upper=4, 
    mu=0, # mean and Sd
    sigma=1,
    scaling="points") # Quadrature points rescaled
  {
  if (upper<=lower || sigma<=0 || Q<3) stop("bad argument")
  # Q quadrature points evenly distributed between boundaries
  qp=seq(lower,upper,length.out = Q)

  if(scaling=="points") {
    # Should always be this condition, rather than "else"
    qw=dnorm(qp,mean = 0, sd = 1) # Probability density standard normal
    qw=qw/sum(qw) # For qw to add up to unity
    qp=qp*sigma+mu # Returns qp for standard normal
  } else {
    # Only useful for another value of mu, sigma other than 0, 1
    qw=dnorm(qp,mu,sigma)
    qw=qw/sum(qw)
  }
  # A list of probabilities (quad.weights) of quad.points
  return(list(quad.points=qp, quad.weights=qw))
}

log_likelihood_function = function(
    qp, # Quadrature points
    r, # Response matrix
    p, # Rasch parameters
    mu,
    sigma,
    method = "ML") # Maximum likelihood
  {
  # Calculate probabilities, generalized here for multiple parameter IRT, but Rasch parameters
  # will condense to Rasch model
  pr = p[,3] + (1.0 - p[,3])/(1.0 + exp(p[,1]*(p[,2] - qp)))
  # Get maximimum and minimum probabilities or defined max min
  pr = pmax(pr, .00001); pr = pmin(pr, .99999)
  # Log likelihood- response_vector time log probabilities plus variance
  ll = r*log(pr) + (1-r)*log(1.0-pr)
  lf = sum(ll)
  if (method != "ML") lf = lf + log(dnorm(qp,mu,sigma)) 
  return(lf)
} 

eap.one = function(r, p, qp, qw) {
  # Remove na values from response matrix and probabilities
  cc = !is.na(r)
  r  = r[cc]
  p  = p[cc,,drop=FALSE]
  n  = length(r)
  if (n < 1) return(c(NA, NA, 0))
  ll = sapply(qp, log_likelihood_function, r=r, p=p, mu=NULL, sigma=NULL, method="ML")
  # Weighted loglikelihood
  wl = exp(ll)*qw
  swl = sum(wl)
  # Standardized weighted likelihood quad points
  x  = sum(wl*qp)/swl
  # Deviance
  dev = qp - x
  sem = sqrt(sum(wl*dev*dev)/swl)
  return(c(x,sem,n))
}

eap = function(resp, ip, qu) {
  if (is.list(ip)) ip = ip$est
  if (is.null(dim(resp))) dim(resp) = c(1,length(resp))
  # Error handling
  if (is.null(dim(ip))) stop("item parameters not a matrix")
  if (nrow(ip) != ncol(resp)) stop("responses - item parameters mismatch")
  np = nrow(resp)
  qp = qu$quad.points
  qw = qu$quad.weights
  o  = sapply(1:np, function(i) eap.one(r=resp[i,], p=ip, qp, qw),USE.NAMES=FALSE)
  rownames(o) = c("est","sem","n")
  return(t(o))
}

# Define the Rasch parameters. Will be a L x 3 matrix. Column 1 is a, discrimination, is L
# length vector of 1. Column 2 is difficulties, passed to list, Column 3 is c, set to 0.

# Function that takes difficulties and response vector and returns EAP theta and sem

eap_estimation_func <- function(difficulties, response_vector){
item_parameters<- list(
  est = matrix(
    c(rep(1,length(difficulties)), # a
      difficulties, # b
      rep(0,length(difficulties))), # c
    ncol = 3
    )
  )

response_matrix <- matrix(
  response_vector, nrow = 1
  )

eap(resp = response_matrix, ip = item_parameters, qu = normal.qu())
}


# MLE ---------------------------------------------------------------------

###*** MLE- to be used after responses are all not 0 or 1

# Item Response Function, or Item Characteristic Curve; picture logistic curve of response
# for each theta estimate
irf = function(
    ip, # item parameters
    items=NULL,
    x=NULL # Values of latent variable at which will be evaluated, if NULL 99 equal spaces
    # between interval
    ) 
  {
  if (is.null(x)) 
    x = seq(-4, 4, length = 101)
  if (is.list(ip)) ip = ip$est
  if (is.null(dim(ip))) 
    dim(ip) = c(1, 3)
  if (!is.null(items)) ip = ip[items, , drop=FALSE]
  # Returns array  of x by nrow of parameters of x, abilities, subtracted from difficulties
  f = sweep(outer(x, ip[,2], "-"), 2, ip[,1], "*")
  f = 1 / (1 + exp(-f))
  if (any(ip[,3]!=0)) 
    f = sweep(sweep(f, 2, 1-ip[,3], "*"), 2, ip[,3], "+")
  r = list(x = x, f = f)
  class(r) = "irf"
  return(r)
}

# Item Information Function- gets item information for each item, using item response
# function from above; this function used in Test Information Functio below
iif = function(ip, items=NULL, x=NULL) {
  if (is.null(x)) 
    x = seq(-4, 4, length = 101) # Ability interval
  # Check data
  if (is.list(ip)) ip = ip$est
  if (is.null(dim(ip))) 
    dim(ip) = c(1, 3)
  if (!is.null(items)) ip = ip[items, ,drop=FALSE]
  p = irf(ip, items=NULL, x)$f  # Item response function from above
  if (any(ip[, 3] != 0)) {
    ip[,3] = 0
    q = irf(ip, items=NULL, x)$f
    f = q^2*(1-p)/p # standardized probability
  } else 
    f = p*(1-p)
  f = sweep(f, 2, ip[,1]^2, "*")  
  r = list(x = x, f = f)
  class(r) = "iif"
  return(r)
}

# Test Information Function- Applies item information function to items;
# used in mle.one below
tif = function(ip, x=NULL) {
  i = iif(ip=ip, items=NULL, x=x) # item information function above
  if (is.null(dim(i$f))) dim(i$f) = c(length(i$x),length(i$f))
  f = apply(i$f, 1, sum) # apply across columns, down rows
  r = list(x=i$x, f=f, ni=ncol(i$f))
  class(r) = "tif"
  return(r)
}

## Function to conduct maximum likelihood estimation
mle.one = function(
    resp, # response matrix
    ip, # item parameters
    mu=mu, 
    sigma=sigma, 
    method=method) { 
  # Remove na
  cc = !is.na(resp)                                        
  resp = resp[cc]                                          
  ip = ip[cc, , drop=FALSE]                                             
  n = length(resp)                                         
  if (n < 1) return(c(NA, NA, 0))
  # Finds maximum of log-likelihood for given constraints
  est = optimize(log_likelihood_function, lower = -4, upper = 4, maximum = TRUE, 
                 r = resp, p = ip, mu = mu, sigma = sigma, method = method)$maximum
  # Test information function
  ti = tif(ip, est)$f
  if (method != "ML") ti = ti + 1/(sigma * sigma) # ti plus variance
  sem = sqrt(1/ti)
  return(c(est, sem, n))
}

## Checks data and applies mle.one function, returns estimate, sem, and n
mlebme = function(resp, ip, mu=0, sigma=1, method="ML") {
  # Check for invalid data
  if (is.list(ip)) ip = ip$est
  if (is.null(dim(resp))) dim(resp) = c(1,length(resp))
  if (is.null(dim(ip))) stop("item parameters not a matrix")
  if (nrow(ip) != ncol(resp)) stop("responses - item parameters mismatch")
  np = nrow(resp)
  # Apply mle.one
  o = sapply(1:np, function(i) mle.one(resp=resp[i,], 
                                       ip=ip, mu=mu, sigma=sigma, method=method))
  rownames(o) = c("est","sem","n")
  return(t(o)) 
}

## Prepares data to apply above mlebme function
mle_estimation_func <- function(difficulties, response_vector){
  # Convert rasch difficulties to matrix, which function requires
  item_parameters<- list(
    est = matrix(
      c(rep(1,length(difficulties)), # a- in Rasch all 1's
        difficulties, # b
        rep(0,length(difficulties))), # c- in Rasch all 0's
      ncol = 3
    )
  )
  # Convert response_vector to matrix, which function requires
  response_matrix <- matrix(
    response_vector, nrow = 1
  )
  mlebme(resp = response_matrix, ip = item_parameters)
}

####**** ALL ABOVE ARE ESTIMATION FUNCTIONS. BELOW APPLIES

# EAP and MLE Algorithm ---------------------------------------------------

###*** Integrating above functions into algorithm
## Function to choose between EAP and MLE. Start with EAP until not all 0 or 1, then MLE
apply_estimation_function <- function(set_responses){
  # Check if responses are all either 0 or 1
  if(length(unique(set_responses$responses)) == 1){
    eap_estimation_func(
      difficulties = set_responses$difficulties, 
      response_vector = set_responses$responses
    )
  } else{
    mle_estimation_func(
      difficulties = set_responses$difficulties, 
      response_vector = set_responses$responses
    )
  }
}

This is the simulation, where the scoring algorithm and item selection is applied across different latent abilities.

Show the code

# Simulation ---------------------------------------------------------

###*** Data for simulating
###* In real use, use Rasch item parameters and candidate response vector

## Sample data, 200 items, random item difficulties from -3 to 3. You can supply your own.
difficulties_items <- tibble::tibble(
  question_number = 1:200,
  difficulties = runif(200, -3, 3)
)

###*** Generate 0 or 1 response based on logistic function and candidate ability
###* This will be used for each selected item to generate a response
is_response_correct_func <- function(
    item_difficulty, 
    current_candidate_latent_ability){ # This is assumed candidate ability
  # rbinom chooses 0 or 1 for given probability
  rbinom(n = 1, size = 1, prob = 
           # logistic function solved for P
  exp(1.7*(current_candidate_latent_ability - item_difficulty)) / 
    (1 + exp(1.7*(current_candidate_latent_ability - item_difficulty)))
  )
}

## Selects from unadministered items, chooses 15 closest items, randomly selects items, removes from unadministered items and places in administered, chooses if item is correct or not based on probability, calculates candidate theta

administration_function <- function(){
  unadministered_items %>% 
    # finds distance from current estimated theta to item difficultes
    mutate(current_closest_item_difference = abs(current_theta_estimate$est - difficulties)) %>% 
    slice_min(current_closest_item_difference, n = 15)  %>% # 15 closest
    slice_sample %>% # randomly chooses one
    {. -> tmp # Chosen item
      # adds item to administered items
      administered_items <<- bind_rows(administered_items, 
                                       tmp %>% 
                                        dplyr::select(difficulties, responses)
    )
      # Removes item from unadministered
      unadministered_items <<- anti_join(unadministered_items, 
                                         tmp, 
                                         by = "question_number")
    }
  # Decides if item is correct, then calculates candidate theta
  current_theta_estimate <<- apply_estimation_function(administered_items) %>% 
    as.data.frame()
}

###*** Initializes all holder variables, df's at beginning of each test
initialize_var_func <- function(latent_ability){
    administered_items <<- tibble()

    unadministered_items <<- difficulties_items %>%
      rowwise() %>%
      mutate(responses = is_response_correct_func(difficulties, latent_ability)) %>%
      ungroup()

    current_theta_estimate <<- tibble(est = -1)
}

###*** Apply simulation across different latent abilities, multiple examinations, capture output as dataframe with candidate latent ability, estimated ability, sem
simulation_output <- purrr::map_dfr(
  .x = seq(-2, 1, .5), # Different latent abilities
  .f = function(latent_ability){ # list for each ability
    
    purrr::map_dfr(
      .x = 1:20, # number of administrations of each test per latent ability
      .f = ~{
    initialize_var_func(latent_ability) # Initialize all holder vars
    
    replicate(50, administration_function()) # Number of items per exam, can change
    current_theta_estimate # return theta_estimate df
      }
    ) %>% 
      tibble::add_column(latent_ability, .before = 1) 
  })

Simulation Summary

Summary stastics of output captured from each candidate exam, grouped by latent ability.

Show the code

simulation_output %>% 
  group_by(latent_ability) %>% 
  summarize(
    estimate_mean = mean(est),
    estimate_sd = sd(est),
    sem_mean = mean(sem),
    sem_sd = sd(sem)
  ) %>% 
  mutate(across(where(is.numeric), round, 2)) %>% 
  table_print(., caption = "Simulation Summary Output by Theta")

Simulation Summary Output by Theta
Latent Ability	Estimate Mean	Estimate Sd	Sem Mean
-2.0	-2.07	0.23	0.29
-1.5	-1.51	0.22	0.29
-1.0	-1.03	0.10	0.29
-0.5	-0.44	0.19	0.29
0.0	-0.08	0.15	0.29
0.5	0.56	0.20	0.29
1.0	1.09	0.19	0.29

Simulation Plot

Boxplots of estimated ability by latent ability and sem for each latent ability. What we are lookin for is that estimates should be centered on latent ability and grouping should be narrow–that means estimates are close to what candidate’s true ability is. For SEM, we don’t want to see that vary across scores and closer to 0 is better, though it probably asymptotes around .2 or .3.

Show the code

simulation_output %>% 
  dplyr::select(-n) %>% 
  pivot_longer(., cols = - latent_ability, names_to = "measure", values_to = "values") %>% 
  mutate(latent_ability = as_factor(latent_ability)) %>% 
  ggplot(., aes(x = latent_ability, y = values, color = measure)) +
  geom_boxplot() + 
  ggtitle("Boxplot of Simulation by Theta")