The aim of this analysis is to transform YPT knowledge scores to T scores based on the Item Response Theory (IRT) model.
Among other things, the purpose of IRT is to provide a framework for evaluating how well assessments (such as our measure of transplant knowledge) work, and how well individual question items on assessments work. The most common application of IRT is in education, where psychometricians use it for developing and designing exams, maintaining banks of items for exams, and equating the difficulties of items for successive versions of exams (for example, to allow comparisons between results over time).
library(pacman)
p_load(ltm,mirt,tidyverse,psych,htmlTable,dplyr,plyr,beepr)
data <- read.csv(file="./ypt long format v2.csv", header=TRUE, sep=",", stringsAsFactors = FALSE, na.strings = "")
data2 <- data %>%
filter(eval_time == 0) %>%
select(starts_with("know")) # only select baseline scores
# ger rid of missing asa possible pattern
data2[,1:18] <- sapply(data2[,1:18], as.integer)
data3 <- data2[complete.cases(data2), ]# convert char to numericUsing graded response model summary(polymodel) suggested by statistical plan.
Notes: F1 - how much is this variable related with the latent variable? or called factor loading, remove <0.4
# the first and full model
polymodel <- mirt(data3, model = 1, itemtype = "graded") summary(polymodel)## F1 h2
## know1 0.514 0.264
## know2 0.406 0.164
## know3 0.539 0.291
## know4 0.367 0.135
## know5 0.488 0.238
## know7 0.515 0.265
## know8 0.479 0.229
## know9 0.411 0.169
## know10 0.469 0.220
## know11 0.446 0.199
## know12 0.597 0.356
## know13 0.419 0.175
## know14 0.507 0.257
## know15 0.602 0.363
## know16 0.524 0.274
## know17 0.546 0.298
## know18 0.617 0.381
## know19 0.579 0.336
##
## SS loadings: 4.615
## Proportion Var: 0.256
##
## Factor correlations:
##
## F1
## F1 1
Compare model fit after low factor loading variables removed. Results show model 2 has smaller AIC and BIC
# remove know4 with F1 < 0.4
data4 <- data3 %>%
select(know1:know3, know5:know19)
polymodel2 <- mirt(data4,model = 1, itemtype = "graded") #using graded response model as Devin, 1 vs rest, 2 vs. restsummary(polymodel2)## F1 h2
## know1 0.514 0.264
## know2 0.404 0.163
## know3 0.536 0.288
## know5 0.475 0.225
## know7 0.507 0.257
## know8 0.482 0.233
## know9 0.404 0.163
## know10 0.462 0.214
## know11 0.442 0.195
## know12 0.600 0.361
## know13 0.417 0.174
## know14 0.507 0.257
## know15 0.609 0.370
## know16 0.520 0.271
## know17 0.546 0.299
## know18 0.626 0.392
## know19 0.588 0.345
##
## SS loadings: 4.471
## Proportion Var: 0.263
##
## Factor correlations:
##
## F1
## F1 1
# compare model fit
anova(polymodel, polymodel2)##
## Model 1: mirt(data = data3, model = 1, itemtype = "graded")
## Model 2: mirt(data = data4, model = 1, itemtype = "graded")
## AIC AICc SABIC HQ BIC logLik X2
## 1 26196.82 26204.78 26278.45 26294.04 26449.93 -13044.41 NaN
## 2 24532.17 24539.24 24609.26 24623.99 24771.21 -12215.08 1658.654
## df p
## 1 NaN NaN
## 2 258280323 1
# ability level
IRT_item.par <- coef(polymodel2, IRTpars = T)
IRT_item.par## $know1
## a b1 b2
## par 1.02 -2.048 -0.233
##
## $know2
## a b1 b2
## par 0.752 -3.744 -0.341
##
## $know3
## a b1 b2
## par 1.082 -2.022 -1.181
##
## $know5
## a b1 b2
## par 0.918 -1.563 -0.181
##
## $know7
## a b1 b2
## par 1.001 -1.719 -0.718
##
## $know8
## a b1 b2
## par 0.937 -2.544 -1.693
##
## $know9
## a b1 b2
## par 0.752 -1.404 0.244
##
## $know10
## a b1 b2
## par 0.887 -1.681 -0.297
##
## $know11
## a b1 b2
## par 0.838 -2.461 0.355
##
## $know12
## a b1 b2
## par 1.278 -0.882 0.792
##
## $know13
## a b1 b2
## par 0.78 -2.757 1.424
##
## $know14
## a b1 b2
## par 1.002 -1.011 1.679
##
## $know15
## a b1 b2
## par 1.306 -1.226 0.522
##
## $know16
## a b1 b2
## par 1.037 -0.308 2.157
##
## $know17
## a b1 b2
## par 1.11 -1.21 -0.356
##
## $know18
## a b1 b2
## par 1.367 -0.813 0.497
##
## $know19
## a b1 b2
## par 1.236 -1.667 1.033
##
## $GroupPars
## MEAN_1 COV_11
## par 0 1
# factor loadings of items
factorloading <- summary(polymodel2) #combine the above 2 to get final IRT item table## F1 h2
## know1 0.514 0.264
## know2 0.404 0.163
## know3 0.536 0.288
## know5 0.475 0.225
## know7 0.507 0.257
## know8 0.482 0.233
## know9 0.404 0.163
## know10 0.462 0.214
## know11 0.442 0.195
## know12 0.600 0.361
## know13 0.417 0.174
## know14 0.507 0.257
## know15 0.609 0.370
## know16 0.520 0.271
## know17 0.546 0.299
## know18 0.626 0.392
## know19 0.588 0.345
##
## SS loadings: 4.471
## Proportion Var: 0.263
##
## Factor correlations:
##
## F1
## F1 1
# plot all traces
plot.all.trace <- plot(polymodel2, type = "trace")
plot.all.trace# information plot
plot.all.info <- plot(polymodel2, type = "info")
rely.alpha <- alpha(data4) # raw_alpha: Cronbachâs α (values ⥠.7 or .8 indicate good reliability; Kline (1999))
rely.alpha
# expected score curve
plot(polymodel2) # s curve##
## Reliability analysis
## Call: alpha(x = data4)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.81 0.81 0.81 0.2 4.2 0.0098 1.3 0.37 0.2
##
## lower alpha upper 95% confidence boundaries
## 0.79 0.81 0.83
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## know1 0.80 0.80 0.80 0.20 3.9 0.010 0.0042 0.19
## know2 0.80 0.80 0.81 0.20 4.1 0.010 0.0040 0.20
## know3 0.80 0.80 0.80 0.20 3.9 0.010 0.0041 0.19
## know5 0.80 0.80 0.80 0.20 4.0 0.010 0.0043 0.19
## know7 0.80 0.80 0.80 0.20 3.9 0.010 0.0042 0.19
## know8 0.80 0.80 0.80 0.20 4.0 0.010 0.0040 0.20
## know9 0.80 0.80 0.80 0.20 4.1 0.010 0.0041 0.20
## know10 0.80 0.80 0.80 0.20 4.0 0.010 0.0042 0.20
## know11 0.80 0.80 0.80 0.20 4.0 0.010 0.0041 0.20
## know12 0.80 0.80 0.80 0.20 3.9 0.010 0.0036 0.20
## know13 0.81 0.81 0.81 0.21 4.1 0.010 0.0038 0.20
## know14 0.80 0.80 0.80 0.20 4.0 0.010 0.0039 0.20
## know15 0.80 0.80 0.80 0.20 3.9 0.010 0.0039 0.19
## know16 0.80 0.80 0.80 0.20 4.0 0.010 0.0041 0.20
## know17 0.80 0.80 0.80 0.20 3.9 0.010 0.0042 0.19
## know18 0.79 0.79 0.80 0.19 3.9 0.011 0.0037 0.19
## know19 0.80 0.80 0.80 0.20 3.9 0.010 0.0040 0.19
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## know1 802 0.53 0.53 0.49 0.43 1.42 0.73
## know2 802 0.41 0.43 0.36 0.32 1.49 0.62
## know3 802 0.52 0.52 0.48 0.43 1.61 0.72
## know5 802 0.51 0.50 0.45 0.41 1.33 0.81
## know7 802 0.53 0.52 0.47 0.43 1.48 0.78
## know8 802 0.46 0.46 0.41 0.37 1.69 0.66
## know9 802 0.46 0.44 0.38 0.34 1.19 0.84
## know10 802 0.50 0.49 0.44 0.40 1.35 0.81
## know11 802 0.47 0.47 0.42 0.37 1.31 0.70
## know12 802 0.54 0.53 0.50 0.44 1.05 0.78
## know13 802 0.39 0.41 0.34 0.30 1.15 0.61
## know14 802 0.48 0.48 0.43 0.38 0.91 0.69
## know15 802 0.56 0.56 0.52 0.47 1.17 0.76
## know16 802 0.47 0.48 0.42 0.38 0.71 0.69
## know17 802 0.53 0.52 0.48 0.43 1.35 0.84
## know18 802 0.58 0.57 0.54 0.48 1.11 0.82
## know19 802 0.51 0.52 0.48 0.43 1.12 0.65
##
## Non missing response frequency for each item
## 0 1 2 miss
## know1 0.14 0.30 0.56 0
## know2 0.07 0.37 0.56 0
## know3 0.14 0.12 0.75 0
## know5 0.22 0.23 0.55 0
## know7 0.18 0.16 0.66 0
## know8 0.11 0.09 0.80 0
## know9 0.27 0.26 0.47 0
## know10 0.21 0.22 0.56 0
## know11 0.14 0.42 0.44 0
## know12 0.28 0.39 0.33 0
## know13 0.12 0.60 0.28 0
## know14 0.29 0.51 0.20 0
## know15 0.22 0.40 0.39 0
## know16 0.43 0.44 0.13 0
## know17 0.24 0.16 0.59 0
## know18 0.29 0.32 0.39 0
## know19 0.16 0.56 0.28 0
f_scores <- round(fscores(polymodel2, method = "EAPsum", full.scores = F), digits = 2)## df X2 p.X2 rxx_Theta.1
## stats 30 67.79619 9.517606e-05 0.7965514
f_scores # the z scores## Sum.Scores Theta SE.Theta observed expected
## 0 0 -3.26 0.57 2 0.04
## 1 1 -3.01 0.54 2 0.17
## 2 2 -2.80 0.52 2 0.44
## 3 3 -2.60 0.51 2 0.90
## 4 4 -2.43 0.49 2 1.60
## 5 5 -2.26 0.48 4 2.56
## 6 6 -2.10 0.47 6 3.83
## 7 7 -1.95 0.46 7 5.41
## 8 8 -1.80 0.45 6 7.31
## 9 9 -1.66 0.45 11 9.54
## 10 10 -1.52 0.44 10 12.08
## 11 11 -1.39 0.44 12 14.91
## 12 12 -1.25 0.43 13 18.01
## 13 13 -1.12 0.43 16 21.34
## 14 14 -0.99 0.43 16 24.85
## 15 15 -0.86 0.43 23 28.48
## 16 16 -0.74 0.43 22 32.14
## 17 17 -0.61 0.43 34 35.74
## 18 18 -0.48 0.43 40 39.18
## 19 19 -0.35 0.43 41 42.32
## 20 20 -0.21 0.43 31 45.04
## 21 21 -0.08 0.44 44 47.19
## 22 22 0.06 0.44 47 48.62
## 23 23 0.20 0.44 47 49.20
## 24 24 0.34 0.45 64 48.82
## 25 25 0.50 0.45 66 47.39
## 26 26 0.65 0.46 54 44.84
## 27 27 0.82 0.47 64 41.18
## 28 28 0.99 0.48 39 36.46
## 29 29 1.18 0.49 29 30.83
## 30 30 1.38 0.51 24 24.50
## 31 31 1.59 0.52 12 17.86
## 32 32 1.83 0.54 7 11.43
## 33 33 2.10 0.57 3 5.88
## 34 34 2.42 0.60 0 1.93
# convert z scores to T scores, equation provided in the analysis plan/grant
f_scores$t_scores <- (f_scores[,2]*10) + 50
# The lookup table
f_scores## Sum.Scores Theta SE.Theta observed expected t_scores
## 0 0 -3.26 0.57 2 0.04 17.4
## 1 1 -3.01 0.54 2 0.17 19.9
## 2 2 -2.80 0.52 2 0.44 22.0
## 3 3 -2.60 0.51 2 0.90 24.0
## 4 4 -2.43 0.49 2 1.60 25.7
## 5 5 -2.26 0.48 4 2.56 27.4
## 6 6 -2.10 0.47 6 3.83 29.0
## 7 7 -1.95 0.46 7 5.41 30.5
## 8 8 -1.80 0.45 6 7.31 32.0
## 9 9 -1.66 0.45 11 9.54 33.4
## 10 10 -1.52 0.44 10 12.08 34.8
## 11 11 -1.39 0.44 12 14.91 36.1
## 12 12 -1.25 0.43 13 18.01 37.5
## 13 13 -1.12 0.43 16 21.34 38.8
## 14 14 -0.99 0.43 16 24.85 40.1
## 15 15 -0.86 0.43 23 28.48 41.4
## 16 16 -0.74 0.43 22 32.14 42.6
## 17 17 -0.61 0.43 34 35.74 43.9
## 18 18 -0.48 0.43 40 39.18 45.2
## 19 19 -0.35 0.43 41 42.32 46.5
## 20 20 -0.21 0.43 31 45.04 47.9
## 21 21 -0.08 0.44 44 47.19 49.2
## 22 22 0.06 0.44 47 48.62 50.6
## 23 23 0.20 0.44 47 49.20 52.0
## 24 24 0.34 0.45 64 48.82 53.4
## 25 25 0.50 0.45 66 47.39 55.0
## 26 26 0.65 0.46 54 44.84 56.5
## 27 27 0.82 0.47 64 41.18 58.2
## 28 28 0.99 0.48 39 36.46 59.9
## 29 29 1.18 0.49 29 30.83 61.8
## 30 30 1.38 0.51 24 24.50 63.8
## 31 31 1.59 0.52 12 17.86 65.9
## 32 32 1.83 0.54 7 11.43 68.3
## 33 33 2.10 0.57 3 5.88 71.0
## 34 34 2.42 0.60 0 1.93 74.2
# write.csv(f_scores, file = "./T score convertion.csv") # export to csv for saving and editing
#calculate sum scores
data[,2:19] <- sapply(data[,2:19], as.integer)
data$sum_score <- rowSums(data[,2:19])
# merge T score to original dataset by summed scores
data$Sum.Scores <- data$sum_score
ypt.tscore<- merge(data, f_scores, by=c("Sum.Scores"), all=TRUE, sort=FALSE) # left join
# nested ifelse statement is too complicated for this issue.Mixed effect model on T scores