#global options
options(
digits = 2,
contrasts = c("contr.treatment", "contr.treatment")
)
#packages
library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
##
##
## Attaching package: 'magrittr'
##
##
## The following object is masked from 'package:purrr':
##
## set_names
##
##
## The following object is masked from 'package:tidyr':
##
## extract
##
##
## Loading required package: weights
##
## Loading required package: Hmisc
##
##
## Attaching package: 'Hmisc'
##
##
## The following objects are masked from 'package:dplyr':
##
## src, summarize
##
##
## The following objects are masked from 'package:base':
##
## format.pval, units
##
##
## Loading required package: assertthat
##
##
## Attaching package: 'assertthat'
##
##
## The following object is masked from 'package:tibble':
##
## has_name
##
##
## Loading required package: psych
##
##
## Attaching package: 'psych'
##
##
## The following object is masked from 'package:Hmisc':
##
## describe
##
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
##
##
##
## Attaching package: 'kirkegaard'
##
##
## The following object is masked from 'package:psych':
##
## rescale
##
##
## The following object is masked from 'package:assertthat':
##
## are_equal
##
##
## The following object is masked from 'package:purrr':
##
## is_logical
##
##
## The following object is masked from 'package:base':
##
## +
load_packages(
mirt,
future, furrr
)
## Loading required package: stats4
## Loading required package: lattice
#ggplot2
theme_set(theme_bw())
#get factor loadings from mirt fit
get_loadings = function(x) {
x@Fit$`F` %>% as.vector()
}
#internal function to fit mirt
fit_mirt = function(items, mirt_args) {
local_mirt_fit = rlang::exec(
.fn = mirt::mirt,
data = items,
!!!mirt_args
)
#get scores
local_scores = mirt::fscores(local_mirt_fit, full.scores.SE = T)
#return fit and scores
list(
fit = local_mirt_fit,
scores = local_scores,
reliability = empirical_rxx(local_scores)
)
}
#make a list of items indexes with one removed per set
make_leave_one_out_sets = function(item_idx) {
#make a list of items indexes with one removed per set
purrr::map(
seq(item_idx),
function(i) {
item_idx[-i]
}
)
}
#make forwards item selection sets
make_forward_sets = function(
all_items,
current_items = NULL,
full_fit,
min_items = 3,
start_method = "highest_loading"
) {
#make a list of items indexes with one removed per set
#if we have none, it's easy
if (is.null(current_items)) {
#if we have no items, we can either try all 3-way combinations, which is often not possible (explosion)
#or we can begin with a best guess of the 3 items with highest loadings from full analysis (fast)
if (start_method == "highest_loading") {
#get highest loading items
item_loadings = tibble(
item = 1:length(get_loadings(full_fit)),
loading = get_loadings(full_fit)
)
start_items = item_loadings %>% arrange(loading) %>% tail(min_items) %>% pull(item)
sets = list(start_items)
message(str_glue("Starting with the {min_items} items with highest loadings: {str_c(start_items, collapse = ', ')}"))
} else if (start_method == "combination") {
#if we have no items, we can either try all 3-way combinations, which is often not possible (explosion)
#or we can begin with a best guess of the 3 items with highest loadings from full analysis (fast)
sets = combn(1:ncol(all_items), m = min_items, simplify = F)
}
} else {
#if we have some, we need to add one different item to each set
sets = purrr::map(
setdiff(seq(ncol(all_items)), current_items),
function(i) {
c(current_items, i)
}
)
}
sets
}
#backwards drop function (drop 1 item)
backwards_drop = function(
all_items,
current_selection = NULL,
criterion_vars,
criterion_cors_full,
selection_method,
full_fit,
full_reliability,
mirt_args = NULL,
save_fits = T
) {
#if no current selection, select all
if (is.null(current_selection)) {
current_selection = seq(ncol(all_items))
}
#main loop
reductions = furrr::future_map_dfr(
make_leave_one_out_sets(current_selection),
function(item_set) {
#fit mirt
fit = fit_mirt(all_items[, item_set, drop = F], mirt_args)
#get scores and their full set cors
cors = wtd.cors(
bind_cols(
score = fit$scores[, 1],
criterion_vars
)
)
#mean fraction of criterion cors
item_set = list(item_set)
criterion_cors = cors[-1, 1]
criterion_cors_frac = criterion_cors / criterion_cors_full[-1, 1]
mean_criterion_cors_frac = mean(criterion_cors_frac)
reliability_frac = (fit$reliability / full_reliability) %>% unname()
#determine what to maximize
criterion_value = switch(selection_method,
"rc" = mean(c(criterion_cors_frac, reliability_frac)),
"r" = reliability_frac,
"c" = mean(criterion_cors_frac)
)
y = tibble(
items_in_scale = length(item_set[[1]]),
item_set = item_set,
criterion_cors_frac = criterion_cors_frac,
mean_criterion_cors_frac = mean_criterion_cors_frac,
reliability = fit$reliability,
reliability_frac = reliability_frac,
criterion_value = criterion_value
)
#add criterion cors
for (i in seq_along(criterion_cors)) {
y[[str_glue("r_{rownames(cors)[i+1]}")]] = criterion_cors[i]
}
#save fits and scores?
if (save_fits) {
y$fit = list(fit)
y$scores = list(fit$scores)
}
y
}
)
#return results
reductions
}
#forwards pick function
forwards_pick = function(
items,
current_items,
criterion_vars,
criterion_cors_full,
selection_method,
full_fit,
full_reliability,
mirt_args = NULL,
save_fits = T
) {
#main loop
additions = furrr::future_map_dfr(
make_forward_sets(items, current_items, full_fit = full_fit),
function(item_set) {
#fit mirt
fit = fit_mirt(items[, item_set, drop = F], mirt_args)
#get scores and their full set cors
cors = wtd.cors(
bind_cols(
score = fit$scores[, 1],
criterion_vars
)
)
#mean fraction of criterion cors
item_set = list(item_set)
criterion_cors = cors[-1, 1]
criterion_cors_frac = criterion_cors / criterion_cors_full[-1, 1]
mean_criterion_cors_frac = mean(criterion_cors_frac)
reliability_frac = (fit$reliability / full_reliability) %>% unname()
#determine what to maximize
criterion_value = switch(selection_method,
"rc" = mean(c(criterion_cors_frac, reliability_frac)),
"r" = reliability_frac,
"c" = mean(criterion_cors_frac)
)
y = tibble(
items_in_scale = length(item_set[[1]]),
item_set = item_set,
criterion_cors_frac = criterion_cors_frac,
mean_criterion_cors_frac = mean_criterion_cors_frac,
reliability = fit$reliability,
reliability_frac = reliability_frac,
criterion_value = criterion_value
)
#add criterion cors
for (i in seq_along(criterion_cors)) {
y[[str_glue("r_{rownames(cors)[i+1]}")]] = criterion_cors[i]
}
#save fits and scores?
if (save_fits) {
y$fit = list(fit)
y$scores = list(fit$scores)
}
y
}
)
#return results
additions
}
#make loadings based item list
make_item_list_for_loadings_method = function(x, max_items) {
#return a list with vectors of first 3 items, then first 4, then first 5, until max
purrr::map(
3:max_items,
function(last_item) {
x[1:last_item]
}
)
}
#use highest loadings
highest_loadings_method = function(
items,
max_items,
criterion_vars,
criterion_cors_full,
full_fit,
full_reliability,
mirt_args = NULL,
save_fits = T,
selection_method
) {
#get loadings
item_loadings = tibble(
item = 1:length(get_loadings(full_fit)),
loading = get_loadings(full_fit)
) %>% arrange(-loading)
#fit models with items from 3 to max
#main loop
additions = furrr::future_map(
make_item_list_for_loadings_method(item_loadings$item, max_items),
function(item_set) {
#fit mirt
fit = fit_mirt(items[, item_set, drop = F], mirt_args)
#get scores and their full set cors
cors = wtd.cors(
bind_cols(
score = fit$scores[, 1],
criterion_vars
)
)
#mean fraction of criterion cors
item_set = list(item_set)
criterion_cors = cors[-1, 1]
criterion_cors_frac = criterion_cors / criterion_cors_full[-1, 1]
mean_criterion_cors_frac = mean(criterion_cors_frac)
reliability_frac = (fit$reliability / full_reliability) %>% unname()
#determine what to maximize
criterion_value = switch(selection_method,
"rc" = mean(c(criterion_cors_frac, reliability_frac)),
"r" = reliability_frac,
"c" = mean(criterion_cors_frac)
)
y = tibble(
items_in_scale = length(item_set[[1]]),
item_set = item_set,
criterion_cors_frac = criterion_cors_frac,
mean_criterion_cors_frac = mean_criterion_cors_frac,
reliability = fit$reliability,
reliability_frac = reliability_frac,
criterion_value = criterion_value
)
#add criterion cors
for (i in seq_along(criterion_cors)) {
y[[str_glue("r_{rownames(cors)[i+1]}")]] = criterion_cors[i]
}
#save fits and scores?
if (save_fits) {
y$fit = list(fit)
y$scores = list(fit$scores)
}
y
}
)
additions
}
#abbreviation scale
abbreviate_scale = function(
items,
criterion_vars = NULL,
item_target = 10,
method = "forwards",
selection_method = "rc",
mirt_args = NULL,
save_fits = T
) {
#start timer
tictoc::tic()
#stop timer on exit
on.exit({
tictoc::toc()
})
#check methods
assert_that(is.data.frame(items) | is.matrix(items))
items = as.matrix(items) #make sure its a matrix to avoid some dropping issues
assert_that(method %in% c("backwards", "forwards", "max_loading"))
if (selection_method == "cr") selection_method = "rc"
assert_that(selection_method %in% c("rc", "r", "c", "cr"))
message(str_glue("Abbreviating scale using {mapvalues(selection_method, from = c('c', 'rc', 'r'), to = c('correlation with criterion variable(s)', 'average of correlation with creiterion variable(s) and reliability', 'reliability'), warn_missing = F)} method"))
message(str_glue("Using the {method} method"))
assert_that(item_target >= 3)
assert_that(item_target < ncol(items))
#use default mirt args if none given
if (is.null(mirt_args)) {
mirt_args = list(
model = 1,
itemtype = "2PL",
technical = list(NCYCLES = 5000),
verbose = F
)
}
#fit full scale mirt
full_fit = rlang::exec(
.fn = mirt::mirt,
data = items,
!!!mirt_args
)
#get scores and their full set cors
full_scores = mirt::fscores(full_fit, full.scores.SE = T)
#reliability
full_reliability = empirical_rxx(full_scores) %>% unname()
#make set of criterion vars
if (is.null(criterion_vars)) {
criterion_vars = tibble(
full_score = full_scores[, 1]
)
}
#criterion cors with full set
full_cors = wtd.cors(
bind_cols(
score = full_scores[, 1],
criterion_vars
)
)
#main loop
#prep a list
item_set_results_all = list()
if (method == "backwards") {
#drop first, then based on results, drop a second etc. until desired size is reached
items_to_drop = ncol(items) - item_target
items_to_drop_seq = seq(ncol(items) - item_target)
for (i in items_to_drop_seq) {
message(str_glue("removing item {i} out of {items_to_drop} ({ncol(items) - i} remaining)"))
#if the first round, just drop one item
#dont need to use the prior item set
if (i == 1) {
item_set_results_all[[i]] = backwards_drop(
#begin with all items
all_items = items,
current_selection = seq(ncol(items)),
criterion_vars = criterion_vars,
criterion_cors_full = full_cors,
selection_method = selection_method,
mirt_args = mirt_args,
save_fits = save_fits,
full_fit = full_fit,
full_reliability = full_reliability
)
next
} else {
#best prior set
best_prior_i = which.max(item_set_results_all[[i - 1]]$criterion_value)
best_prior_set = item_set_results_all[[i - 1]]$item_set[[best_prior_i]]
#use prior best set
item_set_results_all[[i]] = backwards_drop(
all_items = items,
current_selection = best_prior_set,
criterion_vars = criterion_vars,
criterion_cors_full = full_cors,
selection_method = selection_method,
mirt_args = mirt_args,
save_fits = save_fits,
full_fit = full_fit,
full_reliability = full_reliability
)
next
}
}
} else if (method == "forwards") {
#pick first, then based on results, pick a second etc. until desired size is reached
items_to_pick = item_target
items_to_pick_seq = seq(item_target - 2) #because we start with 3 items (minimum for IRT)
for (i in items_to_pick_seq) {
if (i != 1) {
message(str_glue("adding item {i + 2} out of {items_to_pick}"))
}
#if the first round, try each item, one at a time
#dont need to use the prior item set
if (i == 1) {
item_set_results_all[[i]] = forwards_pick(
#begin with all items
items = items,
current_items = c(),
criterion_vars = criterion_vars,
criterion_cors_full = full_cors,
selection_method = selection_method,
mirt_args = mirt_args,
save_fits = save_fits,
full_fit = full_fit,
full_reliability = full_reliability
)
next
} else {
#best prior set
best_prior_i = which.max(item_set_results_all[[i - 1]]$criterion_value)
best_prior_set = item_set_results_all[[i - 1]]$item_set[[best_prior_i]]
#use prior best set
item_set_results_all[[i]] = forwards_pick(
items = items,
current_items = best_prior_set,
criterion_vars = criterion_vars,
criterion_cors_full = full_cors,
selection_method = selection_method,
mirt_args = mirt_args,
save_fits = save_fits,
full_fit = full_fit,
full_reliability = full_reliability
)
next
}
}
}
#loadings method
if (method == "max_loading") {
item_set_results_all = highest_loadings_method(
items = items,
max_items = item_target,
criterion_vars = criterion_vars,
criterion_cors_full = full_cors,
mirt_args = mirt_args,
save_fits = save_fits,
full_fit = full_fit,
full_reliability = full_reliability,
selection_method = selection_method
)
}
#return results
full_results = item_set_results_all %>% ldf_to_df(by_name = "set") %>% as_tibble()
list(
full_results = full_results,
best_sets = full_results %>%
filter(criterion_value == max(criterion_value), .by = set)
)
}
GG_scale_abbreviation = function(x, vars = c("reliability_frac", "mean_criterion_cors_frac", "r_full_score", "reliability")) {
x$best_sets %>%
#display both reliability and criterion value
pivot_longer(
cols = all_of(vars),
names_to = "criterion",
values_to = "value"
) %>%
ggplot(aes(x = items_in_scale, y = value, color = criterion)) +
geom_point() +
geom_line() +
scale_x_continuous(breaks = seq(0, ncol(d_sim), 5)) +
scale_y_continuous(limits = c(NA, 1)) +
theme_minimal() +
labs(
x = "Number of items in scale",
y = "Criterion value"
)
}
#helper function
get_best_item_set = function(x) {
x$best_sets %>%
tail(1) %>%
pull(item_set) %>%
extract2(1) %>%
sort()
}
The function can optimize for correlation with an arbitrary variable, including the full score (default). Or it can try to optimize the reliability. Here we use the first option.
#no multi-threading
plan(sequential)
#simulate some data
set.seed(1)
n <- 1000
n_items = 25
#simulate items
d_sim = simdata(
N = n,
itemtype = "2PL",
d = runif(n_items, -2, 2),
a = runif(n_items, 0.5, 2)
)
#run a global fit
sim_fit = mirt(
data = d_sim,
model = 1,
itemtype = "2PL",
verbose = F
)
#abbreviate scale
res_c_forwards = abbreviate_scale(
items = d_sim,
item_target = 10,
selection_method = "c",
method = "forwards"
)
## Abbreviating scale using correlation with criterion variable(s) method
## Using the forwards method
## Starting with the 3 items with highest loadings: 13, 21, 7
## adding item 4 out of 10
## adding item 5 out of 10
## adding item 6 out of 10
## adding item 7 out of 10
## adding item 8 out of 10
## adding item 9 out of 10
## adding item 10 out of 10
## 15.36 sec elapsed
res_c_backwards = abbreviate_scale(
items = d_sim,
item_target = 10,
selection_method = "c",
method = "backwards"
)
## Abbreviating scale using correlation with criterion variable(s) method
## Using the backwards method
## removing item 1 out of 15 (24 remaining)
## removing item 2 out of 15 (23 remaining)
## removing item 3 out of 15 (22 remaining)
## removing item 4 out of 15 (21 remaining)
## removing item 5 out of 15 (20 remaining)
## removing item 6 out of 15 (19 remaining)
## removing item 7 out of 15 (18 remaining)
## removing item 8 out of 15 (17 remaining)
## removing item 9 out of 15 (16 remaining)
## removing item 10 out of 15 (15 remaining)
## removing item 11 out of 15 (14 remaining)
## removing item 12 out of 15 (13 remaining)
## removing item 13 out of 15 (12 remaining)
## removing item 14 out of 15 (11 remaining)
## removing item 15 out of 15 (10 remaining)
## 62.685 sec elapsed
#plot results
res_c_forwards %>%
GG_scale_abbreviation()
res_c_backwards %>%
GG_scale_abbreviation()
#compare results
res_c_forwards$best_sets %>%
tail(1)
res_c_backwards$best_sets %>%
tail(1)
#almost the same, but not entirely, why?
#compare the items in best sets
res_c_backwards %>%
get_best_item_set()
## [1] 4 6 7 9 13 17 18 20 21 23
res_c_forwards %>%
get_best_item_set()
## [1] 4 6 7 8 9 13 18 20 21 23
symdiff(
res_c_backwards %>%
get_best_item_set(),
res_c_forwards %>%
get_best_item_set()
)
## [1] 17 8
We try optimizing for reliability, full score correlation, and just picking the items with highest loadings.
#no multi-threading
plan(multisession(workers = 7))
#simulate some data
set.seed(1)
n <- 1000
n_items = 100
#simulate items
d_sim = simdata(
N = n,
itemtype = "2PL",
d = runif(n_items, -2, 2),
a = runif(n_items, 0.5, 2)
)
#run a global fit
sim_fit = mirt(
data = d_sim,
model = 1,
itemtype = "2PL",
verbose = F
)
#abbreviate scale
res_c_forwards = abbreviate_scale(
items = d_sim,
item_target = 20,
selection_method = "c",
method = "forwards"
)
## Abbreviating scale using correlation with criterion variable(s) method
## Using the forwards method
## Starting with the 3 items with highest loadings: 7, 80, 18
## adding item 4 out of 20
## adding item 5 out of 20
## adding item 6 out of 20
## adding item 7 out of 20
## adding item 8 out of 20
## adding item 9 out of 20
## adding item 10 out of 20
## adding item 11 out of 20
## adding item 12 out of 20
## adding item 13 out of 20
## adding item 14 out of 20
## adding item 15 out of 20
## adding item 16 out of 20
## adding item 17 out of 20
## adding item 18 out of 20
## adding item 19 out of 20
## adding item 20 out of 20
## 187.505 sec elapsed
#plot results
res_c_forwards %>%
GG_scale_abbreviation()
#compare results
res_c_forwards$best_sets %>%
tail(1)
#alternatively, we can optimize for reliability
res_r_forwards = abbreviate_scale(
items = d_sim,
item_target = 20,
selection_method = "r",
method = "forwards"
)
## Abbreviating scale using reliability method
## Using the forwards method
## Starting with the 3 items with highest loadings: 7, 80, 18
## adding item 4 out of 20
## adding item 5 out of 20
## adding item 6 out of 20
## adding item 7 out of 20
## adding item 8 out of 20
## adding item 9 out of 20
## adding item 10 out of 20
## adding item 11 out of 20
## adding item 12 out of 20
## adding item 13 out of 20
## adding item 14 out of 20
## adding item 15 out of 20
## adding item 16 out of 20
## adding item 17 out of 20
## adding item 18 out of 20
## adding item 19 out of 20
## adding item 20 out of 20
## 168.722 sec elapsed
#plot results
res_r_forwards %>%
GG_scale_abbreviation()
#compare results
res_r_forwards$best_sets %>%
tail(1)
#highest loading method
res_max_loading = abbreviate_scale(
items = d_sim,
item_target = 20,
selection_method = "c",
method = "max_loading"
)
## Abbreviating scale using correlation with criterion variable(s) method
## Using the max_loading method
## 4.736 sec elapsed
#plot results
res_max_loading %>%
GG_scale_abbreviation()
#compare results
res_max_loading$best_sets %>%
tail(1)
#a single plot of results at 20 items from each method
combined_results = bind_rows(
res_max_loading$best_sets %>%
mutate("method" = "max_loading"),
res_r_forwards$best_sets %>%
mutate("method" = "reliability"),
res_c_forwards$best_sets %>%
mutate("method" = "r full score")
) %>%
select(reliability, r_full_score, method, items_in_scale) %>%
pivot_longer(
cols = c("reliability", "r_full_score"),
names_to = "criterion",
values_to = "value"
)
combined_results %>%
ggplot(aes(items_in_scale, value, color = method)) +
geom_line() +
facet_wrap("criterion", scales = "free_y")
#simulate some data
set.seed(1)
n <- 1000
n_items = 100
max_items = 25
#simulate norms to use for item pars
item_pars = MASS::mvrnorm(
n = n_items,
mu = c(0, 0),
Sigma = matrix(c(1, -0.5, -0.5, 1), nrow = 2)
)
#simulate items
d_sim = simdata(
N = n,
itemtype = "2PL",
d = item_pars[, 1],
a = item_pars[, 2] %>% rescale(0.5, 2)
)
#run a global fit
sim_fit = mirt(
data = d_sim,
model = 1,
itemtype = "2PL",
verbose = F
)
#abbreviate scale
res_c_forwards = abbreviate_scale(
items = d_sim,
item_target = max_items,
selection_method = "c",
method = "forwards"
)
## Abbreviating scale using correlation with criterion variable(s) method
## Using the forwards method
## Starting with the 3 items with highest loadings: 50, 61, 70
## adding item 4 out of 25
## adding item 5 out of 25
## adding item 6 out of 25
## adding item 7 out of 25
## adding item 8 out of 25
## adding item 9 out of 25
## adding item 10 out of 25
## adding item 11 out of 25
## adding item 12 out of 25
## adding item 13 out of 25
## adding item 14 out of 25
## adding item 15 out of 25
## adding item 16 out of 25
## adding item 17 out of 25
## adding item 18 out of 25
## adding item 19 out of 25
## adding item 20 out of 25
## adding item 21 out of 25
## adding item 22 out of 25
## adding item 23 out of 25
## adding item 24 out of 25
## adding item 25 out of 25
## 269.946 sec elapsed
#plot results
res_c_forwards %>%
GG_scale_abbreviation()
#compare results
res_c_forwards$best_sets %>%
tail(1)
#alternatively, we can optimize for reliability
res_r_forwards = abbreviate_scale(
items = d_sim,
item_target = max_items,
selection_method = "r",
method = "forwards"
)
## Abbreviating scale using reliability method
## Using the forwards method
## Starting with the 3 items with highest loadings: 50, 61, 70
## adding item 4 out of 25
## adding item 5 out of 25
## adding item 6 out of 25
## adding item 7 out of 25
## adding item 8 out of 25
## adding item 9 out of 25
## adding item 10 out of 25
## adding item 11 out of 25
## adding item 12 out of 25
## adding item 13 out of 25
## adding item 14 out of 25
## adding item 15 out of 25
## adding item 16 out of 25
## adding item 17 out of 25
## adding item 18 out of 25
## adding item 19 out of 25
## adding item 20 out of 25
## adding item 21 out of 25
## adding item 22 out of 25
## adding item 23 out of 25
## adding item 24 out of 25
## adding item 25 out of 25
## 272.19 sec elapsed
#plot results
res_r_forwards %>%
GG_scale_abbreviation()
#compare results
res_r_forwards$best_sets %>%
tail(1)
#max both r and c
res_rc_forwards = abbreviate_scale(
items = d_sim,
item_target = max_items,
selection_method = "rc",
method = "forwards"
)
## Abbreviating scale using average of correlation with creiterion variable(s) and reliability method
## Using the forwards method
## Starting with the 3 items with highest loadings: 50, 61, 70
## adding item 4 out of 25
## adding item 5 out of 25
## adding item 6 out of 25
## adding item 7 out of 25
## adding item 8 out of 25
## adding item 9 out of 25
## adding item 10 out of 25
## adding item 11 out of 25
## adding item 12 out of 25
## adding item 13 out of 25
## adding item 14 out of 25
## adding item 15 out of 25
## adding item 16 out of 25
## adding item 17 out of 25
## adding item 18 out of 25
## adding item 19 out of 25
## adding item 20 out of 25
## adding item 21 out of 25
## adding item 22 out of 25
## adding item 23 out of 25
## adding item 24 out of 25
## adding item 25 out of 25
## 275.849 sec elapsed
#plot results
res_rc_forwards %>%
GG_scale_abbreviation()
#compare results
res_rc_forwards$best_sets %>%
tail(1)
#highest loading method
res_max_loading = abbreviate_scale(
items = d_sim,
item_target = max_items,
selection_method = "c",
method = "max_loading"
)
## Abbreviating scale using correlation with criterion variable(s) method
## Using the max_loading method
## 6.479 sec elapsed
#plot results
res_max_loading %>%
GG_scale_abbreviation()
#compare results
res_max_loading$best_sets %>%
tail(1)
#a single plot of results at 20 items from each method
combined_results = bind_rows(
res_max_loading$best_sets %>%
mutate("method" = "max_loading"),
res_r_forwards$best_sets %>%
mutate("method" = "reliability"),
res_c_forwards$best_sets %>%
mutate("method" = "r full score"),
res_rc_forwards$best_sets %>%
mutate("method" = "r full score and reliability")
) %>%
select(reliability, r_full_score, method, items_in_scale) %>%
pivot_longer(
cols = c("reliability", "r_full_score"),
names_to = "criterion",
values_to = "value"
)
combined_results %>%
ggplot(aes(items_in_scale, value, color = method)) +
geom_line() +
facet_wrap("criterion", scales = "free_y")
When item parameter correlations are present (common), optimizing for both full scale correlation and reliability is a good idea. It gets almost the same full score correlation and reliability as the pure strategies. The highest loading method is not as good in this case.
These functions will be implemented in Kirkegaard package shortly and easily available for use.
write_sessioninfo()
## R version 4.3.3 (2024-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 21.1
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##
## locale:
## [1] LC_CTYPE=en_DK.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_DK.UTF-8 LC_COLLATE=en_DK.UTF-8
## [5] LC_MONETARY=en_DK.UTF-8 LC_MESSAGES=en_DK.UTF-8
## [7] LC_PAPER=en_DK.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Europe/Copenhagen
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] furrr_0.3.1 future_1.33.2 mirt_1.41.8
## [4] lattice_0.22-5 kirkegaard_2024-04-01 psych_2.4.3
## [7] assertthat_0.2.1 weights_1.0.4 Hmisc_5.1-2
## [10] magrittr_2.0.3 lubridate_1.9.3 forcats_1.0.0
## [13] stringr_1.5.1 dplyr_1.1.4 purrr_1.0.2
## [16] readr_2.1.5 tidyr_1.3.1 tibble_3.2.1
## [19] ggplot2_3.5.0 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] mnormt_2.1.1 pbapply_1.7-2 gridExtra_2.3
## [4] permute_0.9-7 rlang_1.1.3 compiler_4.3.3
## [7] mgcv_1.9-1 gdata_3.0.0 vctrs_0.6.5
## [10] pkgconfig_2.0.3 shape_1.4.6.1 fastmap_1.1.1
## [13] backports_1.4.1 labeling_0.4.3 utf8_1.2.4
## [16] rmarkdown_2.26 tzdb_0.4.0 nloptr_2.0.3
## [19] xfun_0.43 glmnet_4.1-8 jomo_2.7-6
## [22] cachem_1.0.8 jsonlite_1.8.8 highr_0.10
## [25] tictoc_1.2.1 pan_1.9 Deriv_4.1.3
## [28] broom_1.0.5 parallel_4.3.3 cluster_2.1.6
## [31] R6_2.5.1 bslib_0.7.0 stringi_1.8.3
## [34] parallelly_1.37.1 boot_1.3-30 rpart_4.1.23
## [37] jquerylib_0.1.4 Rcpp_1.0.12 iterators_1.0.14
## [40] knitr_1.45 base64enc_0.1-3 Matrix_1.6-5
## [43] splines_4.3.3 nnet_7.3-19 timechange_0.3.0
## [46] tidyselect_1.2.1 rstudioapi_0.16.0 yaml_2.3.8
## [49] vegan_2.6-4 codetools_0.2-19 dcurver_0.9.2
## [52] listenv_0.9.1 withr_3.0.0 evaluate_0.23
## [55] foreign_0.8-86 survival_3.5-8 pillar_1.9.0
## [58] mice_3.16.0 checkmate_2.3.1 foreach_1.5.2
## [61] generics_0.1.3 hms_1.1.3 munsell_0.5.1
## [64] scales_1.3.0 minqa_1.2.6 gtools_3.9.5
## [67] globals_0.16.3 glue_1.7.0 tools_4.3.3
## [70] data.table_1.15.4 lme4_1.1-35.2 grid_4.3.3
## [73] colorspace_2.1-0 nlme_3.1-163 htmlTable_2.4.2
## [76] Formula_1.2-5 cli_3.6.2 fansi_1.0.6
## [79] gtable_0.3.4 sass_0.4.9 digest_0.6.35
## [82] GPArotation_2024.3-1 htmlwidgets_1.6.4 farver_2.1.1
## [85] htmltools_0.5.8.1 lifecycle_1.0.4 mitml_0.4-5
## [88] MASS_7.3-60
#upload to OSF
#avoid uploading the data in case they freak out again
if (F) {
library(osfr)
#auth
osf_auth(readr::read_lines("~/.config/osf_token"))
#the project we will use
osf_proj = osf_retrieve_node("https://osf.io/XXX/")
#upload files
#overwrite existing (versioning)
osf_upload(osf_proj, conflicts = "overwrite",
path = c(
"figs",
"data",
"notebook.html",
"notebook.Rmd",
))
}