I don’t have the raw data, so these norms are based on summary statistics about pass rates and average item intercorrelation.
options(digits = 3)
library(pacman)
p_load(kirkegaard, readxl)
theme_set(theme_bw())
#show and plot norms
show_norms = function(x) {
#score by row
x_scores = rowSums(x)
#fit ecdf
x_cdf = ecdf(x_scores)
#show norms
possible_x = 0:ncol(x)
x_centiles = x_cdf(possible_x) %>% set_names(possible_x)
x_IQs = qnorm(x_centiles, 100, 15)
list(
desc = describe(x_scores),
centiles = x_centiles,
IQ_norms = x_IQs,
plot = ggplot(tibble(x = x_scores), aes(x)) +
geom_bar() +
scale_x_continuous(breaks = possible_x)
)
}
#item simulator
sim_items = function(n, mean_latent_r = 0) {
#with average correlation matching the paper
sim_mat = matrix(rep(mean_latent_r, nrow(items)^2), nrow = nrow(items)); diag(sim_mat) = 1
sim_r_match = MASS::mvrnorm(n = n,
mu = rep(0, nrow(items)),
Sigma = sim_mat)
#dichotomize to match pass rates
for (i in seq_along_rows(items)) {
#z value that results in this pass rate
thres = qnorm(items$pass_rate[i])
sim_r_match[, i] = sim_r_match[, i] < thres
}
sim_r_match %>%
set_colnames("item_" + 1:ncol(.)) %>%
as_tibble()
}
items = read_xlsx("data/items.xlsx") %>%
df_legalize_names() %>%
mutate(pass_rate = pass_rate / 100)
items
Make approximate norms
#with item cors of 0, independent items
n_sim = 10000
set.seed(1)
#with no intercorrelation
sim_r0 = map_dfc(items$pass_rate, ~tibble(a = sample(c(T, F), size = n_sim, replace = T, prob = c(., 1-.))))
#show norms
sim_r0 %>% show_norms()
## $desc
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 10000 11.3 1.7 11 11.3 1.48 5 18 13 -0.09 -0.04 0.02
##
## $centiles
## 0 1 2 3 4 5 6 7 8 9 10
## 0.0000 0.0000 0.0000 0.0000 0.0000 0.0006 0.0023 0.0131 0.0490 0.1443 0.3102
## 11 12 13 14 15 16 17 18
## 0.5360 0.7558 0.9056 0.9751 0.9946 0.9996 0.9999 1.0000
##
## $IQ_norms
## 0 1 2 3 4 5 6 7 8 9 10 11 12
## -Inf -Inf -Inf -Inf -Inf 51.4 57.5 66.7 75.2 84.1 92.6 101.4 110.4
## 13 14 15 16 17 18
## 119.7 129.4 138.2 150.3 155.8 Inf
##
## $plot
#but we can also use the fancier function
sim_r0b = sim_items(n_sim, mean_latent_r = 0)
sim_r0b %>% show_norms()
## $desc
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 10000 11.3 1.72 11 11.3 1.48 4 17 13 -0.09 -0.06 0.02
##
## $centiles
## 0 1 2 3 4 5 6 7 8 9 10
## 0.0000 0.0000 0.0000 0.0000 0.0001 0.0003 0.0023 0.0144 0.0527 0.1449 0.3136
## 11 12 13 14 15 16 17 18
## 0.5372 0.7592 0.9045 0.9723 0.9954 0.9996 1.0000 1.0000
##
## $IQ_norms
## 0 1 2 3 4 5 6 7 8 9 10 11 12
## -Inf -Inf -Inf -Inf 44.2 48.5 57.5 67.2 75.7 84.1 92.7 101.4 110.6
## 13 14 15 16 17 18
## 119.6 128.7 139.1 150.3 Inf Inf
##
## $plot
#check if matches approxiately
colMeans(sim_r0b)
## item_1 item_2 item_3 item_4 item_5 item_6 item_7 item_8 item_9 item_10
## 0.9570 0.9460 0.9274 0.9134 0.8842 0.8443 0.7437 0.7356 0.7207 0.7063
## item_11 item_12 item_13 item_14 item_15 item_16 item_17 item_18
## 0.5852 0.5543 0.5272 0.3429 0.3207 0.3175 0.1817 0.0954
colMeans(sim_r0b) - items$pass_rate
## item_1 item_2 item_3 item_4 item_5 item_6 item_7 item_8 item_9 item_10
## -0.0040 0.0010 0.0004 0.0014 0.0032 0.0013 -0.0053 -0.0074 0.0087 0.0003
## item_11 item_12 item_13 item_14 item_15 item_16 item_17 item_18
## 0.0012 0.0013 -0.0008 -0.0021 -0.0023 -0.0015 -0.0053 -0.0026
## attr(,"label")
## [1] "pass rate"
#with average correlation matching the paper
#trial and error until it matches
sim_r_match = sim_items(n_sim, mean_latent_r = .50)
#check if matches approxiately
colMeans(sim_r_match)
## item_1 item_2 item_3 item_4 item_5 item_6 item_7 item_8 item_9 item_10
## 0.962 0.944 0.931 0.914 0.887 0.845 0.747 0.745 0.718 0.705
## item_11 item_12 item_13 item_14 item_15 item_16 item_17 item_18
## 0.589 0.554 0.534 0.347 0.329 0.327 0.190 0.102
colMeans(sim_r_match) - items$pass_rate
## item_1 item_2 item_3 item_4 item_5 item_6 item_7 item_8 item_9 item_10
## 0.0009 -0.0010 0.0043 0.0019 0.0057 0.0022 -0.0016 0.0017 0.0060 -0.0008
## item_11 item_12 item_13 item_14 item_15 item_16 item_17 item_18
## 0.0051 0.0014 0.0062 0.0023 0.0058 0.0081 0.0025 0.0044
## attr(,"label")
## [1] "pass rate"
#test stats
alpha(sim_r_match)
##
## Reliability analysis
## Call: alpha(x = sim_r_match)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.85 0.85 0.85 0.24 5.6 0.0021 0.63 0.21 0.24
##
## lower alpha upper 95% confidence boundaries
## 0.85 0.85 0.85
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## item_1 0.85 0.85 0.84 0.24 5.5 0.0021 0.0039 0.25
## item_2 0.85 0.84 0.84 0.24 5.4 0.0021 0.0042 0.25
## item_3 0.85 0.84 0.84 0.24 5.4 0.0021 0.0043 0.25
## item_4 0.84 0.84 0.84 0.24 5.4 0.0022 0.0044 0.24
## item_5 0.84 0.84 0.84 0.24 5.3 0.0022 0.0045 0.24
## item_6 0.84 0.84 0.84 0.24 5.2 0.0022 0.0045 0.23
## item_7 0.84 0.84 0.83 0.23 5.1 0.0022 0.0044 0.23
## item_8 0.84 0.84 0.83 0.23 5.1 0.0022 0.0044 0.23
## item_9 0.84 0.84 0.83 0.23 5.1 0.0022 0.0044 0.23
## item_10 0.84 0.84 0.83 0.23 5.1 0.0022 0.0043 0.23
## item_11 0.84 0.84 0.83 0.23 5.1 0.0023 0.0042 0.23
## item_12 0.84 0.84 0.83 0.23 5.1 0.0022 0.0043 0.23
## item_13 0.84 0.84 0.83 0.23 5.1 0.0023 0.0042 0.23
## item_14 0.84 0.84 0.84 0.23 5.2 0.0022 0.0042 0.23
## item_15 0.84 0.84 0.84 0.23 5.2 0.0022 0.0043 0.23
## item_16 0.84 0.84 0.84 0.23 5.2 0.0022 0.0042 0.23
## item_17 0.84 0.84 0.84 0.24 5.3 0.0022 0.0041 0.24
## item_18 0.85 0.85 0.84 0.24 5.5 0.0021 0.0036 0.25
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## item_1 10000 0.34 0.40 0.34 0.30 0.96 0.19
## item_2 10000 0.38 0.43 0.37 0.33 0.94 0.23
## item_3 10000 0.41 0.45 0.40 0.35 0.93 0.25
## item_4 10000 0.44 0.48 0.42 0.38 0.91 0.28
## item_5 10000 0.48 0.50 0.45 0.41 0.89 0.32
## item_6 10000 0.52 0.54 0.50 0.45 0.85 0.36
## item_7 10000 0.59 0.58 0.55 0.50 0.75 0.43
## item_8 10000 0.58 0.58 0.54 0.50 0.74 0.44
## item_9 10000 0.59 0.59 0.55 0.51 0.72 0.45
## item_10 10000 0.61 0.60 0.57 0.53 0.71 0.46
## item_11 10000 0.62 0.60 0.57 0.53 0.59 0.49
## item_12 10000 0.61 0.59 0.56 0.52 0.55 0.50
## item_13 10000 0.62 0.60 0.57 0.53 0.53 0.50
## item_14 10000 0.59 0.56 0.53 0.50 0.35 0.48
## item_15 10000 0.57 0.55 0.51 0.48 0.33 0.47
## item_16 10000 0.58 0.55 0.52 0.49 0.33 0.47
## item_17 10000 0.49 0.48 0.43 0.41 0.19 0.39
## item_18 10000 0.40 0.41 0.35 0.33 0.10 0.30
##
## Non missing response frequency for each item
## 0 1 miss
## item_1 0.04 0.96 0
## item_2 0.06 0.94 0
## item_3 0.07 0.93 0
## item_4 0.09 0.91 0
## item_5 0.11 0.89 0
## item_6 0.15 0.85 0
## item_7 0.25 0.75 0
## item_8 0.26 0.74 0
## item_9 0.28 0.72 0
## item_10 0.29 0.71 0
## item_11 0.41 0.59 0
## item_12 0.45 0.55 0
## item_13 0.47 0.53 0
## item_14 0.65 0.35 0
## item_15 0.67 0.33 0
## item_16 0.67 0.33 0
## item_17 0.81 0.19 0
## item_18 0.90 0.10 0
#show norms
sim_r_match %>% show_norms()
## $desc
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 10000 11.4 3.83 12 11.6 4.45 0 18 18 -0.42 -0.45 0.04
##
## $centiles
## 0 1 2 3 4 5 6 7 8 9 10
## 0.0010 0.0056 0.0146 0.0304 0.0542 0.0833 0.1219 0.1697 0.2308 0.3018 0.3851
## 11 12 13 14 15 16 17 18
## 0.4743 0.5723 0.6698 0.7648 0.8520 0.9236 0.9737 1.0000
##
## $IQ_norms
## 0 1 2 3 4 5 6 7 8 9 10 11 12
## 53.6 62.0 67.3 71.9 75.9 79.3 82.5 85.7 89.0 92.2 95.6 99.0 102.7
## 13 14 15 16 17 18
## 106.6 110.8 115.7 121.4 129.1 Inf
##
## $plot