About

https://onlinelibrary.wiley.com/doi/full/10.1002/bdm.1751

I don’t have the raw data, so these norms are based on summary statistics about pass rates and average item intercorrelation.

Init

options(digits = 3)
library(pacman)
p_load(kirkegaard, readxl)
theme_set(theme_bw())

#show and plot norms
show_norms = function(x) {
  #score by row
  x_scores = rowSums(x)
  
  #fit ecdf
  x_cdf = ecdf(x_scores)
  
  #show norms
  possible_x = 0:ncol(x)
  
  x_centiles = x_cdf(possible_x) %>% set_names(possible_x)
  x_IQs = qnorm(x_centiles, 100, 15)
  
  list(
    desc = describe(x_scores),
    centiles = x_centiles,
    IQ_norms = x_IQs,
    plot = ggplot(tibble(x = x_scores), aes(x)) +
      geom_bar() +
      scale_x_continuous(breaks = possible_x)
  )
  
}

#item simulator
sim_items = function(n, mean_latent_r = 0) {
  #with average correlation matching the paper
  sim_mat = matrix(rep(mean_latent_r, nrow(items)^2), nrow = nrow(items)); diag(sim_mat) = 1
  sim_r_match = MASS::mvrnorm(n = n, 
                              mu = rep(0, nrow(items)),
                              Sigma = sim_mat)
  #dichotomize to match pass rates
  for (i in seq_along_rows(items)) {
    #z value that results in this pass rate
    thres = qnorm(items$pass_rate[i])
    sim_r_match[, i] = sim_r_match[, i] < thres
  }
  
  sim_r_match %>% 
    set_colnames("item_" + 1:ncol(.)) %>% 
    as_tibble()
}

Data

items = read_xlsx("data/items.xlsx") %>% 
  df_legalize_names() %>% 
  mutate(pass_rate = pass_rate / 100)

items

Simulate

Make approximate norms

#with item cors of 0, independent items
n_sim = 10000
set.seed(1)

#with no intercorrelation
sim_r0 = map_dfc(items$pass_rate, ~tibble(a = sample(c(T, F), size = n_sim, replace = T, prob = c(., 1-.))))

#show norms
sim_r0 %>% show_norms()

## $desc
##    vars     n mean  sd median trimmed  mad min max range  skew kurtosis   se
## X1    1 10000 11.3 1.7     11    11.3 1.48   5  18    13 -0.09    -0.04 0.02
## 
## $centiles
##      0      1      2      3      4      5      6      7      8      9     10 
## 0.0000 0.0000 0.0000 0.0000 0.0000 0.0006 0.0023 0.0131 0.0490 0.1443 0.3102 
##     11     12     13     14     15     16     17     18 
## 0.5360 0.7558 0.9056 0.9751 0.9946 0.9996 0.9999 1.0000 
## 
## $IQ_norms
##     0     1     2     3     4     5     6     7     8     9    10    11    12 
##  -Inf  -Inf  -Inf  -Inf  -Inf  51.4  57.5  66.7  75.2  84.1  92.6 101.4 110.4 
##    13    14    15    16    17    18 
## 119.7 129.4 138.2 150.3 155.8   Inf 
## 
## $plot

#but we can also use the fancier function
sim_r0b = sim_items(n_sim, mean_latent_r = 0)

sim_r0b %>% show_norms()

## $desc
##    vars     n mean   sd median trimmed  mad min max range  skew kurtosis   se
## X1    1 10000 11.3 1.72     11    11.3 1.48   4  17    13 -0.09    -0.06 0.02
## 
## $centiles
##      0      1      2      3      4      5      6      7      8      9     10 
## 0.0000 0.0000 0.0000 0.0000 0.0001 0.0003 0.0023 0.0144 0.0527 0.1449 0.3136 
##     11     12     13     14     15     16     17     18 
## 0.5372 0.7592 0.9045 0.9723 0.9954 0.9996 1.0000 1.0000 
## 
## $IQ_norms
##     0     1     2     3     4     5     6     7     8     9    10    11    12 
##  -Inf  -Inf  -Inf  -Inf  44.2  48.5  57.5  67.2  75.7  84.1  92.7 101.4 110.6 
##    13    14    15    16    17    18 
## 119.6 128.7 139.1 150.3   Inf   Inf 
## 
## $plot

#check if matches approxiately
colMeans(sim_r0b)

##  item_1  item_2  item_3  item_4  item_5  item_6  item_7  item_8  item_9 item_10 
##  0.9570  0.9460  0.9274  0.9134  0.8842  0.8443  0.7437  0.7356  0.7207  0.7063 
## item_11 item_12 item_13 item_14 item_15 item_16 item_17 item_18 
##  0.5852  0.5543  0.5272  0.3429  0.3207  0.3175  0.1817  0.0954

colMeans(sim_r0b) - items$pass_rate

##  item_1  item_2  item_3  item_4  item_5  item_6  item_7  item_8  item_9 item_10 
## -0.0040  0.0010  0.0004  0.0014  0.0032  0.0013 -0.0053 -0.0074  0.0087  0.0003 
## item_11 item_12 item_13 item_14 item_15 item_16 item_17 item_18 
##  0.0012  0.0013 -0.0008 -0.0021 -0.0023 -0.0015 -0.0053 -0.0026 
## attr(,"label")
## [1] "pass rate"

#with average correlation matching the paper
#trial and error until it matches
sim_r_match = sim_items(n_sim, mean_latent_r = .50)

#check if matches approxiately
colMeans(sim_r_match)

##  item_1  item_2  item_3  item_4  item_5  item_6  item_7  item_8  item_9 item_10 
##   0.962   0.944   0.931   0.914   0.887   0.845   0.747   0.745   0.718   0.705 
## item_11 item_12 item_13 item_14 item_15 item_16 item_17 item_18 
##   0.589   0.554   0.534   0.347   0.329   0.327   0.190   0.102

colMeans(sim_r_match) - items$pass_rate

##  item_1  item_2  item_3  item_4  item_5  item_6  item_7  item_8  item_9 item_10 
##  0.0009 -0.0010  0.0043  0.0019  0.0057  0.0022 -0.0016  0.0017  0.0060 -0.0008 
## item_11 item_12 item_13 item_14 item_15 item_16 item_17 item_18 
##  0.0051  0.0014  0.0062  0.0023  0.0058  0.0081  0.0025  0.0044 
## attr(,"label")
## [1] "pass rate"

#test stats
alpha(sim_r_match)

## 
## Reliability analysis   
## Call: alpha(x = sim_r_match)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N    ase mean   sd median_r
##       0.85      0.85    0.85      0.24 5.6 0.0021 0.63 0.21     0.24
## 
##  lower alpha upper     95% confidence boundaries
## 0.85 0.85 0.85 
## 
##  Reliability if an item is dropped:
##         raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r med.r
## item_1       0.85      0.85    0.84      0.24 5.5   0.0021 0.0039  0.25
## item_2       0.85      0.84    0.84      0.24 5.4   0.0021 0.0042  0.25
## item_3       0.85      0.84    0.84      0.24 5.4   0.0021 0.0043  0.25
## item_4       0.84      0.84    0.84      0.24 5.4   0.0022 0.0044  0.24
## item_5       0.84      0.84    0.84      0.24 5.3   0.0022 0.0045  0.24
## item_6       0.84      0.84    0.84      0.24 5.2   0.0022 0.0045  0.23
## item_7       0.84      0.84    0.83      0.23 5.1   0.0022 0.0044  0.23
## item_8       0.84      0.84    0.83      0.23 5.1   0.0022 0.0044  0.23
## item_9       0.84      0.84    0.83      0.23 5.1   0.0022 0.0044  0.23
## item_10      0.84      0.84    0.83      0.23 5.1   0.0022 0.0043  0.23
## item_11      0.84      0.84    0.83      0.23 5.1   0.0023 0.0042  0.23
## item_12      0.84      0.84    0.83      0.23 5.1   0.0022 0.0043  0.23
## item_13      0.84      0.84    0.83      0.23 5.1   0.0023 0.0042  0.23
## item_14      0.84      0.84    0.84      0.23 5.2   0.0022 0.0042  0.23
## item_15      0.84      0.84    0.84      0.23 5.2   0.0022 0.0043  0.23
## item_16      0.84      0.84    0.84      0.23 5.2   0.0022 0.0042  0.23
## item_17      0.84      0.84    0.84      0.24 5.3   0.0022 0.0041  0.24
## item_18      0.85      0.85    0.84      0.24 5.5   0.0021 0.0036  0.25
## 
##  Item statistics 
##             n raw.r std.r r.cor r.drop mean   sd
## item_1  10000  0.34  0.40  0.34   0.30 0.96 0.19
## item_2  10000  0.38  0.43  0.37   0.33 0.94 0.23
## item_3  10000  0.41  0.45  0.40   0.35 0.93 0.25
## item_4  10000  0.44  0.48  0.42   0.38 0.91 0.28
## item_5  10000  0.48  0.50  0.45   0.41 0.89 0.32
## item_6  10000  0.52  0.54  0.50   0.45 0.85 0.36
## item_7  10000  0.59  0.58  0.55   0.50 0.75 0.43
## item_8  10000  0.58  0.58  0.54   0.50 0.74 0.44
## item_9  10000  0.59  0.59  0.55   0.51 0.72 0.45
## item_10 10000  0.61  0.60  0.57   0.53 0.71 0.46
## item_11 10000  0.62  0.60  0.57   0.53 0.59 0.49
## item_12 10000  0.61  0.59  0.56   0.52 0.55 0.50
## item_13 10000  0.62  0.60  0.57   0.53 0.53 0.50
## item_14 10000  0.59  0.56  0.53   0.50 0.35 0.48
## item_15 10000  0.57  0.55  0.51   0.48 0.33 0.47
## item_16 10000  0.58  0.55  0.52   0.49 0.33 0.47
## item_17 10000  0.49  0.48  0.43   0.41 0.19 0.39
## item_18 10000  0.40  0.41  0.35   0.33 0.10 0.30
## 
## Non missing response frequency for each item
##            0    1 miss
## item_1  0.04 0.96    0
## item_2  0.06 0.94    0
## item_3  0.07 0.93    0
## item_4  0.09 0.91    0
## item_5  0.11 0.89    0
## item_6  0.15 0.85    0
## item_7  0.25 0.75    0
## item_8  0.26 0.74    0
## item_9  0.28 0.72    0
## item_10 0.29 0.71    0
## item_11 0.41 0.59    0
## item_12 0.45 0.55    0
## item_13 0.47 0.53    0
## item_14 0.65 0.35    0
## item_15 0.67 0.33    0
## item_16 0.67 0.33    0
## item_17 0.81 0.19    0
## item_18 0.90 0.10    0

#show norms
sim_r_match %>% show_norms()

## $desc
##    vars     n mean   sd median trimmed  mad min max range  skew kurtosis   se
## X1    1 10000 11.4 3.83     12    11.6 4.45   0  18    18 -0.42    -0.45 0.04
## 
## $centiles
##      0      1      2      3      4      5      6      7      8      9     10 
## 0.0010 0.0056 0.0146 0.0304 0.0542 0.0833 0.1219 0.1697 0.2308 0.3018 0.3851 
##     11     12     13     14     15     16     17     18 
## 0.4743 0.5723 0.6698 0.7648 0.8520 0.9236 0.9737 1.0000 
## 
## $IQ_norms
##     0     1     2     3     4     5     6     7     8     9    10    11    12 
##  53.6  62.0  67.3  71.9  75.9  79.3  82.5  85.7  89.0  92.2  95.6  99.0 102.7 
##    13    14    15    16    17    18 
## 106.6 110.8 115.7 121.4 129.1   Inf 
## 
## $plot

Numeracy scale test norms

About

Init

Data

Simulate