Start

library(pacman)
p_load(kirkegaard, rms, caret, furrr, doFuture, mirt)
options(digits = 3)

Parallel

#register backend
registerDoFuture()

#activate parallel
plan(multiprocess(workers = 6))
## Warning: [ONE-TIME WARNING] Forked processing ('multicore') is disabled
## in future (>= 1.13.0) when running R from RStudio, because it is
## considered unstable. Because of this, plan("multicore") will fall
## back to plan("sequential"), and plan("multiprocess") will fall back to
## plan("multisession") - not plan("multicore") as in the past. For more details,
## how to control forked processing or not, and how to silence this warning in
## future R sessions, see ?future::supportsMulticore
#max size of exported object
options(future.globals.maxSize = 2000 * 1024^2)

Functions

#recode items and socring key
#limitation of mirt package requires this
relevel_items = function(x, key) {
  #convert to factor, drop unused levels
  xx = map_df(x, function(xi) {
    xi %>% as.factor() %>% fct_drop()
  })

  #which factor level is the key?
  key2 = map2_int(xx, key, function(i, k) {
    #which level is the right one?
    (levels(i) == k) %>% which()
  })
  
  list(
    data = map_df(xx, as.numeric),
    key = key2
  )
}

#test
if (F) {
  ex_items = data.frame(
    i1 = c(1, 3, 5, 9, 11) %>% factor(levels = 1:11),
    i2 = c(1, 2, 5, 7, 3) %>% factor(levels = 1:11),
    i3 = c(3, 7, 1, 4, 4) %>% factor(levels = 1:11)
  )
  ex_key = c(1, 7, 3)
  
  relevel_items(ex_items, ex_key)
}

Data

#read
vocab = read_tsv("data/VIQT_data.csv")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   country = col_character()
## )
## See spec(...) for full column specifications.
vocab_vars = read_lines("data/codebook.txt")
vocab_keys = readxl::read_xlsx("data/vocab_keys.xlsx")

#education as ordered factor
vocab$education_ord = ordered(vocab$education)

#scored items
#need to convert to the binary format
options_to_binary = function(x, y) {
  #base string
  z = rep(0, 5)
  
  #loop and replace
  for (i in 1:5) {
    #check
    if (i %in% c(x, y)) {
      z[i] = 1
    }
  }
  
  str_c(z, collapse = "")
}

#binary to decimal
binary_to_decimal = function(x, reverse = F) {
  if (reverse) return(stringi::stri_reverse(x) %>% base::strtoi(base = 2))
  x %>% base::strtoi(base = 2)
}

#decimal to response options
#tricky!
decimal_to_options_inner = function(x) {
  # browser()
  if (x == -1) return("DK")
  map_chr(x, ~binaryLogic::as.binary(.) %>% str_c(collapse = "") %>% str_pad(width = 5, side = "left", pad = "0")) %>% stringi::stri_reverse()
}
decimal_to_options_inner = memoise::memoise(decimal_to_options_inner)
decimal_to_options = function(x) map_chr(x, decimal_to_options_inner)

#recode option keys to binary
vocab_keys$true_option_manual = vocab_keys %>%
  dplyr::select(tidyselect::starts_with("Option")) %$%
  map2(Option1, Option2, .f = options_to_binary) %>%
  map_dbl(.f = binary_to_decimal, reverse = T)

#true keys from data file
vocab_keys$true_option = vocab_vars[15:59] %>% str_match("\\t\\d+") %>% str_replace("\\t", "") %>% as.numeric()

#keys match?
assert_that(all(vocab_keys$true_option == vocab_keys$true_option_manual))
## [1] TRUE
#score data
scored_items = score_items(vocab %>% select(starts_with("Q")), key = vocab_keys$true_option)
names(scored_items) = "Item_" + 1:45
vocab_preds = "Q" + 1:45

#join to main
vocab = cbind(vocab, scored_items)
  
#choices to factor coding
#remove impossible options
#some people managed to select single items
possible_options = c("11000", "10100", "10010", "10001", "01100", "01010", "01001", "00110", "00101", "00011") %>% binary_to_decimal(reverse = T)
assert_that(all(vocab_keys$true_option %in% possible_options))
## [1] TRUE
#recode and recode
for (q in "Q" + 1:45) {
  #not possible option?
  vocab[[q]] = case_when((!vocab[[q]] %in% c(possible_options)) ~ -1,
                         TRUE ~ vocab[[q]])
  
  #to factor
  vocab[[q]] = as.factor(vocab[[q]])
}

#option diversity by item
map_int("Q" + 1:45, ~vocab[[.]] %>% unique() %>% length())
##  [1] 11 11 11 11 11 11 11 11 11 10 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11
## [26] 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11
#subset to data for natives
vocab_all = vocab
vocab = vocab %>% filter(engnat == 1)

Item response theory

#standard approaches
#unsupervised predictor
#difficulty by item
item_stats = tibble(
  item = vocab %>% select(starts_with("Item_")) %>% colnames(),
  pass_rate = vocab %>% select(starts_with("Item_")) %>% colMeans(),
  male_d = (vocab %>% filter(gender == 1) %>% select(starts_with("Item_")) %>% colMeans() %>% qnorm()) - (vocab %>% filter(gender == 2) %>% select(starts_with("Item_")) %>% colMeans() %>% qnorm())
)

#simple scoring
vocab$simple_score = vocab %>% select(starts_with("Item_")) %>% rowSums()
vocab %>% select(starts_with("Item_")) %>% alpha()
## 
## Reliability analysis   
## Call: alpha(x = .)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N    ase mean   sd median_r
##       0.91      0.92    0.93      0.21  12 0.0012 0.78 0.17      0.2
## 
##  lower alpha upper     95% confidence boundaries
## 0.91 0.91 0.92 
## 
##  Reliability if an item is dropped:
##         raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## Item_1       0.91      0.92    0.93      0.21  12   0.0012 0.012  0.20
## Item_2       0.91      0.92    0.93      0.21  12   0.0012 0.013  0.20
## Item_3       0.91      0.92    0.93      0.21  12   0.0012 0.012  0.20
## Item_4       0.92      0.92    0.93      0.21  12   0.0012 0.013  0.21
## Item_5       0.91      0.92    0.93      0.21  12   0.0012 0.012  0.20
## Item_6       0.91      0.92    0.93      0.21  12   0.0012 0.012  0.20
## Item_7       0.92      0.92    0.93      0.22  12   0.0011 0.012  0.21
## Item_8       0.91      0.92    0.93      0.21  12   0.0012 0.012  0.20
## Item_9       0.91      0.92    0.93      0.21  12   0.0012 0.012  0.20
## Item_10      0.91      0.92    0.93      0.21  12   0.0012 0.012  0.20
## Item_11      0.91      0.92    0.93      0.21  12   0.0012 0.012  0.20
## Item_12      0.91      0.92    0.93      0.21  12   0.0012 0.013  0.20
## Item_13      0.91      0.92    0.93      0.21  12   0.0012 0.013  0.20
## Item_14      0.91      0.92    0.93      0.21  12   0.0012 0.013  0.20
## Item_15      0.91      0.92    0.93      0.21  12   0.0012 0.013  0.20
## Item_16      0.91      0.92    0.93      0.21  12   0.0012 0.012  0.20
## Item_17      0.91      0.92    0.93      0.21  12   0.0012 0.012  0.20
## Item_18      0.91      0.92    0.93      0.21  12   0.0012 0.013  0.20
## Item_19      0.91      0.92    0.93      0.21  12   0.0012 0.013  0.20
## Item_20      0.91      0.92    0.93      0.21  12   0.0012 0.012  0.20
## Item_21      0.91      0.92    0.93      0.21  12   0.0012 0.012  0.20
## Item_22      0.91      0.92    0.93      0.21  12   0.0012 0.013  0.20
## Item_23      0.91      0.92    0.93      0.21  12   0.0012 0.013  0.20
## Item_24      0.91      0.92    0.93      0.21  12   0.0012 0.012  0.20
## Item_25      0.91      0.92    0.93      0.21  12   0.0012 0.013  0.20
## Item_26      0.92      0.92    0.93      0.22  12   0.0012 0.012  0.21
## Item_27      0.91      0.92    0.93      0.21  12   0.0012 0.012  0.20
## Item_28      0.91      0.92    0.93      0.21  12   0.0012 0.012  0.20
## Item_29      0.91      0.92    0.93      0.21  12   0.0012 0.013  0.20
## Item_30      0.91      0.92    0.93      0.21  12   0.0012 0.013  0.20
## Item_31      0.91      0.92    0.93      0.21  12   0.0012 0.012  0.20
## Item_32      0.91      0.92    0.93      0.21  12   0.0012 0.013  0.20
## Item_33      0.91      0.92    0.93      0.21  12   0.0012 0.012  0.20
## Item_34      0.91      0.92    0.93      0.21  12   0.0012 0.012  0.20
## Item_35      0.91      0.92    0.93      0.21  12   0.0012 0.013  0.20
## Item_36      0.91      0.92    0.93      0.21  12   0.0012 0.012  0.20
## Item_37      0.91      0.92    0.93      0.21  12   0.0012 0.013  0.20
## Item_38      0.91      0.92    0.93      0.21  12   0.0012 0.012  0.20
## Item_39      0.91      0.92    0.93      0.21  12   0.0012 0.012  0.20
## Item_40      0.91      0.92    0.93      0.21  12   0.0012 0.012  0.20
## Item_41      0.91      0.92    0.93      0.21  12   0.0012 0.012  0.20
## Item_42      0.91      0.92    0.93      0.21  12   0.0012 0.012  0.20
## Item_43      0.91      0.92    0.93      0.21  12   0.0012 0.012  0.20
## Item_44      0.91      0.92    0.93      0.21  12   0.0012 0.013  0.20
## Item_45      0.91      0.92    0.93      0.21  12   0.0012 0.013  0.21
## 
##  Item statistics 
##            n raw.r std.r r.cor r.drop mean   sd
## Item_1  9368  0.34  0.48  0.47  0.328 0.98 0.12
## Item_2  9368  0.46  0.51  0.49  0.437 0.94 0.25
## Item_3  9368  0.35  0.49  0.48  0.339 0.98 0.13
## Item_4  9368  0.37  0.33  0.30  0.310 0.49 0.50
## Item_5  9368  0.33  0.44  0.42  0.310 0.97 0.16
## Item_6  9368  0.60  0.59  0.58  0.570 0.84 0.36
## Item_7  9368  0.15  0.15  0.11  0.088 0.49 0.50
## Item_8  9368  0.59  0.58  0.57  0.559 0.85 0.36
## Item_9  9368  0.59  0.57  0.56  0.556 0.82 0.39
## Item_10 9368  0.27  0.37  0.34  0.247 0.96 0.20
## Item_11 9368  0.36  0.48  0.47  0.343 0.98 0.15
## Item_12 9368  0.56  0.59  0.58  0.535 0.91 0.29
## Item_13 9368  0.55  0.57  0.55  0.522 0.89 0.31
## Item_14 9368  0.36  0.45  0.43  0.335 0.97 0.18
## Item_15 9368  0.45  0.45  0.43  0.407 0.84 0.37
## Item_16 9368  0.27  0.36  0.34  0.241 0.96 0.19
## Item_17 9368  0.40  0.54  0.53  0.386 0.98 0.13
## Item_18 9368  0.53  0.48  0.46  0.486 0.57 0.49
## Item_19 9368  0.57  0.52  0.50  0.521 0.64 0.48
## Item_20 9368  0.62  0.59  0.58  0.589 0.79 0.41
## Item_21 9368  0.62  0.55  0.54  0.577 0.58 0.49
## Item_22 9368  0.36  0.46  0.44  0.339 0.97 0.17
## Item_23 9368  0.31  0.39  0.37  0.281 0.96 0.20
## Item_24 9368  0.59  0.61  0.61  0.565 0.91 0.29
## Item_25 9368  0.44  0.40  0.37  0.384 0.60 0.49
## Item_26 9368  0.16  0.22  0.18  0.119 0.90 0.30
## Item_27 9368  0.51  0.44  0.42  0.463 0.30 0.46
## Item_28 9368  0.62  0.57  0.56  0.584 0.68 0.46
## Item_29 9368  0.49  0.49  0.47  0.449 0.85 0.36
## Item_30 9368  0.41  0.43  0.41  0.379 0.89 0.32
## Item_31 9368  0.37  0.50  0.49  0.351 0.98 0.13
## Item_32 9368  0.28  0.34  0.32  0.250 0.93 0.26
## Item_33 9368  0.60  0.59  0.58  0.568 0.88 0.33
## Item_34 9368  0.63  0.60  0.59  0.598 0.82 0.39
## Item_35 9368  0.51  0.47  0.46  0.463 0.76 0.43
## Item_36 9368  0.57  0.51  0.49  0.525 0.57 0.50
## Item_37 9368  0.56  0.58  0.57  0.530 0.91 0.29
## Item_38 9368  0.52  0.45  0.43  0.467 0.43 0.49
## Item_39 9368  0.59  0.56  0.55  0.557 0.78 0.41
## Item_40 9368  0.57  0.51  0.49  0.525 0.52 0.50
## Item_41 9368  0.63  0.59  0.58  0.596 0.72 0.45
## Item_42 9368  0.61  0.55  0.54  0.570 0.61 0.49
## Item_43 9368  0.49  0.42  0.41  0.443 0.31 0.46
## Item_44 9368  0.42  0.38  0.35  0.369 0.49 0.50
## Item_45 9368  0.38  0.37  0.34  0.334 0.79 0.41
## 
## Non missing response frequency for each item
##            0    1 miss
## Item_1  0.02 0.98    0
## Item_2  0.06 0.94    0
## Item_3  0.02 0.98    0
## Item_4  0.51 0.49    0
## Item_5  0.03 0.97    0
## Item_6  0.16 0.84    0
## Item_7  0.51 0.49    0
## Item_8  0.15 0.85    0
## Item_9  0.18 0.82    0
## Item_10 0.04 0.96    0
## Item_11 0.02 0.98    0
## Item_12 0.09 0.91    0
## Item_13 0.11 0.89    0
## Item_14 0.03 0.97    0
## Item_15 0.16 0.84    0
## Item_16 0.04 0.96    0
## Item_17 0.02 0.98    0
## Item_18 0.43 0.57    0
## Item_19 0.36 0.64    0
## Item_20 0.21 0.79    0
## Item_21 0.42 0.58    0
## Item_22 0.03 0.97    0
## Item_23 0.04 0.96    0
## Item_24 0.09 0.91    0
## Item_25 0.40 0.60    0
## Item_26 0.10 0.90    0
## Item_27 0.70 0.30    0
## Item_28 0.32 0.68    0
## Item_29 0.15 0.85    0
## Item_30 0.11 0.89    0
## Item_31 0.02 0.98    0
## Item_32 0.07 0.93    0
## Item_33 0.12 0.88    0
## Item_34 0.18 0.82    0
## Item_35 0.24 0.76    0
## Item_36 0.43 0.57    0
## Item_37 0.09 0.91    0
## Item_38 0.57 0.43    0
## Item_39 0.22 0.78    0
## Item_40 0.48 0.52    0
## Item_41 0.28 0.72    0
## Item_42 0.39 0.61    0
## Item_43 0.69 0.31    0
## Item_44 0.51 0.49    0
## Item_45 0.21 0.79    0
#plot
GG_denhist(vocab$simple_score, "simple_score")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

GG_save("figs/simple_scores_dist.png")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#remove left tail outliers
vocab = filter(vocab, simple_score >= 10)

#IRT scoring - binary
#old code with psych package
if (F) {
  IRT_fit = psych::irt.fa(vocab %>% select(starts_with("Item_")), nfactors = 1)
  IRT_fit$fa
  item_stats$difficulty = IRT_fit$irt$difficulty[[1]]
  item_stats$loading = IRT_fit$fa$loadings %>% as.vector()
  
  IRT_scores = psych::scoreIrt(IRT_fit, items = vocab %>% select(starts_with("Item_")))
  vocab$IRT_score = IRT_scores$theta1 %>% standardize()
  vocab$g = vocab$IRT_score
  
  #plot
  GG_denhist(vocab, "IRT_score")
  GG_save("figs/IRT_scores_dist.png")
}

#mirt binary
vocab_irt_binary = mirt(vocab %>% select(starts_with("Item_")), model = 1, itemtype = "2PL")
## 
Iteration: 1, Log-Lik: -145098.581, Max-Change: 3.58195
Iteration: 2, Log-Lik: -139893.780, Max-Change: 1.24209
Iteration: 3, Log-Lik: -138732.430, Max-Change: 1.03498
Iteration: 4, Log-Lik: -138469.080, Max-Change: 0.29795
Iteration: 5, Log-Lik: -138335.150, Max-Change: 0.17225
Iteration: 6, Log-Lik: -138248.025, Max-Change: 0.17120
Iteration: 7, Log-Lik: -138184.765, Max-Change: 0.17111
Iteration: 8, Log-Lik: -138136.087, Max-Change: 0.12166
Iteration: 9, Log-Lik: -138094.608, Max-Change: 0.10778
Iteration: 10, Log-Lik: -138066.817, Max-Change: 0.11033
Iteration: 11, Log-Lik: -138045.028, Max-Change: 0.09593
Iteration: 12, Log-Lik: -138028.693, Max-Change: 0.08408
Iteration: 13, Log-Lik: -138015.485, Max-Change: 0.07852
Iteration: 14, Log-Lik: -138005.525, Max-Change: 0.06453
Iteration: 15, Log-Lik: -137997.630, Max-Change: 0.06661
Iteration: 16, Log-Lik: -137991.256, Max-Change: 0.05338
Iteration: 17, Log-Lik: -137985.825, Max-Change: 0.04781
Iteration: 18, Log-Lik: -137981.448, Max-Change: 0.04911
Iteration: 19, Log-Lik: -137979.321, Max-Change: 0.02292
Iteration: 20, Log-Lik: -137974.707, Max-Change: 0.04875
Iteration: 21, Log-Lik: -137971.795, Max-Change: 0.04192
Iteration: 22, Log-Lik: -137970.408, Max-Change: 0.02505
Iteration: 23, Log-Lik: -137967.543, Max-Change: 0.03823
Iteration: 24, Log-Lik: -137965.559, Max-Change: 0.03720
Iteration: 25, Log-Lik: -137964.562, Max-Change: 0.02092
Iteration: 26, Log-Lik: -137962.059, Max-Change: 0.03684
Iteration: 27, Log-Lik: -137960.600, Max-Change: 0.04074
Iteration: 28, Log-Lik: -137959.885, Max-Change: 0.01119
Iteration: 29, Log-Lik: -137957.872, Max-Change: 0.00706
Iteration: 30, Log-Lik: -137956.192, Max-Change: 0.00599
Iteration: 31, Log-Lik: -137949.392, Max-Change: 0.00755
Iteration: 32, Log-Lik: -137948.940, Max-Change: 0.00387
Iteration: 33, Log-Lik: -137948.556, Max-Change: 0.00379
Iteration: 34, Log-Lik: -137946.972, Max-Change: 0.00668
Iteration: 35, Log-Lik: -137946.851, Max-Change: 0.00180
Iteration: 36, Log-Lik: -137946.760, Max-Change: 0.00236
Iteration: 37, Log-Lik: -137946.517, Max-Change: 0.00212
Iteration: 38, Log-Lik: -137946.463, Max-Change: 0.00205
Iteration: 39, Log-Lik: -137946.417, Max-Change: 0.00182
Iteration: 40, Log-Lik: -137946.228, Max-Change: 0.00081
Iteration: 41, Log-Lik: -137946.221, Max-Change: 0.00063
Iteration: 42, Log-Lik: -137946.212, Max-Change: 0.00066
Iteration: 43, Log-Lik: -137946.172, Max-Change: 0.00078
Iteration: 44, Log-Lik: -137946.166, Max-Change: 0.00033
Iteration: 45, Log-Lik: -137946.165, Max-Change: 0.00010
Iteration: 46, Log-Lik: -137946.163, Max-Change: 0.00053
Iteration: 47, Log-Lik: -137946.160, Max-Change: 0.00060
Iteration: 48, Log-Lik: -137946.158, Max-Change: 0.00031
Iteration: 49, Log-Lik: -137946.158, Max-Change: 0.00025
Iteration: 50, Log-Lik: -137946.157, Max-Change: 0.00044
Iteration: 51, Log-Lik: -137946.155, Max-Change: 0.00018
Iteration: 52, Log-Lik: -137946.155, Max-Change: 0.00076
Iteration: 53, Log-Lik: -137946.154, Max-Change: 0.00040
Iteration: 54, Log-Lik: -137946.153, Max-Change: 0.00051
Iteration: 55, Log-Lik: -137946.152, Max-Change: 0.00041
Iteration: 56, Log-Lik: -137946.152, Max-Change: 0.00055
Iteration: 57, Log-Lik: -137946.151, Max-Change: 0.00029
Iteration: 58, Log-Lik: -137946.151, Max-Change: 0.00023
Iteration: 59, Log-Lik: -137946.151, Max-Change: 0.00030
Iteration: 60, Log-Lik: -137946.150, Max-Change: 0.00016
Iteration: 61, Log-Lik: -137946.150, Max-Change: 0.00013
Iteration: 62, Log-Lik: -137946.150, Max-Change: 0.00028
Iteration: 63, Log-Lik: -137946.150, Max-Change: 0.00045
Iteration: 64, Log-Lik: -137946.150, Max-Change: 0.00038
Iteration: 65, Log-Lik: -137946.149, Max-Change: 0.00050
Iteration: 66, Log-Lik: -137946.149, Max-Change: 0.00027
Iteration: 67, Log-Lik: -137946.149, Max-Change: 0.00020
Iteration: 68, Log-Lik: -137946.149, Max-Change: 0.00026
Iteration: 69, Log-Lik: -137946.149, Max-Change: 0.00015
Iteration: 70, Log-Lik: -137946.149, Max-Change: 0.00011
Iteration: 71, Log-Lik: -137946.148, Max-Change: 0.00020
Iteration: 72, Log-Lik: -137946.148, Max-Change: 0.00040
Iteration: 73, Log-Lik: -137946.148, Max-Change: 0.00026
Iteration: 74, Log-Lik: -137946.148, Max-Change: 0.00034
Iteration: 75, Log-Lik: -137946.148, Max-Change: 0.00018
Iteration: 76, Log-Lik: -137946.148, Max-Change: 0.00014
Iteration: 77, Log-Lik: -137946.148, Max-Change: 0.00018
Iteration: 78, Log-Lik: -137946.148, Max-Change: 0.00010
#score
vocab_irt_binary_scores = fscores(vocab_irt_binary)
vocab$irt_binary = vocab_irt_binary_scores[, 1] %>% as.numeric() %>% standardize()

#plot
GG_denhist(vocab, "irt_binary")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#mirt categorical
#must recode items due to issue with mirt
vocab_cat_recoded = relevel_items(vocab %>% select(Q1:Q45),
                                  vocab_keys$true_option)
#other worse fitting algos
if (F) {
  vocab_irt_cat = mirt(vocab_cat_recoded$data, model = 1, itemtype = "nominal")
  vocab_irt_cat = mirt(vocab_cat_recoded$data, model = 1, itemtype = "graded")
  vocab_irt_cat = mirt(vocab_cat_recoded$data, model = 1, itemtype = "gpcm")
  vocab_irt_cat = mirt(vocab_cat_recoded$data, model = 1, itemtype = "gpcmIRT")
}

vocab_irt_cat = mirt(vocab_cat_recoded$data, model = 1, itemtype = "2PLNRM", key = vocab_cat_recoded$key,
                     method = "EM", TOL = .0003) #would not converge with default tolerance
## 
Iteration: 1, Log-Lik: -331206.779, Max-Change: 11.43198
Iteration: 2, Log-Lik: -277489.550, Max-Change: 3.57211
Iteration: 3, Log-Lik: -261015.527, Max-Change: 2.40709
Iteration: 4, Log-Lik: -256974.920, Max-Change: 2.10980
Iteration: 5, Log-Lik: -255972.833, Max-Change: 1.46167
Iteration: 6, Log-Lik: -255722.164, Max-Change: 0.50464
Iteration: 7, Log-Lik: -255619.497, Max-Change: 0.35309
Iteration: 8, Log-Lik: -255551.174, Max-Change: 0.34298
Iteration: 9, Log-Lik: -255505.664, Max-Change: 0.28015
Iteration: 10, Log-Lik: -255471.402, Max-Change: 0.22558
Iteration: 11, Log-Lik: -255444.751, Max-Change: 0.23882
Iteration: 12, Log-Lik: -255422.070, Max-Change: 0.18999
Iteration: 13, Log-Lik: -255404.291, Max-Change: 0.18676
Iteration: 14, Log-Lik: -255388.594, Max-Change: 0.16803
Iteration: 15, Log-Lik: -255375.121, Max-Change: 0.16336
Iteration: 16, Log-Lik: -255363.787, Max-Change: 0.14859
Iteration: 17, Log-Lik: -255353.950, Max-Change: 0.14637
Iteration: 18, Log-Lik: -255345.465, Max-Change: 0.14080
Iteration: 19, Log-Lik: -255337.914, Max-Change: 0.12132
Iteration: 20, Log-Lik: -255331.300, Max-Change: 0.13358
Iteration: 21, Log-Lik: -255325.220, Max-Change: 0.10920
Iteration: 22, Log-Lik: -255319.885, Max-Change: 0.12772
Iteration: 23, Log-Lik: -255314.921, Max-Change: 0.09838
Iteration: 24, Log-Lik: -255310.583, Max-Change: 0.11942
Iteration: 25, Log-Lik: -255306.236, Max-Change: 0.07969
Iteration: 26, Log-Lik: -255302.691, Max-Change: 0.12931
Iteration: 27, Log-Lik: -255299.211, Max-Change: 0.07806
Iteration: 28, Log-Lik: -255296.773, Max-Change: 0.25789
Iteration: 29, Log-Lik: -255292.853, Max-Change: 0.06880
Iteration: 30, Log-Lik: -255290.372, Max-Change: 0.10123
Iteration: 31, Log-Lik: -255288.025, Max-Change: 0.07182
Iteration: 32, Log-Lik: -255285.997, Max-Change: 0.07639
Iteration: 33, Log-Lik: -255284.169, Max-Change: 0.07357
Iteration: 34, Log-Lik: -255282.458, Max-Change: 0.06590
Iteration: 35, Log-Lik: -255280.944, Max-Change: 0.07055
Iteration: 36, Log-Lik: -255279.518, Max-Change: 0.06244
Iteration: 37, Log-Lik: -255278.283, Max-Change: 0.07244
Iteration: 38, Log-Lik: -255277.063, Max-Change: 0.05676
Iteration: 39, Log-Lik: -255275.985, Max-Change: 0.17829
Iteration: 40, Log-Lik: -255274.267, Max-Change: 0.04835
Iteration: 41, Log-Lik: -255273.297, Max-Change: 0.25343
Iteration: 42, Log-Lik: -255271.618, Max-Change: 0.03159
Iteration: 43, Log-Lik: -255271.431, Max-Change: 0.75173
Iteration: 44, Log-Lik: -255268.900, Max-Change: 0.02006
Iteration: 45, Log-Lik: -255268.394, Max-Change: 0.00574
Iteration: 46, Log-Lik: -255268.194, Max-Change: 0.00557
Iteration: 47, Log-Lik: -255267.537, Max-Change: 0.00471
Iteration: 48, Log-Lik: -255266.986, Max-Change: 0.19881
Iteration: 49, Log-Lik: -255266.320, Max-Change: 0.02193
Iteration: 50, Log-Lik: -255265.829, Max-Change: 0.00368
Iteration: 51, Log-Lik: -255265.476, Max-Change: 0.00355
Iteration: 52, Log-Lik: -255264.488, Max-Change: 0.00584
Iteration: 53, Log-Lik: -255264.223, Max-Change: 0.00245
Iteration: 54, Log-Lik: -255264.058, Max-Change: 0.00228
Iteration: 55, Log-Lik: -255263.466, Max-Change: 0.00521
Iteration: 56, Log-Lik: -255263.359, Max-Change: 0.00205
Iteration: 57, Log-Lik: -255263.274, Max-Change: 0.00205
Iteration: 58, Log-Lik: -255263.228, Max-Change: 0.00217
Iteration: 59, Log-Lik: -255263.183, Max-Change: 0.00100
Iteration: 60, Log-Lik: -255263.155, Max-Change: 0.00088
Iteration: 61, Log-Lik: -255263.048, Max-Change: 0.01109
Iteration: 62, Log-Lik: -255262.957, Max-Change: 0.00088
Iteration: 63, Log-Lik: -255262.938, Max-Change: 0.00081
Iteration: 64, Log-Lik: -255262.890, Max-Change: 0.00122
Iteration: 65, Log-Lik: -255262.869, Max-Change: 0.00068
Iteration: 66, Log-Lik: -255262.857, Max-Change: 0.00060
Iteration: 67, Log-Lik: -255262.799, Max-Change: 0.00052
Iteration: 68, Log-Lik: -255262.794, Max-Change: 0.00120
Iteration: 69, Log-Lik: -255262.791, Max-Change: 0.00056
Iteration: 70, Log-Lik: -255262.790, Max-Change: 0.00046
Iteration: 71, Log-Lik: -255262.789, Max-Change: 0.00077
Iteration: 72, Log-Lik: -255262.785, Max-Change: 0.00035
Iteration: 73, Log-Lik: -255262.785, Max-Change: 0.00146
Iteration: 74, Log-Lik: -255262.783, Max-Change: 0.00068
Iteration: 75, Log-Lik: -255262.780, Max-Change: 0.00115
Iteration: 76, Log-Lik: -255262.779, Max-Change: 0.00085
Iteration: 77, Log-Lik: -255262.775, Max-Change: 0.00143
Iteration: 78, Log-Lik: -255262.774, Max-Change: 0.00067
Iteration: 79, Log-Lik: -255262.773, Max-Change: 0.00054
Iteration: 80, Log-Lik: -255262.771, Max-Change: 0.00091
Iteration: 81, Log-Lik: -255262.769, Max-Change: 0.00042
Iteration: 82, Log-Lik: -255262.769, Max-Change: 0.00034
Iteration: 83, Log-Lik: -255262.768, Max-Change: 0.00057
Iteration: 84, Log-Lik: -255262.766, Max-Change: 0.00134
Iteration: 85, Log-Lik: -255262.764, Max-Change: 0.00063
Iteration: 86, Log-Lik: -255262.763, Max-Change: 0.00106
Iteration: 87, Log-Lik: -255262.761, Max-Change: 0.00049
Iteration: 88, Log-Lik: -255262.761, Max-Change: 0.00040
Iteration: 89, Log-Lik: -255262.760, Max-Change: 0.00067
Iteration: 90, Log-Lik: -255262.758, Max-Change: 0.00031
Iteration: 91, Log-Lik: -255262.758, Max-Change: 0.00127
Iteration: 92, Log-Lik: -255262.757, Max-Change: 0.00059
Iteration: 93, Log-Lik: -255262.755, Max-Change: 0.00099
Iteration: 94, Log-Lik: -255262.755, Max-Change: 0.00075
Iteration: 95, Log-Lik: -255262.752, Max-Change: 0.00126
Iteration: 96, Log-Lik: -255262.752, Max-Change: 0.00059
Iteration: 97, Log-Lik: -255262.751, Max-Change: 0.00047
Iteration: 98, Log-Lik: -255262.750, Max-Change: 0.00079
Iteration: 99, Log-Lik: -255262.749, Max-Change: 0.00037
Iteration: 100, Log-Lik: -255262.749, Max-Change: 0.00030
#score
vocab_irt_cat_scores = fscores(vocab_irt_cat)
vocab$irt_cat = vocab_irt_cat_scores[, 1] %>% as.numeric() %>% standardize()

#plot
GG_denhist(vocab, "irt_cat")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#relation?
GG_scatter(vocab, "irt_binary", "irt_cat")
## `geom_smooth()` using formula 'y ~ x'

#criterion relations
vocab %>% select(education, age, simple_score:irt_cat) %>% wtd.cors()
##              education   age simple_score irt_binary irt_cat
## education        1.000 0.353        0.434      0.441   0.438
## age              0.353 1.000        0.380      0.403   0.404
## simple_score     0.434 0.380        1.000      0.967   0.955
## irt_binary       0.441 0.403        0.967      1.000   0.985
## irt_cat          0.438 0.404        0.955      0.985   1.000
#output data for reuse
write_csv(vocab, "data/vocab_prepped.csv")