Start

options(
  digits = 2,
  contrasts=c("contr.treatment", "contr.treatment")
)

library(pacman)
p_load(
  kirkegaard,
  readr,
  dplyr,
  rms,
  polycor,
  mirt,
  ggeffects
  )

Data

#load
vars = readr::read_csv2("data/question_data.csv")
## ℹ Using ',' as decimal and '.' as grouping mark. Use `read_delim()` for more control.
## Warning: Missing column names filled in: 'X1' [1]
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   X1 = col_character(),
##   text = col_character(),
##   option_1 = col_character(),
##   option_2 = col_character(),
##   option_3 = col_character(),
##   option_4 = col_character(),
##   N = col_double(),
##   Type = col_character(),
##   Order = col_character(),
##   Keywords = col_character()
## )
d = readr::read_rds("data/parsed_data.rds")
d_orig = d

Recode

#US states
Canada_units = c("Ontario", "British Columbia", "Alberta", "Manitoba", "Nova Scotia", "Saskatchewan", "New Brunswick", "Prince Edward Island")

#rename
d %<>% mutate(
  #simple stuff
  age = d_age,
  sex = gender,
  male = sex == "Man",
  country = d_country,
  sexual_orientation = gender_orientation,
  pol_beliefs = q212813 %>% fct_relevel("Conservative / Right-wing"),
  body_type = d_bodytype %>% fct_relevel("Average"),
  
  #funny indicators
  unnatural_hair_color = q513 %>% fct_rev(),
  vegetarian = q179268 %>% fct_rev(),
  have_tattoos = q128 %>% fct_rev(),
  
  #polyamory
  poly1 = q48278 %>% fct_rev(),
  poly2 = q33107 %>% fct_rev(),
  poly3 = q37772,
  
  #recode country
  country2 = country %>% 
    mapvalues(from = state.abb, to = rep("USA", length(state.abb))) %>% 
    mapvalues(from = Canada_units, to = rep("Canada", length(Canada_units))),
  
  #anglophone = USA states, or regions or Canada (not Quebec) or a few others
  anglophone = (country %in% c("Ontario", "British Columbia", "Alberta", "Manitoba", "Nova Scotia", "Saskatchewan", "New Brunswick", "Prince Edward Island", "Australia", "Singapore", "Ireland", "New Zealand")) | str_length(country) == 2,
  
  western = (country %in% c("Ontario", "British Columbia", "Alberta", "Manitoba", "Nova Scotia", "Saskatchewan", "New Brunswick", "Prince Edward Island", "Australia", "Ireland", "New Zealand")) | (str_length(country) == 2) | (country %in% c("Germany", "Netherlands", "Denmark", "Italy", "Sweden", "France", "Finland", "Israel", "Belgium", "Spain", "Austria", "Switzerland", "Norway", "Greece", "Portugal", "Iceland", "Luxembourg"))
)

Functions

#logical to numeric
df_lgl_to_num = function(x) {
  for (v in seq_along(x)) {
    if (is.logical(x[[v]])) x[[v]] = x[[v]] %>% as.numeric()
  }
  
  x
}

#redefine describe
describe = function(x) {
  # browser()
  y = x %>% 
    as.data.frame() %>% 
    df_lgl_to_num() %>% 
    psych::describe()
  
  class(y) = "data.frame"
  
  y$var = colnames(x)
  rownames(y) = NULL
  y %>% select(-vars) %>% select(var, n, mean, median, sd, mad, min, max, skew)
}

#find identical columns
df_find_dup_cols = function(x, col, simplify = T) {
  
  col
  
  #find dups
  x %>% select(-!!col) %>% map_lgl(function(v) {
    res = all.equal(x[[col]], v, check.attributes = F)
    if (isTRUE(res)) return(T) else return(F)
  }) ->
    y
  
  names(y[y])
}

iris$Species2 = iris$Species

df_find_dup_cols(iris, "Species")
## [1] "Species2"
#ez life
get_question_info = function(x, which = "text") {
  #try
  y = vars %>% filter(X1 == x) %>% pull(!!which)
  
  #if no hits, try finding a duplicate variable
  if (length(y) > 0) return(y)
  dup = df_find_dup_cols(d, x)
  
  if (length(dup > 0)) {
    return(vars %>% filter(X1 == dup[1]) %>% pull(!!which))
  } else {
    return(y)
  }
}

get_question_info("q2")
## [1] "Breast implants?"
get_question_info("q2", which = "N")
## [1] 24839
#slow
get_question_info("unnatural_hair_color")
## [1] "Have you ever dyed your hair a real crazy, unnatural color?"
#print question table
question_table = function(x) {
  map_df(x, function(v) {
    # browser()
    #basics
    y = vars %>% filter(X1 %in% v) %>% select(X1, text, option_1:option_4, N)
    
    if (nrow(y) != 1) stop(str_glue("Invalid id: {v}, there were {nrow(y)} matched rows in `vars`"))
    
    #add distributions in %
    dist = table2(d[[v]], include_NA = F, sort_descending = NULL)
    
    #fill in %s
    for (i in seq_along_rows(dist)) {
      i_which = which(dist$Group[i] == (y[1, c("option_" + 1:4)]))
      y[1, "option_" + i_which] = y[1, "option_" + i_which] + str_glue(" ({format_digits(dist$Percent[i], digits = 1)}%)")
    }
    
    y
  })
}

question_table("q2")

Mental health

#reverse coding on one item
d$q50 %<>% fct_rev()

#items
mental_health_items = c(
  seen_therapist = "q50",
  much_depression = "q1552",
  emotional_diversity = "q6021",
  happy_with_life = "q4018",
  exp_mental_illness = "q1287"
)

#output var data
vars %>% filter(X1 %in% (!!mental_health_items)) %>% select(X1, text, option_1:option_4, N)
#copy to new names
for (v in mental_health_items) d[[names(mental_health_items)[which(v == mental_health_items)]]] = d[[v]]

#print items
for (v in mental_health_items) {
  print(str_glue("{v} - {names(mental_health_items)[which(v == mental_health_items)]}: {get_question_info(v)}"))
  print(table2(d[[v]], include_NA = F, sort_descending = NULL))
  cat("\n")
}
## q50 - seen_therapist: Have you ever seen a therapist?
## # A tibble: 2 x 3
##   Group Count Percent
##   <chr> <dbl>   <dbl>
## 1 No     3826    40.2
## 2 Yes    5681    59.8
## 
## q1552 - much_depression: Do you get depressed much?
## # A tibble: 3 x 3
##   Group                          Count Percent
##   <chr>                          <dbl>   <dbl>
## 1 Almost never, I'm happy!        3261   32.7 
## 2 Sometimes, when it's a bad day  6295   63.2 
## 3 Yeah, despair is my life         402    4.04
## 
## q6021 - emotional_diversity: How would you describe your emotional diversity?
## # A tibble: 4 x 3
##   Group                                          Count Percent
##   <chr>                                          <dbl>   <dbl>
## 1 I get extremely happy but rarely depressed      1428   32.7 
## 2 I get extremely depressed and I'm rarely happy   202    4.63
## 3 I don't feel much of either                      731   16.7 
## 4 I feel both often                               2006   45.9 
## 
## q4018 - happy_with_life: Are you happy with your life?
## # A tibble: 2 x 3
##   Group Count Percent
##   <chr> <dbl>   <dbl>
## 1 Yes   49586   92.5 
## 2 No     4039    7.53
## 
## q1287 - exp_mental_illness: Have you experienced mental illness?
## # A tibble: 4 x 3
##   Group           Count Percent
##   <chr>           <dbl>   <dbl>
## 1 No              10501   58.5 
## 2 I'm not sure     2540   14.2 
## 3 Yes - low grade  3843   21.4 
## 4 Yes - severely   1061    5.91
#factor analyze with default mirt settings, some items are binary, some are ordinal, one is categorical
mh_mirt = mirt(
  data = d %>% select(!!mental_health_items) %>% map_df(as.numeric),
  model = 1,
  itemtype = c("2PL", "nominal", "nominal", "2PL", "nominal"),
  technical = list(removeEmptyRows = T)
)
## 
Iteration: 1, Log-Lik: -67060.043, Max-Change: 2.38811
Iteration: 2, Log-Lik: -53004.128, Max-Change: 1.49423
Iteration: 3, Log-Lik: -51422.869, Max-Change: 0.75200
Iteration: 4, Log-Lik: -51096.832, Max-Change: 0.87695
Iteration: 5, Log-Lik: -50907.603, Max-Change: 0.55862
Iteration: 6, Log-Lik: -50797.116, Max-Change: 0.53455
Iteration: 7, Log-Lik: -50725.979, Max-Change: 0.39774
Iteration: 8, Log-Lik: -50680.333, Max-Change: 0.18697
Iteration: 9, Log-Lik: -50650.731, Max-Change: 0.17559
Iteration: 10, Log-Lik: -50630.990, Max-Change: 0.08875
Iteration: 11, Log-Lik: -50618.058, Max-Change: 0.12700
Iteration: 12, Log-Lik: -50609.103, Max-Change: 0.08946
Iteration: 13, Log-Lik: -50603.099, Max-Change: 0.05713
Iteration: 14, Log-Lik: -50598.924, Max-Change: 0.03379
Iteration: 15, Log-Lik: -50596.081, Max-Change: 0.03510
Iteration: 16, Log-Lik: -50592.013, Max-Change: 0.02334
Iteration: 17, Log-Lik: -50590.756, Max-Change: 0.02099
Iteration: 18, Log-Lik: -50590.010, Max-Change: 0.01844
Iteration: 19, Log-Lik: -50588.529, Max-Change: 0.00654
Iteration: 20, Log-Lik: -50588.415, Max-Change: 0.00544
Iteration: 21, Log-Lik: -50588.354, Max-Change: 0.00406
Iteration: 22, Log-Lik: -50588.302, Max-Change: 0.00391
Iteration: 23, Log-Lik: -50588.273, Max-Change: 0.00342
Iteration: 24, Log-Lik: -50588.252, Max-Change: 0.00339
Iteration: 25, Log-Lik: -50588.171, Max-Change: 0.00172
Iteration: 26, Log-Lik: -50588.167, Max-Change: 0.00344
Iteration: 27, Log-Lik: -50588.158, Max-Change: 0.00067
Iteration: 28, Log-Lik: -50588.158, Max-Change: 0.00049
Iteration: 29, Log-Lik: -50588.156, Max-Change: 0.00189
Iteration: 30, Log-Lik: -50588.152, Max-Change: 0.00009
mh_mirt
## 
## Call:
## mirt(data = d %>% select(!!mental_health_items) %>% map_df(as.numeric), 
##     model = 1, itemtype = c("2PL", "nominal", "nominal", "2PL", 
##         "nominal"), technical = list(removeEmptyRows = T))
## 
## Full-information item factor analysis with 1 factor(s).
## Converged within 1e-04 tolerance after 30 EM iterations.
## mirt version: 1.33.2 
## M-step optimizer: BFGS 
## EM acceleration: Ramsay 
## Number of rectangular quadrature: 61
## Latent density type: Gaussian 
## 
## Log-likelihood = -50588
## Estimated parameters: 20 
## AIC = 1e+05; AICc = 1e+05
## BIC = 1e+05; SABIC = 1e+05
mh_mirt %>% summary()
##                        F1    h2
## seen_therapist      0.483 0.233
## much_depression     0.881 0.776
## emotional_diversity 0.505 0.255
## happy_with_life     0.630 0.397
## exp_mental_illness  0.433 0.188
## 
## SS loadings:  1.8 
## Proportion Var:  0.37 
## 
## Factor correlations: 
## 
##    F1
## F1  1
#score
mh_mirt_scores = fscores(mh_mirt, full.scores = T, full.scores.SE = T)

#reliability
empirical_rxx(mh_mirt_scores)
##   F1 
## 0.26
marginal_rxx(mh_mirt)
## [1] 0.72
#save scores
mh_mirt_scores_item_notNA = miss_by_case(d %>% select(!!mental_health_items), reverse = T)
mh_mirt_scores_item_notNA %>% table2(sort_descending = NULL)
d$mh_score = NA
d$mh_score[mh_mirt_scores_item_notNA > 0] = mh_mirt_scores[, 1] %>% standardize() %>% multiply_by(-1)

Extract intelligence

#test items
iq_items_meta = read_csv("data/test_items.csv")
## Warning: Missing column names filled in: 'X1' [1]
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   X1 = col_character(),
##   ID = col_double(),
##   text = col_character(),
##   option_1 = col_character(),
##   option_2 = col_character(),
##   option_3 = col_character(),
##   option_4 = col_character(),
##   option_correct = col_double()
## )
#count IQ items
CA_items = intersect(iq_items_meta$X1, names(d))
d$CA_items = d[CA_items] %>% miss_by_case(reverse = T)

#get scoring key
v_correct_answers_text = apply(iq_items_meta %>% filter(X1 %in% CA_items), MARGIN = 1, function(item) {
  item["option_" + 1:4][item["option_correct"] %>% as.numeric()]
})

#score items
CA_scored_items = score_items(d[CA_items], key = v_correct_answers_text)

#factor analysis with IRT
irt_CA = mirt(
  CA_scored_items,
  model = 1,
  technical = list(removeEmptyRows = T)
  )
## 
Iteration: 1, Log-Lik: -188768.194, Max-Change: 5.04127
Iteration: 2, Log-Lik: -167586.674, Max-Change: 0.93778
Iteration: 3, Log-Lik: -163943.799, Max-Change: 0.34531
Iteration: 4, Log-Lik: -162360.344, Max-Change: 0.28657
Iteration: 5, Log-Lik: -161709.787, Max-Change: 0.22768
Iteration: 6, Log-Lik: -161353.642, Max-Change: 0.16053
Iteration: 7, Log-Lik: -161173.781, Max-Change: 0.09737
Iteration: 8, Log-Lik: -161064.493, Max-Change: 0.22971
Iteration: 9, Log-Lik: -160991.363, Max-Change: 0.12791
Iteration: 10, Log-Lik: -160959.392, Max-Change: 0.07594
Iteration: 11, Log-Lik: -160939.094, Max-Change: 0.04297
Iteration: 12, Log-Lik: -160925.798, Max-Change: 0.04225
Iteration: 13, Log-Lik: -160916.474, Max-Change: 0.02643
Iteration: 14, Log-Lik: -160913.117, Max-Change: 0.01116
Iteration: 15, Log-Lik: -160909.698, Max-Change: 0.00764
Iteration: 16, Log-Lik: -160907.840, Max-Change: 0.00427
Iteration: 17, Log-Lik: -160907.466, Max-Change: 0.00338
Iteration: 18, Log-Lik: -160907.281, Max-Change: 0.00242
Iteration: 19, Log-Lik: -160907.163, Max-Change: 0.00138
Iteration: 20, Log-Lik: -160907.136, Max-Change: 0.00128
Iteration: 21, Log-Lik: -160907.121, Max-Change: 0.00118
Iteration: 22, Log-Lik: -160907.111, Max-Change: 0.00045
Iteration: 23, Log-Lik: -160907.109, Max-Change: 0.00028
Iteration: 24, Log-Lik: -160907.108, Max-Change: 0.00025
Iteration: 25, Log-Lik: -160907.108, Max-Change: 0.00024
Iteration: 26, Log-Lik: -160907.107, Max-Change: 0.00008
irt_CA
## 
## Call:
## mirt(data = CA_scored_items, model = 1, technical = list(removeEmptyRows = T))
## 
## Full-information item factor analysis with 1 factor(s).
## Converged within 1e-04 tolerance after 26 EM iterations.
## mirt version: 1.33.2 
## M-step optimizer: BFGS 
## EM acceleration: Ramsay 
## Number of rectangular quadrature: 61
## Latent density type: Gaussian 
## 
## Log-likelihood = -160907
## Estimated parameters: 28 
## AIC = 321870; AICc = 321870
## BIC = 322121; SABIC = 322032
irt_CA %>% summary()
##           F1    h2
## q178   0.716 0.513
## q255   0.699 0.489
## q1201  0.748 0.560
## q14835 0.363 0.131
## q8672  0.713 0.508
## q18154 0.747 0.558
## q12625 0.586 0.344
## q477   0.607 0.369
## q256   0.374 0.140
## q43639 0.752 0.565
## q267   0.481 0.231
## q18698 0.701 0.491
## q511   0.685 0.470
## q57844 0.699 0.489
## 
## SS loadings:  5.9 
## Proportion Var:  0.42 
## 
## Factor correlations: 
## 
##    F1
## F1  1
#save
irt_CA_scores = fscores(irt_CA, full.scores = T, full.scores.SE = T)
d$CA = NA
d$CA[d$CA_items > 0] = irt_CA_scores[, 1] %>% standardize()
d$intelligence = d$CA

#estimate reliability
empirical_rxx(irt_CA_scores)
##   F1 
## 0.55
marginal_rxx(irt_CA)
## [1] 0.76

Polyamory

#items
poly_items = c(
  "q48278",
  "q33107",
  "q37772"
)

#item counts
poly_item_counts = d %>% select(poly1:poly3) %>% miss_by_case(reverse = T)
poly_item_counts %>% table2(sort_descending = NULL)
#poly fit, these are all ordinals
poly_fit = mirt(
  data = d %>% select(poly1:poly3) %>% map_df(as.numeric),
  model = 1,
  technical = list(removeEmptyRows=T)
)
## 
Iteration: 1, Log-Lik: -40715.500, Max-Change: 3.00885
Iteration: 2, Log-Lik: -31171.766, Max-Change: 0.94973
Iteration: 3, Log-Lik: -29536.243, Max-Change: 0.78633
Iteration: 4, Log-Lik: -28803.071, Max-Change: 0.64791
Iteration: 5, Log-Lik: -28455.886, Max-Change: 0.50791
Iteration: 6, Log-Lik: -28289.084, Max-Change: 0.43317
Iteration: 7, Log-Lik: -28205.852, Max-Change: 0.36511
Iteration: 8, Log-Lik: -28162.202, Max-Change: 0.30753
Iteration: 9, Log-Lik: -28138.177, Max-Change: 0.26023
Iteration: 10, Log-Lik: -28124.395, Max-Change: 0.22190
Iteration: 11, Log-Lik: -28116.197, Max-Change: 0.19104
Iteration: 12, Log-Lik: -28111.144, Max-Change: 0.16633
Iteration: 13, Log-Lik: -28107.904, Max-Change: 0.14659
Iteration: 14, Log-Lik: -28105.729, Max-Change: 0.13084
Iteration: 15, Log-Lik: -28104.192, Max-Change: 0.11827
Iteration: 16, Log-Lik: -28099.682, Max-Change: 0.06424
Iteration: 17, Log-Lik: -28099.215, Max-Change: 0.06302
Iteration: 18, Log-Lik: -28098.818, Max-Change: 0.06234
Iteration: 19, Log-Lik: -28097.031, Max-Change: 0.06560
Iteration: 20, Log-Lik: -28096.814, Max-Change: 0.06011
Iteration: 21, Log-Lik: -28096.623, Max-Change: 0.05507
Iteration: 22, Log-Lik: -28095.752, Max-Change: 0.05175
Iteration: 23, Log-Lik: -28095.593, Max-Change: 0.05450
Iteration: 24, Log-Lik: -28095.465, Max-Change: 0.05495
Iteration: 25, Log-Lik: -28094.819, Max-Change: 0.04408
Iteration: 26, Log-Lik: -28094.722, Max-Change: 0.04591
Iteration: 27, Log-Lik: -28094.646, Max-Change: 0.03808
Iteration: 28, Log-Lik: -28094.383, Max-Change: 0.04595
Iteration: 29, Log-Lik: -28094.317, Max-Change: 0.04457
Iteration: 30, Log-Lik: -28094.255, Max-Change: 0.04092
Iteration: 31, Log-Lik: -28093.947, Max-Change: 0.04026
Iteration: 32, Log-Lik: -28093.899, Max-Change: 0.04170
Iteration: 33, Log-Lik: -28093.852, Max-Change: 0.03338
Iteration: 34, Log-Lik: -28093.710, Max-Change: 0.02614
Iteration: 35, Log-Lik: -28093.684, Max-Change: 0.03615
Iteration: 36, Log-Lik: -28093.649, Max-Change: 0.03967
Iteration: 37, Log-Lik: -28093.446, Max-Change: 0.03151
Iteration: 38, Log-Lik: -28093.416, Max-Change: 0.02439
Iteration: 39, Log-Lik: -28093.396, Max-Change: 0.02576
Iteration: 40, Log-Lik: -28093.284, Max-Change: 0.02493
Iteration: 41, Log-Lik: -28093.265, Max-Change: 0.02069
Iteration: 42, Log-Lik: -28093.251, Max-Change: 0.02687
Iteration: 43, Log-Lik: -28093.181, Max-Change: 0.03167
Iteration: 44, Log-Lik: -28093.158, Max-Change: 0.02304
Iteration: 45, Log-Lik: -28093.144, Max-Change: 0.00120
Iteration: 46, Log-Lik: -28093.144, Max-Change: 0.00110
Iteration: 47, Log-Lik: -28093.143, Max-Change: 0.00092
Iteration: 48, Log-Lik: -28093.143, Max-Change: 0.03171
Iteration: 49, Log-Lik: -28093.123, Max-Change: 0.01810
Iteration: 50, Log-Lik: -28093.113, Max-Change: 0.02383
Iteration: 51, Log-Lik: -28093.098, Max-Change: 0.02293
Iteration: 52, Log-Lik: -28093.036, Max-Change: 0.02796
Iteration: 53, Log-Lik: -28093.014, Max-Change: 0.01765
Iteration: 54, Log-Lik: -28093.003, Max-Change: 0.02335
Iteration: 55, Log-Lik: -28092.957, Max-Change: 0.01348
Iteration: 56, Log-Lik: -28092.949, Max-Change: 0.02316
Iteration: 57, Log-Lik: -28092.939, Max-Change: 0.03043
Iteration: 58, Log-Lik: -28092.908, Max-Change: 0.01780
Iteration: 59, Log-Lik: -28092.898, Max-Change: 0.00074
Iteration: 60, Log-Lik: -28092.898, Max-Change: 0.00017
Iteration: 61, Log-Lik: -28092.898, Max-Change: 0.00015
Iteration: 62, Log-Lik: -28092.897, Max-Change: 0.01583
Iteration: 63, Log-Lik: -28092.890, Max-Change: 0.02099
Iteration: 64, Log-Lik: -28092.868, Max-Change: 0.02155
Iteration: 65, Log-Lik: -28092.852, Max-Change: 0.01553
Iteration: 66, Log-Lik: -28092.844, Max-Change: 0.00074
Iteration: 67, Log-Lik: -28092.844, Max-Change: 0.02294
Iteration: 68, Log-Lik: -28092.835, Max-Change: 0.01188
Iteration: 69, Log-Lik: -28092.831, Max-Change: 0.02204
Iteration: 70, Log-Lik: -28092.815, Max-Change: 0.00077
Iteration: 71, Log-Lik: -28092.815, Max-Change: 0.01922
Iteration: 72, Log-Lik: -28092.807, Max-Change: 0.00061
Iteration: 73, Log-Lik: -28092.807, Max-Change: 0.00061
Iteration: 74, Log-Lik: -28092.807, Max-Change: 0.00060
Iteration: 75, Log-Lik: -28092.806, Max-Change: 0.00017
Iteration: 76, Log-Lik: -28092.806, Max-Change: 0.00014
Iteration: 77, Log-Lik: -28092.806, Max-Change: 0.00059
Iteration: 78, Log-Lik: -28092.806, Max-Change: 0.00065
Iteration: 79, Log-Lik: -28092.806, Max-Change: 0.00015
Iteration: 80, Log-Lik: -28092.806, Max-Change: 0.00059
Iteration: 81, Log-Lik: -28092.806, Max-Change: 0.00065
Iteration: 82, Log-Lik: -28092.805, Max-Change: 0.00016
Iteration: 83, Log-Lik: -28092.805, Max-Change: 0.00058
Iteration: 84, Log-Lik: -28092.805, Max-Change: 0.00014
Iteration: 85, Log-Lik: -28092.805, Max-Change: 0.00061
Iteration: 86, Log-Lik: -28092.805, Max-Change: 0.00021
Iteration: 87, Log-Lik: -28092.805, Max-Change: 0.00058
Iteration: 88, Log-Lik: -28092.805, Max-Change: 0.01688
Iteration: 89, Log-Lik: -28092.798, Max-Change: 0.00083
Iteration: 90, Log-Lik: -28092.798, Max-Change: 0.00063
Iteration: 91, Log-Lik: -28092.798, Max-Change: 0.00057
Iteration: 92, Log-Lik: -28092.798, Max-Change: 0.00019
Iteration: 93, Log-Lik: -28092.798, Max-Change: 0.00058
Iteration: 94, Log-Lik: -28092.798, Max-Change: 0.01590
Iteration: 95, Log-Lik: -28092.794, Max-Change: 0.01517
Iteration: 96, Log-Lik: -28092.786, Max-Change: 0.00021
Iteration: 97, Log-Lik: -28092.786, Max-Change: 0.00020
Iteration: 98, Log-Lik: -28092.786, Max-Change: 0.00012
Iteration: 99, Log-Lik: -28092.786, Max-Change: 0.00055
Iteration: 100, Log-Lik: -28092.786, Max-Change: 0.00058
Iteration: 101, Log-Lik: -28092.786, Max-Change: 0.00013
Iteration: 102, Log-Lik: -28092.786, Max-Change: 0.00055
Iteration: 103, Log-Lik: -28092.785, Max-Change: 0.00015
Iteration: 104, Log-Lik: -28092.785, Max-Change: 0.00056
Iteration: 105, Log-Lik: -28092.785, Max-Change: 0.00011
Iteration: 106, Log-Lik: -28092.785, Max-Change: 0.00011
Iteration: 107, Log-Lik: -28092.785, Max-Change: 0.00055
Iteration: 108, Log-Lik: -28092.785, Max-Change: 0.00055
Iteration: 109, Log-Lik: -28092.785, Max-Change: 0.00054
Iteration: 110, Log-Lik: -28092.784, Max-Change: 0.00011
Iteration: 111, Log-Lik: -28092.784, Max-Change: 0.00055
Iteration: 112, Log-Lik: -28092.784, Max-Change: 0.00014
Iteration: 113, Log-Lik: -28092.784, Max-Change: 0.00054
Iteration: 114, Log-Lik: -28092.784, Max-Change: 0.00011
Iteration: 115, Log-Lik: -28092.784, Max-Change: 0.00011
Iteration: 116, Log-Lik: -28092.784, Max-Change: 0.00055
Iteration: 117, Log-Lik: -28092.784, Max-Change: 0.00054
Iteration: 118, Log-Lik: -28092.783, Max-Change: 0.00055
Iteration: 119, Log-Lik: -28092.783, Max-Change: 0.00018
Iteration: 120, Log-Lik: -28092.783, Max-Change: 0.00054
Iteration: 121, Log-Lik: -28092.783, Max-Change: 0.00023
Iteration: 122, Log-Lik: -28092.783, Max-Change: 0.00055
Iteration: 123, Log-Lik: -28092.783, Max-Change: 0.00018
Iteration: 124, Log-Lik: -28092.783, Max-Change: 0.00015
Iteration: 125, Log-Lik: -28092.783, Max-Change: 0.00054
Iteration: 126, Log-Lik: -28092.782, Max-Change: 0.00012
Iteration: 127, Log-Lik: -28092.782, Max-Change: 0.00055
Iteration: 128, Log-Lik: -28092.782, Max-Change: 0.00019
Iteration: 129, Log-Lik: -28092.782, Max-Change: 0.00054
Iteration: 130, Log-Lik: -28092.782, Max-Change: 0.00024
Iteration: 131, Log-Lik: -28092.782, Max-Change: 0.00055
Iteration: 132, Log-Lik: -28092.782, Max-Change: 0.00019
Iteration: 133, Log-Lik: -28092.782, Max-Change: 0.00016
Iteration: 134, Log-Lik: -28092.782, Max-Change: 0.00054
Iteration: 135, Log-Lik: -28092.781, Max-Change: 0.00013
Iteration: 136, Log-Lik: -28092.781, Max-Change: 0.00055
Iteration: 137, Log-Lik: -28092.781, Max-Change: 0.00020
Iteration: 138, Log-Lik: -28092.781, Max-Change: 0.00054
Iteration: 139, Log-Lik: -28092.781, Max-Change: 0.01755
Iteration: 140, Log-Lik: -28092.775, Max-Change: 0.00057
Iteration: 141, Log-Lik: -28092.775, Max-Change: 0.00013
Iteration: 142, Log-Lik: -28092.775, Max-Change: 0.00011
Iteration: 143, Log-Lik: -28092.774, Max-Change: 0.00054
Iteration: 144, Log-Lik: -28092.774, Max-Change: 0.00055
Iteration: 145, Log-Lik: -28092.774, Max-Change: 0.00015
Iteration: 146, Log-Lik: -28092.774, Max-Change: 0.00054
Iteration: 147, Log-Lik: -28092.774, Max-Change: 0.00011
Iteration: 148, Log-Lik: -28092.774, Max-Change: 0.00054
Iteration: 149, Log-Lik: -28092.774, Max-Change: 0.00020
Iteration: 150, Log-Lik: -28092.774, Max-Change: 0.00054
Iteration: 151, Log-Lik: -28092.774, Max-Change: 0.03120
Iteration: 152, Log-Lik: -28092.763, Max-Change: 0.00055
Iteration: 153, Log-Lik: -28092.762, Max-Change: 0.00011
Iteration: 154, Log-Lik: -28092.762, Max-Change: 0.00051
Iteration: 155, Log-Lik: -28092.762, Max-Change: 0.00020
Iteration: 156, Log-Lik: -28092.762, Max-Change: 0.00053
Iteration: 157, Log-Lik: -28092.762, Max-Change: 0.01867
Iteration: 158, Log-Lik: -28092.756, Max-Change: 0.00054
Iteration: 159, Log-Lik: -28092.755, Max-Change: 0.00051
Iteration: 160, Log-Lik: -28092.755, Max-Change: 0.00052
Iteration: 161, Log-Lik: -28092.755, Max-Change: 0.00015
Iteration: 162, Log-Lik: -28092.755, Max-Change: 0.00051
Iteration: 163, Log-Lik: -28092.755, Max-Change: 0.00020
Iteration: 164, Log-Lik: -28092.755, Max-Change: 0.00052
Iteration: 165, Log-Lik: -28092.755, Max-Change: 0.00016
Iteration: 166, Log-Lik: -28092.755, Max-Change: 0.00014
Iteration: 167, Log-Lik: -28092.754, Max-Change: 0.00051
Iteration: 168, Log-Lik: -28092.754, Max-Change: 0.00011
Iteration: 169, Log-Lik: -28092.754, Max-Change: 0.00052
Iteration: 170, Log-Lik: -28092.754, Max-Change: 0.00018
Iteration: 171, Log-Lik: -28092.754, Max-Change: 0.00051
Iteration: 172, Log-Lik: -28092.754, Max-Change: 0.02987
Iteration: 173, Log-Lik: -28092.744, Max-Change: 0.00052
Iteration: 174, Log-Lik: -28092.744, Max-Change: 0.00012
Iteration: 175, Log-Lik: -28092.744, Max-Change: 0.00011
Iteration: 176, Log-Lik: -28092.744, Max-Change: 0.00050
Iteration: 177, Log-Lik: -28092.744, Max-Change: 0.00051
Iteration: 178, Log-Lik: -28092.743, Max-Change: 0.00012
Iteration: 179, Log-Lik: -28092.743, Max-Change: 0.00050
Iteration: 180, Log-Lik: -28092.743, Max-Change: 0.00050
Iteration: 181, Log-Lik: -28092.743, Max-Change: 0.00013
Iteration: 182, Log-Lik: -28092.743, Max-Change: 0.00050
Iteration: 183, Log-Lik: -28092.743, Max-Change: 0.00011
Iteration: 184, Log-Lik: -28092.743, Max-Change: 0.00050
Iteration: 185, Log-Lik: -28092.743, Max-Change: 0.00018
Iteration: 186, Log-Lik: -28092.743, Max-Change: 0.00050
Iteration: 187, Log-Lik: -28092.743, Max-Change: 0.02844
Iteration: 188, Log-Lik: -28092.733, Max-Change: 0.00051
Iteration: 189, Log-Lik: -28092.733, Max-Change: 0.00013
Iteration: 190, Log-Lik: -28092.733, Max-Change: 0.00011
Iteration: 191, Log-Lik: -28092.733, Max-Change: 0.00049
Iteration: 192, Log-Lik: -28092.733, Max-Change: 0.00050
Iteration: 193, Log-Lik: -28092.733, Max-Change: 0.00011
Iteration: 194, Log-Lik: -28092.733, Max-Change: 0.00049
Iteration: 195, Log-Lik: -28092.732, Max-Change: 0.00050
Iteration: 196, Log-Lik: -28092.732, Max-Change: 0.00012
Iteration: 197, Log-Lik: -28092.732, Max-Change: 0.00049
Iteration: 198, Log-Lik: -28092.732, Max-Change: 0.00010
Iteration: 199, Log-Lik: -28092.732, Max-Change: 0.00049
Iteration: 200, Log-Lik: -28092.732, Max-Change: 0.00017
Iteration: 201, Log-Lik: -28092.732, Max-Change: 0.00049
Iteration: 202, Log-Lik: -28092.732, Max-Change: 0.02680
Iteration: 203, Log-Lik: -28092.723, Max-Change: 0.00050
Iteration: 204, Log-Lik: -28092.723, Max-Change: 0.00012
Iteration: 205, Log-Lik: -28092.723, Max-Change: 0.00011
Iteration: 206, Log-Lik: -28092.723, Max-Change: 0.00049
Iteration: 207, Log-Lik: -28092.723, Max-Change: 0.00049
Iteration: 208, Log-Lik: -28092.723, Max-Change: 0.00011
Iteration: 209, Log-Lik: -28092.723, Max-Change: 0.00049
Iteration: 210, Log-Lik: -28092.722, Max-Change: 0.00049
Iteration: 211, Log-Lik: -28092.722, Max-Change: 0.00012
Iteration: 212, Log-Lik: -28092.722, Max-Change: 0.00048
Iteration: 213, Log-Lik: -28092.722, Max-Change: 0.00010
#summary
poly_fit
## 
## Call:
## mirt(data = d %>% select(poly1:poly3) %>% map_df(as.numeric), 
##     model = 1, technical = list(removeEmptyRows = T))
## 
## Full-information item factor analysis with 1 factor(s).
## Converged within 1e-04 tolerance after 213 EM iterations.
## mirt version: 1.33.2 
## M-step optimizer: BFGS 
## EM acceleration: Ramsay 
## Number of rectangular quadrature: 61
## Latent density type: Gaussian 
## 
## Log-likelihood = -28093
## Estimated parameters: 10 
## AIC = 56205; AICc = 56205
## BIC = 56285; SABIC = 56253
poly_fit %>% summary()
##          F1    h2
## poly1 0.987 0.973
## poly2 0.901 0.813
## poly3 0.823 0.678
## 
## SS loadings:  2.5 
## Proportion Var:  0.82 
## 
## Factor correlations: 
## 
##    F1
## F1  1
#scores
poly_fit_scores = fscores(poly_fit, full.scores = T, full.scores.SE = T)

#save
d$poly_score = NA
d$poly_score[poly_item_counts > 0] = poly_fit_scores[, 1] %>% standardize()
d$polyamory = d$poly_score

Subsets

#whites only
d_white = d %>% filter(race == "White")

#anglos only
d_anglo = d %>% filter(anglophone)

#white Americans only
d_white_amer = d %>% filter(race == "White", country %in% state.abb)

#our subset with some number of items at least
d2 = d[mh_mirt_scores_item_notNA >= 2, ]

#alternative more strict
d3 = d[mh_mirt_scores_item_notNA >= 3, ]

Basic descriptives

#numerics
d %>% 
  select(
    age,
    male
  ) %>% 
  describe()
#basics
d$sex %>% table2(include_NA = F)
d$sexual_orientation %>% table2(include_NA = F)
d$country2 %>% table2(include_NA = F)
d$anglophone %>% table2(include_NA = F)
d$western %>% table2(include_NA = F)
#question table
other_items = c("q513", "q128", "q179268", "q212813")
question_table(c(mental_health_items, poly_items, other_items))

Simple results

#hair color
SMD_matrix(
  d2$mh_score,
  group = d2$unnatural_hair_color,
  extended_output = T
)
## $d
##       No  Yes
## No    NA 0.27
## Yes 0.27   NA
## 
## $d_string
##     No                 Yes               
## No  NA                 "0.27 [0.23 0.30]"
## Yes "0.27 [0.23 0.30]" NA                
## 
## $CI_lower
##       No  Yes
## No    NA 0.23
## Yes 0.23   NA
## 
## $CI_upper
##      No Yes
## No   NA 0.3
## Yes 0.3  NA
## 
## $se_z
## [1] 2
## 
## $se
##        No   Yes
## No     NA 0.017
## Yes 0.017    NA
## 
## $p
##          No     Yes
## No       NA 8.7e-55
## Yes 8.7e-55      NA
## 
## $pairwise_n
##        No   Yes
## No     NA 14421
## Yes 14421    NA
#vegan/vegeratian
SMD_matrix(
  d2$mh_score,
  group = d2$vegetarian,
  extended_output = T
)
## $d
##       No  Yes
## No    NA 0.25
## Yes 0.25   NA
## 
## $d_string
##     No                 Yes               
## No  NA                 "0.25 [0.20 0.30]"
## Yes "0.25 [0.20 0.30]" NA                
## 
## $CI_lower
##      No Yes
## No   NA 0.2
## Yes 0.2  NA
## 
## $CI_upper
##      No Yes
## No   NA 0.3
## Yes 0.3  NA
## 
## $se_z
## [1] 2
## 
## $se
##        No   Yes
## No     NA 0.026
## Yes 0.026    NA
## 
## $p
##          No     Yes
## No       NA 5.6e-22
## Yes 5.6e-22      NA
## 
## $pairwise_n
##        No   Yes
## No     NA 21739
## Yes 21739    NA
#vegan/vegeratian
SMD_matrix(
  d2$mh_score,
  group = d2$have_tattoos,
  extended_output = T
)
## $d
##                                 I have no tattoos
## I have no tattoos                              NA
## I have 1 or more LITTLE tattoos              0.12
## I have 1 or more BIG tattoos                 0.13
##                                 I have 1 or more LITTLE tattoos
## I have no tattoos                                         0.116
## I have 1 or more LITTLE tattoos                              NA
## I have 1 or more BIG tattoos                              0.012
##                                 I have 1 or more BIG tattoos
## I have no tattoos                                      0.128
## I have 1 or more LITTLE tattoos                        0.012
## I have 1 or more BIG tattoos                              NA
## 
## $d_string
##                                 I have no tattoos  
## I have no tattoos               NA                 
## I have 1 or more LITTLE tattoos "0.12 [0.081 0.15]"
## I have 1 or more BIG tattoos    "0.13 [0.09 0.17]" 
##                                 I have 1 or more LITTLE tattoos
## I have no tattoos               "0.12 [0.081 0.15]"            
## I have 1 or more LITTLE tattoos NA                             
## I have 1 or more BIG tattoos    "0.012 [-0.033 0.057]"         
##                                 I have 1 or more BIG tattoos
## I have no tattoos               "0.13 [0.09 0.17]"          
## I have 1 or more LITTLE tattoos "0.012 [-0.033 0.057]"      
## I have 1 or more BIG tattoos    NA                          
## 
## $CI_lower
##                                 I have no tattoos
## I have no tattoos                              NA
## I have 1 or more LITTLE tattoos             0.081
## I have 1 or more BIG tattoos                0.090
##                                 I have 1 or more LITTLE tattoos
## I have no tattoos                                         0.081
## I have 1 or more LITTLE tattoos                              NA
## I have 1 or more BIG tattoos                             -0.033
##                                 I have 1 or more BIG tattoos
## I have no tattoos                                      0.090
## I have 1 or more LITTLE tattoos                       -0.033
## I have 1 or more BIG tattoos                              NA
## 
## $CI_upper
##                                 I have no tattoos
## I have no tattoos                              NA
## I have 1 or more LITTLE tattoos              0.15
## I have 1 or more BIG tattoos                 0.17
##                                 I have 1 or more LITTLE tattoos
## I have no tattoos                                         0.150
## I have 1 or more LITTLE tattoos                              NA
## I have 1 or more BIG tattoos                              0.057
##                                 I have 1 or more BIG tattoos
## I have no tattoos                                      0.165
## I have 1 or more LITTLE tattoos                        0.057
## I have 1 or more BIG tattoos                              NA
## 
## $se_z
## [1] 2
## 
## $se
##                                 I have no tattoos
## I have no tattoos                              NA
## I have 1 or more LITTLE tattoos             0.017
## I have 1 or more BIG tattoos                0.019
##                                 I have 1 or more LITTLE tattoos
## I have no tattoos                                         0.017
## I have 1 or more LITTLE tattoos                              NA
## I have 1 or more BIG tattoos                              0.023
##                                 I have 1 or more BIG tattoos
## I have no tattoos                                      0.019
## I have 1 or more LITTLE tattoos                        0.023
## I have 1 or more BIG tattoos                              NA
## 
## $p
##                                 I have no tattoos
## I have no tattoos                              NA
## I have 1 or more LITTLE tattoos           3.1e-11
## I have 1 or more BIG tattoos              3.2e-11
##                                 I have 1 or more LITTLE tattoos
## I have no tattoos                                       3.1e-11
## I have 1 or more LITTLE tattoos                              NA
## I have 1 or more BIG tattoos                            6.0e-01
##                                 I have 1 or more BIG tattoos
## I have no tattoos                                    3.2e-11
## I have 1 or more LITTLE tattoos                      6.0e-01
## I have 1 or more BIG tattoos                              NA
## 
## $pairwise_n
##                                 I have no tattoos
## I have no tattoos                              NA
## I have 1 or more LITTLE tattoos             18143
## I have 1 or more BIG tattoos                17163
##                                 I have 1 or more LITTLE tattoos
## I have no tattoos                                         18143
## I have 1 or more LITTLE tattoos                              NA
## I have 1 or more BIG tattoos                               7736
##                                 I have 1 or more BIG tattoos
## I have no tattoos                                      17163
## I have 1 or more LITTLE tattoos                         7736
## I have 1 or more BIG tattoos                              NA
#poly1
SMD_matrix(
  d2$mh_score,
  group = d2$poly1,
  extended_output = T
)
## $d
##       No. Yes.
## No.    NA 0.26
## Yes. 0.26   NA
## 
## $d_string
##      No.                Yes.              
## No.  NA                 "0.26 [0.23 0.30]"
## Yes. "0.26 [0.23 0.30]" NA                
## 
## $CI_lower
##       No. Yes.
## No.    NA 0.23
## Yes. 0.23   NA
## 
## $CI_upper
##      No. Yes.
## No.   NA  0.3
## Yes. 0.3   NA
## 
## $se_z
## [1] 2
## 
## $se
##        No.  Yes.
## No.     NA 0.019
## Yes. 0.019    NA
## 
## $p
##          No.    Yes.
## No.       NA 2.3e-44
## Yes. 2.3e-44      NA
## 
## $pairwise_n
##        No.  Yes.
## No.     NA 11822
## Yes. 11822    NA
#latent correlations
d %>% 
  select(
    mh_score,
    unnatural_hair_color,
    vegetarian,
    have_tattoos,
    poly_score,
    poly1:poly3,
    CA
    ) %>% polycor::hetcor()
## Warning in polyserial(y, x, ML = ML, std.err = std.err, bins = bins): initial
## correlation inadmissible, 1.17311560644192, set to 0.9999
## Warning in polyserial(y, x, ML = ML, std.err = std.err, bins = bins): initial
## correlation inadmissible, 1.02946419249337, set to 0.9999
## Warning in hetcor.data.frame(.): could not compute polyserial correlation between variables 7 and 5
##     Message: Error in optim(rho, f, control = control, hessian = TRUE, method = "BFGS") : 
##   initial value in 'vmmin' is not finite
## 
## Two-Step Estimates
## 
## Correlations/Type of Correlation:
##                      mh_score unnatural_hair_color vegetarian have_tattoos
## mh_score                    1           Polyserial Polyserial   Polyserial
## unnatural_hair_color   -0.161                    1 Polychoric   Polychoric
## vegetarian             -0.105                0.182          1   Polychoric
## have_tattoos          -0.0655                  0.4     0.0406            1
## poly_score             -0.152                0.129      0.108       0.0188
## poly1                   -0.17                0.142     0.0823      0.00957
## poly2                  -0.163                0.151       0.13       0.0251
## poly3                  -0.151                0.124      0.167       0.0267
## CA                     -0.012              -0.0401      0.122       -0.205
##                      poly_score      poly1      poly2      poly3         CA
## mh_score                Pearson Polyserial Polyserial Polyserial    Pearson
## unnatural_hair_color Polyserial Polychoric Polychoric Polychoric Polyserial
## vegetarian           Polyserial Polychoric Polychoric Polychoric Polyserial
## have_tattoos         Polyserial Polychoric Polychoric Polychoric Polyserial
## poly_score                    1 Polyserial Polyserial Polyserial    Pearson
## poly1                     0.997          1 Polychoric Polychoric Polyserial
## poly2                      <NA>      0.859          1 Polychoric Polyserial
## poly3                     0.806      0.781      0.733          1 Polyserial
## CA                         0.11      0.133      0.113     0.0913          1
## 
## Standard Errors:
##                      mh_score unnatural_hair_color vegetarian have_tattoos
## mh_score                                                                  
## unnatural_hair_color   0.0158                                             
## vegetarian             0.0229               0.0291                        
## have_tattoos           0.0155               0.0171     0.0285             
## poly_score             0.0125               0.0159     0.0224       0.0155
## poly1                  0.0158               0.0204     0.0298       0.0198
## poly2                  0.0143               0.0186     0.0266       0.0181
## poly3                   0.016               0.0208      0.029       0.0202
## CA                     0.0128               0.0163      0.024       0.0147
##                      poly_score   poly1   poly2  poly3
## mh_score                                              
## unnatural_hair_color                                  
## vegetarian                                            
## have_tattoos                                          
## poly_score                                            
## poly1                  0.000404                       
## poly2                         0 0.00614               
## poly3                   0.00569 0.00902 0.00885       
## CA                       0.0126  0.0161  0.0147 0.0166
## 
## n = 6112 
## 
## P-values for Tests of Bivariate Normality:
##                                   mh_score unnatural_hair_color vegetarian
## mh_score                                                                  
## unnatural_hair_color              2.99e-25                                
## vegetarian                        2.87e-26                 <NA>           
## have_tattoos                      1.06e-23              0.00078      0.787
## poly_score                               0                    0          0
## poly1                             4.64e-26                 <NA>       <NA>
## poly2                9.52999999999999e-227               0.0328     0.0131
## poly3                                    0             1.66e-05     0.0496
## CA                                7.28e-33             1.04e-11   2.95e-11
##                      have_tattoos poly_score     poly1     poly2 poly3
## mh_score                                                              
## unnatural_hair_color                                                  
## vegetarian                                                            
## have_tattoos                                                          
## poly_score                      0                                     
## poly1                     0.00681        NaN                          
## poly2                       0.163          0  3.09e-59                
## poly3                      0.0798          0 1.12e-121    3e-143      
## CA                       1.82e-12          0  2.28e-11 1.24e-212     0

Models

#modeling function
run_models = function(x, sums = T) {
  #models, summarized
  list(
    simple = ols(mh_score ~ unnatural_hair_color + vegetarian + have_tattoos, data = x),
    add_basic = ols(mh_score ~ unnatural_hair_color + vegetarian + have_tattoos + rcs(age) + sexual_orientation, data = x),
    add_race = ols(mh_score ~ unnatural_hair_color + vegetarian + have_tattoos + rcs(age) + sexual_orientation + race, data = x),
    add_body = ols(mh_score ~ unnatural_hair_color + vegetarian + have_tattoos + rcs(age) + sexual_orientation + race + body_type, data = x),
    final = ols(mh_score ~ unnatural_hair_color + vegetarian + have_tattoos + rcs(age) + sexual_orientation + race + body_type + intelligence + polyamory + pol_beliefs, data = x)
  ) -> mods
  
  if (sums) return(summarize_models(mods)) else return(mods)
}

#main subset, 2+ MH items
(d2_results = run_models(d2))
#more strict, 3+ items
(d3_results = run_models(d3))
#all data
(d_results = run_models(d))
#visualize
jtools::plot_summs(
  ols(mh_score ~ unnatural_hair_color + vegetarian + have_tattoos + rcs(age) + sexual_orientation + race + body_type + intelligence + polyamory + pol_beliefs, data = d2) %>% summary.lm(),
  omit.coefs = c("Intercept", "age" + c("", "'", "''", "'''"))
  )
## Loading required namespace: broom.mixed
## Registered S3 method overwritten by 'broom.mixed':
##   method      from 
##   tidy.gamlss broom

GG_save("figs/model_summary.png")

jtools::plot_summs(
  run_models(d2, sums = F) %>% map(summary.lm),
  omit.coefs = c("Intercept", "age" + c("", "'", "''", "'''"))
  )

GG_save("figs/model_summary_all.png")