Start
options(
digits = 2,
contrasts=c("contr.treatment", "contr.treatment")
)
library(pacman)
p_load(
kirkegaard,
readr,
dplyr,
rms,
polycor,
mirt,
ggeffects
)
Data
#load
vars = readr::read_csv2("data/question_data.csv")
## ℹ Using ',' as decimal and '.' as grouping mark. Use `read_delim()` for more control.
## Warning: Missing column names filled in: 'X1' [1]
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## X1 = col_character(),
## text = col_character(),
## option_1 = col_character(),
## option_2 = col_character(),
## option_3 = col_character(),
## option_4 = col_character(),
## N = col_double(),
## Type = col_character(),
## Order = col_character(),
## Keywords = col_character()
## )
d = readr::read_rds("data/parsed_data.rds")
d_orig = d
Recode
#US states
Canada_units = c("Ontario", "British Columbia", "Alberta", "Manitoba", "Nova Scotia", "Saskatchewan", "New Brunswick", "Prince Edward Island")
#rename
d %<>% mutate(
#simple stuff
age = d_age,
sex = gender,
male = sex == "Man",
country = d_country,
sexual_orientation = gender_orientation,
pol_beliefs = q212813 %>% fct_relevel("Conservative / Right-wing"),
body_type = d_bodytype %>% fct_relevel("Average"),
#funny indicators
unnatural_hair_color = q513 %>% fct_rev(),
vegetarian = q179268 %>% fct_rev(),
have_tattoos = q128 %>% fct_rev(),
#polyamory
poly1 = q48278 %>% fct_rev(),
poly2 = q33107 %>% fct_rev(),
poly3 = q37772,
#recode country
country2 = country %>%
mapvalues(from = state.abb, to = rep("USA", length(state.abb))) %>%
mapvalues(from = Canada_units, to = rep("Canada", length(Canada_units))),
#anglophone = USA states, or regions or Canada (not Quebec) or a few others
anglophone = (country %in% c("Ontario", "British Columbia", "Alberta", "Manitoba", "Nova Scotia", "Saskatchewan", "New Brunswick", "Prince Edward Island", "Australia", "Singapore", "Ireland", "New Zealand")) | str_length(country) == 2,
western = (country %in% c("Ontario", "British Columbia", "Alberta", "Manitoba", "Nova Scotia", "Saskatchewan", "New Brunswick", "Prince Edward Island", "Australia", "Ireland", "New Zealand")) | (str_length(country) == 2) | (country %in% c("Germany", "Netherlands", "Denmark", "Italy", "Sweden", "France", "Finland", "Israel", "Belgium", "Spain", "Austria", "Switzerland", "Norway", "Greece", "Portugal", "Iceland", "Luxembourg"))
)
Functions
#logical to numeric
df_lgl_to_num = function(x) {
for (v in seq_along(x)) {
if (is.logical(x[[v]])) x[[v]] = x[[v]] %>% as.numeric()
}
x
}
#redefine describe
describe = function(x) {
# browser()
y = x %>%
as.data.frame() %>%
df_lgl_to_num() %>%
psych::describe()
class(y) = "data.frame"
y$var = colnames(x)
rownames(y) = NULL
y %>% select(-vars) %>% select(var, n, mean, median, sd, mad, min, max, skew)
}
#find identical columns
df_find_dup_cols = function(x, col, simplify = T) {
col
#find dups
x %>% select(-!!col) %>% map_lgl(function(v) {
res = all.equal(x[[col]], v, check.attributes = F)
if (isTRUE(res)) return(T) else return(F)
}) ->
y
names(y[y])
}
iris$Species2 = iris$Species
df_find_dup_cols(iris, "Species")
## [1] "Species2"
#ez life
get_question_info = function(x, which = "text") {
#try
y = vars %>% filter(X1 == x) %>% pull(!!which)
#if no hits, try finding a duplicate variable
if (length(y) > 0) return(y)
dup = df_find_dup_cols(d, x)
if (length(dup > 0)) {
return(vars %>% filter(X1 == dup[1]) %>% pull(!!which))
} else {
return(y)
}
}
get_question_info("q2")
## [1] "Breast implants?"
get_question_info("q2", which = "N")
## [1] 24839
#slow
get_question_info("unnatural_hair_color")
## [1] "Have you ever dyed your hair a real crazy, unnatural color?"
#print question table
question_table = function(x) {
map_df(x, function(v) {
# browser()
#basics
y = vars %>% filter(X1 %in% v) %>% select(X1, text, option_1:option_4, N)
if (nrow(y) != 1) stop(str_glue("Invalid id: {v}, there were {nrow(y)} matched rows in `vars`"))
#add distributions in %
dist = table2(d[[v]], include_NA = F, sort_descending = NULL)
#fill in %s
for (i in seq_along_rows(dist)) {
i_which = which(dist$Group[i] == (y[1, c("option_" + 1:4)]))
y[1, "option_" + i_which] = y[1, "option_" + i_which] + str_glue(" ({format_digits(dist$Percent[i], digits = 1)}%)")
}
y
})
}
question_table("q2")
Mental health
#reverse coding on one item
d$q50 %<>% fct_rev()
#items
mental_health_items = c(
seen_therapist = "q50",
much_depression = "q1552",
emotional_diversity = "q6021",
happy_with_life = "q4018",
exp_mental_illness = "q1287"
)
#output var data
vars %>% filter(X1 %in% (!!mental_health_items)) %>% select(X1, text, option_1:option_4, N)
#copy to new names
for (v in mental_health_items) d[[names(mental_health_items)[which(v == mental_health_items)]]] = d[[v]]
#print items
for (v in mental_health_items) {
print(str_glue("{v} - {names(mental_health_items)[which(v == mental_health_items)]}: {get_question_info(v)}"))
print(table2(d[[v]], include_NA = F, sort_descending = NULL))
cat("\n")
}
## q50 - seen_therapist: Have you ever seen a therapist?
## # A tibble: 2 x 3
## Group Count Percent
## <chr> <dbl> <dbl>
## 1 No 3826 40.2
## 2 Yes 5681 59.8
##
## q1552 - much_depression: Do you get depressed much?
## # A tibble: 3 x 3
## Group Count Percent
## <chr> <dbl> <dbl>
## 1 Almost never, I'm happy! 3261 32.7
## 2 Sometimes, when it's a bad day 6295 63.2
## 3 Yeah, despair is my life 402 4.04
##
## q6021 - emotional_diversity: How would you describe your emotional diversity?
## # A tibble: 4 x 3
## Group Count Percent
## <chr> <dbl> <dbl>
## 1 I get extremely happy but rarely depressed 1428 32.7
## 2 I get extremely depressed and I'm rarely happy 202 4.63
## 3 I don't feel much of either 731 16.7
## 4 I feel both often 2006 45.9
##
## q4018 - happy_with_life: Are you happy with your life?
## # A tibble: 2 x 3
## Group Count Percent
## <chr> <dbl> <dbl>
## 1 Yes 49586 92.5
## 2 No 4039 7.53
##
## q1287 - exp_mental_illness: Have you experienced mental illness?
## # A tibble: 4 x 3
## Group Count Percent
## <chr> <dbl> <dbl>
## 1 No 10501 58.5
## 2 I'm not sure 2540 14.2
## 3 Yes - low grade 3843 21.4
## 4 Yes - severely 1061 5.91
#factor analyze with default mirt settings, some items are binary, some are ordinal, one is categorical
mh_mirt = mirt(
data = d %>% select(!!mental_health_items) %>% map_df(as.numeric),
model = 1,
itemtype = c("2PL", "nominal", "nominal", "2PL", "nominal"),
technical = list(removeEmptyRows = T)
)
##
Iteration: 1, Log-Lik: -67060.043, Max-Change: 2.38811
Iteration: 2, Log-Lik: -53004.128, Max-Change: 1.49423
Iteration: 3, Log-Lik: -51422.869, Max-Change: 0.75200
Iteration: 4, Log-Lik: -51096.832, Max-Change: 0.87695
Iteration: 5, Log-Lik: -50907.603, Max-Change: 0.55862
Iteration: 6, Log-Lik: -50797.116, Max-Change: 0.53455
Iteration: 7, Log-Lik: -50725.979, Max-Change: 0.39774
Iteration: 8, Log-Lik: -50680.333, Max-Change: 0.18697
Iteration: 9, Log-Lik: -50650.731, Max-Change: 0.17559
Iteration: 10, Log-Lik: -50630.990, Max-Change: 0.08875
Iteration: 11, Log-Lik: -50618.058, Max-Change: 0.12700
Iteration: 12, Log-Lik: -50609.103, Max-Change: 0.08946
Iteration: 13, Log-Lik: -50603.099, Max-Change: 0.05713
Iteration: 14, Log-Lik: -50598.924, Max-Change: 0.03379
Iteration: 15, Log-Lik: -50596.081, Max-Change: 0.03510
Iteration: 16, Log-Lik: -50592.013, Max-Change: 0.02334
Iteration: 17, Log-Lik: -50590.756, Max-Change: 0.02099
Iteration: 18, Log-Lik: -50590.010, Max-Change: 0.01844
Iteration: 19, Log-Lik: -50588.529, Max-Change: 0.00654
Iteration: 20, Log-Lik: -50588.415, Max-Change: 0.00544
Iteration: 21, Log-Lik: -50588.354, Max-Change: 0.00406
Iteration: 22, Log-Lik: -50588.302, Max-Change: 0.00391
Iteration: 23, Log-Lik: -50588.273, Max-Change: 0.00342
Iteration: 24, Log-Lik: -50588.252, Max-Change: 0.00339
Iteration: 25, Log-Lik: -50588.171, Max-Change: 0.00172
Iteration: 26, Log-Lik: -50588.167, Max-Change: 0.00344
Iteration: 27, Log-Lik: -50588.158, Max-Change: 0.00067
Iteration: 28, Log-Lik: -50588.158, Max-Change: 0.00049
Iteration: 29, Log-Lik: -50588.156, Max-Change: 0.00189
Iteration: 30, Log-Lik: -50588.152, Max-Change: 0.00009
mh_mirt
##
## Call:
## mirt(data = d %>% select(!!mental_health_items) %>% map_df(as.numeric),
## model = 1, itemtype = c("2PL", "nominal", "nominal", "2PL",
## "nominal"), technical = list(removeEmptyRows = T))
##
## Full-information item factor analysis with 1 factor(s).
## Converged within 1e-04 tolerance after 30 EM iterations.
## mirt version: 1.33.2
## M-step optimizer: BFGS
## EM acceleration: Ramsay
## Number of rectangular quadrature: 61
## Latent density type: Gaussian
##
## Log-likelihood = -50588
## Estimated parameters: 20
## AIC = 1e+05; AICc = 1e+05
## BIC = 1e+05; SABIC = 1e+05
mh_mirt %>% summary()
## F1 h2
## seen_therapist 0.483 0.233
## much_depression 0.881 0.776
## emotional_diversity 0.505 0.255
## happy_with_life 0.630 0.397
## exp_mental_illness 0.433 0.188
##
## SS loadings: 1.8
## Proportion Var: 0.37
##
## Factor correlations:
##
## F1
## F1 1
#score
mh_mirt_scores = fscores(mh_mirt, full.scores = T, full.scores.SE = T)
#reliability
empirical_rxx(mh_mirt_scores)
## F1
## 0.26
marginal_rxx(mh_mirt)
## [1] 0.72
#save scores
mh_mirt_scores_item_notNA = miss_by_case(d %>% select(!!mental_health_items), reverse = T)
mh_mirt_scores_item_notNA %>% table2(sort_descending = NULL)
d$mh_score = NA
d$mh_score[mh_mirt_scores_item_notNA > 0] = mh_mirt_scores[, 1] %>% standardize() %>% multiply_by(-1)
Polyamory
#items
poly_items = c(
"q48278",
"q33107",
"q37772"
)
#item counts
poly_item_counts = d %>% select(poly1:poly3) %>% miss_by_case(reverse = T)
poly_item_counts %>% table2(sort_descending = NULL)
#poly fit, these are all ordinals
poly_fit = mirt(
data = d %>% select(poly1:poly3) %>% map_df(as.numeric),
model = 1,
technical = list(removeEmptyRows=T)
)
##
Iteration: 1, Log-Lik: -40715.500, Max-Change: 3.00885
Iteration: 2, Log-Lik: -31171.766, Max-Change: 0.94973
Iteration: 3, Log-Lik: -29536.243, Max-Change: 0.78633
Iteration: 4, Log-Lik: -28803.071, Max-Change: 0.64791
Iteration: 5, Log-Lik: -28455.886, Max-Change: 0.50791
Iteration: 6, Log-Lik: -28289.084, Max-Change: 0.43317
Iteration: 7, Log-Lik: -28205.852, Max-Change: 0.36511
Iteration: 8, Log-Lik: -28162.202, Max-Change: 0.30753
Iteration: 9, Log-Lik: -28138.177, Max-Change: 0.26023
Iteration: 10, Log-Lik: -28124.395, Max-Change: 0.22190
Iteration: 11, Log-Lik: -28116.197, Max-Change: 0.19104
Iteration: 12, Log-Lik: -28111.144, Max-Change: 0.16633
Iteration: 13, Log-Lik: -28107.904, Max-Change: 0.14659
Iteration: 14, Log-Lik: -28105.729, Max-Change: 0.13084
Iteration: 15, Log-Lik: -28104.192, Max-Change: 0.11827
Iteration: 16, Log-Lik: -28099.682, Max-Change: 0.06424
Iteration: 17, Log-Lik: -28099.215, Max-Change: 0.06302
Iteration: 18, Log-Lik: -28098.818, Max-Change: 0.06234
Iteration: 19, Log-Lik: -28097.031, Max-Change: 0.06560
Iteration: 20, Log-Lik: -28096.814, Max-Change: 0.06011
Iteration: 21, Log-Lik: -28096.623, Max-Change: 0.05507
Iteration: 22, Log-Lik: -28095.752, Max-Change: 0.05175
Iteration: 23, Log-Lik: -28095.593, Max-Change: 0.05450
Iteration: 24, Log-Lik: -28095.465, Max-Change: 0.05495
Iteration: 25, Log-Lik: -28094.819, Max-Change: 0.04408
Iteration: 26, Log-Lik: -28094.722, Max-Change: 0.04591
Iteration: 27, Log-Lik: -28094.646, Max-Change: 0.03808
Iteration: 28, Log-Lik: -28094.383, Max-Change: 0.04595
Iteration: 29, Log-Lik: -28094.317, Max-Change: 0.04457
Iteration: 30, Log-Lik: -28094.255, Max-Change: 0.04092
Iteration: 31, Log-Lik: -28093.947, Max-Change: 0.04026
Iteration: 32, Log-Lik: -28093.899, Max-Change: 0.04170
Iteration: 33, Log-Lik: -28093.852, Max-Change: 0.03338
Iteration: 34, Log-Lik: -28093.710, Max-Change: 0.02614
Iteration: 35, Log-Lik: -28093.684, Max-Change: 0.03615
Iteration: 36, Log-Lik: -28093.649, Max-Change: 0.03967
Iteration: 37, Log-Lik: -28093.446, Max-Change: 0.03151
Iteration: 38, Log-Lik: -28093.416, Max-Change: 0.02439
Iteration: 39, Log-Lik: -28093.396, Max-Change: 0.02576
Iteration: 40, Log-Lik: -28093.284, Max-Change: 0.02493
Iteration: 41, Log-Lik: -28093.265, Max-Change: 0.02069
Iteration: 42, Log-Lik: -28093.251, Max-Change: 0.02687
Iteration: 43, Log-Lik: -28093.181, Max-Change: 0.03167
Iteration: 44, Log-Lik: -28093.158, Max-Change: 0.02304
Iteration: 45, Log-Lik: -28093.144, Max-Change: 0.00120
Iteration: 46, Log-Lik: -28093.144, Max-Change: 0.00110
Iteration: 47, Log-Lik: -28093.143, Max-Change: 0.00092
Iteration: 48, Log-Lik: -28093.143, Max-Change: 0.03171
Iteration: 49, Log-Lik: -28093.123, Max-Change: 0.01810
Iteration: 50, Log-Lik: -28093.113, Max-Change: 0.02383
Iteration: 51, Log-Lik: -28093.098, Max-Change: 0.02293
Iteration: 52, Log-Lik: -28093.036, Max-Change: 0.02796
Iteration: 53, Log-Lik: -28093.014, Max-Change: 0.01765
Iteration: 54, Log-Lik: -28093.003, Max-Change: 0.02335
Iteration: 55, Log-Lik: -28092.957, Max-Change: 0.01348
Iteration: 56, Log-Lik: -28092.949, Max-Change: 0.02316
Iteration: 57, Log-Lik: -28092.939, Max-Change: 0.03043
Iteration: 58, Log-Lik: -28092.908, Max-Change: 0.01780
Iteration: 59, Log-Lik: -28092.898, Max-Change: 0.00074
Iteration: 60, Log-Lik: -28092.898, Max-Change: 0.00017
Iteration: 61, Log-Lik: -28092.898, Max-Change: 0.00015
Iteration: 62, Log-Lik: -28092.897, Max-Change: 0.01583
Iteration: 63, Log-Lik: -28092.890, Max-Change: 0.02099
Iteration: 64, Log-Lik: -28092.868, Max-Change: 0.02155
Iteration: 65, Log-Lik: -28092.852, Max-Change: 0.01553
Iteration: 66, Log-Lik: -28092.844, Max-Change: 0.00074
Iteration: 67, Log-Lik: -28092.844, Max-Change: 0.02294
Iteration: 68, Log-Lik: -28092.835, Max-Change: 0.01188
Iteration: 69, Log-Lik: -28092.831, Max-Change: 0.02204
Iteration: 70, Log-Lik: -28092.815, Max-Change: 0.00077
Iteration: 71, Log-Lik: -28092.815, Max-Change: 0.01922
Iteration: 72, Log-Lik: -28092.807, Max-Change: 0.00061
Iteration: 73, Log-Lik: -28092.807, Max-Change: 0.00061
Iteration: 74, Log-Lik: -28092.807, Max-Change: 0.00060
Iteration: 75, Log-Lik: -28092.806, Max-Change: 0.00017
Iteration: 76, Log-Lik: -28092.806, Max-Change: 0.00014
Iteration: 77, Log-Lik: -28092.806, Max-Change: 0.00059
Iteration: 78, Log-Lik: -28092.806, Max-Change: 0.00065
Iteration: 79, Log-Lik: -28092.806, Max-Change: 0.00015
Iteration: 80, Log-Lik: -28092.806, Max-Change: 0.00059
Iteration: 81, Log-Lik: -28092.806, Max-Change: 0.00065
Iteration: 82, Log-Lik: -28092.805, Max-Change: 0.00016
Iteration: 83, Log-Lik: -28092.805, Max-Change: 0.00058
Iteration: 84, Log-Lik: -28092.805, Max-Change: 0.00014
Iteration: 85, Log-Lik: -28092.805, Max-Change: 0.00061
Iteration: 86, Log-Lik: -28092.805, Max-Change: 0.00021
Iteration: 87, Log-Lik: -28092.805, Max-Change: 0.00058
Iteration: 88, Log-Lik: -28092.805, Max-Change: 0.01688
Iteration: 89, Log-Lik: -28092.798, Max-Change: 0.00083
Iteration: 90, Log-Lik: -28092.798, Max-Change: 0.00063
Iteration: 91, Log-Lik: -28092.798, Max-Change: 0.00057
Iteration: 92, Log-Lik: -28092.798, Max-Change: 0.00019
Iteration: 93, Log-Lik: -28092.798, Max-Change: 0.00058
Iteration: 94, Log-Lik: -28092.798, Max-Change: 0.01590
Iteration: 95, Log-Lik: -28092.794, Max-Change: 0.01517
Iteration: 96, Log-Lik: -28092.786, Max-Change: 0.00021
Iteration: 97, Log-Lik: -28092.786, Max-Change: 0.00020
Iteration: 98, Log-Lik: -28092.786, Max-Change: 0.00012
Iteration: 99, Log-Lik: -28092.786, Max-Change: 0.00055
Iteration: 100, Log-Lik: -28092.786, Max-Change: 0.00058
Iteration: 101, Log-Lik: -28092.786, Max-Change: 0.00013
Iteration: 102, Log-Lik: -28092.786, Max-Change: 0.00055
Iteration: 103, Log-Lik: -28092.785, Max-Change: 0.00015
Iteration: 104, Log-Lik: -28092.785, Max-Change: 0.00056
Iteration: 105, Log-Lik: -28092.785, Max-Change: 0.00011
Iteration: 106, Log-Lik: -28092.785, Max-Change: 0.00011
Iteration: 107, Log-Lik: -28092.785, Max-Change: 0.00055
Iteration: 108, Log-Lik: -28092.785, Max-Change: 0.00055
Iteration: 109, Log-Lik: -28092.785, Max-Change: 0.00054
Iteration: 110, Log-Lik: -28092.784, Max-Change: 0.00011
Iteration: 111, Log-Lik: -28092.784, Max-Change: 0.00055
Iteration: 112, Log-Lik: -28092.784, Max-Change: 0.00014
Iteration: 113, Log-Lik: -28092.784, Max-Change: 0.00054
Iteration: 114, Log-Lik: -28092.784, Max-Change: 0.00011
Iteration: 115, Log-Lik: -28092.784, Max-Change: 0.00011
Iteration: 116, Log-Lik: -28092.784, Max-Change: 0.00055
Iteration: 117, Log-Lik: -28092.784, Max-Change: 0.00054
Iteration: 118, Log-Lik: -28092.783, Max-Change: 0.00055
Iteration: 119, Log-Lik: -28092.783, Max-Change: 0.00018
Iteration: 120, Log-Lik: -28092.783, Max-Change: 0.00054
Iteration: 121, Log-Lik: -28092.783, Max-Change: 0.00023
Iteration: 122, Log-Lik: -28092.783, Max-Change: 0.00055
Iteration: 123, Log-Lik: -28092.783, Max-Change: 0.00018
Iteration: 124, Log-Lik: -28092.783, Max-Change: 0.00015
Iteration: 125, Log-Lik: -28092.783, Max-Change: 0.00054
Iteration: 126, Log-Lik: -28092.782, Max-Change: 0.00012
Iteration: 127, Log-Lik: -28092.782, Max-Change: 0.00055
Iteration: 128, Log-Lik: -28092.782, Max-Change: 0.00019
Iteration: 129, Log-Lik: -28092.782, Max-Change: 0.00054
Iteration: 130, Log-Lik: -28092.782, Max-Change: 0.00024
Iteration: 131, Log-Lik: -28092.782, Max-Change: 0.00055
Iteration: 132, Log-Lik: -28092.782, Max-Change: 0.00019
Iteration: 133, Log-Lik: -28092.782, Max-Change: 0.00016
Iteration: 134, Log-Lik: -28092.782, Max-Change: 0.00054
Iteration: 135, Log-Lik: -28092.781, Max-Change: 0.00013
Iteration: 136, Log-Lik: -28092.781, Max-Change: 0.00055
Iteration: 137, Log-Lik: -28092.781, Max-Change: 0.00020
Iteration: 138, Log-Lik: -28092.781, Max-Change: 0.00054
Iteration: 139, Log-Lik: -28092.781, Max-Change: 0.01755
Iteration: 140, Log-Lik: -28092.775, Max-Change: 0.00057
Iteration: 141, Log-Lik: -28092.775, Max-Change: 0.00013
Iteration: 142, Log-Lik: -28092.775, Max-Change: 0.00011
Iteration: 143, Log-Lik: -28092.774, Max-Change: 0.00054
Iteration: 144, Log-Lik: -28092.774, Max-Change: 0.00055
Iteration: 145, Log-Lik: -28092.774, Max-Change: 0.00015
Iteration: 146, Log-Lik: -28092.774, Max-Change: 0.00054
Iteration: 147, Log-Lik: -28092.774, Max-Change: 0.00011
Iteration: 148, Log-Lik: -28092.774, Max-Change: 0.00054
Iteration: 149, Log-Lik: -28092.774, Max-Change: 0.00020
Iteration: 150, Log-Lik: -28092.774, Max-Change: 0.00054
Iteration: 151, Log-Lik: -28092.774, Max-Change: 0.03120
Iteration: 152, Log-Lik: -28092.763, Max-Change: 0.00055
Iteration: 153, Log-Lik: -28092.762, Max-Change: 0.00011
Iteration: 154, Log-Lik: -28092.762, Max-Change: 0.00051
Iteration: 155, Log-Lik: -28092.762, Max-Change: 0.00020
Iteration: 156, Log-Lik: -28092.762, Max-Change: 0.00053
Iteration: 157, Log-Lik: -28092.762, Max-Change: 0.01867
Iteration: 158, Log-Lik: -28092.756, Max-Change: 0.00054
Iteration: 159, Log-Lik: -28092.755, Max-Change: 0.00051
Iteration: 160, Log-Lik: -28092.755, Max-Change: 0.00052
Iteration: 161, Log-Lik: -28092.755, Max-Change: 0.00015
Iteration: 162, Log-Lik: -28092.755, Max-Change: 0.00051
Iteration: 163, Log-Lik: -28092.755, Max-Change: 0.00020
Iteration: 164, Log-Lik: -28092.755, Max-Change: 0.00052
Iteration: 165, Log-Lik: -28092.755, Max-Change: 0.00016
Iteration: 166, Log-Lik: -28092.755, Max-Change: 0.00014
Iteration: 167, Log-Lik: -28092.754, Max-Change: 0.00051
Iteration: 168, Log-Lik: -28092.754, Max-Change: 0.00011
Iteration: 169, Log-Lik: -28092.754, Max-Change: 0.00052
Iteration: 170, Log-Lik: -28092.754, Max-Change: 0.00018
Iteration: 171, Log-Lik: -28092.754, Max-Change: 0.00051
Iteration: 172, Log-Lik: -28092.754, Max-Change: 0.02987
Iteration: 173, Log-Lik: -28092.744, Max-Change: 0.00052
Iteration: 174, Log-Lik: -28092.744, Max-Change: 0.00012
Iteration: 175, Log-Lik: -28092.744, Max-Change: 0.00011
Iteration: 176, Log-Lik: -28092.744, Max-Change: 0.00050
Iteration: 177, Log-Lik: -28092.744, Max-Change: 0.00051
Iteration: 178, Log-Lik: -28092.743, Max-Change: 0.00012
Iteration: 179, Log-Lik: -28092.743, Max-Change: 0.00050
Iteration: 180, Log-Lik: -28092.743, Max-Change: 0.00050
Iteration: 181, Log-Lik: -28092.743, Max-Change: 0.00013
Iteration: 182, Log-Lik: -28092.743, Max-Change: 0.00050
Iteration: 183, Log-Lik: -28092.743, Max-Change: 0.00011
Iteration: 184, Log-Lik: -28092.743, Max-Change: 0.00050
Iteration: 185, Log-Lik: -28092.743, Max-Change: 0.00018
Iteration: 186, Log-Lik: -28092.743, Max-Change: 0.00050
Iteration: 187, Log-Lik: -28092.743, Max-Change: 0.02844
Iteration: 188, Log-Lik: -28092.733, Max-Change: 0.00051
Iteration: 189, Log-Lik: -28092.733, Max-Change: 0.00013
Iteration: 190, Log-Lik: -28092.733, Max-Change: 0.00011
Iteration: 191, Log-Lik: -28092.733, Max-Change: 0.00049
Iteration: 192, Log-Lik: -28092.733, Max-Change: 0.00050
Iteration: 193, Log-Lik: -28092.733, Max-Change: 0.00011
Iteration: 194, Log-Lik: -28092.733, Max-Change: 0.00049
Iteration: 195, Log-Lik: -28092.732, Max-Change: 0.00050
Iteration: 196, Log-Lik: -28092.732, Max-Change: 0.00012
Iteration: 197, Log-Lik: -28092.732, Max-Change: 0.00049
Iteration: 198, Log-Lik: -28092.732, Max-Change: 0.00010
Iteration: 199, Log-Lik: -28092.732, Max-Change: 0.00049
Iteration: 200, Log-Lik: -28092.732, Max-Change: 0.00017
Iteration: 201, Log-Lik: -28092.732, Max-Change: 0.00049
Iteration: 202, Log-Lik: -28092.732, Max-Change: 0.02680
Iteration: 203, Log-Lik: -28092.723, Max-Change: 0.00050
Iteration: 204, Log-Lik: -28092.723, Max-Change: 0.00012
Iteration: 205, Log-Lik: -28092.723, Max-Change: 0.00011
Iteration: 206, Log-Lik: -28092.723, Max-Change: 0.00049
Iteration: 207, Log-Lik: -28092.723, Max-Change: 0.00049
Iteration: 208, Log-Lik: -28092.723, Max-Change: 0.00011
Iteration: 209, Log-Lik: -28092.723, Max-Change: 0.00049
Iteration: 210, Log-Lik: -28092.722, Max-Change: 0.00049
Iteration: 211, Log-Lik: -28092.722, Max-Change: 0.00012
Iteration: 212, Log-Lik: -28092.722, Max-Change: 0.00048
Iteration: 213, Log-Lik: -28092.722, Max-Change: 0.00010
#summary
poly_fit
##
## Call:
## mirt(data = d %>% select(poly1:poly3) %>% map_df(as.numeric),
## model = 1, technical = list(removeEmptyRows = T))
##
## Full-information item factor analysis with 1 factor(s).
## Converged within 1e-04 tolerance after 213 EM iterations.
## mirt version: 1.33.2
## M-step optimizer: BFGS
## EM acceleration: Ramsay
## Number of rectangular quadrature: 61
## Latent density type: Gaussian
##
## Log-likelihood = -28093
## Estimated parameters: 10
## AIC = 56205; AICc = 56205
## BIC = 56285; SABIC = 56253
poly_fit %>% summary()
## F1 h2
## poly1 0.987 0.973
## poly2 0.901 0.813
## poly3 0.823 0.678
##
## SS loadings: 2.5
## Proportion Var: 0.82
##
## Factor correlations:
##
## F1
## F1 1
#scores
poly_fit_scores = fscores(poly_fit, full.scores = T, full.scores.SE = T)
#save
d$poly_score = NA
d$poly_score[poly_item_counts > 0] = poly_fit_scores[, 1] %>% standardize()
d$polyamory = d$poly_score
Subsets
#whites only
d_white = d %>% filter(race == "White")
#anglos only
d_anglo = d %>% filter(anglophone)
#white Americans only
d_white_amer = d %>% filter(race == "White", country %in% state.abb)
#our subset with some number of items at least
d2 = d[mh_mirt_scores_item_notNA >= 2, ]
#alternative more strict
d3 = d[mh_mirt_scores_item_notNA >= 3, ]
Basic descriptives
#numerics
d %>%
select(
age,
male
) %>%
describe()
#basics
d$sex %>% table2(include_NA = F)
d$sexual_orientation %>% table2(include_NA = F)
d$country2 %>% table2(include_NA = F)
d$anglophone %>% table2(include_NA = F)
d$western %>% table2(include_NA = F)
#question table
other_items = c("q513", "q128", "q179268", "q212813")
question_table(c(mental_health_items, poly_items, other_items))
Simple results
#hair color
SMD_matrix(
d2$mh_score,
group = d2$unnatural_hair_color,
extended_output = T
)
## $d
## No Yes
## No NA 0.27
## Yes 0.27 NA
##
## $d_string
## No Yes
## No NA "0.27 [0.23 0.30]"
## Yes "0.27 [0.23 0.30]" NA
##
## $CI_lower
## No Yes
## No NA 0.23
## Yes 0.23 NA
##
## $CI_upper
## No Yes
## No NA 0.3
## Yes 0.3 NA
##
## $se_z
## [1] 2
##
## $se
## No Yes
## No NA 0.017
## Yes 0.017 NA
##
## $p
## No Yes
## No NA 8.7e-55
## Yes 8.7e-55 NA
##
## $pairwise_n
## No Yes
## No NA 14421
## Yes 14421 NA
#vegan/vegeratian
SMD_matrix(
d2$mh_score,
group = d2$vegetarian,
extended_output = T
)
## $d
## No Yes
## No NA 0.25
## Yes 0.25 NA
##
## $d_string
## No Yes
## No NA "0.25 [0.20 0.30]"
## Yes "0.25 [0.20 0.30]" NA
##
## $CI_lower
## No Yes
## No NA 0.2
## Yes 0.2 NA
##
## $CI_upper
## No Yes
## No NA 0.3
## Yes 0.3 NA
##
## $se_z
## [1] 2
##
## $se
## No Yes
## No NA 0.026
## Yes 0.026 NA
##
## $p
## No Yes
## No NA 5.6e-22
## Yes 5.6e-22 NA
##
## $pairwise_n
## No Yes
## No NA 21739
## Yes 21739 NA
#vegan/vegeratian
SMD_matrix(
d2$mh_score,
group = d2$have_tattoos,
extended_output = T
)
## $d
## I have no tattoos
## I have no tattoos NA
## I have 1 or more LITTLE tattoos 0.12
## I have 1 or more BIG tattoos 0.13
## I have 1 or more LITTLE tattoos
## I have no tattoos 0.116
## I have 1 or more LITTLE tattoos NA
## I have 1 or more BIG tattoos 0.012
## I have 1 or more BIG tattoos
## I have no tattoos 0.128
## I have 1 or more LITTLE tattoos 0.012
## I have 1 or more BIG tattoos NA
##
## $d_string
## I have no tattoos
## I have no tattoos NA
## I have 1 or more LITTLE tattoos "0.12 [0.081 0.15]"
## I have 1 or more BIG tattoos "0.13 [0.09 0.17]"
## I have 1 or more LITTLE tattoos
## I have no tattoos "0.12 [0.081 0.15]"
## I have 1 or more LITTLE tattoos NA
## I have 1 or more BIG tattoos "0.012 [-0.033 0.057]"
## I have 1 or more BIG tattoos
## I have no tattoos "0.13 [0.09 0.17]"
## I have 1 or more LITTLE tattoos "0.012 [-0.033 0.057]"
## I have 1 or more BIG tattoos NA
##
## $CI_lower
## I have no tattoos
## I have no tattoos NA
## I have 1 or more LITTLE tattoos 0.081
## I have 1 or more BIG tattoos 0.090
## I have 1 or more LITTLE tattoos
## I have no tattoos 0.081
## I have 1 or more LITTLE tattoos NA
## I have 1 or more BIG tattoos -0.033
## I have 1 or more BIG tattoos
## I have no tattoos 0.090
## I have 1 or more LITTLE tattoos -0.033
## I have 1 or more BIG tattoos NA
##
## $CI_upper
## I have no tattoos
## I have no tattoos NA
## I have 1 or more LITTLE tattoos 0.15
## I have 1 or more BIG tattoos 0.17
## I have 1 or more LITTLE tattoos
## I have no tattoos 0.150
## I have 1 or more LITTLE tattoos NA
## I have 1 or more BIG tattoos 0.057
## I have 1 or more BIG tattoos
## I have no tattoos 0.165
## I have 1 or more LITTLE tattoos 0.057
## I have 1 or more BIG tattoos NA
##
## $se_z
## [1] 2
##
## $se
## I have no tattoos
## I have no tattoos NA
## I have 1 or more LITTLE tattoos 0.017
## I have 1 or more BIG tattoos 0.019
## I have 1 or more LITTLE tattoos
## I have no tattoos 0.017
## I have 1 or more LITTLE tattoos NA
## I have 1 or more BIG tattoos 0.023
## I have 1 or more BIG tattoos
## I have no tattoos 0.019
## I have 1 or more LITTLE tattoos 0.023
## I have 1 or more BIG tattoos NA
##
## $p
## I have no tattoos
## I have no tattoos NA
## I have 1 or more LITTLE tattoos 3.1e-11
## I have 1 or more BIG tattoos 3.2e-11
## I have 1 or more LITTLE tattoos
## I have no tattoos 3.1e-11
## I have 1 or more LITTLE tattoos NA
## I have 1 or more BIG tattoos 6.0e-01
## I have 1 or more BIG tattoos
## I have no tattoos 3.2e-11
## I have 1 or more LITTLE tattoos 6.0e-01
## I have 1 or more BIG tattoos NA
##
## $pairwise_n
## I have no tattoos
## I have no tattoos NA
## I have 1 or more LITTLE tattoos 18143
## I have 1 or more BIG tattoos 17163
## I have 1 or more LITTLE tattoos
## I have no tattoos 18143
## I have 1 or more LITTLE tattoos NA
## I have 1 or more BIG tattoos 7736
## I have 1 or more BIG tattoos
## I have no tattoos 17163
## I have 1 or more LITTLE tattoos 7736
## I have 1 or more BIG tattoos NA
#poly1
SMD_matrix(
d2$mh_score,
group = d2$poly1,
extended_output = T
)
## $d
## No. Yes.
## No. NA 0.26
## Yes. 0.26 NA
##
## $d_string
## No. Yes.
## No. NA "0.26 [0.23 0.30]"
## Yes. "0.26 [0.23 0.30]" NA
##
## $CI_lower
## No. Yes.
## No. NA 0.23
## Yes. 0.23 NA
##
## $CI_upper
## No. Yes.
## No. NA 0.3
## Yes. 0.3 NA
##
## $se_z
## [1] 2
##
## $se
## No. Yes.
## No. NA 0.019
## Yes. 0.019 NA
##
## $p
## No. Yes.
## No. NA 2.3e-44
## Yes. 2.3e-44 NA
##
## $pairwise_n
## No. Yes.
## No. NA 11822
## Yes. 11822 NA
#latent correlations
d %>%
select(
mh_score,
unnatural_hair_color,
vegetarian,
have_tattoos,
poly_score,
poly1:poly3,
CA
) %>% polycor::hetcor()
## Warning in polyserial(y, x, ML = ML, std.err = std.err, bins = bins): initial
## correlation inadmissible, 1.17311560644192, set to 0.9999
## Warning in polyserial(y, x, ML = ML, std.err = std.err, bins = bins): initial
## correlation inadmissible, 1.02946419249337, set to 0.9999
## Warning in hetcor.data.frame(.): could not compute polyserial correlation between variables 7 and 5
## Message: Error in optim(rho, f, control = control, hessian = TRUE, method = "BFGS") :
## initial value in 'vmmin' is not finite
##
## Two-Step Estimates
##
## Correlations/Type of Correlation:
## mh_score unnatural_hair_color vegetarian have_tattoos
## mh_score 1 Polyserial Polyserial Polyserial
## unnatural_hair_color -0.161 1 Polychoric Polychoric
## vegetarian -0.105 0.182 1 Polychoric
## have_tattoos -0.0655 0.4 0.0406 1
## poly_score -0.152 0.129 0.108 0.0188
## poly1 -0.17 0.142 0.0823 0.00957
## poly2 -0.163 0.151 0.13 0.0251
## poly3 -0.151 0.124 0.167 0.0267
## CA -0.012 -0.0401 0.122 -0.205
## poly_score poly1 poly2 poly3 CA
## mh_score Pearson Polyserial Polyserial Polyserial Pearson
## unnatural_hair_color Polyserial Polychoric Polychoric Polychoric Polyserial
## vegetarian Polyserial Polychoric Polychoric Polychoric Polyserial
## have_tattoos Polyserial Polychoric Polychoric Polychoric Polyserial
## poly_score 1 Polyserial Polyserial Polyserial Pearson
## poly1 0.997 1 Polychoric Polychoric Polyserial
## poly2 <NA> 0.859 1 Polychoric Polyserial
## poly3 0.806 0.781 0.733 1 Polyserial
## CA 0.11 0.133 0.113 0.0913 1
##
## Standard Errors:
## mh_score unnatural_hair_color vegetarian have_tattoos
## mh_score
## unnatural_hair_color 0.0158
## vegetarian 0.0229 0.0291
## have_tattoos 0.0155 0.0171 0.0285
## poly_score 0.0125 0.0159 0.0224 0.0155
## poly1 0.0158 0.0204 0.0298 0.0198
## poly2 0.0143 0.0186 0.0266 0.0181
## poly3 0.016 0.0208 0.029 0.0202
## CA 0.0128 0.0163 0.024 0.0147
## poly_score poly1 poly2 poly3
## mh_score
## unnatural_hair_color
## vegetarian
## have_tattoos
## poly_score
## poly1 0.000404
## poly2 0 0.00614
## poly3 0.00569 0.00902 0.00885
## CA 0.0126 0.0161 0.0147 0.0166
##
## n = 6112
##
## P-values for Tests of Bivariate Normality:
## mh_score unnatural_hair_color vegetarian
## mh_score
## unnatural_hair_color 2.99e-25
## vegetarian 2.87e-26 <NA>
## have_tattoos 1.06e-23 0.00078 0.787
## poly_score 0 0 0
## poly1 4.64e-26 <NA> <NA>
## poly2 9.52999999999999e-227 0.0328 0.0131
## poly3 0 1.66e-05 0.0496
## CA 7.28e-33 1.04e-11 2.95e-11
## have_tattoos poly_score poly1 poly2 poly3
## mh_score
## unnatural_hair_color
## vegetarian
## have_tattoos
## poly_score 0
## poly1 0.00681 NaN
## poly2 0.163 0 3.09e-59
## poly3 0.0798 0 1.12e-121 3e-143
## CA 1.82e-12 0 2.28e-11 1.24e-212 0
Models
#modeling function
run_models = function(x, sums = T) {
#models, summarized
list(
simple = ols(mh_score ~ unnatural_hair_color + vegetarian + have_tattoos, data = x),
add_basic = ols(mh_score ~ unnatural_hair_color + vegetarian + have_tattoos + rcs(age) + sexual_orientation, data = x),
add_race = ols(mh_score ~ unnatural_hair_color + vegetarian + have_tattoos + rcs(age) + sexual_orientation + race, data = x),
add_body = ols(mh_score ~ unnatural_hair_color + vegetarian + have_tattoos + rcs(age) + sexual_orientation + race + body_type, data = x),
final = ols(mh_score ~ unnatural_hair_color + vegetarian + have_tattoos + rcs(age) + sexual_orientation + race + body_type + intelligence + polyamory + pol_beliefs, data = x)
) -> mods
if (sums) return(summarize_models(mods)) else return(mods)
}
#main subset, 2+ MH items
(d2_results = run_models(d2))
#more strict, 3+ items
(d3_results = run_models(d3))
#all data
(d_results = run_models(d))
#visualize
jtools::plot_summs(
ols(mh_score ~ unnatural_hair_color + vegetarian + have_tattoos + rcs(age) + sexual_orientation + race + body_type + intelligence + polyamory + pol_beliefs, data = d2) %>% summary.lm(),
omit.coefs = c("Intercept", "age" + c("", "'", "''", "'''"))
)
## Loading required namespace: broom.mixed
## Registered S3 method overwritten by 'broom.mixed':
## method from
## tidy.gamlss broom

GG_save("figs/model_summary.png")
jtools::plot_summs(
run_models(d2, sums = F) %>% map(summary.lm),
omit.coefs = c("Intercept", "age" + c("", "'", "''", "'''"))
)

GG_save("figs/model_summary_all.png")