library(pacman)
p_load(kirkegaard, readr, dplyr, rms, polycor, mirt)
#redefine describe
describe = function(...) psych::describe(...) %>% as.matrix()
#load
vars = readr::read_csv2("data/question_data.csv")
## Using ',' as decimal and '.' as grouping mark. Use read_delim() for more control.
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## X1 = col_character(),
## text = col_character(),
## option_1 = col_character(),
## option_2 = col_character(),
## option_3 = col_character(),
## option_4 = col_character(),
## N = col_double(),
## Type = col_character(),
## Order = col_character(),
## Keywords = col_character()
## )
d = readr::read_rds("data/parsed_data.rds")
d_orig = d
#test items
iq_items_meta = read_csv("data/test_items.csv")
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## X1 = col_character(),
## ID = col_double(),
## text = col_character(),
## option_1 = col_character(),
## option_2 = col_character(),
## option_3 = col_character(),
## option_4 = col_character(),
## option_correct = col_double()
## )
#limited to persons with at last 5 IQ items
#included IQ items, some were removed due to ~0 datapoints
CA_items = intersect(iq_items_meta$X1, names(d))
d$CA_items = d[CA_items] %>% miss_by_case(reverse = T)
d = filter(d, CA_items >= 5)
#rename
d$age = d$d_age
d$sex = d$gender
d$country = d$d_country
d$sexual_orientation = d$gender_orientation
#anglophone = USA states, or regions or Canada (not Quebec) or a few others
d$anglophone = (d$country %in% c("Ontario", "British Columbia", "Alberta", "Manitoba", "Nova Scotia", "Saskatchewan", "New Brunswick", "Prince Edward Island", "Australia", "Singapore", "Ireland", "New Zealand")) | str_length(d$country) == 2
d$western = (d$country %in% c("Ontario", "British Columbia", "Alberta", "Manitoba", "Nova Scotia", "Saskatchewan", "New Brunswick", "Prince Edward Island", "Australia", "Ireland", "New Zealand")) | (str_length(d$country) == 2) | (d$country %in% c("Germany", "Netherlands", "Denmark", "Italy", "Sweden", "France", "Finland", "Israel", "Belgium", "Spain", "Austria", "Switzerland", "Norway", "Greece", "Portugal", "Iceland", "Luxembourg"))
#chronobio or sleeping related
d$wake_up_speed = d$q19652 %>% fct_rev()
d$wake_up_speed2 = d$q42209 %>% fct_rev()
d$weekends_up_late = d$q28831 %>% fct_rev()
d$sleep_strat = d$q85867 %>% fct_rev()
d$no_obligation_get_up = d$q33170
d$typical_sleep = d$q17342
d$ideal_schedule_late = d$q18644 %>% fct_rev()
d$trouble_sleeping = d$q79134 %>% fct_rev()
d$number_pillows = d$q67024
d$sleep_tv = d$q49461 %>% fct_rev()
d$sleep_tv2 = d$q42266
d$sleep_stuffed_animal = d$q212 %>% fct_rev()
d$pref_sleep_duration = d$q8079 %>% fct_rev()
d$sleep_longer_if_possible = d$q27141
d$talk_in_sleep = d$q28206
d$sleep_others = d$q31840
d$sleep_room_temp = d$q35862
d$sleep_lights = d$q36622
d$sleep_unfamiliar_environ_good = d$q38249 %>% fct_rev()
d$longest_no_sleep = d$q51833
d$read_before_sleep = d$q53910 %>% fct_rev()
d$sleep_music = d$q71315 %>% fct_rev()
#sleep vars
sleep_vars = d %>% select(wake_up_speed:sleep_music) %>% names()
#helper
fixNA = function(x) {
x[str_detect(x, "NA%")] = NA
x
}
#descriptive
sleep_summary = map_df(sleep_vars, function(x) {
# browser()
xvar = d[[x]] %>% na.omit()
xvar_uniq = xvar %>% levels()
#backlook
# if (x == "no_obligation_get_up") browser()
xvar_id = map_lgl(d, function(v) {
identical(v, d[[x]]) || identical(v, fct_rev(d[[x]]))
}) %>% which() %>% names() %>% .[1]
tibble(
var = x,
n = xvar %>% length(),
#meta
question = vars %>% filter(X1 == xvar_id) %>% pull(text),
option_1 = str_glue("{xvar_uniq[1]} - {(mean(xvar == xvar_uniq[1])*100) %>% format_digits()}%"),
option_2 = str_glue("{xvar_uniq[2]} - {(mean(xvar == xvar_uniq[2])*100) %>% format_digits()}%"),
option_3 = str_glue("{xvar_uniq[3]} - {(mean(xvar == xvar_uniq[3])*100) %>% format_digits()}%"),
option_4 = str_glue("{xvar_uniq[4]} - {(mean(xvar == xvar_uniq[4])*100) %>% format_digits()}%")
)
}) %>% map_df(fixNA)
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
sleep_summary
#main chronobio questions
#pure and typical questions
chrono = d %>% select(ideal_schedule_late, no_obligation_get_up, weekends_up_late, typical_sleep)
#correlations
chrono_fa = mirt::mirt(chrono %>% map_df(as.numeric), model = 1, technical = list(removeEmptyRows=T))
##
Iteration: 1, Log-Lik: -88944.294, Max-Change: 1.10688
Iteration: 2, Log-Lik: -83722.452, Max-Change: 0.44536
Iteration: 3, Log-Lik: -82684.278, Max-Change: 0.34000
Iteration: 4, Log-Lik: -82301.901, Max-Change: 0.26723
Iteration: 5, Log-Lik: -82081.811, Max-Change: 0.16361
Iteration: 6, Log-Lik: -82008.658, Max-Change: 0.15209
Iteration: 7, Log-Lik: -81981.475, Max-Change: 0.07756
Iteration: 8, Log-Lik: -81967.009, Max-Change: 0.07967
Iteration: 9, Log-Lik: -81961.068, Max-Change: 0.04190
Iteration: 10, Log-Lik: -81957.469, Max-Change: 0.03491
Iteration: 11, Log-Lik: -81955.724, Max-Change: 0.02863
Iteration: 12, Log-Lik: -81954.747, Max-Change: 0.02330
Iteration: 13, Log-Lik: -81952.969, Max-Change: 0.00993
Iteration: 14, Log-Lik: -81952.847, Max-Change: 0.00720
Iteration: 15, Log-Lik: -81952.774, Max-Change: 0.00590
Iteration: 16, Log-Lik: -81952.694, Max-Change: 0.00328
Iteration: 17, Log-Lik: -81952.675, Max-Change: 0.00562
Iteration: 18, Log-Lik: -81952.653, Max-Change: 0.00299
Iteration: 19, Log-Lik: -81952.628, Max-Change: 0.00277
Iteration: 20, Log-Lik: -81952.617, Max-Change: 0.00247
Iteration: 21, Log-Lik: -81952.606, Max-Change: 0.00247
Iteration: 22, Log-Lik: -81952.568, Max-Change: 0.00047
Iteration: 23, Log-Lik: -81952.568, Max-Change: 0.00029
Iteration: 24, Log-Lik: -81952.567, Max-Change: 0.00026
Iteration: 25, Log-Lik: -81952.567, Max-Change: 0.00112
Iteration: 26, Log-Lik: -81952.566, Max-Change: 0.00018
Iteration: 27, Log-Lik: -81952.566, Max-Change: 0.00018
Iteration: 28, Log-Lik: -81952.566, Max-Change: 0.00089
Iteration: 29, Log-Lik: -81952.565, Max-Change: 0.00008
chrono_fa@Fit$F
## F1
## ideal_schedule_late 0.8201320
## no_obligation_get_up 0.8299519
## weekends_up_late 0.4819559
## typical_sleep 0.6484487
#scores
chrono_fa_scores = fscores(chrono_fa, full.scores = T, full.scores.SE = T)
d$chronobio = NA
d$chronobio[miss_by_case(chrono, reverse = T) > 0] = chrono_fa_scores[, 1] %>% standardize()
(miss_by_case(chrono, reverse = T) > 0) %>% sum()
## [1] 29637
nrow(chrono_fa_scores)
## [1] 29637
#estimate reliability
empirical_rxx(chrono_fa_scores)
## F1
## 0.6337424
marginal_rxx(chrono_fa)
## [1] 0.7223729
#get scoring key
v_correct_answers_text = apply(iq_items_meta %>% filter(X1 %in% CA_items), MARGIN = 1, function(item) {
item["option_" + 1:4][item["option_correct"] %>% as.numeric()]
})
#score items
CA_scored_items = score_items(d[CA_items], key = v_correct_answers_text)
#factor analysis with IRT
irt_CA = mirt(CA_scored_items, model = 1)
##
Iteration: 1, Log-Lik: -151544.753, Max-Change: 3.37241
Iteration: 2, Log-Lik: -137049.017, Max-Change: 0.45960
Iteration: 3, Log-Lik: -135942.068, Max-Change: 0.28963
Iteration: 4, Log-Lik: -135346.752, Max-Change: 0.23959
Iteration: 5, Log-Lik: -135000.070, Max-Change: 0.16686
Iteration: 6, Log-Lik: -134805.781, Max-Change: 0.25781
Iteration: 7, Log-Lik: -134681.766, Max-Change: 0.09668
Iteration: 8, Log-Lik: -134606.564, Max-Change: 0.24591
Iteration: 9, Log-Lik: -134548.208, Max-Change: 0.14237
Iteration: 10, Log-Lik: -134519.358, Max-Change: 0.08380
Iteration: 11, Log-Lik: -134499.564, Max-Change: 0.06449
Iteration: 12, Log-Lik: -134485.274, Max-Change: 0.05566
Iteration: 13, Log-Lik: -134474.237, Max-Change: 0.03508
Iteration: 14, Log-Lik: -134468.174, Max-Change: 0.04288
Iteration: 15, Log-Lik: -134463.962, Max-Change: 0.03212
Iteration: 16, Log-Lik: -134461.212, Max-Change: 0.04777
Iteration: 17, Log-Lik: -134455.203, Max-Change: 0.01162
Iteration: 18, Log-Lik: -134454.203, Max-Change: 0.00664
Iteration: 19, Log-Lik: -134453.895, Max-Change: 0.00367
Iteration: 20, Log-Lik: -134453.713, Max-Change: 0.00224
Iteration: 21, Log-Lik: -134453.625, Max-Change: 0.00139
Iteration: 22, Log-Lik: -134453.609, Max-Change: 0.00142
Iteration: 23, Log-Lik: -134453.582, Max-Change: 0.00074
Iteration: 24, Log-Lik: -134453.567, Max-Change: 0.00094
Iteration: 25, Log-Lik: -134453.551, Max-Change: 0.00071
Iteration: 26, Log-Lik: -134453.547, Max-Change: 0.00026
Iteration: 27, Log-Lik: -134453.545, Max-Change: 0.00025
Iteration: 28, Log-Lik: -134453.544, Max-Change: 0.00020
Iteration: 29, Log-Lik: -134453.544, Max-Change: 0.00018
Iteration: 30, Log-Lik: -134453.543, Max-Change: 0.00019
Iteration: 31, Log-Lik: -134453.543, Max-Change: 0.00014
Iteration: 32, Log-Lik: -134453.543, Max-Change: 0.00016
Iteration: 33, Log-Lik: -134453.543, Max-Change: 0.00015
Iteration: 34, Log-Lik: -134453.543, Max-Change: 0.00012
Iteration: 35, Log-Lik: -134453.543, Max-Change: 0.00011
Iteration: 36, Log-Lik: -134453.542, Max-Change: 0.00011
Iteration: 37, Log-Lik: -134453.542, Max-Change: 0.00010
Iteration: 38, Log-Lik: -134453.542, Max-Change: 0.00009
irt_CA
##
## Call:
## mirt(data = CA_scored_items, model = 1)
##
## Full-information item factor analysis with 1 factor(s).
## Converged within 1e-04 tolerance after 38 EM iterations.
## mirt version: 1.31
## M-step optimizer: BFGS
## EM acceleration: Ramsay
## Number of rectangular quadrature: 61
## Latent density type: Gaussian
##
## Log-likelihood = -134453.5
## Estimated parameters: 28
## AIC = 268963.1; AICc = 268963.1
## BIC = 269202.1; SABIC = 269113.2
irt_CA@Fit
## $G2
## [1] NaN
##
## $p
## [1] NaN
##
## $TLI
## [1] NaN
##
## $CFI
## [1] NaN
##
## $RMSEA
## [1] NaN
##
## $df
## [1] 16355
##
## $AIC
## [1] 268963.1
##
## $AICc
## [1] 268963.1
##
## $BIC
## [1] 269202.1
##
## $SABIC
## [1] 269113.2
##
## $DIC
## [1] 268963.1
##
## $HQ
## [1] 269039
##
## $logLik
## [1] -134453.5
##
## $logPrior
## [1] 0
##
## $SElogLik
## [1] 0
##
## $F
## F1
## q178 0.7354377
## q255 0.7041587
## q1201 0.7519587
## q14835 0.3786183
## q8672 0.7202241
## q18154 0.7435125
## q12625 0.5823432
## q477 0.6024514
## q256 0.3699111
## q43639 0.7487329
## q267 0.4796624
## q18698 0.6924156
## q511 0.6799004
## q57844 0.6927200
##
## $h2
## q178 q255 q1201 q14835 q8672 q18154 q12625 q477
## 0.5408686 0.4958394 0.5654419 0.1433518 0.5187228 0.5528109 0.3391236 0.3629477
## q256 q43639 q267 q18698 q511 q57844
## 0.1368342 0.5606009 0.2300760 0.4794394 0.4622646 0.4798610
#save
irt_CA_scores = fscores(irt_CA, full.scores = T, full.scores.SE = T)
d$CA_old = d$CA %>% standardize()
d$CA = irt_CA_scores[, 1] %>% standardize()
#plot scores
GG_scatter(d, "CA", "CA_old")
#estimate reliability
empirical_rxx(irt_CA_scores)
## F1
## 0.6275554
marginal_rxx(irt_CA)
## [1] 0.7536288
#whites only
d_white = d %>% filter(race == "White")
#anglos only
d_anglo = d %>% filter(anglophone)
#white Americans only
d_white_amer = d %>% filter(race == "White", country %in% state.abb)
#Sex
d$sex %>% table2()
#sexual orientation
d$sexual_orientation %>% table2()
#age
d$age %>% describe()
## vars n mean sd median trimmed mad min max range skew
## X1 1 36975 33.06339 7.790711 32 32.47456 7.413 18 100 82 0.9250802
## kurtosis se
## X1 1.999387 0.04051567
#country / state
d$country %>% table2()
#anglo
d$anglophone %>% table2()
#western
d$western %>% table2()
#each ordinal variable
d[sleep_vars] %>% map_chr(~class(.)[1])
## wake_up_speed wake_up_speed2
## "ordered" "factor"
## weekends_up_late sleep_strat
## "ordered" "ordered"
## no_obligation_get_up typical_sleep
## "ordered" "ordered"
## ideal_schedule_late trouble_sleeping
## "ordered" "ordered"
## number_pillows sleep_tv
## "ordered" "ordered"
## sleep_tv2 sleep_stuffed_animal
## "ordered" "ordered"
## pref_sleep_duration sleep_longer_if_possible
## "ordered" "ordered"
## talk_in_sleep sleep_others
## "ordered" "ordered"
## sleep_room_temp sleep_lights
## "ordered" "ordered"
## sleep_unfamiliar_environ_good longest_no_sleep
## "ordered" "ordered"
## read_before_sleep sleep_music
## "ordered" "ordered"
sleep_vars2 = d[sleep_vars] %>% map_chr(~class(.)[1]) %>% {
.[. == "ordered"] %>% names()
}
#table of latent correlations
sleep_cors = map_df(sleep_vars2, function(x) {
#cor
xsym = as.symbol(x)
xcor = polycor::hetcor(d %>% select(CA, !!xsym))
tibble(
var = x,
n = xcor$n,
r = xcor$correlations[1, 2],
p = xcor$tests[1, 2]
)
})
sleep_cors
#scatterplots
GG_scatter(d, "CA", "chronobio")
GG_scatter(d_white, "CA", "chronobio")
GG_scatter(d_anglo, "CA", "chronobio")
GG_scatter(d_white_amer, "CA", "chronobio")
list(
ols(chronobio ~ CA, data = d),
ols(chronobio ~ CA + rcs(age), data = d),
ols(chronobio ~ CA + gender_orientation + rcs(age), data = d),
ols(chronobio ~ CA + race + rcs(age), data = d),
ols(chronobio ~ CA + gender_orientation + race + rcs(age), data = d),
ols(chronobio ~ CA + gender_orientation + rcs(age), data = d_white_amer)
) %>%
print() %>%
summarize_models()
## [[1]]
## Frequencies of Missing Values Due to Each Variable
## chronobio CA
## 8077 0
##
## Linear Regression Model
##
## ols(formula = chronobio ~ CA, data = d)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 29637 LR chi2 113.19 R2 0.004
## sigma0.9981 d.f. 1 R2 adj 0.004
## d.f. 29635 Pr(> chi2) 0.0000 g 0.070
##
## Residuals
##
## Min 1Q Median 3Q Max
## -3.169724 -0.790109 0.005972 0.743512 2.609451
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -0.0062 0.0058 -1.07 0.2852
## CA 0.0633 0.0059 10.65 <0.0001
##
##
## [[2]]
## Frequencies of Missing Values Due to Each Variable
## chronobio CA age
## 8077 0 739
##
## Linear Regression Model
##
## ols(formula = chronobio ~ CA + rcs(age), data = d)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 28958 LR chi2 1034.76 R2 0.035
## sigma0.9813 d.f. 5 R2 adj 0.035
## d.f. 28952 Pr(> chi2) 0.0000 g 0.206
##
## Residuals
##
## Min 1Q Median 3Q Max
## -3.398397 -0.767895 -0.009533 0.689115 2.966520
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 1.2247 0.1509 8.12 <0.0001
## CA 0.0599 0.0060 10.05 <0.0001
## age -0.0412 0.0061 -6.75 <0.0001
## age' 0.1070 0.0410 2.61 0.0091
## age'' -0.3653 0.1589 -2.30 0.0215
## age''' 0.3545 0.1816 1.95 0.0510
##
##
## [[3]]
## Frequencies of Missing Values Due to Each Variable
## chronobio CA gender_orientation age
## 8077 0 1114 739
##
## Linear Regression Model
##
## ols(formula = chronobio ~ CA + gender_orientation + rcs(age),
## data = d)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 28604 LR chi2 1184.86 R2 0.041
## sigma0.9775 d.f. 10 R2 adj 0.040
## d.f. 28593 Pr(> chi2) 0.0000 g 0.221
##
## Residuals
##
## Min 1Q Median 3Q Max
## -3.485306 -0.764214 -0.004807 0.682016 3.066751
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.9612 0.1545 6.22 <0.0001
## CA 0.0537 0.0060 8.96 <0.0001
## gender_orientation=Bisexual_female 0.2618 0.0281 9.33 <0.0001
## gender_orientation=Gay_female 0.1737 0.0532 3.27 0.0011
## gender_orientation=Gay_male 0.0529 0.0284 1.86 0.0624
## gender_orientation=Bisexual_male 0.3941 0.0425 9.27 <0.0001
## gender_orientation=Hetero_male 0.1150 0.0142 8.09 <0.0001
## age -0.0348 0.0062 -5.60 <0.0001
## age' 0.0812 0.0414 1.96 0.0497
## age'' -0.2995 0.1599 -1.87 0.0610
## age''' 0.3138 0.1823 1.72 0.0852
##
##
## [[4]]
## Frequencies of Missing Values Due to Each Variable
## chronobio CA race age
## 8077 0 3783 739
##
## Linear Regression Model
##
## ols(formula = chronobio ~ CA + race + rcs(age), data = d)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 27197 LR chi2 1030.05 R2 0.037
## sigma0.9816 d.f. 14 R2 adj 0.037
## d.f. 27182 Pr(> chi2) 0.0000 g 0.212
##
## Residuals
##
## Min 1Q Median 3Q Max
## -3.40247 -0.76724 -0.01159 0.68615 2.98545
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 1.2599 0.1566 8.05 <0.0001
## CA 0.0551 0.0062 8.86 <0.0001
## race=Mixed 0.0502 0.0202 2.49 0.0128
## race=Asian -0.0179 0.0311 -0.57 0.5660
## race=Hispanic / Latin -0.0829 0.0316 -2.62 0.0088
## race=Black -0.0001 0.0317 0.00 0.9966
## race=Other 0.0323 0.0376 0.86 0.3900
## race=Indian -0.2385 0.0664 -3.59 0.0003
## race=Middle Eastern -0.1367 0.0994 -1.38 0.1691
## race=Native American -0.1727 0.1260 -1.37 0.1703
## race=Pacific Islander -0.1248 0.1390 -0.90 0.3695
## age -0.0424 0.0063 -6.69 <0.0001
## age' 0.1123 0.0425 2.64 0.0083
## age'' -0.3842 0.1647 -2.33 0.0197
## age''' 0.3717 0.1882 1.98 0.0483
##
##
## [[5]]
## Frequencies of Missing Values Due to Each Variable
## chronobio CA gender_orientation race
## 8077 0 1114 3783
## age
## 739
##
## Linear Regression Model
##
## ols(formula = chronobio ~ CA + gender_orientation + race + rcs(age),
## data = d)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 26872 LR chi2 1174.93 R2 0.043
## sigma0.9778 d.f. 19 R2 adj 0.042
## d.f. 26852 Pr(> chi2) 0.0000 g 0.227
##
## Residuals
##
## Min 1Q Median 3Q Max
## -3.472286 -0.760818 -0.005071 0.680982 3.076929
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.9862 0.1604 6.15 <0.0001
## CA 0.0488 0.0063 7.80 <0.0001
## gender_orientation=Bisexual_female 0.2813 0.0294 9.58 <0.0001
## gender_orientation=Gay_female 0.1919 0.0558 3.44 0.0006
## gender_orientation=Gay_male 0.0664 0.0291 2.28 0.0227
## gender_orientation=Bisexual_male 0.3849 0.0448 8.60 <0.0001
## gender_orientation=Hetero_male 0.1193 0.0148 8.09 <0.0001
## race=Mixed 0.0448 0.0203 2.20 0.0276
## race=Asian 0.0052 0.0311 0.17 0.8664
## race=Hispanic / Latin -0.0831 0.0316 -2.62 0.0087
## race=Black 0.0048 0.0318 0.15 0.8795
## race=Other 0.0279 0.0377 0.74 0.4588
## race=Indian -0.2382 0.0665 -3.58 0.0003
## race=Middle Eastern -0.1354 0.0990 -1.37 0.1715
## race=Native American -0.1845 0.1255 -1.47 0.1416
## race=Pacific Islander -0.1181 0.1385 -0.85 0.3941
## age -0.0358 0.0064 -5.55 <0.0001
## age' 0.0856 0.0429 1.99 0.0461
## age'' -0.3166 0.1656 -1.91 0.0560
## age''' 0.3309 0.1889 1.75 0.0798
##
##
## [[6]]
## Frequencies of Missing Values Due to Each Variable
## chronobio CA gender_orientation age
## 2547 0 162 0
##
## Linear Regression Model
##
## ols(formula = chronobio ~ CA + gender_orientation + rcs(age),
## data = d_white_amer)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 15259 LR chi2 739.24 R2 0.047
## sigma0.9944 d.f. 10 R2 adj 0.047
## d.f. 15248 Pr(> chi2) 0.0000 g 0.242
##
## Residuals
##
## Min 1Q Median 3Q Max
## -3.51120 -0.78477 -0.01054 0.69515 3.11008
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 1.3789 0.1989 6.93 <0.0001
## CA 0.0546 0.0083 6.55 <0.0001
## gender_orientation=Bisexual_female 0.2755 0.0417 6.60 <0.0001
## gender_orientation=Gay_female 0.1734 0.0758 2.29 0.0222
## gender_orientation=Gay_male 0.0479 0.0393 1.22 0.2235
## gender_orientation=Bisexual_male 0.4291 0.0637 6.73 <0.0001
## gender_orientation=Hetero_male 0.0981 0.0199 4.92 <0.0001
## age -0.0497 0.0076 -6.53 <0.0001
## age' 0.2376 0.0727 3.27 0.0011
## age'' -0.7570 0.2427 -3.12 0.0018
## age''' 0.7346 0.2584 2.84 0.0045
##
write_sessioninfo()
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 19.1
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] mirt_1.31 polycor_0.7-10 rms_5.1-3.1 SparseM_1.77
## [5] kirkegaard_2018.05 metafor_2.1-0 Matrix_1.2-17 psych_1.8.12
## [9] magrittr_1.5 assertthat_0.2.1 weights_1.0 mice_3.6.0
## [13] gdata_2.18.0 Hmisc_4.3-0 Formula_1.2-3 survival_3.1-7
## [17] lattice_0.20-38 forcats_0.4.0 stringr_1.4.0 dplyr_0.8.3
## [21] purrr_0.3.3 readr_1.3.1 tidyr_1.0.0 tibble_2.1.3
## [25] ggplot2_3.2.1 tidyverse_1.2.1 pacman_0.5.1
##
## loaded via a namespace (and not attached):
## [1] TH.data_1.0-10 minqa_1.2.4 colorspace_1.4-1
## [4] htmlTable_1.13.2 base64enc_0.1-3 rstudioapi_0.10
## [7] Deriv_3.9.0 MatrixModels_0.4-1 mvtnorm_1.0-11
## [10] lubridate_1.7.4 xml2_1.2.2 codetools_0.2-16
## [13] splines_3.6.1 mnormt_1.5-5 knitr_1.26
## [16] zeallot_0.1.0 jsonlite_1.6 nloptr_1.2.1
## [19] broom_0.5.2 cluster_2.1.0 compiler_3.6.1
## [22] httr_1.4.1 backports_1.1.5 lazyeval_0.2.2
## [25] cli_1.1.0 acepack_1.4.1 htmltools_0.4.0
## [28] quantreg_5.52 tools_3.6.1 gtable_0.3.0
## [31] glue_1.3.1 Rcpp_1.0.3 cellranger_1.1.0
## [34] vctrs_0.2.0 nlme_3.1-142 multilevel_2.6
## [37] xfun_0.11 lme4_1.1-21 rvest_0.3.5
## [40] lifecycle_0.1.0 dcurver_0.9.1 gtools_3.8.1
## [43] polspline_1.1.17 pan_1.6 MASS_7.3-51.4
## [46] zoo_1.8-6 scales_1.0.0 hms_0.5.2
## [49] parallel_3.6.1 sandwich_2.5-1 RColorBrewer_1.1-2
## [52] psychometric_2.2 yaml_2.2.0 gridExtra_2.3
## [55] rpart_4.1-15 latticeExtra_0.6-28 stringi_1.4.3
## [58] checkmate_1.9.4 permute_0.9-5 boot_1.3-23
## [61] rlang_0.4.1 pkgconfig_2.0.3 evaluate_0.14
## [64] htmlwidgets_1.5.1 labeling_0.3 tidyselect_0.2.5
## [67] R6_2.4.1 generics_0.0.2 mitml_0.3-7
## [70] multcomp_1.4-10 pillar_1.4.2 haven_2.2.0
## [73] foreign_0.8-72 withr_2.1.2 mgcv_1.8-31
## [76] nnet_7.3-12 modelr_0.1.5 crayon_1.3.4
## [79] jomo_2.6-10 rmarkdown_1.17 grid_3.6.1
## [82] readxl_1.3.1 data.table_1.12.6 vegan_2.5-6
## [85] digest_0.6.22 GPArotation_2014.11-1 munsell_0.5.0
d %>% write_rds("data/data_out.rds", compress = "xz")
d %>% write_csv("data/data_out.csv.gz")