Start

library(pacman)
p_load(kirkegaard, readr, dplyr, rms, polycor, mirt)

#redefine describe
describe = function(...) psych::describe(...) %>% as.matrix()

Data

#load
vars = readr::read_csv2("data/question_data.csv")
## Using ',' as decimal and '.' as grouping mark. Use read_delim() for more control.
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
##   X1 = col_character(),
##   text = col_character(),
##   option_1 = col_character(),
##   option_2 = col_character(),
##   option_3 = col_character(),
##   option_4 = col_character(),
##   N = col_double(),
##   Type = col_character(),
##   Order = col_character(),
##   Keywords = col_character()
## )
d = readr::read_rds("data/parsed_data.rds")
d_orig = d

#test items
iq_items_meta = read_csv("data/test_items.csv")
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
##   X1 = col_character(),
##   ID = col_double(),
##   text = col_character(),
##   option_1 = col_character(),
##   option_2 = col_character(),
##   option_3 = col_character(),
##   option_4 = col_character(),
##   option_correct = col_double()
## )
#limited to persons with at last 5 IQ items
#included IQ items, some were removed due to ~0 datapoints
CA_items = intersect(iq_items_meta$X1, names(d))
d$CA_items = d[CA_items] %>% miss_by_case(reverse = T)
d = filter(d, CA_items >= 5)

Recode

#rename
d$age = d$d_age
d$sex = d$gender
d$country = d$d_country
d$sexual_orientation = d$gender_orientation

#anglophone = USA states, or regions or Canada (not Quebec) or a few others
d$anglophone = (d$country %in% c("Ontario", "British Columbia", "Alberta", "Manitoba", "Nova Scotia", "Saskatchewan", "New Brunswick", "Prince Edward Island", "Australia", "Singapore", "Ireland", "New Zealand")) | str_length(d$country) == 2
d$western = (d$country %in% c("Ontario", "British Columbia", "Alberta", "Manitoba", "Nova Scotia", "Saskatchewan", "New Brunswick", "Prince Edward Island", "Australia", "Ireland", "New Zealand")) | (str_length(d$country) == 2) | (d$country %in% c("Germany", "Netherlands", "Denmark", "Italy", "Sweden", "France", "Finland", "Israel", "Belgium", "Spain", "Austria", "Switzerland", "Norway", "Greece", "Portugal", "Iceland", "Luxembourg"))

#chronobio or sleeping related
d$wake_up_speed = d$q19652 %>% fct_rev()
d$wake_up_speed2 = d$q42209 %>% fct_rev()
d$weekends_up_late = d$q28831 %>% fct_rev()
d$sleep_strat = d$q85867 %>% fct_rev()
d$no_obligation_get_up = d$q33170
d$typical_sleep = d$q17342
d$ideal_schedule_late = d$q18644 %>% fct_rev()
d$trouble_sleeping = d$q79134 %>% fct_rev()
d$number_pillows = d$q67024
d$sleep_tv = d$q49461 %>% fct_rev()
d$sleep_tv2 = d$q42266
d$sleep_stuffed_animal = d$q212 %>% fct_rev()
d$pref_sleep_duration = d$q8079 %>% fct_rev()
d$sleep_longer_if_possible = d$q27141
d$talk_in_sleep = d$q28206
d$sleep_others = d$q31840
d$sleep_room_temp = d$q35862
d$sleep_lights = d$q36622
d$sleep_unfamiliar_environ_good = d$q38249 %>% fct_rev()
d$longest_no_sleep = d$q51833
d$read_before_sleep = d$q53910 %>% fct_rev()
d$sleep_music = d$q71315 %>% fct_rev()

#sleep vars
sleep_vars = d %>% select(wake_up_speed:sleep_music) %>% names()

Chronobiolgy

#helper
fixNA = function(x) {
  x[str_detect(x, "NA%")] = NA
  x
}

#descriptive
sleep_summary = map_df(sleep_vars, function(x) {
  # browser()
  xvar = d[[x]] %>% na.omit()
  xvar_uniq = xvar %>% levels()
  #backlook
  # if (x == "no_obligation_get_up") browser()
  xvar_id = map_lgl(d, function(v) {
    identical(v, d[[x]]) || identical(v, fct_rev(d[[x]]))
  }) %>% which() %>% names() %>% .[1]
  
  tibble(
    var = x,
    n = xvar %>% length(),
    
    #meta
    question = vars %>% filter(X1 == xvar_id) %>% pull(text),
    option_1 = str_glue("{xvar_uniq[1]} - {(mean(xvar == xvar_uniq[1])*100) %>% format_digits()}%"),
    option_2 = str_glue("{xvar_uniq[2]} - {(mean(xvar == xvar_uniq[2])*100) %>% format_digits()}%"),
    option_3 = str_glue("{xvar_uniq[3]} - {(mean(xvar == xvar_uniq[3])*100) %>% format_digits()}%"),
    option_4 = str_glue("{xvar_uniq[4]} - {(mean(xvar == xvar_uniq[4])*100) %>% format_digits()}%")
  )
}) %>% map_df(fixNA)
## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
## their attributes
sleep_summary
#main chronobio questions
#pure and typical questions
chrono = d %>% select(ideal_schedule_late, no_obligation_get_up, weekends_up_late, typical_sleep)

#correlations
chrono_fa = mirt::mirt(chrono %>% map_df(as.numeric), model = 1, technical = list(removeEmptyRows=T))
## 
Iteration: 1, Log-Lik: -88944.294, Max-Change: 1.10688
Iteration: 2, Log-Lik: -83722.452, Max-Change: 0.44536
Iteration: 3, Log-Lik: -82684.278, Max-Change: 0.34000
Iteration: 4, Log-Lik: -82301.901, Max-Change: 0.26723
Iteration: 5, Log-Lik: -82081.811, Max-Change: 0.16361
Iteration: 6, Log-Lik: -82008.658, Max-Change: 0.15209
Iteration: 7, Log-Lik: -81981.475, Max-Change: 0.07756
Iteration: 8, Log-Lik: -81967.009, Max-Change: 0.07967
Iteration: 9, Log-Lik: -81961.068, Max-Change: 0.04190
Iteration: 10, Log-Lik: -81957.469, Max-Change: 0.03491
Iteration: 11, Log-Lik: -81955.724, Max-Change: 0.02863
Iteration: 12, Log-Lik: -81954.747, Max-Change: 0.02330
Iteration: 13, Log-Lik: -81952.969, Max-Change: 0.00993
Iteration: 14, Log-Lik: -81952.847, Max-Change: 0.00720
Iteration: 15, Log-Lik: -81952.774, Max-Change: 0.00590
Iteration: 16, Log-Lik: -81952.694, Max-Change: 0.00328
Iteration: 17, Log-Lik: -81952.675, Max-Change: 0.00562
Iteration: 18, Log-Lik: -81952.653, Max-Change: 0.00299
Iteration: 19, Log-Lik: -81952.628, Max-Change: 0.00277
Iteration: 20, Log-Lik: -81952.617, Max-Change: 0.00247
Iteration: 21, Log-Lik: -81952.606, Max-Change: 0.00247
Iteration: 22, Log-Lik: -81952.568, Max-Change: 0.00047
Iteration: 23, Log-Lik: -81952.568, Max-Change: 0.00029
Iteration: 24, Log-Lik: -81952.567, Max-Change: 0.00026
Iteration: 25, Log-Lik: -81952.567, Max-Change: 0.00112
Iteration: 26, Log-Lik: -81952.566, Max-Change: 0.00018
Iteration: 27, Log-Lik: -81952.566, Max-Change: 0.00018
Iteration: 28, Log-Lik: -81952.566, Max-Change: 0.00089
Iteration: 29, Log-Lik: -81952.565, Max-Change: 0.00008
chrono_fa@Fit$F
##                             F1
## ideal_schedule_late  0.8201320
## no_obligation_get_up 0.8299519
## weekends_up_late     0.4819559
## typical_sleep        0.6484487
#scores
chrono_fa_scores = fscores(chrono_fa, full.scores = T, full.scores.SE = T)
d$chronobio = NA
d$chronobio[miss_by_case(chrono, reverse = T) > 0] = chrono_fa_scores[, 1] %>% standardize()
(miss_by_case(chrono, reverse = T) > 0) %>% sum()
## [1] 29637
nrow(chrono_fa_scores)
## [1] 29637
#estimate reliability
empirical_rxx(chrono_fa_scores)
##        F1 
## 0.6337424
marginal_rxx(chrono_fa)
## [1] 0.7223729

Extract intelligence

#get scoring key
v_correct_answers_text = apply(iq_items_meta %>% filter(X1 %in% CA_items), MARGIN = 1, function(item) {
  item["option_" + 1:4][item["option_correct"] %>% as.numeric()]
})

#score items
CA_scored_items = score_items(d[CA_items], key = v_correct_answers_text)

#factor analysis with IRT
irt_CA = mirt(CA_scored_items, model = 1)
## 
Iteration: 1, Log-Lik: -151544.753, Max-Change: 3.37241
Iteration: 2, Log-Lik: -137049.017, Max-Change: 0.45960
Iteration: 3, Log-Lik: -135942.068, Max-Change: 0.28963
Iteration: 4, Log-Lik: -135346.752, Max-Change: 0.23959
Iteration: 5, Log-Lik: -135000.070, Max-Change: 0.16686
Iteration: 6, Log-Lik: -134805.781, Max-Change: 0.25781
Iteration: 7, Log-Lik: -134681.766, Max-Change: 0.09668
Iteration: 8, Log-Lik: -134606.564, Max-Change: 0.24591
Iteration: 9, Log-Lik: -134548.208, Max-Change: 0.14237
Iteration: 10, Log-Lik: -134519.358, Max-Change: 0.08380
Iteration: 11, Log-Lik: -134499.564, Max-Change: 0.06449
Iteration: 12, Log-Lik: -134485.274, Max-Change: 0.05566
Iteration: 13, Log-Lik: -134474.237, Max-Change: 0.03508
Iteration: 14, Log-Lik: -134468.174, Max-Change: 0.04288
Iteration: 15, Log-Lik: -134463.962, Max-Change: 0.03212
Iteration: 16, Log-Lik: -134461.212, Max-Change: 0.04777
Iteration: 17, Log-Lik: -134455.203, Max-Change: 0.01162
Iteration: 18, Log-Lik: -134454.203, Max-Change: 0.00664
Iteration: 19, Log-Lik: -134453.895, Max-Change: 0.00367
Iteration: 20, Log-Lik: -134453.713, Max-Change: 0.00224
Iteration: 21, Log-Lik: -134453.625, Max-Change: 0.00139
Iteration: 22, Log-Lik: -134453.609, Max-Change: 0.00142
Iteration: 23, Log-Lik: -134453.582, Max-Change: 0.00074
Iteration: 24, Log-Lik: -134453.567, Max-Change: 0.00094
Iteration: 25, Log-Lik: -134453.551, Max-Change: 0.00071
Iteration: 26, Log-Lik: -134453.547, Max-Change: 0.00026
Iteration: 27, Log-Lik: -134453.545, Max-Change: 0.00025
Iteration: 28, Log-Lik: -134453.544, Max-Change: 0.00020
Iteration: 29, Log-Lik: -134453.544, Max-Change: 0.00018
Iteration: 30, Log-Lik: -134453.543, Max-Change: 0.00019
Iteration: 31, Log-Lik: -134453.543, Max-Change: 0.00014
Iteration: 32, Log-Lik: -134453.543, Max-Change: 0.00016
Iteration: 33, Log-Lik: -134453.543, Max-Change: 0.00015
Iteration: 34, Log-Lik: -134453.543, Max-Change: 0.00012
Iteration: 35, Log-Lik: -134453.543, Max-Change: 0.00011
Iteration: 36, Log-Lik: -134453.542, Max-Change: 0.00011
Iteration: 37, Log-Lik: -134453.542, Max-Change: 0.00010
Iteration: 38, Log-Lik: -134453.542, Max-Change: 0.00009
irt_CA
## 
## Call:
## mirt(data = CA_scored_items, model = 1)
## 
## Full-information item factor analysis with 1 factor(s).
## Converged within 1e-04 tolerance after 38 EM iterations.
## mirt version: 1.31 
## M-step optimizer: BFGS 
## EM acceleration: Ramsay 
## Number of rectangular quadrature: 61
## Latent density type: Gaussian 
## 
## Log-likelihood = -134453.5
## Estimated parameters: 28 
## AIC = 268963.1; AICc = 268963.1
## BIC = 269202.1; SABIC = 269113.2
irt_CA@Fit
## $G2
## [1] NaN
## 
## $p
## [1] NaN
## 
## $TLI
## [1] NaN
## 
## $CFI
## [1] NaN
## 
## $RMSEA
## [1] NaN
## 
## $df
## [1] 16355
## 
## $AIC
## [1] 268963.1
## 
## $AICc
## [1] 268963.1
## 
## $BIC
## [1] 269202.1
## 
## $SABIC
## [1] 269113.2
## 
## $DIC
## [1] 268963.1
## 
## $HQ
## [1] 269039
## 
## $logLik
## [1] -134453.5
## 
## $logPrior
## [1] 0
## 
## $SElogLik
## [1] 0
## 
## $F
##               F1
## q178   0.7354377
## q255   0.7041587
## q1201  0.7519587
## q14835 0.3786183
## q8672  0.7202241
## q18154 0.7435125
## q12625 0.5823432
## q477   0.6024514
## q256   0.3699111
## q43639 0.7487329
## q267   0.4796624
## q18698 0.6924156
## q511   0.6799004
## q57844 0.6927200
## 
## $h2
##      q178      q255     q1201    q14835     q8672    q18154    q12625      q477 
## 0.5408686 0.4958394 0.5654419 0.1433518 0.5187228 0.5528109 0.3391236 0.3629477 
##      q256    q43639      q267    q18698      q511    q57844 
## 0.1368342 0.5606009 0.2300760 0.4794394 0.4622646 0.4798610
#save
irt_CA_scores = fscores(irt_CA, full.scores = T, full.scores.SE = T)
d$CA_old = d$CA %>% standardize()
d$CA = irt_CA_scores[, 1] %>% standardize()

#plot scores
GG_scatter(d, "CA", "CA_old")

#estimate reliability
empirical_rxx(irt_CA_scores)
##        F1 
## 0.6275554
marginal_rxx(irt_CA)
## [1] 0.7536288

Subsets

#whites only
d_white = d %>% filter(race == "White")
#anglos only
d_anglo = d %>% filter(anglophone)
#white Americans only
d_white_amer = d %>% filter(race == "White", country %in% state.abb)

Analyses

Descriptive

#Sex
d$sex %>% table2()
#sexual orientation
d$sexual_orientation %>% table2()
#age
d$age %>% describe()
##    vars     n     mean       sd median  trimmed   mad min max range      skew
## X1    1 36975 33.06339 7.790711     32 32.47456 7.413  18 100    82 0.9250802
##    kurtosis         se
## X1 1.999387 0.04051567
#country / state
d$country %>% table2()
#anglo
d$anglophone %>% table2()
#western
d$western %>% table2()

Chronobiology questions and intelligence

#each ordinal variable
d[sleep_vars] %>% map_chr(~class(.)[1])
##                 wake_up_speed                wake_up_speed2 
##                     "ordered"                      "factor" 
##              weekends_up_late                   sleep_strat 
##                     "ordered"                     "ordered" 
##          no_obligation_get_up                 typical_sleep 
##                     "ordered"                     "ordered" 
##           ideal_schedule_late              trouble_sleeping 
##                     "ordered"                     "ordered" 
##                number_pillows                      sleep_tv 
##                     "ordered"                     "ordered" 
##                     sleep_tv2          sleep_stuffed_animal 
##                     "ordered"                     "ordered" 
##           pref_sleep_duration      sleep_longer_if_possible 
##                     "ordered"                     "ordered" 
##                 talk_in_sleep                  sleep_others 
##                     "ordered"                     "ordered" 
##               sleep_room_temp                  sleep_lights 
##                     "ordered"                     "ordered" 
## sleep_unfamiliar_environ_good              longest_no_sleep 
##                     "ordered"                     "ordered" 
##             read_before_sleep                   sleep_music 
##                     "ordered"                     "ordered"
sleep_vars2 = d[sleep_vars] %>% map_chr(~class(.)[1]) %>% {
  .[. == "ordered"] %>% names()
}

#table of latent correlations
sleep_cors = map_df(sleep_vars2, function(x) {
  #cor
  xsym = as.symbol(x)
  xcor = polycor::hetcor(d %>% select(CA, !!xsym))
  
  tibble(
    var = x,
    n = xcor$n,
    r = xcor$correlations[1, 2],
    p = xcor$tests[1, 2]
  )
})

sleep_cors
#scatterplots
GG_scatter(d, "CA", "chronobio")

GG_scatter(d_white, "CA", "chronobio")

GG_scatter(d_anglo, "CA", "chronobio")

GG_scatter(d_white_amer, "CA", "chronobio")

Models

list(
  ols(chronobio ~ CA, data = d),
  ols(chronobio ~ CA + rcs(age), data = d),
  ols(chronobio ~ CA + gender_orientation + rcs(age), data = d),
  ols(chronobio ~ CA + race + rcs(age), data = d),
  ols(chronobio ~ CA + gender_orientation + race + rcs(age), data = d),
  ols(chronobio ~ CA + gender_orientation + rcs(age), data = d_white_amer)
  ) %>% 
  print() %>% 
  summarize_models()
## [[1]]
## Frequencies of Missing Values Due to Each Variable
## chronobio        CA 
##      8077         0 
## 
## Linear Regression Model
##  
##  ols(formula = chronobio ~ CA, data = d)
##  
##  
##                 Model Likelihood     Discrimination    
##                    Ratio Test           Indexes        
##  Obs   29637    LR chi2    113.19    R2       0.004    
##  sigma0.9981    d.f.            1    R2 adj   0.004    
##  d.f.  29635    Pr(> chi2) 0.0000    g        0.070    
##  
##  Residuals
##  
##        Min        1Q    Median        3Q       Max 
##  -3.169724 -0.790109  0.005972  0.743512  2.609451 
##  
##  
##            Coef    S.E.   t     Pr(>|t|)
##  Intercept -0.0062 0.0058 -1.07 0.2852  
##  CA         0.0633 0.0059 10.65 <0.0001 
##  
## 
## [[2]]
## Frequencies of Missing Values Due to Each Variable
## chronobio        CA       age 
##      8077         0       739 
## 
## Linear Regression Model
##  
##  ols(formula = chronobio ~ CA + rcs(age), data = d)
##  
##  
##                  Model Likelihood     Discrimination    
##                     Ratio Test           Indexes        
##  Obs   28958    LR chi2    1034.76    R2       0.035    
##  sigma0.9813    d.f.             5    R2 adj   0.035    
##  d.f.  28952    Pr(> chi2)  0.0000    g        0.206    
##  
##  Residuals
##  
##        Min        1Q    Median        3Q       Max 
##  -3.398397 -0.767895 -0.009533  0.689115  2.966520 
##  
##  
##            Coef    S.E.   t     Pr(>|t|)
##  Intercept  1.2247 0.1509  8.12 <0.0001 
##  CA         0.0599 0.0060 10.05 <0.0001 
##  age       -0.0412 0.0061 -6.75 <0.0001 
##  age'       0.1070 0.0410  2.61 0.0091  
##  age''     -0.3653 0.1589 -2.30 0.0215  
##  age'''     0.3545 0.1816  1.95 0.0510  
##  
## 
## [[3]]
## Frequencies of Missing Values Due to Each Variable
##          chronobio                 CA gender_orientation                age 
##               8077                  0               1114                739 
## 
## Linear Regression Model
##  
##  ols(formula = chronobio ~ CA + gender_orientation + rcs(age), 
##      data = d)
##  
##  
##                  Model Likelihood     Discrimination    
##                     Ratio Test           Indexes        
##  Obs   28604    LR chi2    1184.86    R2       0.041    
##  sigma0.9775    d.f.            10    R2 adj   0.040    
##  d.f.  28593    Pr(> chi2)  0.0000    g        0.221    
##  
##  Residuals
##  
##        Min        1Q    Median        3Q       Max 
##  -3.485306 -0.764214 -0.004807  0.682016  3.066751 
##  
##  
##                                     Coef    S.E.   t     Pr(>|t|)
##  Intercept                           0.9612 0.1545  6.22 <0.0001 
##  CA                                  0.0537 0.0060  8.96 <0.0001 
##  gender_orientation=Bisexual_female  0.2618 0.0281  9.33 <0.0001 
##  gender_orientation=Gay_female       0.1737 0.0532  3.27 0.0011  
##  gender_orientation=Gay_male         0.0529 0.0284  1.86 0.0624  
##  gender_orientation=Bisexual_male    0.3941 0.0425  9.27 <0.0001 
##  gender_orientation=Hetero_male      0.1150 0.0142  8.09 <0.0001 
##  age                                -0.0348 0.0062 -5.60 <0.0001 
##  age'                                0.0812 0.0414  1.96 0.0497  
##  age''                              -0.2995 0.1599 -1.87 0.0610  
##  age'''                              0.3138 0.1823  1.72 0.0852  
##  
## 
## [[4]]
## Frequencies of Missing Values Due to Each Variable
## chronobio        CA      race       age 
##      8077         0      3783       739 
## 
## Linear Regression Model
##  
##  ols(formula = chronobio ~ CA + race + rcs(age), data = d)
##  
##  
##                  Model Likelihood     Discrimination    
##                     Ratio Test           Indexes        
##  Obs   27197    LR chi2    1030.05    R2       0.037    
##  sigma0.9816    d.f.            14    R2 adj   0.037    
##  d.f.  27182    Pr(> chi2)  0.0000    g        0.212    
##  
##  Residuals
##  
##       Min       1Q   Median       3Q      Max 
##  -3.40247 -0.76724 -0.01159  0.68615  2.98545 
##  
##  
##                        Coef    S.E.   t     Pr(>|t|)
##  Intercept              1.2599 0.1566  8.05 <0.0001 
##  CA                     0.0551 0.0062  8.86 <0.0001 
##  race=Mixed             0.0502 0.0202  2.49 0.0128  
##  race=Asian            -0.0179 0.0311 -0.57 0.5660  
##  race=Hispanic / Latin -0.0829 0.0316 -2.62 0.0088  
##  race=Black            -0.0001 0.0317  0.00 0.9966  
##  race=Other             0.0323 0.0376  0.86 0.3900  
##  race=Indian           -0.2385 0.0664 -3.59 0.0003  
##  race=Middle Eastern   -0.1367 0.0994 -1.38 0.1691  
##  race=Native American  -0.1727 0.1260 -1.37 0.1703  
##  race=Pacific Islander -0.1248 0.1390 -0.90 0.3695  
##  age                   -0.0424 0.0063 -6.69 <0.0001 
##  age'                   0.1123 0.0425  2.64 0.0083  
##  age''                 -0.3842 0.1647 -2.33 0.0197  
##  age'''                 0.3717 0.1882  1.98 0.0483  
##  
## 
## [[5]]
## Frequencies of Missing Values Due to Each Variable
##          chronobio                 CA gender_orientation               race 
##               8077                  0               1114               3783 
##                age 
##                739 
## 
## Linear Regression Model
##  
##  ols(formula = chronobio ~ CA + gender_orientation + race + rcs(age), 
##      data = d)
##  
##  
##                  Model Likelihood     Discrimination    
##                     Ratio Test           Indexes        
##  Obs   26872    LR chi2    1174.93    R2       0.043    
##  sigma0.9778    d.f.            19    R2 adj   0.042    
##  d.f.  26852    Pr(> chi2)  0.0000    g        0.227    
##  
##  Residuals
##  
##        Min        1Q    Median        3Q       Max 
##  -3.472286 -0.760818 -0.005071  0.680982  3.076929 
##  
##  
##                                     Coef    S.E.   t     Pr(>|t|)
##  Intercept                           0.9862 0.1604  6.15 <0.0001 
##  CA                                  0.0488 0.0063  7.80 <0.0001 
##  gender_orientation=Bisexual_female  0.2813 0.0294  9.58 <0.0001 
##  gender_orientation=Gay_female       0.1919 0.0558  3.44 0.0006  
##  gender_orientation=Gay_male         0.0664 0.0291  2.28 0.0227  
##  gender_orientation=Bisexual_male    0.3849 0.0448  8.60 <0.0001 
##  gender_orientation=Hetero_male      0.1193 0.0148  8.09 <0.0001 
##  race=Mixed                          0.0448 0.0203  2.20 0.0276  
##  race=Asian                          0.0052 0.0311  0.17 0.8664  
##  race=Hispanic / Latin              -0.0831 0.0316 -2.62 0.0087  
##  race=Black                          0.0048 0.0318  0.15 0.8795  
##  race=Other                          0.0279 0.0377  0.74 0.4588  
##  race=Indian                        -0.2382 0.0665 -3.58 0.0003  
##  race=Middle Eastern                -0.1354 0.0990 -1.37 0.1715  
##  race=Native American               -0.1845 0.1255 -1.47 0.1416  
##  race=Pacific Islander              -0.1181 0.1385 -0.85 0.3941  
##  age                                -0.0358 0.0064 -5.55 <0.0001 
##  age'                                0.0856 0.0429  1.99 0.0461  
##  age''                              -0.3166 0.1656 -1.91 0.0560  
##  age'''                              0.3309 0.1889  1.75 0.0798  
##  
## 
## [[6]]
## Frequencies of Missing Values Due to Each Variable
##          chronobio                 CA gender_orientation                age 
##               2547                  0                162                  0 
## 
## Linear Regression Model
##  
##  ols(formula = chronobio ~ CA + gender_orientation + rcs(age), 
##      data = d_white_amer)
##  
##  
##                 Model Likelihood     Discrimination    
##                    Ratio Test           Indexes        
##  Obs   15259    LR chi2    739.24    R2       0.047    
##  sigma0.9944    d.f.           10    R2 adj   0.047    
##  d.f.  15248    Pr(> chi2) 0.0000    g        0.242    
##  
##  Residuals
##  
##       Min       1Q   Median       3Q      Max 
##  -3.51120 -0.78477 -0.01054  0.69515  3.11008 
##  
##  
##                                     Coef    S.E.   t     Pr(>|t|)
##  Intercept                           1.3789 0.1989  6.93 <0.0001 
##  CA                                  0.0546 0.0083  6.55 <0.0001 
##  gender_orientation=Bisexual_female  0.2755 0.0417  6.60 <0.0001 
##  gender_orientation=Gay_female       0.1734 0.0758  2.29 0.0222  
##  gender_orientation=Gay_male         0.0479 0.0393  1.22 0.2235  
##  gender_orientation=Bisexual_male    0.4291 0.0637  6.73 <0.0001 
##  gender_orientation=Hetero_male      0.0981 0.0199  4.92 <0.0001 
##  age                                -0.0497 0.0076 -6.53 <0.0001 
##  age'                                0.2376 0.0727  3.27 0.0011  
##  age''                              -0.7570 0.2427 -3.12 0.0018  
##  age'''                              0.7346 0.2584  2.84 0.0045  
## 

Meta

write_sessioninfo()
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 19.1
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] mirt_1.31          polycor_0.7-10     rms_5.1-3.1        SparseM_1.77      
##  [5] kirkegaard_2018.05 metafor_2.1-0      Matrix_1.2-17      psych_1.8.12      
##  [9] magrittr_1.5       assertthat_0.2.1   weights_1.0        mice_3.6.0        
## [13] gdata_2.18.0       Hmisc_4.3-0        Formula_1.2-3      survival_3.1-7    
## [17] lattice_0.20-38    forcats_0.4.0      stringr_1.4.0      dplyr_0.8.3       
## [21] purrr_0.3.3        readr_1.3.1        tidyr_1.0.0        tibble_2.1.3      
## [25] ggplot2_3.2.1      tidyverse_1.2.1    pacman_0.5.1      
## 
## loaded via a namespace (and not attached):
##  [1] TH.data_1.0-10        minqa_1.2.4           colorspace_1.4-1     
##  [4] htmlTable_1.13.2      base64enc_0.1-3       rstudioapi_0.10      
##  [7] Deriv_3.9.0           MatrixModels_0.4-1    mvtnorm_1.0-11       
## [10] lubridate_1.7.4       xml2_1.2.2            codetools_0.2-16     
## [13] splines_3.6.1         mnormt_1.5-5          knitr_1.26           
## [16] zeallot_0.1.0         jsonlite_1.6          nloptr_1.2.1         
## [19] broom_0.5.2           cluster_2.1.0         compiler_3.6.1       
## [22] httr_1.4.1            backports_1.1.5       lazyeval_0.2.2       
## [25] cli_1.1.0             acepack_1.4.1         htmltools_0.4.0      
## [28] quantreg_5.52         tools_3.6.1           gtable_0.3.0         
## [31] glue_1.3.1            Rcpp_1.0.3            cellranger_1.1.0     
## [34] vctrs_0.2.0           nlme_3.1-142          multilevel_2.6       
## [37] xfun_0.11             lme4_1.1-21           rvest_0.3.5          
## [40] lifecycle_0.1.0       dcurver_0.9.1         gtools_3.8.1         
## [43] polspline_1.1.17      pan_1.6               MASS_7.3-51.4        
## [46] zoo_1.8-6             scales_1.0.0          hms_0.5.2            
## [49] parallel_3.6.1        sandwich_2.5-1        RColorBrewer_1.1-2   
## [52] psychometric_2.2      yaml_2.2.0            gridExtra_2.3        
## [55] rpart_4.1-15          latticeExtra_0.6-28   stringi_1.4.3        
## [58] checkmate_1.9.4       permute_0.9-5         boot_1.3-23          
## [61] rlang_0.4.1           pkgconfig_2.0.3       evaluate_0.14        
## [64] htmlwidgets_1.5.1     labeling_0.3          tidyselect_0.2.5     
## [67] R6_2.4.1              generics_0.0.2        mitml_0.3-7          
## [70] multcomp_1.4-10       pillar_1.4.2          haven_2.2.0          
## [73] foreign_0.8-72        withr_2.1.2           mgcv_1.8-31          
## [76] nnet_7.3-12           modelr_0.1.5          crayon_1.3.4         
## [79] jomo_2.6-10           rmarkdown_1.17        grid_3.6.1           
## [82] readxl_1.3.1          data.table_1.12.6     vegan_2.5-6          
## [85] digest_0.6.22         GPArotation_2014.11-1 munsell_0.5.0
d %>% write_rds("data/data_out.rds", compress = "xz")
d %>% write_csv("data/data_out.csv.gz")