Init

options(digits = 3)
library(pacman)
p_load(kirkegaard, rms, haven)
theme_set(theme_bw())

Data

#Main file
d = read_rds("data/VES_dataset.rds")

#Nerve file
ncv = read_sav("data/nerve conduction_workfile_3.sav")

#variable list
vars_d = read_csv("data/VES_dataset_variables.csv")
## Parsed with column specification:
## cols(
##   number = col_double(),
##   code = col_character(),
##   name = col_character()
## )
vars_ncv = df_var_table(ncv)

#merge
d = full_join(d, ncv %>% select(AOP_ID, !!(ncv %>% names() %>% setdiff(names(d)))), by = "AOP_ID")

Recode

#modify variables
d %<>% mutate(
  #rename tests for clarity
  VE_time1 = VE,
  AR_time1 = AR,
  VE_time2 = VESS,
  AR_time2 = ARSS,
  GT = GT2,

  #pupil size variables: these are in neurology.pdf
  pupil_size_right = NE01A15,
  pupil_size_left = NE01A16,
  pupil_size_mean = cbind(pupil_size_left, pupil_size_right) %>% rowMeans(na.rm=T),
  pupil_hour_measured = NE01003A,
  
  #covariates: these are in medical history.pdf
  drugs_past_year = (MH07188 == 2),
  smoker = c(MH07179 == 2),
  drinks_per_month = (MH06174),
  drinks_per_day = (MH06175)
)

g factor

#g tests
g_tests_early = c("VE_time1", "AR_time1", "PA", "GIT", "AFQT")
g_tests_later = c("VE_time2", "AR_time2", "WAIS_BD", "WAIS_GI", "WRAT", "PASAT", "WLGT", "copy_direct", "copy_immediate", "copy_delayed", "CVLT", "WCST", "GPT_left", "GPT_right")
g_tests = c(g_tests_early, g_tests_later)

#test correlations
wtd.cors(d[g_tests])
##                VE_time1 AR_time1    PA   GIT  AFQT VE_time2 AR_time2 WAIS_BD
## VE_time1          1.000    0.699 0.516 0.659 0.714    0.824    0.642   0.437
## AR_time1          0.699    1.000 0.576 0.589 0.737    0.658    0.785   0.502
## PA                0.516    0.576 1.000 0.467 0.728    0.484    0.545   0.634
## GIT               0.659    0.589 0.467 1.000 0.645    0.620    0.548   0.418
## AFQT              0.714    0.737 0.728 0.645 1.000    0.670    0.688   0.629
## VE_time2          0.824    0.658 0.484 0.620 0.670    1.000    0.691   0.453
## AR_time2          0.642    0.785 0.545 0.548 0.688    0.691    1.000   0.532
## WAIS_BD           0.437    0.502 0.634 0.418 0.629    0.453    0.532   1.000
## WAIS_GI           0.725    0.635 0.482 0.582 0.626    0.719    0.622   0.453
## WRAT              0.746    0.589 0.412 0.517 0.578    0.766    0.585   0.382
## PASAT             0.408    0.521 0.371 0.365 0.432    0.440    0.562   0.388
## WLGT              0.443    0.370 0.289 0.310 0.360    0.463    0.365   0.281
## copy_direct       0.290    0.333 0.380 0.254 0.372    0.325    0.384   0.398
## copy_immediate    0.303    0.335 0.456 0.311 0.452    0.316    0.387   0.490
## copy_delayed      0.301    0.337 0.452 0.308 0.451    0.315    0.385   0.492
## CVLT              0.317    0.331 0.264 0.250 0.312    0.333    0.356   0.269
## WCST              0.327    0.360 0.331 0.282 0.368    0.361    0.396   0.356
## GPT_left          0.208    0.212 0.263 0.192 0.269    0.226    0.236   0.307
## GPT_right         0.204    0.201 0.261 0.174 0.253    0.220    0.240   0.301
##                WAIS_GI  WRAT PASAT  WLGT copy_direct copy_immediate
## VE_time1         0.725 0.746 0.408 0.443       0.290          0.303
## AR_time1         0.635 0.589 0.521 0.370       0.333          0.335
## PA               0.482 0.412 0.371 0.289       0.380          0.456
## GIT              0.582 0.517 0.365 0.310       0.254          0.311
## AFQT             0.626 0.578 0.432 0.360       0.372          0.452
## VE_time2         0.719 0.766 0.440 0.463       0.325          0.316
## AR_time2         0.622 0.585 0.562 0.365       0.384          0.387
## WAIS_BD          0.453 0.382 0.388 0.281       0.398          0.490
## WAIS_GI          1.000 0.652 0.366 0.414       0.278          0.343
## WRAT             0.652 1.000 0.417 0.504       0.269          0.269
## PASAT            0.366 0.417 1.000 0.357       0.247          0.282
## WLGT             0.414 0.504 0.357 1.000       0.176          0.213
## copy_direct      0.278 0.269 0.247 0.176       1.000          0.474
## copy_immediate   0.343 0.269 0.282 0.213       0.474          1.000
## copy_delayed     0.345 0.271 0.281 0.216       0.482          0.915
## CVLT             0.329 0.309 0.289 0.278       0.205          0.318
## WCST             0.330 0.292 0.285 0.209       0.291          0.268
## GPT_left         0.186 0.204 0.216 0.157       0.223          0.227
## GPT_right        0.173 0.196 0.226 0.168       0.223          0.194
##                copy_delayed  CVLT  WCST GPT_left GPT_right
## VE_time1              0.301 0.317 0.327    0.208     0.204
## AR_time1              0.337 0.331 0.360    0.212     0.201
## PA                    0.452 0.264 0.331    0.263     0.261
## GIT                   0.308 0.250 0.282    0.192     0.174
## AFQT                  0.451 0.312 0.368    0.269     0.253
## VE_time2              0.315 0.333 0.361    0.226     0.220
## AR_time2              0.385 0.356 0.396    0.236     0.240
## WAIS_BD               0.492 0.269 0.356    0.307     0.301
## WAIS_GI               0.345 0.329 0.330    0.186     0.173
## WRAT                  0.271 0.309 0.292    0.204     0.196
## PASAT                 0.281 0.289 0.285    0.216     0.226
## WLGT                  0.216 0.278 0.209    0.157     0.168
## copy_direct           0.482 0.205 0.291    0.223     0.223
## copy_immediate        0.915 0.318 0.268    0.227     0.194
## copy_delayed          1.000 0.327 0.268    0.227     0.199
## CVLT                  0.327 1.000 0.192    0.115     0.117
## WCST                  0.268 0.192 1.000    0.196     0.186
## GPT_left              0.227 0.115 0.196    1.000     0.634
## GPT_right             0.199 0.117 0.186    0.634     1.000
#all tests g
fa_g = fa(d[g_tests])
fa_g
## Factor Analysis using method =  minres
## Call: fa(r = d[g_tests])
## Standardized loadings (pattern matrix) based upon correlation matrix
##                 MR1   h2   u2 com
## VE_time1       0.82 0.67 0.33   1
## AR_time1       0.81 0.66 0.34   1
## PA             0.70 0.50 0.50   1
## GIT            0.69 0.47 0.53   1
## AFQT           0.85 0.73 0.27   1
## VE_time2       0.82 0.67 0.33   1
## AR_time2       0.82 0.67 0.33   1
## WAIS_BD        0.67 0.45 0.55   1
## WAIS_GI        0.76 0.58 0.42   1
## WRAT           0.73 0.53 0.47   1
## PASAT          0.57 0.32 0.68   1
## WLGT           0.49 0.24 0.76   1
## copy_direct    0.47 0.22 0.78   1
## copy_immediate 0.55 0.30 0.70   1
## copy_delayed   0.55 0.30 0.70   1
## CVLT           0.42 0.18 0.82   1
## WCST           0.46 0.21 0.79   1
## GPT_left       0.34 0.12 0.88   1
## GPT_right      0.33 0.11 0.89   1
## 
##                 MR1
## SS loadings    7.93
## Proportion Var 0.42
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  171  and the objective function was  12.5 with Chi Square of  55829
## The degrees of freedom for the model are 152  and the objective function was  3.86 
## 
## The root mean square of the residuals (RMSR) is  0.09 
## The df corrected root mean square of the residuals is  0.1 
## 
## The harmonic number of observations is  4426 with the empirical chi square  12980  with prob <  0 
## The total number of observations was  4462  with Likelihood Chi Square =  17178  with prob <  0 
## 
## Tucker Lewis Index of factoring reliability =  0.656
## RMSEA index =  0.158  and the 90 % confidence intervals are  0.156 0.16
## BIC =  15901
## Fit based upon off diagonal values = 0.95
## Measures of factor score adequacy             
##                                                    MR1
## Correlation of (regression) scores with factors   0.97
## Multiple R square of scores with factors          0.95
## Minimum correlation of possible factor scores     0.89
fa_g$loadings[, 1] %>% describe() %>% as.matrix()
##    vars  n  mean    sd median trimmed   mad   min   max range   skew kurtosis
## X1    1 19 0.624 0.172  0.673   0.628 0.212 0.331 0.853 0.522 -0.233    -1.44
##        se
## X1 0.0394
#earlier tests only
fa_g_time1 = fa(d[g_tests_early])
fa_g_time1
## Factor Analysis using method =  minres
## Call: fa(r = d[g_tests_early])
## Standardized loadings (pattern matrix) based upon correlation matrix
##           MR1   h2   u2 com
## VE_time1 0.82 0.67 0.33   1
## AR_time1 0.82 0.68 0.32   1
## PA       0.70 0.49 0.51   1
## GIT      0.73 0.53 0.47   1
## AFQT     0.92 0.84 0.16   1
## 
##                 MR1
## SS loadings    3.20
## Proportion Var 0.64
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  10  and the objective function was  3.1 with Chi Square of  13826
## The degrees of freedom for the model are 5  and the objective function was  0.15 
## 
## The root mean square of the residuals (RMSR) is  0.04 
## The df corrected root mean square of the residuals is  0.06 
## 
## The harmonic number of observations is  4377 with the empirical chi square  169  with prob <  9.5e-35 
## The total number of observations was  4462  with Likelihood Chi Square =  690  with prob <  8.8e-147 
## 
## Tucker Lewis Index of factoring reliability =  0.901
## RMSEA index =  0.175  and the 90 % confidence intervals are  0.164 0.186
## BIC =  648
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                    MR1
## Correlation of (regression) scores with factors   0.96
## Multiple R square of scores with factors          0.92
## Minimum correlation of possible factor scores     0.85
fa_g_time1$loadings[, 1] %>% describe() %>% as.matrix()
##    vars n  mean     sd median trimmed   mad   min   max range  skew kurtosis
## X1    1 5 0.797 0.0867  0.816   0.797 0.135 0.701 0.918 0.217 0.181    -1.84
##        se
## X1 0.0388
#later tests only
fa_g_time2 = fa(d[g_tests_later])
fa_g_time2
## Factor Analysis using method =  minres
## Call: fa(r = d[g_tests_later])
## Standardized loadings (pattern matrix) based upon correlation matrix
##                 MR1   h2   u2 com
## VE_time2       0.78 0.61 0.39   1
## AR_time2       0.79 0.62 0.38   1
## WAIS_BD        0.67 0.45 0.55   1
## WAIS_GI        0.72 0.52 0.48   1
## WRAT           0.71 0.50 0.50   1
## PASAT          0.58 0.33 0.67   1
## WLGT           0.50 0.25 0.75   1
## copy_direct    0.51 0.26 0.74   1
## copy_immediate 0.61 0.37 0.63   1
## copy_delayed   0.61 0.37 0.63   1
## CVLT           0.45 0.20 0.80   1
## WCST           0.47 0.22 0.78   1
## GPT_left       0.37 0.14 0.86   1
## GPT_right      0.36 0.13 0.87   1
## 
##                 MR1
## SS loadings    5.00
## Proportion Var 0.36
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  91  and the objective function was  7.16 with Chi Square of  31892
## The degrees of freedom for the model are 77  and the objective function was  2.8 
## 
## The root mean square of the residuals (RMSR) is  0.11 
## The df corrected root mean square of the residuals is  0.12 
## 
## The harmonic number of observations is  4457 with the empirical chi square  9373  with prob <  0 
## The total number of observations was  4462  with Likelihood Chi Square =  12463  with prob <  0 
## 
## Tucker Lewis Index of factoring reliability =  0.54
## RMSEA index =  0.19  and the 90 % confidence intervals are  0.187 0.193
## BIC =  11816
## Fit based upon off diagonal values = 0.92
## Measures of factor score adequacy             
##                                                    MR1
## Correlation of (regression) scores with factors   0.95
## Multiple R square of scores with factors          0.90
## Minimum correlation of possible factor scores     0.80
fa_g_time2$loadings[, 1] %>% describe() %>% as.matrix()
##    vars  n  mean    sd median trimmed   mad   min   max range    skew kurtosis
## X1    1 14 0.582 0.141  0.592   0.583 0.173 0.364 0.791 0.426 -0.0247    -1.42
##        se
## X1 0.0378
#save scores, standardize to white
d$g = fa_g$scores[, 1] %>% standardize(focal_group = d$race == "White")
d$g_time1 = fa_g_time1$scores[, 1] %>% standardize(focal_group = d$race == "White")
d$g_time2 = fa_g_time2$scores[, 1] %>% standardize(focal_group = d$race == "White")

Pupil size

#plot
GG_scatter(d, "pupil_size_right", "pupil_size_left")
## `geom_smooth()` using formula 'y ~ x'

ggplot(d, aes(pupil_size_right, pupil_size_left)) +
  geom_count() +
  geom_smooth(se=F, method = "lm")
## Warning: Removed 7 rows containing non-finite values (stat_sum).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 7 rows containing non-finite values (stat_smooth).

#model
ols(pupil_size_right ~ pupil_size_left, data = d)
## Frequencies of Missing Values Due to Each Variable
## pupil_size_right  pupil_size_left 
##                3                4 
## 
## Linear Regression Model
##  
##  ols(formula = pupil_size_right ~ pupil_size_left, data = d)
##  
##  
##                    Model Likelihood    Discrimination    
##                          Ratio Test           Indexes    
##  Obs    4455    LR chi2    13137.73    R2       0.948    
##  sigma0.2207    d.f.              1    R2 adj   0.948    
##  d.f.   4453    Pr(> chi2)   0.0000    g        0.950    
##  
##  Residuals
##  
##       Min       1Q   Median       3Q      Max 
##  -5.84077 -0.01403 -0.01403  0.02063  2.02063 
##  
##  
##                  Coef   S.E.   t      Pr(>|t|)
##  Intercept       0.1180 0.0124   9.53 <0.0001 
##  pupil_size_left 0.9653 0.0034 283.79 <0.0001 
## 
#numerics
d %>% select(contains("pupil_size")) %>% describe() %>% as.matrix()
##                  vars    n mean    sd median trimmed  mad min max range skew
## pupil_size_right    1 4459 3.50 0.964      3    3.44 1.48   1   9     8 1.03
## pupil_size_left     2 4458 3.51 0.972      3    3.44 1.48   1   9     8 1.02
## pupil_size_mean     3 4462 3.51 0.961      3    3.44 1.48   1   9     8 1.02
##                  kurtosis     se
## pupil_size_right     2.31 0.0144
## pupil_size_left      2.36 0.0146
## pupil_size_mean      2.30 0.0144
#table of values, since they are quasi-discrete
d$pupil_size_left %>% table2()
d$pupil_size_right %>% table2()
#plot
d %>% select(AOP_ID, pupil_size_left, pupil_size_right) %>% 
  gather("key", "size", pupil_size_left, pupil_size_right) %>% 
  mutate(key = str_replace(key, "pupil_size_", "")) %>% 
  ggplot(aes(size)) +
  geom_bar() +
  scale_x_continuous(breaks = 1:9) +
  facet_wrap("key") +
  theme(text = element_text(size = 15))
## Warning: Removed 7 rows containing non-finite values (stat_count).

GG_save("figs/histogram_by_eye.png")
## Warning: Removed 7 rows containing non-finite values (stat_count).
#measurement time?
ggplot(d, aes(pupil_hour_measured, pupil_size_mean)) +
  geom_count() +
  geom_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

Correlation matrix

#correlations to all tests
wtd.cors(d %>% select(g_tests, g, g_time1, g_time2),
         d %>% select(pupil_size_right, pupil_size_left, pupil_size_mean))
## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(g_tests)` instead of `g_tests` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
##                pupil_size_right pupil_size_left pupil_size_mean
## VE_time1                0.03597         0.03621         0.03625
## AR_time1                0.03879         0.03731         0.03805
## PA                      0.03404         0.03511         0.03455
## GIT                     0.03779         0.03808         0.03818
## AFQT                    0.05115         0.04957         0.05050
## VE_time2                0.05438         0.05580         0.05553
## AR_time2                0.05187         0.04719         0.04976
## WAIS_BD                 0.03730         0.03588         0.03679
## WAIS_GI                 0.02158         0.01933         0.02062
## WRAT                    0.02128         0.02189         0.02173
## PASAT                   0.03455         0.03228         0.03357
## WLGT                   -0.01249        -0.00910        -0.01073
## copy_direct             0.03236         0.03069         0.03173
## copy_immediate          0.03713         0.03756         0.03769
## copy_delayed            0.04095         0.04202         0.04179
## CVLT                    0.04117         0.04045         0.04076
## WCST                    0.03903         0.03521         0.03740
## GPT_left                0.01364         0.01723         0.01557
## GPT_right               0.00382         0.00271         0.00331
## g                       0.05153         0.05127         0.05165
## g_time1                 0.04928         0.04861         0.04908
## g_time2                 0.05235         0.05160         0.05231
#p values very small
wtd.cor(d %>% select(pupil_size_right, pupil_size_left, pupil_size_mean),
        d %>% select(g_tests, g, g_time1, g_time2))
## $correlation
##                pupil_size_right pupil_size_left pupil_size_mean
## VE_time1                0.03597         0.03621         0.03625
## AR_time1                0.03879         0.03731         0.03805
## PA                      0.03404         0.03511         0.03455
## GIT                     0.03779         0.03808         0.03818
## AFQT                    0.05115         0.04957         0.05050
## VE_time2                0.05438         0.05580         0.05553
## AR_time2                0.05187         0.04719         0.04976
## WAIS_BD                 0.03730         0.03588         0.03679
## WAIS_GI                 0.02158         0.01933         0.02062
## WRAT                    0.02128         0.02189         0.02173
## PASAT                   0.03455         0.03228         0.03357
## WLGT                   -0.01249        -0.00910        -0.01073
## copy_direct             0.03236         0.03069         0.03173
## copy_immediate          0.03713         0.03756         0.03769
## copy_delayed            0.04095         0.04202         0.04179
## CVLT                    0.04117         0.04045         0.04076
## WCST                    0.03903         0.03521         0.03740
## GPT_left                0.01364         0.01723         0.01557
## GPT_right               0.00382         0.00271         0.00331
## g                       0.05153         0.05127         0.05165
## g_time1                 0.04928         0.04861         0.04908
## g_time2                 0.05235         0.05160         0.05231
## 
## $std.err
##                pupil_size_right pupil_size_left pupil_size_mean
## VE_time1                 0.0151          0.0151          0.0151
## AR_time1                 0.0151          0.0151          0.0151
## PA                       0.0151          0.0151          0.0151
## GIT                      0.0151          0.0151          0.0151
## AFQT                     0.0150          0.0150          0.0150
## VE_time2                 0.0150          0.0150          0.0150
## AR_time2                 0.0150          0.0150          0.0150
## WAIS_BD                  0.0150          0.0150          0.0150
## WAIS_GI                  0.0150          0.0150          0.0150
## WRAT                     0.0150          0.0150          0.0150
## PASAT                    0.0150          0.0150          0.0150
## WLGT                     0.0150          0.0150          0.0150
## copy_direct              0.0150          0.0150          0.0150
## copy_immediate           0.0150          0.0150          0.0150
## copy_delayed             0.0150          0.0150          0.0150
## CVLT                     0.0150          0.0150          0.0150
## WCST                     0.0150          0.0150          0.0150
## GPT_left                 0.0150          0.0150          0.0150
## GPT_right                0.0150          0.0150          0.0150
## g                        0.0152          0.0152          0.0152
## g_time1                  0.0151          0.0151          0.0151
## g_time2                  0.0150          0.0150          0.0150
## 
## $t.value
##                pupil_size_right pupil_size_left pupil_size_mean
## VE_time1                  2.382           2.397           2.401
## AR_time1                  2.569           2.471           2.521
## PA                        2.255           2.325           2.289
## GIT                       2.500           2.519           2.527
## AFQT                      3.411           3.305           3.369
## VE_time2                  3.636           3.731           3.714
## AR_time2                  3.467           3.154           3.327
## WAIS_BD                   2.492           2.397           2.459
## WAIS_GI                   1.441           1.291           1.377
## WRAT                      1.421           1.462           1.451
## PASAT                     2.305           2.153           2.240
## WLGT                     -0.834          -0.608          -0.716
## copy_direct               2.162           2.050           2.120
## copy_immediate            2.481           2.509           2.519
## copy_delayed              2.736           2.807           2.793
## CVLT                      2.751           2.702           2.724
## WCST                      2.608           2.352           2.500
## GPT_left                  0.910           1.149           1.038
## GPT_right                 0.254           0.181           0.221
## g                         3.390           3.372           3.399
## g_time1                   3.254           3.209           3.242
## g_time2                   3.485           3.435           3.484
## 
## $p.value
##                pupil_size_right pupil_size_left pupil_size_mean
## VE_time1               0.017267        0.016560        0.016386
## AR_time1               0.010231        0.013521        0.011735
## PA                     0.024206        0.020112        0.022145
## GIT                    0.012458        0.011789        0.011535
## AFQT                   0.000653        0.000957        0.000761
## VE_time2               0.000280        0.000193        0.000207
## AR_time2               0.000530        0.001623        0.000884
## WAIS_BD                0.012749        0.016584        0.013989
## WAIS_GI                0.149702        0.196829        0.168455
## WRAT                   0.155477        0.143928        0.146842
## PASAT                  0.021238        0.031354        0.025129
## WLGT                   0.404322        0.543355        0.473728
## copy_direct            0.030687        0.040458        0.034039
## copy_immediate         0.013145        0.012150        0.011805
## copy_delayed           0.006237        0.005020        0.005243
## CVLT                   0.005969        0.006912        0.006473
## WCST                   0.009141        0.018736        0.012471
## GPT_left               0.363132        0.250735        0.299302
## GPT_right              0.799149        0.856696        0.825338
## g                      0.000706        0.000753        0.000684
## g_time1                0.001146        0.001339        0.001196
## g_time2                0.000496        0.000597        0.000498

Plots

#all
ggplot(d, aes(g, pupil_size_mean)) +
  geom_point() +
  geom_smooth(method = lm) +
  ylab("Pupil size (average)") +
  theme(text = element_text(size = 15))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 142 rows containing non-finite values (stat_smooth).
## Warning: Removed 142 rows containing missing values (geom_point).

GG_save("figs/scatter_all.png")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 142 rows containing non-finite values (stat_smooth).

## Warning: Removed 142 rows containing missing values (geom_point).
#by race
ggplot(d, aes(g, pupil_size_mean)) +
  geom_point() +
  geom_smooth(method = lm) +
  facet_wrap("race") +
  ylab("Pupil size (average)") +
  theme(text = element_text(size = 15))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 142 rows containing non-finite values (stat_smooth).

## Warning: Removed 142 rows containing missing values (geom_point).

GG_save("figs/scatter_by_race.png")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 142 rows containing non-finite values (stat_smooth).

## Warning: Removed 142 rows containing missing values (geom_point).

Regressions

#subsets
d_whites = d %>% filter(race == "White")
d_blacks = d %>% filter(race == "Black")
d_hispanics = d %>% filter(race == "Hispanic")

#fit models
models = list(
  all = ols(pupil_size_mean ~ g, data = d),
  whites = ols(pupil_size_mean ~ g, data = d_whites),
  blacks = ols(pupil_size_mean ~ g, data = d_blacks),
  hispanics = ols(pupil_size_mean ~ g, data = d_hispanics),
  all_covariates = ols(pupil_size_mean ~ g + age + race + drugs_past_year + smoker + drinks_per_month + drinks_per_day + rcs(pupil_hour_measured), data = d),
  whites_covariates = ols(pupil_size_mean ~ g + age + drugs_past_year + smoker + drinks_per_month + drinks_per_day + rcs(pupil_hour_measured), data = d_whites),
  blacks_covariates = ols(pupil_size_mean ~ g + age + drugs_past_year + smoker + drinks_per_month + drinks_per_day + rcs(pupil_hour_measured), data = d_blacks),
  hispanics_covariates = ols(pupil_size_mean ~ g + age + drugs_past_year + smoker + drinks_per_month + drinks_per_day + rcs(pupil_hour_measured), data = d_hispanics)
)

#print summaries
models
## $all
## Frequencies of Missing Values Due to Each Variable
## pupil_size_mean               g 
##               0             142 
## 
## Linear Regression Model
##  
##  ols(formula = pupil_size_mean ~ g, data = d)
##  
##  
##                  Model Likelihood    Discrimination    
##                        Ratio Test           Indexes    
##  Obs    4320    LR chi2     11.54    R2       0.003    
##  sigma0.9622    d.f.            1    R2 adj   0.002    
##  d.f.   4318    Pr(> chi2) 0.0007    g        0.057    
##  
##  Residuals
##  
##      Min      1Q  Median      3Q     Max 
##  -2.5783 -0.5347 -0.4497  0.4943  5.5434 
##  
##  
##            Coef   S.E.   t      Pr(>|t|)
##  Intercept 3.5185 0.0149 236.79 <0.0001 
##  g         0.0465 0.0137   3.40 0.0007  
##  
## 
## $whites
## Frequencies of Missing Values Due to Each Variable
## pupil_size_mean               g 
##               0              99 
## 
## Linear Regression Model
##  
##  ols(formula = pupil_size_mean ~ g, data = d_whites)
##  
##  
##                  Model Likelihood    Discrimination    
##                        Ratio Test           Indexes    
##  Obs    3555    LR chi2      0.19    R2       0.000    
##  sigma0.9585    d.f.            1    R2 adj   0.000    
##  d.f.   3553    Pr(> chi2) 0.6665    g        0.008    
##  
##  Residuals
##  
##      Min      1Q  Median      3Q     Max 
##  -2.5698 -0.5637 -0.5507  0.4409  5.4483 
##  
##  
##            Coef   S.E.   t      Pr(>|t|)
##  Intercept 3.5609 0.0161 221.50 <0.0001 
##  g         0.0069 0.0161   0.43 0.6666  
##  
## 
## $blacks
## Frequencies of Missing Values Due to Each Variable
## pupil_size_mean               g 
##               0              23 
## 
## Linear Regression Model
##  
##  ols(formula = pupil_size_mean ~ g, data = d_blacks)
##  
##  
##                  Model Likelihood    Discrimination    
##                        Ratio Test           Indexes    
##  Obs     502    LR chi2      0.24    R2       0.000    
##  sigma0.9721    d.f.            1    R2 adj  -0.002    
##  d.f.    500    Pr(> chi2) 0.6215    g        0.024    
##  
##  Residuals
##  
##      Min      1Q  Median      3Q     Max 
##  -2.2537 -0.2546 -0.2278  0.7521  4.7520 
##  
##  
##            Coef    S.E.   t     Pr(>|t|)
##  Intercept  3.1986 0.0773 41.38 <0.0001 
##  g         -0.0249 0.0505 -0.49 0.6224  
##  
## 
## $hispanics
## Frequencies of Missing Values Due to Each Variable
## pupil_size_mean               g 
##               0              19 
## 
## Linear Regression Model
##  
##  ols(formula = pupil_size_mean ~ g, data = d_hispanics)
##  
##  
##                  Model Likelihood    Discrimination    
##                        Ratio Test           Indexes    
##  Obs     181    LR chi2      0.01    R2       0.000    
##  sigma0.8791    d.f.            1    R2 adj  -0.006    
##  d.f.    179    Pr(> chi2) 0.9161    g        0.008    
##  
##  Residuals
##  
##      Min      1Q  Median      3Q     Max 
##  -1.8424 -0.3452 -0.3361  0.6560  3.6625 
##  
##  
##            Coef    S.E.   t     Pr(>|t|)
##  Intercept  3.3338 0.0869 38.37 <0.0001 
##  g         -0.0077 0.0738 -0.10 0.9167  
##  
## 
## $all_covariates
## Frequencies of Missing Values Due to Each Variable
##     pupil_size_mean                   g                 age                race 
##                   0                 142                   0                   0 
##     drugs_past_year              smoker    drinks_per_month      drinks_per_day 
##                   0                1163                  12                1120 
## pupil_hour_measured 
##                   0 
## 
## Linear Regression Model
##  
##  ols(formula = pupil_size_mean ~ g + age + race + drugs_past_year + 
##      smoker + drinks_per_month + drinks_per_day + rcs(pupil_hour_measured), 
##      data = d)
##  
##  
##                  Model Likelihood    Discrimination    
##                        Ratio Test           Indexes    
##  Obs    2467    LR chi2    238.55    R2       0.092    
##  sigma0.9090    d.f.           14    R2 adj   0.087    
##  d.f.   2452    Pr(> chi2) 0.0000    g        0.316    
##  
##  Residuals
##  
##      Min      1Q  Median      3Q     Max 
##  -3.1499 -0.4830 -0.2246  0.4951  4.7363 
##  
##  
##                         Coef    S.E.   t     Pr(>|t|)
##  Intercept               4.4097 0.3036 14.53 <0.0001 
##  g                       0.0038 0.0198  0.19 0.8470  
##  age                    -0.0312 0.0074 -4.21 <0.0001 
##  race=Black             -0.3542 0.0616 -5.75 <0.0001 
##  race=Hispanic          -0.2039 0.0990 -2.06 0.0396  
##  race=Asian             -0.3862 0.2284 -1.69 0.0910  
##  race=Native             0.0864 0.1834  0.47 0.6375  
##  drugs_past_year        -0.1892 0.0926 -2.04 0.0412  
##  smoker                 -0.0397 0.0394 -1.01 0.3137  
##  drinks_per_month        0.0018 0.0018  0.99 0.3223  
##  drinks_per_day          0.0051 0.0056  0.91 0.3609  
##  pupil_hour_measured     0.0664 0.0478  1.39 0.1656  
##  pupil_hour_measured'   -0.4604 0.6674 -0.69 0.4904  
##  pupil_hour_measured''   0.5201 0.8225  0.63 0.5272  
##  pupil_hour_measured'''  0.7561 0.5363  1.41 0.1587  
##  
## 
## $whites_covariates
## Frequencies of Missing Values Due to Each Variable
##     pupil_size_mean                   g                 age     drugs_past_year 
##                   0                  99                   0                   0 
##              smoker    drinks_per_month      drinks_per_day pupil_hour_measured 
##                 939                   6                 927                   0 
## 
## Linear Regression Model
##  
##  ols(formula = pupil_size_mean ~ g + age + drugs_past_year + smoker + 
##      drinks_per_month + drinks_per_day + rcs(pupil_hour_measured), 
##      data = d_whites)
##  
##  
##                  Model Likelihood    Discrimination    
##                        Ratio Test           Indexes    
##  Obs    2030    LR chi2    147.63    R2       0.070    
##  sigma0.9157    d.f.           10    R2 adj   0.066    
##  d.f.   2019    Pr(> chi2) 0.0000    g        0.273    
##  
##  Residuals
##  
##      Min      1Q  Median      3Q     Max 
##  -3.1221 -0.4999 -0.2751  0.5065  4.7602 
##  
##  
##                         Coef    S.E.   t     Pr(>|t|)
##  Intercept               4.2941 0.3421 12.55 <0.0001 
##  g                       0.0049 0.0217  0.23 0.8203  
##  age                    -0.0286 0.0083 -3.43 0.0006  
##  drugs_past_year        -0.1249 0.1017 -1.23 0.2193  
##  smoker                 -0.0525 0.0433 -1.21 0.2255  
##  drinks_per_month        0.0017 0.0020  0.86 0.3874  
##  drinks_per_day          0.0041 0.0069  0.59 0.5556  
##  pupil_hour_measured     0.0766 0.0528  1.45 0.1469  
##  pupil_hour_measured'   -0.6332 0.8476 -0.75 0.4552  
##  pupil_hour_measured''   0.7234 1.0424  0.69 0.4878  
##  pupil_hour_measured'''  0.6944 0.6410  1.08 0.2788  
##  
## 
## $blacks_covariates
## Frequencies of Missing Values Due to Each Variable
##     pupil_size_mean                   g                 age     drugs_past_year 
##                   0                  23                   0                   0 
##              smoker    drinks_per_month      drinks_per_day pupil_hour_measured 
##                 124                   4                 124                   0 
## 
## Linear Regression Model
##  
##  ols(formula = pupil_size_mean ~ g + age + drugs_past_year + smoker + 
##      drinks_per_month + drinks_per_day + rcs(pupil_hour_measured), 
##      data = d_blacks)
##  
##  
##                  Model Likelihood    Discrimination    
##                        Ratio Test           Indexes    
##  Obs     306    LR chi2     45.39    R2       0.138    
##  sigma0.8891    d.f.           10    R2 adj   0.109    
##  d.f.    295    Pr(> chi2) 0.0000    g        0.375    
##  
##  Residuals
##  
##       Min       1Q   Median       3Q      Max 
##  -2.56973 -0.43031 -0.03574  0.23678  3.82439 
##  
##  
##                         Coef    S.E.   t     Pr(>|t|)
##  Intercept               4.5516 0.7971  5.71 <0.0001 
##  g                      -0.0511 0.0624 -0.82 0.4134  
##  age                    -0.0450 0.0194 -2.31 0.0215  
##  drugs_past_year        -0.7196 0.3071 -2.34 0.0198  
##  smoker                  0.0736 0.1210  0.61 0.5438  
##  drinks_per_month        0.0000 0.0054  0.00 0.9976  
##  drinks_per_day         -0.0023 0.0108 -0.22 0.8291  
##  pupil_hour_measured     0.0353 0.1310  0.27 0.7876  
##  pupil_hour_measured'   -0.8596 1.8282 -0.47 0.6386  
##  pupil_hour_measured''   1.1491 2.2539  0.51 0.6105  
##  pupil_hour_measured''' -0.3386 1.4674 -0.23 0.8177  
##  
## 
## $hispanics_covariates
## Frequencies of Missing Values Due to Each Variable
##     pupil_size_mean                   g                 age     drugs_past_year 
##                   0                  19                   0                   0 
##              smoker    drinks_per_month      drinks_per_day pupil_hour_measured 
##                  75                   2                  45                   0 
## 
## Linear Regression Model
##  
##  ols(formula = pupil_size_mean ~ g + age + drugs_past_year + smoker + 
##      drinks_per_month + drinks_per_day + rcs(pupil_hour_measured), 
##      data = d_hispanics)
##  
##  
##                  Model Likelihood    Discrimination    
##                        Ratio Test           Indexes    
##  Obs      90    LR chi2     23.21    R2       0.227    
##  sigma0.7598    d.f.           10    R2 adj   0.130    
##  d.f.     79    Pr(> chi2) 0.0100    g        0.437    
##  
##  Residuals
##  
##       Min       1Q   Median       3Q      Max 
##  -1.65758 -0.40713 -0.06056  0.41528  2.24875 
##  
##  
##                         Coef    S.E.   t     Pr(>|t|)
##  Intercept               6.9279 1.4396  4.81 <0.0001 
##  g                       0.0798 0.0933  0.85 0.3952  
##  age                    -0.1004 0.0344 -2.92 0.0046  
##  drugs_past_year         0.0317 0.4713  0.07 0.9465  
##  smoker                 -0.2619 0.1813 -1.44 0.1525  
##  drinks_per_month        0.0093 0.0083  1.12 0.2659  
##  drinks_per_day          0.0484 0.0231  2.10 0.0393  
##  pupil_hour_measured     0.0756 0.2336  0.32 0.7469  
##  pupil_hour_measured'   -3.9678 3.8258 -1.04 0.3028  
##  pupil_hour_measured''   5.3761 4.7404  1.13 0.2602  
##  pupil_hour_measured''' -7.1715 3.9629 -1.81 0.0741  
## 
#summarize to a table
models %>% summarize_models()

Descriptive table

#by group
describeBy(d %>% select(pupil_size_left, pupil_size_right, pupil_size_mean, g), group = d$race) %>% 
  {
    tibble(
      variable = .$White %>% rownames(),
      whites = str_glue("{.$White$mean %>% format_digits()} ({.$White$sd %>% format_digits()})"),
      blacks = str_glue("{.$Black$mean %>% format_digits()} ({.$Black$sd %>% format_digits()})"),
      hispanics = str_glue("{.$Hispanic$mean %>% format_digits()} ({.$Hispanic$sd %>% format_digits()})")
    )
  }
#Cohen d
SMD_matrix(d$pupil_size_mean, d$race)
##           White   Black Hispanic   Asian  Native
## White        NA  0.3419    0.220  0.3687  0.0606
## Black    0.3419      NA   -0.122  0.0268 -0.2813
## Hispanic 0.2203 -0.1216       NA  0.1483 -0.1597
## Asian    0.3687  0.0268    0.148      NA -0.3080
## Native   0.0606 -0.2813   -0.160 -0.3080      NA

Meta

write_sessioninfo()
## R version 4.0.0 (2020-04-24)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 19.3
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=de_DE.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=de_DE.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=de_DE.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] haven_2.2.0           rms_5.1-4             SparseM_1.78         
##  [4] kirkegaard_2020-05-18 metafor_2.4-0         Matrix_1.2-18        
##  [7] psych_1.9.12.31       magrittr_1.5          assertthat_0.2.1     
## [10] weights_1.0.1         mice_3.9.0            gdata_2.18.0         
## [13] Hmisc_4.4-0           Formula_1.2-3         survival_3.1-12      
## [16] lattice_0.20-41       forcats_0.5.0         stringr_1.4.0        
## [19] dplyr_0.8.99.9003     purrr_0.3.4           readr_1.3.1          
## [22] tidyr_1.0.3           tibble_3.0.1          ggplot2_3.3.0        
## [25] tidyverse_1.3.0       pacman_0.5.1         
## 
## loaded via a namespace (and not attached):
##  [1] nlme_3.1-147        fs_1.4.1            lubridate_1.7.8    
##  [4] RColorBrewer_1.1-2  httr_1.4.1          tools_4.0.0        
##  [7] backports_1.1.7     R6_2.4.1            rpart_4.1-15       
## [10] mgcv_1.8-31         DBI_1.1.0           colorspace_1.4-1   
## [13] nnet_7.3-14         withr_2.2.0         tidyselect_1.1.0   
## [16] gridExtra_2.3       mnormt_1.5-7        compiler_4.0.0     
## [19] quantreg_5.55       cli_2.0.2           rvest_0.3.5        
## [22] htmlTable_1.13.3    xml2_1.3.2          sandwich_2.5-1     
## [25] labeling_0.3        scales_1.1.1        checkmate_2.0.0    
## [28] mvtnorm_1.1-0       polspline_1.1.19    multilevel_2.6     
## [31] digest_0.6.25       foreign_0.8-76      rmarkdown_2.1      
## [34] base64enc_0.1-3     jpeg_0.1-8.1        pkgconfig_2.0.3    
## [37] htmltools_0.4.0     dbplyr_1.4.3        htmlwidgets_1.5.1  
## [40] rlang_0.4.6         readxl_1.3.1        rstudioapi_0.11    
## [43] farver_2.0.3        generics_0.0.2      zoo_1.8-8          
## [46] jsonlite_1.6.1      gtools_3.8.2        acepack_1.4.1      
## [49] Rcpp_1.0.4.6        munsell_0.5.0       fansi_0.4.1        
## [52] lifecycle_0.2.0     multcomp_1.4-13     stringi_1.4.6      
## [55] yaml_2.2.1          MASS_7.3-51.6       plyr_1.8.6         
## [58] psychometric_2.2    grid_4.0.0          parallel_4.0.0     
## [61] crayon_1.3.4        splines_4.0.0       hms_0.5.3          
## [64] knitr_1.28          pillar_1.4.4        codetools_0.2-16   
## [67] reprex_0.3.0        glue_1.4.1          evaluate_0.14      
## [70] latticeExtra_0.6-29 data.table_1.12.8   modelr_0.1.7       
## [73] png_0.1-7           vctrs_0.3.0         MatrixModels_0.4-1 
## [76] cellranger_1.1.0    gtable_0.3.0        xfun_0.13          
## [79] broom_0.5.6         cluster_2.1.0       TH.data_1.0-10     
## [82] ellipsis_0.3.1
#data out
d %>% write_rds("data/data_out.rds", compress = "xz")