Init
options(digits = 3)
library(pacman)
p_load(kirkegaard, rms, haven)
theme_set(theme_bw())
Data
#Main file
d = read_rds("data/VES_dataset.rds")
#Nerve file
ncv = read_sav("data/nerve conduction_workfile_3.sav")
#variable list
vars_d = read_csv("data/VES_dataset_variables.csv")
## Parsed with column specification:
## cols(
## number = col_double(),
## code = col_character(),
## name = col_character()
## )
vars_ncv = df_var_table(ncv)
#merge
d = full_join(d, ncv %>% select(AOP_ID, !!(ncv %>% names() %>% setdiff(names(d)))), by = "AOP_ID")
Recode
#modify variables
d %<>% mutate(
#rename tests for clarity
VE_time1 = VE,
AR_time1 = AR,
VE_time2 = VESS,
AR_time2 = ARSS,
GT = GT2,
#pupil size variables: these are in neurology.pdf
pupil_size_right = NE01A15,
pupil_size_left = NE01A16,
pupil_size_mean = cbind(pupil_size_left, pupil_size_right) %>% rowMeans(na.rm=T),
pupil_hour_measured = NE01003A,
#covariates: these are in medical history.pdf
drugs_past_year = (MH07188 == 2),
smoker = c(MH07179 == 2),
drinks_per_month = (MH06174),
drinks_per_day = (MH06175)
)
g factor
#g tests
g_tests_early = c("VE_time1", "AR_time1", "PA", "GIT", "AFQT")
g_tests_later = c("VE_time2", "AR_time2", "WAIS_BD", "WAIS_GI", "WRAT", "PASAT", "WLGT", "copy_direct", "copy_immediate", "copy_delayed", "CVLT", "WCST", "GPT_left", "GPT_right")
g_tests = c(g_tests_early, g_tests_later)
#test correlations
wtd.cors(d[g_tests])
## VE_time1 AR_time1 PA GIT AFQT VE_time2 AR_time2 WAIS_BD
## VE_time1 1.000 0.699 0.516 0.659 0.714 0.824 0.642 0.437
## AR_time1 0.699 1.000 0.576 0.589 0.737 0.658 0.785 0.502
## PA 0.516 0.576 1.000 0.467 0.728 0.484 0.545 0.634
## GIT 0.659 0.589 0.467 1.000 0.645 0.620 0.548 0.418
## AFQT 0.714 0.737 0.728 0.645 1.000 0.670 0.688 0.629
## VE_time2 0.824 0.658 0.484 0.620 0.670 1.000 0.691 0.453
## AR_time2 0.642 0.785 0.545 0.548 0.688 0.691 1.000 0.532
## WAIS_BD 0.437 0.502 0.634 0.418 0.629 0.453 0.532 1.000
## WAIS_GI 0.725 0.635 0.482 0.582 0.626 0.719 0.622 0.453
## WRAT 0.746 0.589 0.412 0.517 0.578 0.766 0.585 0.382
## PASAT 0.408 0.521 0.371 0.365 0.432 0.440 0.562 0.388
## WLGT 0.443 0.370 0.289 0.310 0.360 0.463 0.365 0.281
## copy_direct 0.290 0.333 0.380 0.254 0.372 0.325 0.384 0.398
## copy_immediate 0.303 0.335 0.456 0.311 0.452 0.316 0.387 0.490
## copy_delayed 0.301 0.337 0.452 0.308 0.451 0.315 0.385 0.492
## CVLT 0.317 0.331 0.264 0.250 0.312 0.333 0.356 0.269
## WCST 0.327 0.360 0.331 0.282 0.368 0.361 0.396 0.356
## GPT_left 0.208 0.212 0.263 0.192 0.269 0.226 0.236 0.307
## GPT_right 0.204 0.201 0.261 0.174 0.253 0.220 0.240 0.301
## WAIS_GI WRAT PASAT WLGT copy_direct copy_immediate
## VE_time1 0.725 0.746 0.408 0.443 0.290 0.303
## AR_time1 0.635 0.589 0.521 0.370 0.333 0.335
## PA 0.482 0.412 0.371 0.289 0.380 0.456
## GIT 0.582 0.517 0.365 0.310 0.254 0.311
## AFQT 0.626 0.578 0.432 0.360 0.372 0.452
## VE_time2 0.719 0.766 0.440 0.463 0.325 0.316
## AR_time2 0.622 0.585 0.562 0.365 0.384 0.387
## WAIS_BD 0.453 0.382 0.388 0.281 0.398 0.490
## WAIS_GI 1.000 0.652 0.366 0.414 0.278 0.343
## WRAT 0.652 1.000 0.417 0.504 0.269 0.269
## PASAT 0.366 0.417 1.000 0.357 0.247 0.282
## WLGT 0.414 0.504 0.357 1.000 0.176 0.213
## copy_direct 0.278 0.269 0.247 0.176 1.000 0.474
## copy_immediate 0.343 0.269 0.282 0.213 0.474 1.000
## copy_delayed 0.345 0.271 0.281 0.216 0.482 0.915
## CVLT 0.329 0.309 0.289 0.278 0.205 0.318
## WCST 0.330 0.292 0.285 0.209 0.291 0.268
## GPT_left 0.186 0.204 0.216 0.157 0.223 0.227
## GPT_right 0.173 0.196 0.226 0.168 0.223 0.194
## copy_delayed CVLT WCST GPT_left GPT_right
## VE_time1 0.301 0.317 0.327 0.208 0.204
## AR_time1 0.337 0.331 0.360 0.212 0.201
## PA 0.452 0.264 0.331 0.263 0.261
## GIT 0.308 0.250 0.282 0.192 0.174
## AFQT 0.451 0.312 0.368 0.269 0.253
## VE_time2 0.315 0.333 0.361 0.226 0.220
## AR_time2 0.385 0.356 0.396 0.236 0.240
## WAIS_BD 0.492 0.269 0.356 0.307 0.301
## WAIS_GI 0.345 0.329 0.330 0.186 0.173
## WRAT 0.271 0.309 0.292 0.204 0.196
## PASAT 0.281 0.289 0.285 0.216 0.226
## WLGT 0.216 0.278 0.209 0.157 0.168
## copy_direct 0.482 0.205 0.291 0.223 0.223
## copy_immediate 0.915 0.318 0.268 0.227 0.194
## copy_delayed 1.000 0.327 0.268 0.227 0.199
## CVLT 0.327 1.000 0.192 0.115 0.117
## WCST 0.268 0.192 1.000 0.196 0.186
## GPT_left 0.227 0.115 0.196 1.000 0.634
## GPT_right 0.199 0.117 0.186 0.634 1.000
#all tests g
fa_g = fa(d[g_tests])
fa_g
## Factor Analysis using method = minres
## Call: fa(r = d[g_tests])
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## VE_time1 0.82 0.67 0.33 1
## AR_time1 0.81 0.66 0.34 1
## PA 0.70 0.50 0.50 1
## GIT 0.69 0.47 0.53 1
## AFQT 0.85 0.73 0.27 1
## VE_time2 0.82 0.67 0.33 1
## AR_time2 0.82 0.67 0.33 1
## WAIS_BD 0.67 0.45 0.55 1
## WAIS_GI 0.76 0.58 0.42 1
## WRAT 0.73 0.53 0.47 1
## PASAT 0.57 0.32 0.68 1
## WLGT 0.49 0.24 0.76 1
## copy_direct 0.47 0.22 0.78 1
## copy_immediate 0.55 0.30 0.70 1
## copy_delayed 0.55 0.30 0.70 1
## CVLT 0.42 0.18 0.82 1
## WCST 0.46 0.21 0.79 1
## GPT_left 0.34 0.12 0.88 1
## GPT_right 0.33 0.11 0.89 1
##
## MR1
## SS loadings 7.93
## Proportion Var 0.42
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 171 and the objective function was 12.5 with Chi Square of 55829
## The degrees of freedom for the model are 152 and the objective function was 3.86
##
## The root mean square of the residuals (RMSR) is 0.09
## The df corrected root mean square of the residuals is 0.1
##
## The harmonic number of observations is 4426 with the empirical chi square 12980 with prob < 0
## The total number of observations was 4462 with Likelihood Chi Square = 17178 with prob < 0
##
## Tucker Lewis Index of factoring reliability = 0.656
## RMSEA index = 0.158 and the 90 % confidence intervals are 0.156 0.16
## BIC = 15901
## Fit based upon off diagonal values = 0.95
## Measures of factor score adequacy
## MR1
## Correlation of (regression) scores with factors 0.97
## Multiple R square of scores with factors 0.95
## Minimum correlation of possible factor scores 0.89
fa_g$loadings[, 1] %>% describe() %>% as.matrix()
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 19 0.624 0.172 0.673 0.628 0.212 0.331 0.853 0.522 -0.233 -1.44
## se
## X1 0.0394
#earlier tests only
fa_g_time1 = fa(d[g_tests_early])
fa_g_time1
## Factor Analysis using method = minres
## Call: fa(r = d[g_tests_early])
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## VE_time1 0.82 0.67 0.33 1
## AR_time1 0.82 0.68 0.32 1
## PA 0.70 0.49 0.51 1
## GIT 0.73 0.53 0.47 1
## AFQT 0.92 0.84 0.16 1
##
## MR1
## SS loadings 3.20
## Proportion Var 0.64
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 10 and the objective function was 3.1 with Chi Square of 13826
## The degrees of freedom for the model are 5 and the objective function was 0.15
##
## The root mean square of the residuals (RMSR) is 0.04
## The df corrected root mean square of the residuals is 0.06
##
## The harmonic number of observations is 4377 with the empirical chi square 169 with prob < 9.5e-35
## The total number of observations was 4462 with Likelihood Chi Square = 690 with prob < 8.8e-147
##
## Tucker Lewis Index of factoring reliability = 0.901
## RMSEA index = 0.175 and the 90 % confidence intervals are 0.164 0.186
## BIC = 648
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## MR1
## Correlation of (regression) scores with factors 0.96
## Multiple R square of scores with factors 0.92
## Minimum correlation of possible factor scores 0.85
fa_g_time1$loadings[, 1] %>% describe() %>% as.matrix()
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 5 0.797 0.0867 0.816 0.797 0.135 0.701 0.918 0.217 0.181 -1.84
## se
## X1 0.0388
#later tests only
fa_g_time2 = fa(d[g_tests_later])
fa_g_time2
## Factor Analysis using method = minres
## Call: fa(r = d[g_tests_later])
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## VE_time2 0.78 0.61 0.39 1
## AR_time2 0.79 0.62 0.38 1
## WAIS_BD 0.67 0.45 0.55 1
## WAIS_GI 0.72 0.52 0.48 1
## WRAT 0.71 0.50 0.50 1
## PASAT 0.58 0.33 0.67 1
## WLGT 0.50 0.25 0.75 1
## copy_direct 0.51 0.26 0.74 1
## copy_immediate 0.61 0.37 0.63 1
## copy_delayed 0.61 0.37 0.63 1
## CVLT 0.45 0.20 0.80 1
## WCST 0.47 0.22 0.78 1
## GPT_left 0.37 0.14 0.86 1
## GPT_right 0.36 0.13 0.87 1
##
## MR1
## SS loadings 5.00
## Proportion Var 0.36
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 91 and the objective function was 7.16 with Chi Square of 31892
## The degrees of freedom for the model are 77 and the objective function was 2.8
##
## The root mean square of the residuals (RMSR) is 0.11
## The df corrected root mean square of the residuals is 0.12
##
## The harmonic number of observations is 4457 with the empirical chi square 9373 with prob < 0
## The total number of observations was 4462 with Likelihood Chi Square = 12463 with prob < 0
##
## Tucker Lewis Index of factoring reliability = 0.54
## RMSEA index = 0.19 and the 90 % confidence intervals are 0.187 0.193
## BIC = 11816
## Fit based upon off diagonal values = 0.92
## Measures of factor score adequacy
## MR1
## Correlation of (regression) scores with factors 0.95
## Multiple R square of scores with factors 0.90
## Minimum correlation of possible factor scores 0.80
fa_g_time2$loadings[, 1] %>% describe() %>% as.matrix()
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 14 0.582 0.141 0.592 0.583 0.173 0.364 0.791 0.426 -0.0247 -1.42
## se
## X1 0.0378
#save scores, standardize to white
d$g = fa_g$scores[, 1] %>% standardize(focal_group = d$race == "White")
d$g_time1 = fa_g_time1$scores[, 1] %>% standardize(focal_group = d$race == "White")
d$g_time2 = fa_g_time2$scores[, 1] %>% standardize(focal_group = d$race == "White")
Pupil size
#plot
GG_scatter(d, "pupil_size_right", "pupil_size_left")
## `geom_smooth()` using formula 'y ~ x'

ggplot(d, aes(pupil_size_right, pupil_size_left)) +
geom_count() +
geom_smooth(se=F, method = "lm")
## Warning: Removed 7 rows containing non-finite values (stat_sum).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 7 rows containing non-finite values (stat_smooth).

#model
ols(pupil_size_right ~ pupil_size_left, data = d)
## Frequencies of Missing Values Due to Each Variable
## pupil_size_right pupil_size_left
## 3 4
##
## Linear Regression Model
##
## ols(formula = pupil_size_right ~ pupil_size_left, data = d)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 4455 LR chi2 13137.73 R2 0.948
## sigma0.2207 d.f. 1 R2 adj 0.948
## d.f. 4453 Pr(> chi2) 0.0000 g 0.950
##
## Residuals
##
## Min 1Q Median 3Q Max
## -5.84077 -0.01403 -0.01403 0.02063 2.02063
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.1180 0.0124 9.53 <0.0001
## pupil_size_left 0.9653 0.0034 283.79 <0.0001
##
#numerics
d %>% select(contains("pupil_size")) %>% describe() %>% as.matrix()
## vars n mean sd median trimmed mad min max range skew
## pupil_size_right 1 4459 3.50 0.964 3 3.44 1.48 1 9 8 1.03
## pupil_size_left 2 4458 3.51 0.972 3 3.44 1.48 1 9 8 1.02
## pupil_size_mean 3 4462 3.51 0.961 3 3.44 1.48 1 9 8 1.02
## kurtosis se
## pupil_size_right 2.31 0.0144
## pupil_size_left 2.36 0.0146
## pupil_size_mean 2.30 0.0144
#table of values, since they are quasi-discrete
d$pupil_size_left %>% table2()
d$pupil_size_right %>% table2()
#plot
d %>% select(AOP_ID, pupil_size_left, pupil_size_right) %>%
gather("key", "size", pupil_size_left, pupil_size_right) %>%
mutate(key = str_replace(key, "pupil_size_", "")) %>%
ggplot(aes(size)) +
geom_bar() +
scale_x_continuous(breaks = 1:9) +
facet_wrap("key") +
theme(text = element_text(size = 15))
## Warning: Removed 7 rows containing non-finite values (stat_count).

GG_save("figs/histogram_by_eye.png")
## Warning: Removed 7 rows containing non-finite values (stat_count).
#measurement time?
ggplot(d, aes(pupil_hour_measured, pupil_size_mean)) +
geom_count() +
geom_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

Correlation matrix
#correlations to all tests
wtd.cors(d %>% select(g_tests, g, g_time1, g_time2),
d %>% select(pupil_size_right, pupil_size_left, pupil_size_mean))
## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(g_tests)` instead of `g_tests` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
## pupil_size_right pupil_size_left pupil_size_mean
## VE_time1 0.03597 0.03621 0.03625
## AR_time1 0.03879 0.03731 0.03805
## PA 0.03404 0.03511 0.03455
## GIT 0.03779 0.03808 0.03818
## AFQT 0.05115 0.04957 0.05050
## VE_time2 0.05438 0.05580 0.05553
## AR_time2 0.05187 0.04719 0.04976
## WAIS_BD 0.03730 0.03588 0.03679
## WAIS_GI 0.02158 0.01933 0.02062
## WRAT 0.02128 0.02189 0.02173
## PASAT 0.03455 0.03228 0.03357
## WLGT -0.01249 -0.00910 -0.01073
## copy_direct 0.03236 0.03069 0.03173
## copy_immediate 0.03713 0.03756 0.03769
## copy_delayed 0.04095 0.04202 0.04179
## CVLT 0.04117 0.04045 0.04076
## WCST 0.03903 0.03521 0.03740
## GPT_left 0.01364 0.01723 0.01557
## GPT_right 0.00382 0.00271 0.00331
## g 0.05153 0.05127 0.05165
## g_time1 0.04928 0.04861 0.04908
## g_time2 0.05235 0.05160 0.05231
#p values very small
wtd.cor(d %>% select(pupil_size_right, pupil_size_left, pupil_size_mean),
d %>% select(g_tests, g, g_time1, g_time2))
## $correlation
## pupil_size_right pupil_size_left pupil_size_mean
## VE_time1 0.03597 0.03621 0.03625
## AR_time1 0.03879 0.03731 0.03805
## PA 0.03404 0.03511 0.03455
## GIT 0.03779 0.03808 0.03818
## AFQT 0.05115 0.04957 0.05050
## VE_time2 0.05438 0.05580 0.05553
## AR_time2 0.05187 0.04719 0.04976
## WAIS_BD 0.03730 0.03588 0.03679
## WAIS_GI 0.02158 0.01933 0.02062
## WRAT 0.02128 0.02189 0.02173
## PASAT 0.03455 0.03228 0.03357
## WLGT -0.01249 -0.00910 -0.01073
## copy_direct 0.03236 0.03069 0.03173
## copy_immediate 0.03713 0.03756 0.03769
## copy_delayed 0.04095 0.04202 0.04179
## CVLT 0.04117 0.04045 0.04076
## WCST 0.03903 0.03521 0.03740
## GPT_left 0.01364 0.01723 0.01557
## GPT_right 0.00382 0.00271 0.00331
## g 0.05153 0.05127 0.05165
## g_time1 0.04928 0.04861 0.04908
## g_time2 0.05235 0.05160 0.05231
##
## $std.err
## pupil_size_right pupil_size_left pupil_size_mean
## VE_time1 0.0151 0.0151 0.0151
## AR_time1 0.0151 0.0151 0.0151
## PA 0.0151 0.0151 0.0151
## GIT 0.0151 0.0151 0.0151
## AFQT 0.0150 0.0150 0.0150
## VE_time2 0.0150 0.0150 0.0150
## AR_time2 0.0150 0.0150 0.0150
## WAIS_BD 0.0150 0.0150 0.0150
## WAIS_GI 0.0150 0.0150 0.0150
## WRAT 0.0150 0.0150 0.0150
## PASAT 0.0150 0.0150 0.0150
## WLGT 0.0150 0.0150 0.0150
## copy_direct 0.0150 0.0150 0.0150
## copy_immediate 0.0150 0.0150 0.0150
## copy_delayed 0.0150 0.0150 0.0150
## CVLT 0.0150 0.0150 0.0150
## WCST 0.0150 0.0150 0.0150
## GPT_left 0.0150 0.0150 0.0150
## GPT_right 0.0150 0.0150 0.0150
## g 0.0152 0.0152 0.0152
## g_time1 0.0151 0.0151 0.0151
## g_time2 0.0150 0.0150 0.0150
##
## $t.value
## pupil_size_right pupil_size_left pupil_size_mean
## VE_time1 2.382 2.397 2.401
## AR_time1 2.569 2.471 2.521
## PA 2.255 2.325 2.289
## GIT 2.500 2.519 2.527
## AFQT 3.411 3.305 3.369
## VE_time2 3.636 3.731 3.714
## AR_time2 3.467 3.154 3.327
## WAIS_BD 2.492 2.397 2.459
## WAIS_GI 1.441 1.291 1.377
## WRAT 1.421 1.462 1.451
## PASAT 2.305 2.153 2.240
## WLGT -0.834 -0.608 -0.716
## copy_direct 2.162 2.050 2.120
## copy_immediate 2.481 2.509 2.519
## copy_delayed 2.736 2.807 2.793
## CVLT 2.751 2.702 2.724
## WCST 2.608 2.352 2.500
## GPT_left 0.910 1.149 1.038
## GPT_right 0.254 0.181 0.221
## g 3.390 3.372 3.399
## g_time1 3.254 3.209 3.242
## g_time2 3.485 3.435 3.484
##
## $p.value
## pupil_size_right pupil_size_left pupil_size_mean
## VE_time1 0.017267 0.016560 0.016386
## AR_time1 0.010231 0.013521 0.011735
## PA 0.024206 0.020112 0.022145
## GIT 0.012458 0.011789 0.011535
## AFQT 0.000653 0.000957 0.000761
## VE_time2 0.000280 0.000193 0.000207
## AR_time2 0.000530 0.001623 0.000884
## WAIS_BD 0.012749 0.016584 0.013989
## WAIS_GI 0.149702 0.196829 0.168455
## WRAT 0.155477 0.143928 0.146842
## PASAT 0.021238 0.031354 0.025129
## WLGT 0.404322 0.543355 0.473728
## copy_direct 0.030687 0.040458 0.034039
## copy_immediate 0.013145 0.012150 0.011805
## copy_delayed 0.006237 0.005020 0.005243
## CVLT 0.005969 0.006912 0.006473
## WCST 0.009141 0.018736 0.012471
## GPT_left 0.363132 0.250735 0.299302
## GPT_right 0.799149 0.856696 0.825338
## g 0.000706 0.000753 0.000684
## g_time1 0.001146 0.001339 0.001196
## g_time2 0.000496 0.000597 0.000498
Plots
#all
ggplot(d, aes(g, pupil_size_mean)) +
geom_point() +
geom_smooth(method = lm) +
ylab("Pupil size (average)") +
theme(text = element_text(size = 15))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 142 rows containing non-finite values (stat_smooth).
## Warning: Removed 142 rows containing missing values (geom_point).

GG_save("figs/scatter_all.png")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 142 rows containing non-finite values (stat_smooth).
## Warning: Removed 142 rows containing missing values (geom_point).
#by race
ggplot(d, aes(g, pupil_size_mean)) +
geom_point() +
geom_smooth(method = lm) +
facet_wrap("race") +
ylab("Pupil size (average)") +
theme(text = element_text(size = 15))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 142 rows containing non-finite values (stat_smooth).
## Warning: Removed 142 rows containing missing values (geom_point).

GG_save("figs/scatter_by_race.png")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 142 rows containing non-finite values (stat_smooth).
## Warning: Removed 142 rows containing missing values (geom_point).
Regressions
#subsets
d_whites = d %>% filter(race == "White")
d_blacks = d %>% filter(race == "Black")
d_hispanics = d %>% filter(race == "Hispanic")
#fit models
models = list(
all = ols(pupil_size_mean ~ g, data = d),
whites = ols(pupil_size_mean ~ g, data = d_whites),
blacks = ols(pupil_size_mean ~ g, data = d_blacks),
hispanics = ols(pupil_size_mean ~ g, data = d_hispanics),
all_covariates = ols(pupil_size_mean ~ g + age + race + drugs_past_year + smoker + drinks_per_month + drinks_per_day + rcs(pupil_hour_measured), data = d),
whites_covariates = ols(pupil_size_mean ~ g + age + drugs_past_year + smoker + drinks_per_month + drinks_per_day + rcs(pupil_hour_measured), data = d_whites),
blacks_covariates = ols(pupil_size_mean ~ g + age + drugs_past_year + smoker + drinks_per_month + drinks_per_day + rcs(pupil_hour_measured), data = d_blacks),
hispanics_covariates = ols(pupil_size_mean ~ g + age + drugs_past_year + smoker + drinks_per_month + drinks_per_day + rcs(pupil_hour_measured), data = d_hispanics)
)
#print summaries
models
## $all
## Frequencies of Missing Values Due to Each Variable
## pupil_size_mean g
## 0 142
##
## Linear Regression Model
##
## ols(formula = pupil_size_mean ~ g, data = d)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 4320 LR chi2 11.54 R2 0.003
## sigma0.9622 d.f. 1 R2 adj 0.002
## d.f. 4318 Pr(> chi2) 0.0007 g 0.057
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.5783 -0.5347 -0.4497 0.4943 5.5434
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 3.5185 0.0149 236.79 <0.0001
## g 0.0465 0.0137 3.40 0.0007
##
##
## $whites
## Frequencies of Missing Values Due to Each Variable
## pupil_size_mean g
## 0 99
##
## Linear Regression Model
##
## ols(formula = pupil_size_mean ~ g, data = d_whites)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 3555 LR chi2 0.19 R2 0.000
## sigma0.9585 d.f. 1 R2 adj 0.000
## d.f. 3553 Pr(> chi2) 0.6665 g 0.008
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.5698 -0.5637 -0.5507 0.4409 5.4483
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 3.5609 0.0161 221.50 <0.0001
## g 0.0069 0.0161 0.43 0.6666
##
##
## $blacks
## Frequencies of Missing Values Due to Each Variable
## pupil_size_mean g
## 0 23
##
## Linear Regression Model
##
## ols(formula = pupil_size_mean ~ g, data = d_blacks)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 502 LR chi2 0.24 R2 0.000
## sigma0.9721 d.f. 1 R2 adj -0.002
## d.f. 500 Pr(> chi2) 0.6215 g 0.024
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.2537 -0.2546 -0.2278 0.7521 4.7520
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 3.1986 0.0773 41.38 <0.0001
## g -0.0249 0.0505 -0.49 0.6224
##
##
## $hispanics
## Frequencies of Missing Values Due to Each Variable
## pupil_size_mean g
## 0 19
##
## Linear Regression Model
##
## ols(formula = pupil_size_mean ~ g, data = d_hispanics)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 181 LR chi2 0.01 R2 0.000
## sigma0.8791 d.f. 1 R2 adj -0.006
## d.f. 179 Pr(> chi2) 0.9161 g 0.008
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1.8424 -0.3452 -0.3361 0.6560 3.6625
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 3.3338 0.0869 38.37 <0.0001
## g -0.0077 0.0738 -0.10 0.9167
##
##
## $all_covariates
## Frequencies of Missing Values Due to Each Variable
## pupil_size_mean g age race
## 0 142 0 0
## drugs_past_year smoker drinks_per_month drinks_per_day
## 0 1163 12 1120
## pupil_hour_measured
## 0
##
## Linear Regression Model
##
## ols(formula = pupil_size_mean ~ g + age + race + drugs_past_year +
## smoker + drinks_per_month + drinks_per_day + rcs(pupil_hour_measured),
## data = d)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 2467 LR chi2 238.55 R2 0.092
## sigma0.9090 d.f. 14 R2 adj 0.087
## d.f. 2452 Pr(> chi2) 0.0000 g 0.316
##
## Residuals
##
## Min 1Q Median 3Q Max
## -3.1499 -0.4830 -0.2246 0.4951 4.7363
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 4.4097 0.3036 14.53 <0.0001
## g 0.0038 0.0198 0.19 0.8470
## age -0.0312 0.0074 -4.21 <0.0001
## race=Black -0.3542 0.0616 -5.75 <0.0001
## race=Hispanic -0.2039 0.0990 -2.06 0.0396
## race=Asian -0.3862 0.2284 -1.69 0.0910
## race=Native 0.0864 0.1834 0.47 0.6375
## drugs_past_year -0.1892 0.0926 -2.04 0.0412
## smoker -0.0397 0.0394 -1.01 0.3137
## drinks_per_month 0.0018 0.0018 0.99 0.3223
## drinks_per_day 0.0051 0.0056 0.91 0.3609
## pupil_hour_measured 0.0664 0.0478 1.39 0.1656
## pupil_hour_measured' -0.4604 0.6674 -0.69 0.4904
## pupil_hour_measured'' 0.5201 0.8225 0.63 0.5272
## pupil_hour_measured''' 0.7561 0.5363 1.41 0.1587
##
##
## $whites_covariates
## Frequencies of Missing Values Due to Each Variable
## pupil_size_mean g age drugs_past_year
## 0 99 0 0
## smoker drinks_per_month drinks_per_day pupil_hour_measured
## 939 6 927 0
##
## Linear Regression Model
##
## ols(formula = pupil_size_mean ~ g + age + drugs_past_year + smoker +
## drinks_per_month + drinks_per_day + rcs(pupil_hour_measured),
## data = d_whites)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 2030 LR chi2 147.63 R2 0.070
## sigma0.9157 d.f. 10 R2 adj 0.066
## d.f. 2019 Pr(> chi2) 0.0000 g 0.273
##
## Residuals
##
## Min 1Q Median 3Q Max
## -3.1221 -0.4999 -0.2751 0.5065 4.7602
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 4.2941 0.3421 12.55 <0.0001
## g 0.0049 0.0217 0.23 0.8203
## age -0.0286 0.0083 -3.43 0.0006
## drugs_past_year -0.1249 0.1017 -1.23 0.2193
## smoker -0.0525 0.0433 -1.21 0.2255
## drinks_per_month 0.0017 0.0020 0.86 0.3874
## drinks_per_day 0.0041 0.0069 0.59 0.5556
## pupil_hour_measured 0.0766 0.0528 1.45 0.1469
## pupil_hour_measured' -0.6332 0.8476 -0.75 0.4552
## pupil_hour_measured'' 0.7234 1.0424 0.69 0.4878
## pupil_hour_measured''' 0.6944 0.6410 1.08 0.2788
##
##
## $blacks_covariates
## Frequencies of Missing Values Due to Each Variable
## pupil_size_mean g age drugs_past_year
## 0 23 0 0
## smoker drinks_per_month drinks_per_day pupil_hour_measured
## 124 4 124 0
##
## Linear Regression Model
##
## ols(formula = pupil_size_mean ~ g + age + drugs_past_year + smoker +
## drinks_per_month + drinks_per_day + rcs(pupil_hour_measured),
## data = d_blacks)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 306 LR chi2 45.39 R2 0.138
## sigma0.8891 d.f. 10 R2 adj 0.109
## d.f. 295 Pr(> chi2) 0.0000 g 0.375
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.56973 -0.43031 -0.03574 0.23678 3.82439
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 4.5516 0.7971 5.71 <0.0001
## g -0.0511 0.0624 -0.82 0.4134
## age -0.0450 0.0194 -2.31 0.0215
## drugs_past_year -0.7196 0.3071 -2.34 0.0198
## smoker 0.0736 0.1210 0.61 0.5438
## drinks_per_month 0.0000 0.0054 0.00 0.9976
## drinks_per_day -0.0023 0.0108 -0.22 0.8291
## pupil_hour_measured 0.0353 0.1310 0.27 0.7876
## pupil_hour_measured' -0.8596 1.8282 -0.47 0.6386
## pupil_hour_measured'' 1.1491 2.2539 0.51 0.6105
## pupil_hour_measured''' -0.3386 1.4674 -0.23 0.8177
##
##
## $hispanics_covariates
## Frequencies of Missing Values Due to Each Variable
## pupil_size_mean g age drugs_past_year
## 0 19 0 0
## smoker drinks_per_month drinks_per_day pupil_hour_measured
## 75 2 45 0
##
## Linear Regression Model
##
## ols(formula = pupil_size_mean ~ g + age + drugs_past_year + smoker +
## drinks_per_month + drinks_per_day + rcs(pupil_hour_measured),
## data = d_hispanics)
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 90 LR chi2 23.21 R2 0.227
## sigma0.7598 d.f. 10 R2 adj 0.130
## d.f. 79 Pr(> chi2) 0.0100 g 0.437
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1.65758 -0.40713 -0.06056 0.41528 2.24875
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 6.9279 1.4396 4.81 <0.0001
## g 0.0798 0.0933 0.85 0.3952
## age -0.1004 0.0344 -2.92 0.0046
## drugs_past_year 0.0317 0.4713 0.07 0.9465
## smoker -0.2619 0.1813 -1.44 0.1525
## drinks_per_month 0.0093 0.0083 1.12 0.2659
## drinks_per_day 0.0484 0.0231 2.10 0.0393
## pupil_hour_measured 0.0756 0.2336 0.32 0.7469
## pupil_hour_measured' -3.9678 3.8258 -1.04 0.3028
## pupil_hour_measured'' 5.3761 4.7404 1.13 0.2602
## pupil_hour_measured''' -7.1715 3.9629 -1.81 0.0741
##
#summarize to a table
models %>% summarize_models()
Descriptive table
#by group
describeBy(d %>% select(pupil_size_left, pupil_size_right, pupil_size_mean, g), group = d$race) %>%
{
tibble(
variable = .$White %>% rownames(),
whites = str_glue("{.$White$mean %>% format_digits()} ({.$White$sd %>% format_digits()})"),
blacks = str_glue("{.$Black$mean %>% format_digits()} ({.$Black$sd %>% format_digits()})"),
hispanics = str_glue("{.$Hispanic$mean %>% format_digits()} ({.$Hispanic$sd %>% format_digits()})")
)
}
#Cohen d
SMD_matrix(d$pupil_size_mean, d$race)
## White Black Hispanic Asian Native
## White NA 0.3419 0.220 0.3687 0.0606
## Black 0.3419 NA -0.122 0.0268 -0.2813
## Hispanic 0.2203 -0.1216 NA 0.1483 -0.1597
## Asian 0.3687 0.0268 0.148 NA -0.3080
## Native 0.0606 -0.2813 -0.160 -0.3080 NA