library(pacman)
p_load(kirkegaard, googlesheets, glue, metafor, gtools)
#authenticate
gs_auth()
#load object
gs_obj = gs_url("https://docs.google.com/spreadsheets/d/1mYxtT0ajjJR7x_BvIHnrxj_kGTqOUVOjjINsVkc28EU/edit#gid=23025499")
## Sheet-identifying info appears to be a browser URL.
## googlesheets will attempt to extract sheet key from the URL.
## Putative key: 1mYxtT0ajjJR7x_BvIHnrxj_kGTqOUVOjjINsVkc28EU
## Sheet successfully identified: "Scarr-Rowe on Race Ethnic summary data"
#get sheet
d_orig = gs_read(gs_obj, ws = 2, col_names = F)
## Accessing worksheet titled 'Data'.
## Parsed with column specification:
## cols(
## .default = col_character(),
## X5 = col_logical(),
## X47 = col_logical()
## )
## See spec(...) for full column specifications.
#fix headers
#remove empty cols
empty_cols = map_lgl(d_orig, ~all(is.na(.)))
d = d_orig[!empty_cols]
#header cols
d_header_cols = d[1:2, ]
#last observation carried forward
d_header_cols[1, ] = d_header_cols[1, ] %>% unlist() %>% zoo::na.locf(na.rm = F)
#merge headers
d_new_header = glue("{unlist(d_header_cols[1, ])}_{unlist(d_header_cols[2, ])}", .na = "") %>%
#recode space and dash and / to underscore
str_replace_all("[\\s+\\-\\/]", "_") %>%
#multiple underscores to single
str_replace_all("_+", "_") %>%
#remove dots
str_replace_all("\\.", "") %>%
#remove ending underscore
str_replace("_$", "") %>%
#rename 2 SIREs
str_replace("Asian_Other", "AsianOther") %>%
str_replace("Non_White", "NonWhite")
#remove headers
d = d[-c(1:2), ]
#add names
colnames(d) = d_new_header
#fix types
for (cn in colnames(d)[-c(1:4)]) d[[cn]] = d[[cn]] %>% as.numeric()
#summary only
dsums = d %>% plyr::ddply("Study_id", function(dd) {
dd %>% filter(Test_type == "Summary")
})
#compare size
d %>% nrow()
## [1] 89
dsums %>% nrow()
## [1] 15
#SIREs
SIRE_list = c("White", "Hispanic", "Black", "AsianOther", "Multiracial", "NonWhite")
#simple ACE x SIRE REs
simple_SIRE_ACE = expand.grid(SIRE = SIRE_list,
param = c("A", "C", "E"),
k = NA,
estimate = NA,
estimate_se = NA,
estimate_p = NA,
estimate_I2 = NA,
fit = list(NULL),
stringsAsFactors = F
) %>% as_tibble()
#loop
for (i in 1:nrow(simple_SIRE_ACE)) {
#names
i_SIRE = simple_SIRE_ACE$SIRE[i]
i_col = glue("{i_SIRE}_{simple_SIRE_ACE$param[i]}")
#rma
i_rma = suppressWarnings(
rma(dsums[[i_col]],
sei = dsums[[glue("{i_SIRE}_SE")]]
)
)
#save info
simple_SIRE_ACE$k[i] = i_rma$k
simple_SIRE_ACE$estimate[i] = i_rma$b %>% as.vector()
simple_SIRE_ACE$estimate_se[i] = i_rma$se
simple_SIRE_ACE$estimate_p[i] = i_rma$pval
simple_SIRE_ACE$estimate_I2[i] = i_rma$I2
simple_SIRE_ACE$fit[i] = list(i_rma)
}
simple_SIRE_ACE
#compute delta cols
#loop over pairs, without order
delta_pairs = gtools::combinations(length(SIRE_list), r = 2, v = SIRE_list) %>% as_tibble()
## Warning: `as_tibble.matrix()` requires a matrix with column names or a `.name_repair` argument. Using compatibility `.name_repair`.
## This warning is displayed once per session.
colnames(delta_pairs) = c("SIRE1", "SIRE2")
#more vars
delta_pairs$k = NA
delta_pairs$estimate_A = NA
delta_pairs$estimate_A_se = NA
delta_pairs$estimate_A_p = NA
delta_pairs$estimate_A_I2 = NA
delta_pairs$estimate_trait = NA
delta_pairs$estimate_trait_se = NA
delta_pairs$estimate_trait_p = NA
delta_pairs$estimate_trait_I2 = NA
delta_pairs$dd_r = NA
delta_pairs$dd_r_se = NA
delta_pairs$dd_r_p = NA
delta_pairs$dd_r_fig = list(NULL)
delta_pairs$fit_A = list(NULL)
delta_pairs$fit_trait = list(NULL)
for (i in seq_along_rows(delta_pairs)) {
i_row = delta_pairs[i, ]
i_deltas = dsums[[glue("{i_row[1]}_A")]] - dsums[[glue("{i_row[2]}_A")]]
i_hns = cbind(dsums[[glue("{i_row[1]}_Hn")]], dsums[[glue("{i_row[2]}_Hn")]]) %>% t() %>% harmonic.mean(na.rm = F)
#save k first
delta_pairs$k[i] = length(na.omit(i_deltas))
#skip if 0
if (delta_pairs$k[i] == 0) next
#RE
i_rma = suppressWarnings(rma(i_deltas, sei = 1/sqrt(i_hns)))
#save info
delta_pairs$estimate_A[i] = i_rma$b %>% as.vector()
delta_pairs$estimate_A_se[i] = i_rma$se
delta_pairs$estimate_A_p[i] = i_rma$pval
delta_pairs$estimate_A_I2[i] = i_rma$I2
delta_pairs$fit_A[i] = list(i_rma)
#add trait delta
#not coded properly in dataset, need to extract with a dictionary
i_row2 = i_row[1:2] %>% unlist() %>% plyr::mapvalues(from = SIRE_list, to = c("W", "H", "B", "A", "M", "NW"), warn_missing = F)
#make 2 alternatives, since we don't know which order John used
delta_trait_1 = glue("{i_row2[1]}_{i_row2[2]}_d")
delta_trait_2 = glue("{i_row2[2]}_{i_row2[1]}_d")
#which exists?
i_delta_trait_var = c(delta_trait_1, delta_trait_2)[c(delta_trait_1, delta_trait_2) %in% names(dsums)]
#RE for trait delta
i_rma_trait = suppressWarnings(rma(dsums[[i_delta_trait_var]], sei = 1 / sqrt(i_hns)))
#save info
delta_pairs$estimate_trait[i] = i_rma_trait$b %>% as.vector()
delta_pairs$estimate_trait_se[i] = i_rma_trait$se
delta_pairs$estimate_trait_p[i] = i_rma_trait$pval
delta_pairs$estimate_trait_I2[i] = i_rma_trait$I2
delta_pairs$fit_trait[i] = list(i_rma_trait)
#correlation for delta A and delta trait
#skip if k < 3
if (delta_pairs$k[i] < 3) next
#compute correlation test
i_cor = wtd.cor(i_deltas, dsums[[i_delta_trait_var]], weight = sqrt(i_hns))
delta_pairs$dd_r[i] = i_cor[1]
delta_pairs$dd_r_se[i] = i_cor[2]
delta_pairs$dd_r_p[i] = i_cor[4]
#plot
i_plot = GG_scatter(tibble(d_trait = dsums[[i_delta_trait_var]],
d_A = i_deltas), "d_trait", "d_A", weights = sqrt(i_hns)) +
ggtitle(glue("Scatterplot: correlation {i_delta_trait_var}"))
delta_pairs$dd_r_fig[i] = list(i_plot)
}
#print non-complex cols
delta_pairs[!map_lgl(delta_pairs, is.list)]
#plots
delta_pairs$dd_r_fig[!map_lgl(delta_pairs$dd_r_fig, is.null)]
## [[1]]
##
## [[2]]
##
## [[3]]
#versions
write_sessioninfo()
## R version 3.5.2 (2018-12-20)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 19.1
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] gtools_3.8.1 glue_1.3.0 googlesheets_0.3.0
## [4] kirkegaard_2018.05 metafor_2.0-0 Matrix_1.2-15
## [7] psych_1.8.12 magrittr_1.5 assertthat_0.2.0
## [10] weights_1.0 mice_3.3.0 gdata_2.18.0
## [13] Hmisc_4.2-0 Formula_1.2-3 survival_2.43-3
## [16] lattice_0.20-38 forcats_0.3.0 stringr_1.4.0
## [19] dplyr_0.8.0.1 purrr_0.3.0 readr_1.3.1
## [22] tidyr_0.8.2 tibble_2.0.1 ggplot2_3.1.0
## [25] tidyverse_1.2.1 pacman_0.5.0
##
## loaded via a namespace (and not attached):
## [1] httr_1.4.0 jsonlite_1.6 splines_3.5.2
## [4] modelr_0.1.3 multilevel_2.6 askpass_1.1
## [7] latticeExtra_0.6-28 cellranger_1.1.0 yaml_2.2.0
## [10] pillar_1.3.1 backports_1.1.3 digest_0.6.18
## [13] RColorBrewer_1.1-2 checkmate_1.9.1 rvest_0.3.2
## [16] minqa_1.2.4 colorspace_1.4-0 htmltools_0.3.6
## [19] plyr_1.8.4 pkgconfig_2.0.2 broom_0.5.1
## [22] haven_2.0.0 scales_1.0.0 lme4_1.1-20
## [25] openssl_1.2.1 htmlTable_1.13.1 generics_0.0.2
## [28] withr_2.1.2 pan_1.6 nnet_7.3-12
## [31] lazyeval_0.2.1 mnormt_1.5-5 cli_1.0.1
## [34] crayon_1.3.4 readxl_1.3.0 mitml_0.3-7
## [37] evaluate_0.13 nlme_3.1-137 MASS_7.3-51.1
## [40] xml2_1.2.0 foreign_0.8-70 tools_3.5.2
## [43] data.table_1.12.0 hms_0.4.2 munsell_0.5.0
## [46] cluster_2.0.7-1 compiler_3.5.2 rlang_0.3.1
## [49] grid_3.5.2 nloptr_1.2.1 rstudioapi_0.9.0
## [52] htmlwidgets_1.3 labeling_0.3 base64enc_0.1-3
## [55] rmarkdown_1.11 gtable_0.2.0 curl_3.3
## [58] R6_2.4.0 zoo_1.8-4 gridExtra_2.3
## [61] lubridate_1.7.4 psychometric_2.2 knitr_1.21
## [64] jomo_2.6-7 stringi_1.3.1 parallel_3.5.2
## [67] Rcpp_1.0.0 rpart_4.1-13 acepack_1.4.1
## [70] tidyselect_0.2.5 xfun_0.4