A large genotyped sample of the population of Wisconsin, USA (The Wisconsin Longitudinal Study, N=8,509) are examined for evidence of the Scarr-Rowe effect, an adverse gene x environment (GxE) interaction that reduces the heritability of IQ among those with low childhood socioeconomic status (SES), using the Continuous Parameter Estimation Method (CPEM):
\[\begin{equation} SES \sim PGS \cdot IQ \end{equation}\]
A large correlation indicates increasing genetic expressivity with increasing SES
library(pacman)
p_load(knitr, tidyverse, magrittr, moments, psych, corrr, pander, broom, glue, ggpubr)
PanderOpts(digits = 3, table.split.table = Inf, missing = "-", table.alignment.default = "left")
theme_set(theme_bw(base_size=14))
A data set with PGS is prepared here. See link for details about data preparation.
df <- read_delim("pgs_data.csv", delim=";") %>%
select(idpub, pgs_std, iq_std = z_iq_std, iq100 = z_iq100,
ses57, sexrsp=z_sexrsp, rtype) %>%
na.omit()
tribble(~"Sample size", df %>% nrow)| Sample size |
|---|
| 6256 |
With standardized versions of all variables
df %<>% mutate(
ses57_std = as.numeric(scale(ses57)),
pgs_iq = as.numeric(scale(pgs_std * iq_std)),
logses57_std = as.numeric(scale(log(ses57))))
| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| ses57_std | 0.0786 | 0.0126 | 6.24 | 4.71e-10 |
ggplot(df) +
geom_point(aes(y=pgs_iq, x=ses57_std), color="grey40", size=1, alpha=0.3) +
geom_abline(aes(intercept=0, slope=coef(m)[["ses57_std"]])) +
ylab("CPE (IQxEA3)") + xlab("Parental SES")
Since the R square is low, these are essentially also the results for the residuals
plot_normality <- function(df, var){
p1 <-ggplot(df, aes_string(x=var)) +
geom_histogram(fill="turquoise4", col="black", binwidth=0.4)
p2 <- ggplot(df, aes_string(sample=var)) + stat_qq()
ggarrange(p1, p2, ncol=2)
}
plot_normality(df, "ses57_std")
| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| (Intercept) | -6.63e-17 | 0.0126 | -5.25e-15 | 1 |
| logses57_std | 0.0504 | 0.0126 | 3.99 | 6.7e-05 |
rm_outliers <- function(df, sd){
df %>% filter_at(vars(iq_std, ses57_std, pgs_iq), all_vars(between(., -sd, sd)))
}
getregr <- function(ses, sex, outlier_rm, intercept, df){
intercept_s <- if (intercept == "0") "0 + " else ""
df <- if(sex == "both") df else df %>% filter(sexrsp == sex)
t_index <- if (intercept == "normal") 2 else 1
if (outlier_rm == "SD < 3") df %<>% rm_outliers(3)
else if (outlier_rm == "SD < 4") df %<>% rm_outliers(4)
m <- lm(as.formula(glue("pgs_iq ~ {intercept_s} {ses}")), df)
est <- tidy(m) %>% filter(term == ses)
tibble(ses, outlier_rm, sex, intercept,
estimate = est$estimate,
t.value = tidy(m)$statistic[t_index],
std.error = est$std.error,
fstat = glance(m)$statistic,
adjr2 = glance(m)$adj.r.squared,
skewness = skewness(resid(m)),
p.value = est$p.value)
}
crossing(ses = c("ses57_std", "logses57_std"),
sex = c("male", "female", "both"),
outlier_rm = c("SD < 3", "SD < 4", "none"),
intercept = c("normal", "0")) %>%
pmap(getregr, df) %>% bind_rows %>%
arrange(desc(ses), outlier_rm)| ses | outlier_rm | sex | intercept | estimate | t.value | std.error | fstat | adjr2 | skewness | p.value |
|---|---|---|---|---|---|---|---|---|---|---|
| ses57_std | none | both | 0 | 0.0786 | 6.24 | 0.0126 | 38.9 | 0.00602 | 1.45 | 4.71e-10 |
| ses57_std | none | both | normal | 0.0786 | 6.24 | 0.0126 | 38.9 | 0.00602 | 1.45 | 4.73e-10 |
| ses57_std | none | female | 0 | 0.0812 | 4.77 | 0.017 | 22.7 | 0.00667 | 1.42 | 1.93e-06 |
| ses57_std | none | female | normal | 0.0812 | 4.76 | 0.017 | 22.7 | 0.00666 | 1.42 | 1.97e-06 |
| ses57_std | none | male | 0 | 0.0757 | 4.05 | 0.0187 | 16.4 | 0.00507 | 1.48 | 5.27e-05 |
| ses57_std | none | male | normal | 0.0756 | 4.04 | 0.0187 | 16.4 | 0.00506 | 1.48 | 5.38e-05 |
| ses57_std | SD < 3 | both | 0 | 0.0574 | 5 | 0.0115 | 25 | 0.00394 | 0.703 | 5.91e-07 |
| ses57_std | SD < 3 | both | normal | 0.0538 | 4.69 | 0.0115 | 22 | 0.00346 | 0.703 | 2.76e-06 |
| ses57_std | SD < 3 | female | 0 | 0.069 | 4.37 | 0.0158 | 19.1 | 0.00573 | 0.683 | 1.29e-05 |
| ses57_std | SD < 3 | female | normal | 0.0665 | 4.22 | 0.0158 | 17.8 | 0.00533 | 0.683 | 2.52e-05 |
| ses57_std | SD < 3 | male | 0 | 0.045 | 2.69 | 0.0167 | 7.25 | 0.00213 | 0.723 | 0.00713 |
| ses57_std | SD < 3 | male | normal | 0.04 | 2.4 | 0.0167 | 5.74 | 0.00162 | 0.723 | 0.0166 |
| ses57_std | SD < 4 | both | 0 | 0.0659 | 5.47 | 0.0121 | 29.9 | 0.00464 | 0.965 | 4.8e-08 |
| ses57_std | SD < 4 | both | normal | 0.0654 | 5.42 | 0.0121 | 29.4 | 0.00456 | 0.965 | 6.15e-08 |
| ses57_std | SD < 4 | female | 0 | 0.0647 | 3.95 | 0.0164 | 15.6 | 0.00455 | 1 | 7.83e-05 |
| ses57_std | SD < 4 | female | normal | 0.0645 | 3.94 | 0.0164 | 15.6 | 0.00453 | 1 | 8.16e-05 |
| ses57_std | SD < 4 | male | 0 | 0.0672 | 3.78 | 0.0178 | 14.3 | 0.00441 | 0.927 | 0.000163 |
| ses57_std | SD < 4 | male | normal | 0.0661 | 3.71 | 0.0178 | 13.8 | 0.00426 | 0.928 | 0.000207 |
| logses57_std | none | both | 0 | 0.0504 | 3.99 | 0.0126 | 15.9 | 0.00238 | 1.46 | 6.69e-05 |
| logses57_std | none | both | normal | 0.0504 | 3.99 | 0.0126 | 15.9 | 0.00238 | 1.46 | 6.7e-05 |
| logses57_std | none | female | 0 | 0.049 | 2.84 | 0.0172 | 8.08 | 0.00218 | 1.43 | 0.0045 |
| logses57_std | none | female | normal | 0.0489 | 2.84 | 0.0172 | 8.06 | 0.00218 | 1.43 | 0.00456 |
| logses57_std | none | male | 0 | 0.0519 | 2.8 | 0.0185 | 7.84 | 0.00226 | 1.49 | 0.00515 |
| logses57_std | none | male | normal | 0.0518 | 2.8 | 0.0185 | 7.81 | 0.00225 | 1.49 | 0.00522 |
| logses57_std | SD < 3 | both | 0 | 0.0323 | 3.02 | 0.0107 | 9.15 | 0.00134 | 0.706 | 0.0025 |
| logses57_std | SD < 3 | both | normal | 0.0305 | 2.86 | 0.0107 | 8.21 | 0.00119 | 0.706 | 0.00419 |
| logses57_std | SD < 3 | female | 0 | 0.0394 | 2.68 | 0.0147 | 7.17 | 0.00196 | 0.688 | 0.00747 |
| logses57_std | SD < 3 | female | normal | 0.0386 | 2.63 | 0.0147 | 6.91 | 0.00188 | 0.688 | 0.0086 |
| logses57_std | SD < 3 | male | 0 | 0.0248 | 1.59 | 0.0156 | 2.54 | 0.000527 | 0.725 | 0.111 |
| logses57_std | SD < 3 | male | normal | 0.0218 | 1.41 | 0.0155 | 1.98 | 0.000333 | 0.725 | 0.16 |
| logses57_std | SD < 4 | both | 0 | 0.0382 | 3.27 | 0.0117 | 10.7 | 0.00156 | 0.972 | 0.00109 |
| logses57_std | SD < 4 | both | normal | 0.0379 | 3.25 | 0.0117 | 10.5 | 0.00154 | 0.972 | 0.00117 |
| logses57_std | SD < 4 | female | 0 | 0.0359 | 2.26 | 0.0159 | 5.11 | 0.00128 | 1.01 | 0.0239 |
| logses57_std | SD < 4 | female | normal | 0.036 | 2.26 | 0.0159 | 5.12 | 0.00129 | 1.01 | 0.0237 |
| logses57_std | SD < 4 | male | 0 | 0.0406 | 2.36 | 0.0172 | 5.58 | 0.00153 | 0.936 | 0.0183 |
| logses57_std | SD < 4 | male | normal | 0.0398 | 2.32 | 0.0172 | 5.38 | 0.00146 | 0.936 | 0.0205 |
df %>% rm_outliers(4) %>% mutate(decile = ntile(ses57_std, 10)) %>%
group_by(decile) %>%
summarize(
decile_sd_iq = sd(iq_std),
decile_sd_pgs = sd(pgs_std),
decile_mean_pgs_iq = mean(pgs_iq),
cor = cor(pgs_std, iq_std))| decile | decile_sd_iq | decile_sd_pgs | decile_mean_pgs_iq | cor |
|---|---|---|---|---|
| 1 | 0.959 | 0.91 | -0.0472 | 0.265 |
| 2 | 0.93 | 0.998 | -0.0304 | 0.263 |
| 3 | 0.936 | 0.974 | -0.0807 | 0.217 |
| 4 | 0.93 | 0.949 | -0.0226 | 0.301 |
| 5 | 0.928 | 0.937 | -0.131 | 0.196 |
| 6 | 0.974 | 0.962 | -0.0492 | 0.28 |
| 7 | 0.959 | 0.969 | -0.0644 | 0.262 |
| 8 | 0.941 | 0.987 | -0.0639 | 0.27 |
| 9 | 0.942 | 0.994 | 0.0214 | 0.317 |
| 10 | 0.919 | 0.995 | 0.175 | 0.277 |
show_cor <- function(df, title) {
df %>% select(iq_std, ses57_std, pgs_std) %>%
correlate(quiet=T) %>%
pander(caption=title)
}
df %>% show_cor("Full sample")
df %>% filter(sexrsp == "male") %>% show_cor("Males")
df %>% filter(sexrsp == "female") %>% show_cor("Females")| rowname | iq_std | ses57_std | pgs_std |
|---|---|---|---|
| iq_std | - | 0.3 | 0.314 |
| ses57_std | 0.3 | - | 0.156 |
| pgs_std | 0.314 | 0.156 | - |
| rowname | iq_std | ses57_std | pgs_std |
|---|---|---|---|
| iq_std | - | 0.305 | 0.305 |
| ses57_std | 0.305 | - | 0.145 |
| pgs_std | 0.305 | 0.145 | - |
| rowname | iq_std | ses57_std | pgs_std |
|---|---|---|---|
| iq_std | - | 0.296 | 0.323 |
| ses57_std | 0.296 | - | 0.167 |
| pgs_std | 0.323 | 0.167 | - |
tribble(~variable, ~code_name, ~WLS_name,
"IQ", "iq_std", "gwiiq_bm for graduates and swiiq_t for siblings",
"Polygenic score", "pgs_std", "pgs_ea3_mtag",
"Socio-economic status of family", "ses57", "ses57")
df %>% select(iq_std, pgs_std, ses57) %>% describe()| variable | code_name | WLS_name |
|---|---|---|
| IQ | iq_std | gwiiq_bm for graduates and swiiq_t for siblings |
| Polygenic score | pgs_std | pgs_ea3_mtag |
| Socio-economic status of family | ses57 | ses57 |
| vars | n | mean | sd | median | trimmed | mad | min | max | range | skew | kurtosis | se | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| iq_std | 1 | 6256 | -7.11e-17 | 1 | -0.016 | -0.00493 | 1.01 | -2.81 | 2.91 | 5.72 | 0.0653 | -0.151 | 0.0126 |
| pgs_std | 2 | 6256 | 0.00261 | 0.999 | -0.0103 | -0.00772 | 0.99 | -3.27 | 4.16 | 7.43 | 0.124 | 0.068 | 0.0126 |
| ses57 | 3 | 6256 | 16.3 | 11.1 | 14 | 15 | 10.4 | 1 | 97 | 96 | 1.29 | 2.61 | 0.14 |
Under which conditions is CPEM more powerful than using linear regression and looking at the interaction term? I perform some simulations to investigate this question here