GxE Interaction affects heritability of IQ depending on socio-economic status

Introduction

A large genotyped sample of the population of Wisconsin, USA (The Wisconsin Longitudinal Study, N=8,509) are examined for evidence of the Scarr-Rowe effect, an adverse gene x environment (GxE) interaction that reduces the heritability of IQ among those with low childhood socioeconomic status (SES), using the Continuous Parameter Estimation Method (CPEM):

\[\begin{equation} SES \sim PGS \cdot IQ \end{equation}\]

A large correlation indicates increasing genetic expressivity with increasing SES

Imports and settings

library(pacman)
p_load(knitr, tidyverse, magrittr, moments, psych, corrr, pander, broom, glue, ggpubr)
PanderOpts(digits = 3, table.split.table = Inf, missing = "-", table.alignment.default = "left")
theme_set(theme_bw(base_size=14))

Read data

A data set with PGS is prepared here. See link for details about data preparation.

df <- read_delim("pgs_data.csv", delim=";") %>% 
  select(idpub, pgs_std, iq_std = z_iq_std, iq100 = z_iq100, 
         ses57, sexrsp=z_sexrsp, rtype) %>% 
  na.omit()

tribble(~"Sample size", df %>% nrow)

Sample size
6256

Find CPEM, the inner product og pgs and iq

With standardized versions of all variables

df %<>% mutate(
  ses57_std = as.numeric(scale(ses57)),
  pgs_iq = as.numeric(scale(pgs_std * iq_std)),
  logses57_std = as.numeric(scale(log(ses57))))

Regression of pgs_iq with ses

m <- df %$% lm(pgs_iq ~ 0 + ses57_std)
m

Fitting linear model: pgs_iq ~ 0 + ses57_std
	Estimate	Std. Error	t value	Pr(>\|t\|)
ses57_std	0.0786	0.0126	6.24	4.71e-10

Scatter plot of ses vs pgs_iq

ggplot(df) +
  geom_point(aes(y=pgs_iq, x=ses57_std), color="grey40", size=1, alpha=0.3) + 
  geom_abline(aes(intercept=0, slope=coef(m)[["ses57_std"]])) +
  ylab("CPE (IQxEA3)") + xlab("Parental SES")

Check normality of ses variable

Since the R square is low, these are essentially also the results for the residuals

plot_normality <- function(df, var){
  p1 <-ggplot(df, aes_string(x=var)) + 
    geom_histogram(fill="turquoise4", col="black", binwidth=0.4)
  p2 <- ggplot(df, aes_string(sample=var)) + stat_qq()
  
  ggarrange(p1, p2, ncol=2)
}

plot_normality(df, "ses57_std")

Analysis with log(ses)

df %$% lm(pgs_iq ~ logses57_std)

Fitting linear model: pgs_iq ~ logses57_std
	Estimate	Std. Error	t value	Pr(>\|t\|)
(Intercept)	-6.63e-17	0.0126	-5.25e-15	1
logses57_std	0.0504	0.0126	3.99	6.7e-05

Check normality of log ses variable

plot_normality(df, "logses57_std")

Regressions for all combinations of sex, intercept, log transformation and outlier removal

rm_outliers <- function(df, sd){
  df %>% filter_at(vars(iq_std, ses57_std, pgs_iq), all_vars(between(., -sd, sd)))
}

getregr <- function(ses, sex, outlier_rm, intercept, df){
  intercept_s <- if (intercept == "0") "0 + " else ""

  df <- if(sex == "both") df else df %>% filter(sexrsp == sex)
  t_index <- if (intercept == "normal") 2 else 1

  if (outlier_rm == "SD < 3") df %<>% rm_outliers(3)
  else if (outlier_rm == "SD < 4") df %<>% rm_outliers(4)

  m <- lm(as.formula(glue("pgs_iq ~ {intercept_s} {ses}")), df)
  est <- tidy(m) %>% filter(term == ses)
  tibble(ses, outlier_rm, sex, intercept,
         estimate = est$estimate,
         t.value = tidy(m)$statistic[t_index],
         std.error = est$std.error,
         fstat = glance(m)$statistic,
         adjr2 = glance(m)$adj.r.squared,
         skewness = skewness(resid(m)),
         p.value = est$p.value)
}

crossing(ses = c("ses57_std", "logses57_std"), 
         sex = c("male", "female", "both"),
         outlier_rm = c("SD < 3", "SD < 4", "none"),
         intercept = c("normal", "0")) %>% 
  pmap(getregr, df) %>% bind_rows %>%
  arrange(desc(ses), outlier_rm)

ses	outlier_rm	sex	intercept	estimate	t.value	std.error	fstat	adjr2	skewness	p.value
ses57_std	none	both	0	0.0786	6.24	0.0126	38.9	0.00602	1.45	4.71e-10
ses57_std	none	both	normal	0.0786	6.24	0.0126	38.9	0.00602	1.45	4.73e-10
ses57_std	none	female	0	0.0812	4.77	0.017	22.7	0.00667	1.42	1.93e-06
ses57_std	none	female	normal	0.0812	4.76	0.017	22.7	0.00666	1.42	1.97e-06
ses57_std	none	male	0	0.0757	4.05	0.0187	16.4	0.00507	1.48	5.27e-05
ses57_std	none	male	normal	0.0756	4.04	0.0187	16.4	0.00506	1.48	5.38e-05
ses57_std	SD < 3	both	0	0.0574	5	0.0115	25	0.00394	0.703	5.91e-07
ses57_std	SD < 3	both	normal	0.0538	4.69	0.0115	22	0.00346	0.703	2.76e-06
ses57_std	SD < 3	female	0	0.069	4.37	0.0158	19.1	0.00573	0.683	1.29e-05
ses57_std	SD < 3	female	normal	0.0665	4.22	0.0158	17.8	0.00533	0.683	2.52e-05
ses57_std	SD < 3	male	0	0.045	2.69	0.0167	7.25	0.00213	0.723	0.00713
ses57_std	SD < 3	male	normal	0.04	2.4	0.0167	5.74	0.00162	0.723	0.0166
ses57_std	SD < 4	both	0	0.0659	5.47	0.0121	29.9	0.00464	0.965	4.8e-08
ses57_std	SD < 4	both	normal	0.0654	5.42	0.0121	29.4	0.00456	0.965	6.15e-08
ses57_std	SD < 4	female	0	0.0647	3.95	0.0164	15.6	0.00455	1	7.83e-05
ses57_std	SD < 4	female	normal	0.0645	3.94	0.0164	15.6	0.00453	1	8.16e-05
ses57_std	SD < 4	male	0	0.0672	3.78	0.0178	14.3	0.00441	0.927	0.000163
ses57_std	SD < 4	male	normal	0.0661	3.71	0.0178	13.8	0.00426	0.928	0.000207
logses57_std	none	both	0	0.0504	3.99	0.0126	15.9	0.00238	1.46	6.69e-05
logses57_std	none	both	normal	0.0504	3.99	0.0126	15.9	0.00238	1.46	6.7e-05
logses57_std	none	female	0	0.049	2.84	0.0172	8.08	0.00218	1.43	0.0045
logses57_std	none	female	normal	0.0489	2.84	0.0172	8.06	0.00218	1.43	0.00456
logses57_std	none	male	0	0.0519	2.8	0.0185	7.84	0.00226	1.49	0.00515
logses57_std	none	male	normal	0.0518	2.8	0.0185	7.81	0.00225	1.49	0.00522
logses57_std	SD < 3	both	0	0.0323	3.02	0.0107	9.15	0.00134	0.706	0.0025
logses57_std	SD < 3	both	normal	0.0305	2.86	0.0107	8.21	0.00119	0.706	0.00419
logses57_std	SD < 3	female	0	0.0394	2.68	0.0147	7.17	0.00196	0.688	0.00747
logses57_std	SD < 3	female	normal	0.0386	2.63	0.0147	6.91	0.00188	0.688	0.0086
logses57_std	SD < 3	male	0	0.0248	1.59	0.0156	2.54	0.000527	0.725	0.111
logses57_std	SD < 3	male	normal	0.0218	1.41	0.0155	1.98	0.000333	0.725	0.16
logses57_std	SD < 4	both	0	0.0382	3.27	0.0117	10.7	0.00156	0.972	0.00109
logses57_std	SD < 4	both	normal	0.0379	3.25	0.0117	10.5	0.00154	0.972	0.00117
logses57_std	SD < 4	female	0	0.0359	2.26	0.0159	5.11	0.00128	1.01	0.0239
logses57_std	SD < 4	female	normal	0.036	2.26	0.0159	5.12	0.00129	1.01	0.0237
logses57_std	SD < 4	male	0	0.0406	2.36	0.0172	5.58	0.00153	0.936	0.0183
logses57_std	SD < 4	male	normal	0.0398	2.32	0.0172	5.38	0.00146	0.936	0.0205

Decile means (>4SD outliers removed)

df %>% rm_outliers(4) %>% mutate(decile = ntile(ses57_std, 10)) %>% 
  group_by(decile) %>%
  summarize(
    decile_sd_iq = sd(iq_std),
    decile_sd_pgs = sd(pgs_std),
    decile_mean_pgs_iq = mean(pgs_iq),
    cor = cor(pgs_std, iq_std))

decile	decile_sd_iq	decile_sd_pgs	decile_mean_pgs_iq	cor
1	0.959	0.91	-0.0472	0.265
2	0.93	0.998	-0.0304	0.263
3	0.936	0.974	-0.0807	0.217
4	0.93	0.949	-0.0226	0.301
5	0.928	0.937	-0.131	0.196
6	0.974	0.962	-0.0492	0.28
7	0.959	0.969	-0.0644	0.262
8	0.941	0.987	-0.0639	0.27
9	0.942	0.994	0.0214	0.317
10	0.919	0.995	0.175	0.277

Correlation between variables

show_cor <- function(df, title) {
  df %>% select(iq_std, ses57_std, pgs_std) %>% 
    correlate(quiet=T) %>% 
    pander(caption=title)
}

df %>% show_cor("Full sample")
df %>% filter(sexrsp == "male") %>% show_cor("Males")
df %>% filter(sexrsp == "female") %>% show_cor("Females")

Full sample
rowname	iq_std	ses57_std	pgs_std
iq_std	-	0.3	0.314
ses57_std	0.3	-	0.156
pgs_std	0.314	0.156	-

Males
rowname	iq_std	ses57_std	pgs_std
iq_std	-	0.305	0.305
ses57_std	0.305	-	0.145
pgs_std	0.305	0.145	-

Females
rowname	iq_std	ses57_std	pgs_std
iq_std	-	0.296	0.323
ses57_std	0.296	-	0.167
pgs_std	0.323	0.167	-

Descriptions of variables

tribble(~variable, ~code_name, ~WLS_name,
       "IQ", "iq_std", "gwiiq_bm for graduates and swiiq_t for siblings",
       "Polygenic score", "pgs_std", "pgs_ea3_mtag",
       "Socio-economic status of family", "ses57", "ses57")

df %>% select(iq_std, pgs_std, ses57) %>% describe()

variable	code_name	WLS_name
IQ	iq_std	gwiiq_bm for graduates and swiiq_t for siblings
Polygenic score	pgs_std	pgs_ea3_mtag
Socio-economic status of family	ses57	ses57

	vars	n	mean	sd	median	trimmed	mad	min	max	range	skew	kurtosis	se
iq_std	1	6256	-7.11e-17	1	-0.016	-0.00493	1.01	-2.81	2.91	5.72	0.0653	-0.151	0.0126
pgs_std	2	6256	0.00261	0.999	-0.0103	-0.00772	0.99	-3.27	4.16	7.43	0.124	0.068	0.0126
ses57	3	6256	16.3	11.1	14	15	10.4	1	97	96	1.29	2.61	0.14

CPEM vs linear regression

Under which conditions is CPEM more powerful than using linear regression and looking at the interaction term? I perform some simulations to investigate this question here