Analyses
### ANALYZE THE SNPS FOUND IN OKBAY ET AL 2016
#main datafile
l_OKBAY = list()
# SNP results -------------------------------------------------------------
l_OKBAY$SNP_data = read_tsv("data/EduYears_Pooled_5000.txt") %>% as.data.frame
## Parsed with column specification:
## cols(
## MarkerName = col_character(),
## CHR = col_integer(),
## POS = col_integer(),
## A1 = col_character(),
## A2 = col_character(),
## EAF = col_double(),
## Beta = col_double(),
## SE = col_double(),
## Pval = col_double()
## )
#rownames
rownames(l_OKBAY$SNP_data) = l_OKBAY$SNP_data$MarkerName
# LD pruned SNPs ----------------------------------------------------------
#this has both SNP and genotype
#pooled results
l_OKBAY$pooled_results = read_csv("data/OKBAY_total_pooled.csv") %>% as.data.frame()
## Parsed with column specification:
## cols(
## SNP = col_character(),
## Chr = col_integer(),
## Position = col_integer(),
## `Allele 1` = col_character(),
## `Frequency Allele 1` = col_double(),
## `Effect size` = col_double(),
## `P-value` = col_double(),
## `Heterogeneity I2` = col_double(),
## `Heterogeneity P-value` = col_double()
## )
rownames(l_OKBAY$pooled_results) = l_OKBAY$pooled_results$SNP
#results without UKBB
l_OKBAY$discovery_results = read_csv("data/OKBAY_total_discovery.csv") %>% as.data.frame()
## Warning: Duplicated column names deduplicated: 'Effect size' => 'Effect
## size_1' [10], 'P-value' => 'P-value_1' [11], 'Effect size' => 'Effect
## size_2' [12], 'P-value' => 'P-value_2' [13]
## Parsed with column specification:
## cols(
## SNP = col_character(),
## Chr = col_integer(),
## Position = col_integer(),
## `Allele 1` = col_character(),
## `Frequency Allele 1` = col_double(),
## `Effect size` = col_double(),
## `P-value` = col_double(),
## `Heterogeneity I2` = col_double(),
## `Heterogeneity P-value` = col_double(),
## `Effect size_1` = col_double(),
## `P-value_1` = col_double(),
## `Effect size_2` = col_double(),
## `P-value_2` = col_double()
## )
rownames(l_OKBAY$discovery_results) = l_OKBAY$discovery_results$SNP
#how many SNPs in common?
sum(l_OKBAY$pooled_results$SNP %in% d_freqs$SNP)
## [1] 154
sum(l_OKBAY$discovery_results$SNP %in% d_freqs$SNP)
## [1] 71
# subset ------------------------------------------------------------------
#subset to the ones not in LD
l_OKBAY$pop_freqs = d_freqs[d_freqs$SNP %in% l_OKBAY$pooled_results$SNP, ]
# get effect allele freqs -------------------------------------------------
l_OKBAY$effect_freqs = get_freq_effect_alleles(freq_data = l_OKBAY$pop_freqs,
SNPs = l_OKBAY$pooled_results$SNP,
alleles = l_OKBAY$pooled_results$`Allele 1`#,
#betas = l_OKBAY$pooled_results$`Effect size`
)
#output
write.csv(l_OKBAY$effect_freqs, file = "data/effect_freqs.csv")
# factor analyze ----------------------------------------------------------
#check method variance for analysis
l_OKBAY$FA_all = fa_all_methods(l_OKBAY$effect_freqs, messages = F)
wtd.cors(l_OKBAY$FA_all$scores)
## regression_minres Thurstone_minres tenBerge_minres
## regression_minres 1.0000000 1.0000000 1.0000000
## Thurstone_minres 1.0000000 1.0000000 1.0000000
## tenBerge_minres 1.0000000 1.0000000 1.0000000
## Bartlett_minres 0.9981106 0.9981106 0.9981106
## regression_pa 0.9999564 0.9999564 0.9999564
## Thurstone_pa 0.9999564 0.9999564 0.9999564
## Bartlett_pa 0.9979269 0.9979269 0.9979269
## regression_minchi 1.0000000 1.0000000 1.0000000
## Thurstone_minchi 1.0000000 1.0000000 1.0000000
## tenBerge_minchi 1.0000000 1.0000000 1.0000000
## Bartlett_minchi 0.9981106 0.9981106 0.9981106
## Bartlett_minres regression_pa Thurstone_pa Bartlett_pa
## regression_minres 0.9981106 0.9999564 0.9999564 0.9979269
## Thurstone_minres 0.9981106 0.9999564 0.9999564 0.9979269
## tenBerge_minres 0.9981106 0.9999564 0.9999564 0.9979269
## Bartlett_minres 1.0000000 0.9982762 0.9982762 0.9999953
## regression_pa 0.9982762 1.0000000 1.0000000 0.9980969
## Thurstone_pa 0.9982762 1.0000000 1.0000000 0.9980969
## Bartlett_pa 0.9999953 0.9980969 0.9980969 1.0000000
## regression_minchi 0.9981106 0.9999564 0.9999564 0.9979269
## Thurstone_minchi 0.9981106 0.9999564 0.9999564 0.9979269
## tenBerge_minchi 0.9981106 0.9999564 0.9999564 0.9979269
## Bartlett_minchi 1.0000000 0.9982762 0.9982762 0.9999953
## regression_minchi Thurstone_minchi tenBerge_minchi
## regression_minres 1.0000000 1.0000000 1.0000000
## Thurstone_minres 1.0000000 1.0000000 1.0000000
## tenBerge_minres 1.0000000 1.0000000 1.0000000
## Bartlett_minres 0.9981106 0.9981106 0.9981106
## regression_pa 0.9999564 0.9999564 0.9999564
## Thurstone_pa 0.9999564 0.9999564 0.9999564
## Bartlett_pa 0.9979269 0.9979269 0.9979269
## regression_minchi 1.0000000 1.0000000 1.0000000
## Thurstone_minchi 1.0000000 1.0000000 1.0000000
## tenBerge_minchi 1.0000000 1.0000000 1.0000000
## Bartlett_minchi 0.9981106 0.9981106 0.9981106
## Bartlett_minchi
## regression_minres 0.9981106
## Thurstone_minres 0.9981106
## tenBerge_minres 0.9981106
## Bartlett_minres 1.0000000
## regression_pa 0.9982762
## Thurstone_pa 0.9982762
## Bartlett_pa 0.9999953
## regression_minchi 0.9981106
## Thurstone_minchi 0.9981106
## tenBerge_minchi 0.9981106
## Bartlett_minchi 1.0000000
#how many to extract?
nScree(l_OKBAY$effect_freqs)
## noc naf nparallel nkaiser
## 1 11 2 11 11
#extract factors
l_OKBAY$fa = fa(l_OKBAY$effect_freqs, scores = "Bartlett")
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done
## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was
## done
## The determinant of the smoothed correlation was zero.
## This means the objective function is not defined.
## Chi square is based upon observed residuals.
## The determinant of the smoothed correlation was zero.
## This means the objective function is not defined for the null model either.
## The Chi square is thus based upon observed correlations.
## In factor.stats, I could not find the RMSEA upper bound . Sorry about that
l_OKBAY$fa_2 = fa(l_OKBAY$effect_freqs, scores = "Bartlett", nfactors = 2)
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done
## Loading required namespace: GPArotation
## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was
## done
## The determinant of the smoothed correlation was zero.
## This means the objective function is not defined.
## Chi square is based upon observed residuals.
## The determinant of the smoothed correlation was zero.
## This means the objective function is not defined for the null model either.
## The Chi square is thus based upon observed correlations.
## In factor.stats, I could not find the RMSEA upper bound . Sorry about that
l_OKBAY$fa_3 = fa(l_OKBAY$effect_freqs, scores = "Bartlett", nfactors = 3)
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done
## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was
## done
## The determinant of the smoothed correlation was zero.
## This means the objective function is not defined.
## Chi square is based upon observed residuals.
## The determinant of the smoothed correlation was zero.
## This means the objective function is not defined for the null model either.
## The Chi square is thus based upon observed correlations.
## In factor.stats, I could not find the RMSEA upper bound . Sorry about that
l_OKBAY$fa_4 = fa(l_OKBAY$effect_freqs, scores = "Bartlett", nfactors = 4)
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done
## Warning in GPFoblq(L, Tmat = Tmat, normalize = normalize, eps = eps, maxit
## = maxit, : convergence not obtained in GPFoblq. 1000 iterations used.
## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was
## done
## The determinant of the smoothed correlation was zero.
## This means the objective function is not defined.
## Chi square is based upon observed residuals.
## The determinant of the smoothed correlation was zero.
## This means the objective function is not defined for the null model either.
## The Chi square is thus based upon observed correlations.
## In factor.stats, I could not find the RMSEA upper bound . Sorry about that
l_OKBAY$fa_5 = fa(l_OKBAY$effect_freqs, scores = "Bartlett", nfactors = 5)
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done
## Warning in GPFoblq(L, Tmat = Tmat, normalize = normalize, eps = eps, maxit
## = maxit, : convergence not obtained in GPFoblq. 1000 iterations used.
## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was
## done
## The determinant of the smoothed correlation was zero.
## This means the objective function is not defined.
## Chi square is based upon observed residuals.
## The determinant of the smoothed correlation was zero.
## This means the objective function is not defined for the null model either.
## The Chi square is thus based upon observed correlations.
## In factor.stats, I could not find the RMSEA upper bound . Sorry about that
l_OKBAY$fa_11 = fa(l_OKBAY$effect_freqs, scores = "Bartlett", nfactors = 11)
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done
## Warning in GPFoblq(L, Tmat = Tmat, normalize = normalize, eps = eps, maxit
## = maxit, : convergence not obtained in GPFoblq. 1000 iterations used.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate =
## rotate, : A loading greater than abs(1) was detected. Examine the loadings
## carefully.
## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was
## done
## The determinant of the smoothed correlation was zero.
## This means the objective function is not defined.
## Chi square is based upon observed residuals.
## The determinant of the smoothed correlation was zero.
## This means the objective function is not defined for the null model either.
## The Chi square is thus based upon observed correlations.
## In factor.stats, I could not find the RMSEA upper bound . Sorry about that
#plot loadings
fa_plot_loadings(l_OKBAY$fa)

psych::describe(l_OKBAY$fa$loadings)
## vars n mean sd median trimmed mad min max range skew kurtosis
## MR1 1 154 0.11 0.61 0.27 0.13 0.78 -0.96 0.96 1.91 -0.21 -1.56
## se
## MR1 0.05
#does loadings match effect sizes?
l_OKBAY$included_SNPs = dimnames(l_OKBAY$fa$loadings)[[1]]
l_OKBAY$SNP_data[l_OKBAY$included_SNPs, "Beta"]
## [1] 0.016 -0.016 0.018 -0.017 0.022 0.029 0.036 -0.016 0.026 -0.029
## [11] 0.013 -0.017 -0.013 0.018 -0.014 0.014 0.020 0.018 0.029 0.016
## [21] -0.014 0.020 0.016 0.016 0.022 0.025 0.017 0.014 -0.015 -0.015
## [31] 0.025 0.035 0.017 0.022 -0.016 0.014 -0.014 0.016 -0.014 0.014
## [41] -0.012 0.014 0.021 -0.015 0.023 0.023 -0.030 -0.025 0.026 0.025
## [51] -0.024 0.021 0.038 0.022 0.018 0.012 -0.017 0.014 0.019 -0.014
## [61] 0.015 -0.017 -0.013 0.015 0.017 0.014 0.013 0.019 -0.013 -0.017
## [71] -0.021 0.016 -0.014 -0.013 -0.019 -0.013 -0.013 -0.018 0.013 -0.021
## [81] 0.013 0.016 -0.013 -0.017 0.015 -0.024 0.028 -0.015 0.014 0.012
## [91] 0.013 -0.014 -0.015 -0.014 0.016 -0.015 0.015 0.019 -0.014 0.014
## [101] -0.016 0.016 0.017 -0.016 -0.015 0.025 0.015 0.014 0.013 -0.014
## [111] 0.013 -0.014 -0.012 -0.028 0.015 -0.016 -0.017 0.016 0.014 0.014
## [121] 0.014 -0.016 -0.014 0.020 0.016 -0.019 -0.013 0.022 -0.024 -0.015
## [131] -0.016 -0.016 -0.017 -0.018 -0.015 0.017 -0.013 0.019 0.015 -0.014
## [141] 0.015 0.013 -0.013 0.018 -0.013 -0.015 -0.013 0.020 -0.029 0.016
## [151] 0.023 -0.013 -0.022 -0.014
GG_scatter(data.frame("loading" = as.vector(l_OKBAY$fa$loadings),
"Beta" = l_OKBAY$pooled_results[l_OKBAY$included_SNPs, "Effect size"])
, x_var = "Beta", y_var = "loading")

d_phenotype$fs = l_OKBAY$fa$scores
d_phenotype$fs2 = l_OKBAY$fa_2$scores[, 1]
d_phenotype$fs3 = l_OKBAY$fa_3$scores[, 1]
d_phenotype$fs4 = l_OKBAY$fa_4$scores[, 1]
d_phenotype$fs5 = l_OKBAY$fa_5$scores[, 1]
d_phenotype$fs11 = l_OKBAY$fa_11$scores[, 1]
# polygenic scores --------------------------------------------------------
#score each freq according to its reported beta = weighted scores
d_phenotype$ps = calc_polygenic_score(l_OKBAY$effect_freqs, effects = l_OKBAY$pooled_results[l_OKBAY$included_SNPs, "Effect size"])
# phenotype relationships --------------------------------------------------
#plot the genomic predictions against the phenotypic values
GG_scatter(d_phenotype, x_var = "fs", y_var = "IQ")

GG_scatter(d_phenotype, x_var = "ps", y_var = "IQ")

#others
wtd.cors(cbind(d_phenotype, l_OKBAY$fa$scores, l_OKBAY$fa_2$scores, l_OKBAY$fa_3$scores, l_OKBAY$fa_4$scores, l_OKBAY$fa_5$scores))
## Warning in wtd.cors(cbind(d_phenotype, l_OKBAY$fa$scores, l_OKBAY
## $fa_2$scores, : NAs introduced by coercion
## Warning in wtd.cors(cbind(d_phenotype, l_OKBAY$fa$scores, l_OKBAY
## $fa_2$scores, : NAs introduced by coercion
## Population Rietveld.et.al.._2013 PS_Ed_Att_Davies
## Population NaN NaN NaN
## Rietveld.et.al.._2013 NaN 1.00000000 0.8795920962
## PS_Ed_Att_Davies NaN 0.87959210 1.0000000000
## PS_Ed_Att_Okbay NaN 0.68685154 0.8497353377
## IQ NaN 0.91969359 0.8210296837
## fs NaN 0.78856747 0.5858493240
## fs2 NaN 0.84638362 0.9115220546
## fs3 NaN 0.87490220 0.9416790614
## fs4 NaN 0.78643601 0.8156488855
## fs5 NaN 0.80216597 0.9023469208
## fs11 NaN -0.03266627 -0.0492278077
## ps NaN 0.90836969 0.8926049519
## MR1 NaN 0.78856747 0.5858493240
## MR1 NaN 0.84638362 0.9115220546
## MR2 NaN 0.18881588 -0.1949026832
## MR1 NaN 0.87490220 0.9416790614
## MR2 NaN 0.30273427 -0.1088470084
## MR3 NaN -0.17003340 -0.1445074609
## MR1 NaN 0.78643601 0.8156488855
## MR2 NaN 0.28724924 -0.1238291365
## MR3 NaN -0.34523478 -0.2937744911
## MR4 NaN 0.09486742 0.1378107154
## MR1 NaN 0.80216597 0.9023469208
## MR2 NaN 0.06997185 0.0066447189
## MR4 NaN 0.04598254 -0.0491776284
## MR3 NaN -0.30478318 -0.2423539115
## MR5 NaN 0.05967042 -0.0001710936
## PS_Ed_Att_Okbay IQ fs fs2
## Population NaN NaN NaN NaN
## Rietveld.et.al.._2013 0.68685154 0.91969359 0.78856747 0.84638362
## PS_Ed_Att_Davies 0.84973534 0.82102968 0.58584932 0.91152205
## PS_Ed_Att_Okbay 1.00000000 0.65366840 0.16061663 0.64328833
## IQ 0.65366840 1.00000000 0.75454518 0.77616263
## fs 0.16061663 0.75454518 1.00000000 0.72108580
## fs2 0.64328833 0.77616263 0.72108580 1.00000000
## fs3 0.67628889 0.81969957 0.72295100 0.97146429
## fs4 0.58983960 0.71528180 0.65483399 0.89354063
## fs5 0.64666952 0.77325104 0.67075040 0.94027226
## fs11 -0.10341763 -0.03686627 0.03708935 0.06322191
## ps 0.78221849 0.83676254 0.66228301 0.79229709
## MR1 0.16061663 0.75454518 1.00000000 0.72108580
## MR1 0.64328833 0.77616263 0.72108580 1.00000000
## MR2 -0.47732895 0.26428908 0.62218729 -0.07758889
## MR1 0.67628889 0.81969957 0.72295100 0.97146429
## MR2 -0.25446889 0.35941809 0.56002378 -0.06629354
## MR3 -0.30321964 -0.25276637 0.10354522 0.03492828
## MR1 0.58983960 0.71528180 0.65483399 0.89354063
## MR2 -0.27506573 0.31181626 0.53090649 -0.07613008
## MR3 -0.57089797 -0.35401303 0.11984993 -0.02330972
## MR4 0.05565769 0.05585519 0.12777696 0.10545063
## MR1 0.64666952 0.77325104 0.67075040 0.94027226
## MR2 0.03687246 0.01361237 0.09458786 0.05699563
## MR4 -0.17407546 -0.14462436 0.21279185 0.12814594
## MR3 -0.50121970 -0.19630632 0.12837618 -0.04068641
## MR5 -0.06892137 0.14341002 0.07625123 -0.08068191
## fs3 fs4 fs5 fs11
## Population NaN NaN NaN NaN
## Rietveld.et.al.._2013 0.874902202 0.786436015 0.802165972 -0.032666269
## PS_Ed_Att_Davies 0.941679061 0.815648886 0.902346921 -0.049227808
## PS_Ed_Att_Okbay 0.676288891 0.589839601 0.646669523 -0.103417635
## IQ 0.819699569 0.715281803 0.773251043 -0.036866269
## fs 0.722950998 0.654833994 0.670750402 0.037089351
## fs2 0.971464292 0.893540631 0.940272258 0.063221912
## fs3 1.000000000 0.893611642 0.943955046 -0.026052238
## fs4 0.893611642 1.000000000 0.943719307 0.021545099
## fs5 0.943955046 0.943719307 1.000000000 0.023641276
## fs11 -0.026052238 0.021545099 0.023641276 1.000000000
## ps 0.831233385 0.660154818 0.716109231 -0.108740576
## MR1 0.722950998 0.654833994 0.670750402 0.037089351
## MR1 0.971464292 0.893540631 0.940272258 0.063221912
## MR2 -0.078188273 -0.086656002 -0.119590537 0.047733954
## MR1 1.000000000 0.893611642 0.943955046 -0.026052238
## MR2 -0.054233612 -0.075914472 -0.130102925 0.042819224
## MR3 0.052358486 -0.117120789 -0.157797793 -0.009081295
## MR1 0.893611642 1.000000000 0.943719307 0.021545099
## MR2 -0.086426422 -0.034301152 -0.085820984 0.082488021
## MR3 -0.061976693 0.045146332 0.023375287 0.171904580
## MR4 0.139773991 -0.267667156 -0.118903076 -0.104843979
## MR1 0.943955046 0.943719307 1.000000000 0.023641276
## MR2 0.001604441 -0.026340153 -0.001491502 0.181541772
## MR4 0.111908633 -0.036024953 -0.124626419 -0.157540807
## MR3 -0.111385744 0.111153985 0.112118639 0.178521796
## MR5 -0.011840236 -0.008744385 -0.023990189 -0.165770194
## ps MR1 MR1 MR2
## Population NaN NaN NaN NaN
## Rietveld.et.al.._2013 0.908369691 0.78856747 0.84638362 0.18881588
## PS_Ed_Att_Davies 0.892604952 0.58584932 0.91152205 -0.19490268
## PS_Ed_Att_Okbay 0.782218489 0.16061663 0.64328833 -0.47732895
## IQ 0.836762538 0.75454518 0.77616263 0.26428908
## fs 0.662283014 1.00000000 0.72108580 0.62218729
## fs2 0.792297085 0.72108580 1.00000000 -0.07758889
## fs3 0.831233385 0.72295100 0.97146429 -0.07818827
## fs4 0.660154818 0.65483399 0.89354063 -0.08665600
## fs5 0.716109231 0.67075040 0.94027226 -0.11959054
## fs11 -0.108740576 0.03708935 0.06322191 0.04773395
## ps 1.000000000 0.66228301 0.79229709 0.07688113
## MR1 0.662283014 1.00000000 0.72108580 0.62218729
## MR1 0.792297085 0.72108580 1.00000000 -0.07758889
## MR2 0.076881134 0.62218729 -0.07758889 1.00000000
## MR1 0.831233385 0.72295100 0.97146429 -0.07818827
## MR2 0.235541566 0.56002378 -0.06629354 0.92694960
## MR3 -0.025919694 0.10354522 0.03492828 0.10714323
## MR1 0.660154818 0.65483399 0.89354063 -0.08665600
## MR2 0.149334450 0.53090649 -0.07613008 0.88531467
## MR3 -0.398580014 0.11984993 -0.02330972 0.15508404
## MR4 0.293091339 0.12777696 0.10545063 0.07493190
## MR1 0.716109231 0.67075040 0.94027226 -0.11959054
## MR2 0.102745228 0.09458786 0.05699563 0.15901859
## MR4 0.133971929 0.21279185 0.12814594 0.19662492
## MR3 -0.445463774 0.12837618 -0.04068641 0.19325197
## MR5 -0.009929531 0.07625123 -0.08068191 0.11917071
## MR1 MR2 MR3 MR1
## Population NaN NaN NaN NaN
## Rietveld.et.al.._2013 0.874902202 0.30273427 -0.170033402 0.786436015
## PS_Ed_Att_Davies 0.941679061 -0.10884701 -0.144507461 0.815648886
## PS_Ed_Att_Okbay 0.676288891 -0.25446889 -0.303219645 0.589839601
## IQ 0.819699569 0.35941809 -0.252766373 0.715281803
## fs 0.722950998 0.56002378 0.103545217 0.654833994
## fs2 0.971464292 -0.06629354 0.034928277 0.893540631
## fs3 1.000000000 -0.05423361 0.052358486 0.893611642
## fs4 0.893611642 -0.07591447 -0.117120789 1.000000000
## fs5 0.943955046 -0.13010292 -0.157797793 0.943719307
## fs11 -0.026052238 0.04281922 -0.009081295 0.021545099
## ps 0.831233385 0.23554157 -0.025919694 0.660154818
## MR1 0.722950998 0.56002378 0.103545217 0.654833994
## MR1 0.971464292 -0.06629354 0.034928277 0.893540631
## MR2 -0.078188273 0.92694960 0.107143225 -0.086656002
## MR1 1.000000000 -0.05423361 0.052358486 0.893611642
## MR2 -0.054233612 1.00000000 0.040795012 -0.075914472
## MR3 0.052358486 0.04079501 1.000000000 -0.117120789
## MR1 0.893611642 -0.07591447 -0.117120789 1.000000000
## MR2 -0.086426422 0.91197759 -0.235635023 -0.034301152
## MR3 -0.061976693 -0.11525079 0.339863763 0.045146332
## MR4 0.139773991 0.07446248 0.561178817 -0.267667156
## MR1 0.943955046 -0.13010292 -0.157797793 0.943719307
## MR2 0.001604441 0.24847448 0.059911029 -0.026340153
## MR4 0.111908633 0.18231481 0.783828066 -0.036024953
## MR3 -0.111385744 -0.08519302 -0.023812979 0.111153985
## MR5 -0.011840236 0.05345406 -0.169207851 -0.008744385
## MR2 MR3 MR4 MR1
## Population NaN NaN NaN NaN
## Rietveld.et.al.._2013 0.287249242 -0.345234780 0.09486742 0.802165972
## PS_Ed_Att_Davies -0.123829137 -0.293774491 0.13781072 0.902346921
## PS_Ed_Att_Okbay -0.275065727 -0.570897969 0.05565769 0.646669523
## IQ 0.311816264 -0.354013027 0.05585519 0.773251043
## fs 0.530906494 0.119849932 0.12777696 0.670750402
## fs2 -0.076130079 -0.023309720 0.10545063 0.940272258
## fs3 -0.086426422 -0.061976693 0.13977399 0.943955046
## fs4 -0.034301152 0.045146332 -0.26766716 0.943719307
## fs5 -0.085820984 0.023375287 -0.11890308 1.000000000
## fs11 0.082488021 0.171904580 -0.10484398 0.023641276
## ps 0.149334450 -0.398580014 0.29309134 0.716109231
## MR1 0.530906494 0.119849932 0.12777696 0.670750402
## MR1 -0.076130079 -0.023309720 0.10545063 0.940272258
## MR2 0.885314669 0.155084041 0.07493190 -0.119590537
## MR1 -0.086426422 -0.061976693 0.13977399 0.943955046
## MR2 0.911977594 -0.115250786 0.07446248 -0.130102925
## MR3 -0.235635023 0.339863763 0.56117882 -0.157797793
## MR1 -0.034301152 0.045146332 -0.26766716 0.943719307
## MR2 1.000000000 -0.004898466 -0.12947637 -0.085820984
## MR3 -0.004898466 1.000000000 -0.13142974 0.023375287
## MR4 -0.129476367 -0.131429744 1.00000000 -0.118903076
## MR1 -0.085820984 0.023375287 -0.11890308 1.000000000
## MR2 0.141494914 -0.049762808 0.10956563 -0.001491502
## MR4 -0.025942085 0.145390340 0.60091993 -0.124626419
## MR3 0.041817000 0.667492155 -0.45093114 0.112118639
## MR5 0.168418092 -0.049737361 -0.09444756 -0.023990189
## MR2 MR4 MR3 MR5
## Population NaN NaN NaN NaN
## Rietveld.et.al.._2013 0.069971845 0.04598254 -0.30478318 0.0596704246
## PS_Ed_Att_Davies 0.006644719 -0.04917763 -0.24235391 -0.0001710936
## PS_Ed_Att_Okbay 0.036872460 -0.17407546 -0.50121970 -0.0689213736
## IQ 0.013612370 -0.14462436 -0.19630632 0.1434100195
## fs 0.094587858 0.21279185 0.12837618 0.0762512340
## fs2 0.056995629 0.12814594 -0.04068641 -0.0806819107
## fs3 0.001604441 0.11190863 -0.11138574 -0.0118402362
## fs4 -0.026340153 -0.03602495 0.11115399 -0.0087443853
## fs5 -0.001491502 -0.12462642 0.11211864 -0.0239901886
## fs11 0.181541772 -0.15754081 0.17852180 -0.1657701938
## ps 0.102745228 0.13397193 -0.44546377 -0.0099295312
## MR1 0.094587858 0.21279185 0.12837618 0.0762512340
## MR1 0.056995629 0.12814594 -0.04068641 -0.0806819107
## MR2 0.159018591 0.19662492 0.19325197 0.1191707110
## MR1 0.001604441 0.11190863 -0.11138574 -0.0118402362
## MR2 0.248474480 0.18231481 -0.08519302 0.0534540602
## MR3 0.059911029 0.78382807 -0.02381298 -0.1692078507
## MR1 -0.026340153 -0.03602495 0.11115399 -0.0087443853
## MR2 0.141494914 -0.02594208 0.04181700 0.1684180924
## MR3 -0.049762808 0.14539034 0.66749215 -0.0497373607
## MR4 0.109565626 0.60091993 -0.45093114 -0.0944475556
## MR1 -0.001491502 -0.12462642 0.11211864 -0.0239901886
## MR2 1.000000000 0.21320909 -0.10027722 -0.9349433730
## MR4 0.213209085 1.00000000 -0.28034520 -0.2802294493
## MR3 -0.100277223 -0.28034520 1.00000000 0.0587644890
## MR5 -0.934943373 -0.28022945 0.05876449 1.0000000000
# SAC analysis ------------------------------------------------------------
#which rows has missing data?
v_keeprows = !apply(d_phenotype, MARGIN = 1, anyNA)
#the functions cannot handle missing data, so we need to subset
#is there SAC in the residuals?
#get model resids
d_phenotype$IQ_resid_fs = lm("IQ ~ fs", data = d_phenotype, na.action = na.exclude) %>% resid()
d_phenotype$IQ_resid_ps = lm("IQ ~ ps", data = d_phenotype, na.action = na.exclude) %>% resid()
#measure SAC
SAC_measures(d_phenotype[v_keeprows, ], dists = m_fst[v_keeprows, v_keeprows], vars = c("IQ", "IQ_resid_fs", "IQ_resid_ps"))
## Factors were converted to characters.
## Factors were converted to characters.
## Factors were converted to characters.
## Morans_I knsn_3
## IQ 0.6579384 0.9767737
## IQ_resid_fs 0.4583273 0.9553662
## IQ_resid_ps 0.2174602 0.8575929
#correct for SAC
SAC_control(d_phenotype[v_keeprows, ], dependent = "IQ", predictors = "fs", dists = m_fst[v_keeprows, v_keeprows])
## Factors were converted to characters.
## Uncorrected KNSNR_b_k3 SLR_k3
## fs 0.7545452 0.5642874 0.6990354
SAC_control(d_phenotype[v_keeprows, ], dependent = "IQ", predictors = "fs2", dists = m_fst[v_keeprows, v_keeprows])
## Factors were converted to characters.
## Uncorrected KNSNR_b_k3 SLR_k3
## fs2 0.7761626 0.3174107 0.380124
SAC_control(d_phenotype[v_keeprows, ], dependent = "IQ", predictors = "fs3", dists = m_fst[v_keeprows, v_keeprows])
## Factors were converted to characters.
## Uncorrected KNSNR_b_k3 SLR_k3
## fs3 0.8196996 0.5391488 0.4565435
SAC_control(d_phenotype[v_keeprows, ], dependent = "IQ", predictors = "fs4", dists = m_fst[v_keeprows, v_keeprows])
## Factors were converted to characters.
## Uncorrected KNSNR_b_k3 SLR_k3
## fs4 0.7152818 0.07617423 -0.3111954
SAC_control(d_phenotype[v_keeprows, ], dependent = "IQ", predictors = "fs5", dists = m_fst[v_keeprows, v_keeprows])
## Factors were converted to characters.
## Uncorrected KNSNR_b_k3 SLR_k3
## fs5 0.773251 0.3631436 0.08125434
SAC_control(d_phenotype[v_keeprows, ], dependent = "IQ", predictors = "fs11", dists = m_fst[v_keeprows, v_keeprows])
## Factors were converted to characters.
## Uncorrected KNSNR_b_k3 SLR_k3
## fs11 -0.03686627 0.2492557 0.507478
SAC_control(d_phenotype[v_keeprows, ], dependent = "IQ", predictors = "ps", dists = m_fst[v_keeprows, v_keeprows])
## Factors were converted to characters.
## Uncorrected KNSNR_b_k3 SLR_k3
## ps 0.8367625 0.1902149 0.3051521
# random snps -------------------------------------------------------------
#resample random SNPs, extract their 1st factor and run analyses to get an idea of what chance results look like
set.seed(1) #reproducible
silence({
d_simul = ldply(1:1000, .fun = function(i) {
#pick random SNPs, same number as in study
d_tmp = d_freqs[sample(1:nrow(d_freqs), size = length(l_OKBAY$included_SNPs)), v_1kg_pops] %>% df_t()
#factor analyze them
fa_tmp = silence(fa(d_tmp, scores = "Bartlett"))
#save scores
d_tmp_pheno = d_phenotype #copy of pheno data
d_tmp_pheno$fs = fa_tmp$scores
#do SAC control
SAC_control(d_tmp_pheno[v_keeprows, ], dependent = "IQ", predictors = "fs", dists = m_fst[v_keeprows, v_keeprows], SLR_include_self = T)
})
})
GG_denhist(d_simul, var = "Uncorrected")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

quantile(d_simul$Uncorrected, probs = c(seq(.05, .95, .05), .96, .97, .98, .99))
## 5% 10% 15% 20% 25% 30%
## -0.7898598 -0.7871279 -0.7853872 -0.7839151 -0.7828417 -0.7819528
## 35% 40% 45% 50% 55% 60%
## -0.7810795 -0.7802045 -0.7792233 -0.7785480 -0.7776478 -0.7766970
## 65% 70% 75% 80% 85% 90%
## -0.7757929 -0.7751406 -0.7739916 -0.7727148 -0.7712160 -0.7691741
## 95% 96% 97% 98% 99%
## -0.7662913 -0.7654121 -0.7640451 -0.7634001 -0.7619504
GG_denhist(d_simul, var = "KNSNR_b_k3")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

GG_denhist(d_simul, var = "SLR_k3")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#SDs approach
#SNPs under selection should differ more in the SDs than neutral SNPs
set.seed(1) #reproducible
silence({
d_simul_sd = ldply(1:1000, .fun = function(i) {
#pick random SNPs, same number as in study
d_tmp = d_freqs[sample(1:nrow(d_freqs), size = length(l_OKBAY$included_SNPs)), v_1kg_pops] %>% df_t()
#calculate mean sd
mean_sd(d_tmp)
})
})
#from SNP hits
v_SNP_hit_sd = mean_sd(l_OKBAY$effect_freqs);v_SNP_hit_sd
## [1] 0.1148372
(v_SNP_hit_sd - mean(d_simul_sd$V1)) / sd(d_simul_sd$V1)
## [1] 8.291788
GG_denhist(d_simul_sd$V1) + geom_vline(xintercept = v_SNP_hit_sd, color = "blue", linetype = "dashed") + xlab("Mean standard deviation of SNPs. Distribution = random sets (null model). Blue line = educational attainment hits.\nDistance between mean of random sets and hits = 16.8 sd.")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave("figures/SD_dist.png")
## Saving 7 x 5 in image
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.