Analyses

### ANALYZE THE SNPS FOUND IN OKBAY ET AL 2016

#main datafile
l_OKBAY = list()


# SNP results -------------------------------------------------------------

l_OKBAY$SNP_data = read_tsv("data/EduYears_Pooled_5000.txt") %>% as.data.frame

## Parsed with column specification:
## cols(
##   MarkerName = col_character(),
##   CHR = col_integer(),
##   POS = col_integer(),
##   A1 = col_character(),
##   A2 = col_character(),
##   EAF = col_double(),
##   Beta = col_double(),
##   SE = col_double(),
##   Pval = col_double()
## )

#rownames
rownames(l_OKBAY$SNP_data) = l_OKBAY$SNP_data$MarkerName

# LD pruned SNPs ----------------------------------------------------------

#this has both SNP and genotype
#pooled results
l_OKBAY$pooled_results = read_csv("data/OKBAY_total_pooled.csv") %>% as.data.frame()

## Parsed with column specification:
## cols(
##   SNP = col_character(),
##   Chr = col_integer(),
##   Position = col_integer(),
##   `Allele 1` = col_character(),
##   `Frequency Allele 1` = col_double(),
##   `Effect size` = col_double(),
##   `P-value` = col_double(),
##   `Heterogeneity I2` = col_double(),
##   `Heterogeneity P-value` = col_double()
## )

rownames(l_OKBAY$pooled_results) = l_OKBAY$pooled_results$SNP

#results without UKBB
l_OKBAY$discovery_results = read_csv("data/OKBAY_total_discovery.csv") %>% as.data.frame()

## Warning: Duplicated column names deduplicated: 'Effect size' => 'Effect
## size_1' [10], 'P-value' => 'P-value_1' [11], 'Effect size' => 'Effect
## size_2' [12], 'P-value' => 'P-value_2' [13]

## Parsed with column specification:
## cols(
##   SNP = col_character(),
##   Chr = col_integer(),
##   Position = col_integer(),
##   `Allele 1` = col_character(),
##   `Frequency Allele 1` = col_double(),
##   `Effect size` = col_double(),
##   `P-value` = col_double(),
##   `Heterogeneity I2` = col_double(),
##   `Heterogeneity P-value` = col_double(),
##   `Effect size_1` = col_double(),
##   `P-value_1` = col_double(),
##   `Effect size_2` = col_double(),
##   `P-value_2` = col_double()
## )

rownames(l_OKBAY$discovery_results) = l_OKBAY$discovery_results$SNP

#how many SNPs in common?
sum(l_OKBAY$pooled_results$SNP %in% d_freqs$SNP)

## [1] 154

sum(l_OKBAY$discovery_results$SNP %in% d_freqs$SNP)

## [1] 71

# subset ------------------------------------------------------------------
#subset to the ones not in LD

l_OKBAY$pop_freqs = d_freqs[d_freqs$SNP %in% l_OKBAY$pooled_results$SNP, ]


# get effect allele freqs -------------------------------------------------
l_OKBAY$effect_freqs = get_freq_effect_alleles(freq_data = l_OKBAY$pop_freqs,
                                               SNPs = l_OKBAY$pooled_results$SNP, 
                                               alleles = l_OKBAY$pooled_results$`Allele 1`#,
                                               #betas = l_OKBAY$pooled_results$`Effect size`
                                               )

#output
write.csv(l_OKBAY$effect_freqs, file = "data/effect_freqs.csv")




# factor analyze ----------------------------------------------------------
#check method variance for analysis
l_OKBAY$FA_all = fa_all_methods(l_OKBAY$effect_freqs, messages = F)
wtd.cors(l_OKBAY$FA_all$scores)

##                   regression_minres Thurstone_minres tenBerge_minres
## regression_minres         1.0000000        1.0000000       1.0000000
## Thurstone_minres          1.0000000        1.0000000       1.0000000
## tenBerge_minres           1.0000000        1.0000000       1.0000000
## Bartlett_minres           0.9981106        0.9981106       0.9981106
## regression_pa             0.9999564        0.9999564       0.9999564
## Thurstone_pa              0.9999564        0.9999564       0.9999564
## Bartlett_pa               0.9979269        0.9979269       0.9979269
## regression_minchi         1.0000000        1.0000000       1.0000000
## Thurstone_minchi          1.0000000        1.0000000       1.0000000
## tenBerge_minchi           1.0000000        1.0000000       1.0000000
## Bartlett_minchi           0.9981106        0.9981106       0.9981106
##                   Bartlett_minres regression_pa Thurstone_pa Bartlett_pa
## regression_minres       0.9981106     0.9999564    0.9999564   0.9979269
## Thurstone_minres        0.9981106     0.9999564    0.9999564   0.9979269
## tenBerge_minres         0.9981106     0.9999564    0.9999564   0.9979269
## Bartlett_minres         1.0000000     0.9982762    0.9982762   0.9999953
## regression_pa           0.9982762     1.0000000    1.0000000   0.9980969
## Thurstone_pa            0.9982762     1.0000000    1.0000000   0.9980969
## Bartlett_pa             0.9999953     0.9980969    0.9980969   1.0000000
## regression_minchi       0.9981106     0.9999564    0.9999564   0.9979269
## Thurstone_minchi        0.9981106     0.9999564    0.9999564   0.9979269
## tenBerge_minchi         0.9981106     0.9999564    0.9999564   0.9979269
## Bartlett_minchi         1.0000000     0.9982762    0.9982762   0.9999953
##                   regression_minchi Thurstone_minchi tenBerge_minchi
## regression_minres         1.0000000        1.0000000       1.0000000
## Thurstone_minres          1.0000000        1.0000000       1.0000000
## tenBerge_minres           1.0000000        1.0000000       1.0000000
## Bartlett_minres           0.9981106        0.9981106       0.9981106
## regression_pa             0.9999564        0.9999564       0.9999564
## Thurstone_pa              0.9999564        0.9999564       0.9999564
## Bartlett_pa               0.9979269        0.9979269       0.9979269
## regression_minchi         1.0000000        1.0000000       1.0000000
## Thurstone_minchi          1.0000000        1.0000000       1.0000000
## tenBerge_minchi           1.0000000        1.0000000       1.0000000
## Bartlett_minchi           0.9981106        0.9981106       0.9981106
##                   Bartlett_minchi
## regression_minres       0.9981106
## Thurstone_minres        0.9981106
## tenBerge_minres         0.9981106
## Bartlett_minres         1.0000000
## regression_pa           0.9982762
## Thurstone_pa            0.9982762
## Bartlett_pa             0.9999953
## regression_minchi       0.9981106
## Thurstone_minchi        0.9981106
## tenBerge_minchi         0.9981106
## Bartlett_minchi         1.0000000

#how many to extract?
nScree(l_OKBAY$effect_freqs)

##   noc naf nparallel nkaiser
## 1  11   2        11      11

#extract factors
l_OKBAY$fa = fa(l_OKBAY$effect_freqs, scores = "Bartlett")

## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done

## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done

## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was
## done

## The determinant of the smoothed correlation was zero.
## This means the objective function is not defined.
## Chi square is based upon observed residuals.

## The determinant of the smoothed correlation was zero.
## This means the objective function is not defined for the null model either.
## The Chi square is thus based upon observed correlations.

## In factor.stats, I could not find the RMSEA upper bound . Sorry about that

l_OKBAY$fa_2 = fa(l_OKBAY$effect_freqs, scores = "Bartlett", nfactors = 2)

## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done

## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done

## Loading required namespace: GPArotation

## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was
## done

## The determinant of the smoothed correlation was zero.
## This means the objective function is not defined.
## Chi square is based upon observed residuals.

## The determinant of the smoothed correlation was zero.
## This means the objective function is not defined for the null model either.
## The Chi square is thus based upon observed correlations.

## In factor.stats, I could not find the RMSEA upper bound . Sorry about that

l_OKBAY$fa_3 = fa(l_OKBAY$effect_freqs, scores = "Bartlett", nfactors = 3)

## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done

## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done

## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was
## done

## The determinant of the smoothed correlation was zero.
## This means the objective function is not defined.
## Chi square is based upon observed residuals.

## The determinant of the smoothed correlation was zero.
## This means the objective function is not defined for the null model either.
## The Chi square is thus based upon observed correlations.

## In factor.stats, I could not find the RMSEA upper bound . Sorry about that

l_OKBAY$fa_4 = fa(l_OKBAY$effect_freqs, scores = "Bartlett", nfactors = 4)

## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done

## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done

## Warning in GPFoblq(L, Tmat = Tmat, normalize = normalize, eps = eps, maxit
## = maxit, : convergence not obtained in GPFoblq. 1000 iterations used.

## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was
## done

## The determinant of the smoothed correlation was zero.
## This means the objective function is not defined.
## Chi square is based upon observed residuals.

## The determinant of the smoothed correlation was zero.
## This means the objective function is not defined for the null model either.
## The Chi square is thus based upon observed correlations.

## In factor.stats, I could not find the RMSEA upper bound . Sorry about that

l_OKBAY$fa_5 = fa(l_OKBAY$effect_freqs, scores = "Bartlett", nfactors = 5)

## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done

## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done

## Warning in GPFoblq(L, Tmat = Tmat, normalize = normalize, eps = eps, maxit
## = maxit, : convergence not obtained in GPFoblq. 1000 iterations used.

## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was
## done

## The determinant of the smoothed correlation was zero.
## This means the objective function is not defined.
## Chi square is based upon observed residuals.

## The determinant of the smoothed correlation was zero.
## This means the objective function is not defined for the null model either.
## The Chi square is thus based upon observed correlations.

## In factor.stats, I could not find the RMSEA upper bound . Sorry about that

l_OKBAY$fa_11 = fa(l_OKBAY$effect_freqs, scores = "Bartlett", nfactors = 11)

## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done

## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done

## Warning in GPFoblq(L, Tmat = Tmat, normalize = normalize, eps = eps, maxit
## = maxit, : convergence not obtained in GPFoblq. 1000 iterations used.

## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate =
## rotate, : A loading greater than abs(1) was detected. Examine the loadings
## carefully.

## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was
## done

## The determinant of the smoothed correlation was zero.
## This means the objective function is not defined.
## Chi square is based upon observed residuals.

## The determinant of the smoothed correlation was zero.
## This means the objective function is not defined for the null model either.
## The Chi square is thus based upon observed correlations.

## In factor.stats, I could not find the RMSEA upper bound . Sorry about that

#plot loadings
fa_plot_loadings(l_OKBAY$fa)

psych::describe(l_OKBAY$fa$loadings)

##     vars   n mean   sd median trimmed  mad   min  max range  skew kurtosis
## MR1    1 154 0.11 0.61   0.27    0.13 0.78 -0.96 0.96  1.91 -0.21    -1.56
##       se
## MR1 0.05

#does loadings match effect sizes?
l_OKBAY$included_SNPs = dimnames(l_OKBAY$fa$loadings)[[1]]

l_OKBAY$SNP_data[l_OKBAY$included_SNPs, "Beta"]

##   [1]  0.016 -0.016  0.018 -0.017  0.022  0.029  0.036 -0.016  0.026 -0.029
##  [11]  0.013 -0.017 -0.013  0.018 -0.014  0.014  0.020  0.018  0.029  0.016
##  [21] -0.014  0.020  0.016  0.016  0.022  0.025  0.017  0.014 -0.015 -0.015
##  [31]  0.025  0.035  0.017  0.022 -0.016  0.014 -0.014  0.016 -0.014  0.014
##  [41] -0.012  0.014  0.021 -0.015  0.023  0.023 -0.030 -0.025  0.026  0.025
##  [51] -0.024  0.021  0.038  0.022  0.018  0.012 -0.017  0.014  0.019 -0.014
##  [61]  0.015 -0.017 -0.013  0.015  0.017  0.014  0.013  0.019 -0.013 -0.017
##  [71] -0.021  0.016 -0.014 -0.013 -0.019 -0.013 -0.013 -0.018  0.013 -0.021
##  [81]  0.013  0.016 -0.013 -0.017  0.015 -0.024  0.028 -0.015  0.014  0.012
##  [91]  0.013 -0.014 -0.015 -0.014  0.016 -0.015  0.015  0.019 -0.014  0.014
## [101] -0.016  0.016  0.017 -0.016 -0.015  0.025  0.015  0.014  0.013 -0.014
## [111]  0.013 -0.014 -0.012 -0.028  0.015 -0.016 -0.017  0.016  0.014  0.014
## [121]  0.014 -0.016 -0.014  0.020  0.016 -0.019 -0.013  0.022 -0.024 -0.015
## [131] -0.016 -0.016 -0.017 -0.018 -0.015  0.017 -0.013  0.019  0.015 -0.014
## [141]  0.015  0.013 -0.013  0.018 -0.013 -0.015 -0.013  0.020 -0.029  0.016
## [151]  0.023 -0.013 -0.022 -0.014

GG_scatter(data.frame("loading" = as.vector(l_OKBAY$fa$loadings),
                      "Beta" = l_OKBAY$pooled_results[l_OKBAY$included_SNPs, "Effect size"])
           , x_var = "Beta", y_var = "loading")

d_phenotype$fs = l_OKBAY$fa$scores
d_phenotype$fs2 = l_OKBAY$fa_2$scores[, 1]
d_phenotype$fs3 = l_OKBAY$fa_3$scores[, 1]
d_phenotype$fs4 = l_OKBAY$fa_4$scores[, 1]
d_phenotype$fs5 = l_OKBAY$fa_5$scores[, 1]
d_phenotype$fs11 = l_OKBAY$fa_11$scores[, 1]

# polygenic scores --------------------------------------------------------
#score each freq according to its reported beta = weighted scores
d_phenotype$ps = calc_polygenic_score(l_OKBAY$effect_freqs, effects = l_OKBAY$pooled_results[l_OKBAY$included_SNPs, "Effect size"])


# phenotype relationships --------------------------------------------------
#plot the genomic predictions against the phenotypic values
GG_scatter(d_phenotype, x_var = "fs", y_var = "IQ")

GG_scatter(d_phenotype, x_var = "ps", y_var = "IQ")

#others
wtd.cors(cbind(d_phenotype, l_OKBAY$fa$scores, l_OKBAY$fa_2$scores, l_OKBAY$fa_3$scores, l_OKBAY$fa_4$scores, l_OKBAY$fa_5$scores))

## Warning in wtd.cors(cbind(d_phenotype, l_OKBAY$fa$scores, l_OKBAY
## $fa_2$scores, : NAs introduced by coercion

## Warning in wtd.cors(cbind(d_phenotype, l_OKBAY$fa$scores, l_OKBAY
## $fa_2$scores, : NAs introduced by coercion

##                       Population Rietveld.et.al.._2013 PS_Ed_Att_Davies
## Population                   NaN                   NaN              NaN
## Rietveld.et.al.._2013        NaN            1.00000000     0.8795920962
## PS_Ed_Att_Davies             NaN            0.87959210     1.0000000000
## PS_Ed_Att_Okbay              NaN            0.68685154     0.8497353377
## IQ                           NaN            0.91969359     0.8210296837
## fs                           NaN            0.78856747     0.5858493240
## fs2                          NaN            0.84638362     0.9115220546
## fs3                          NaN            0.87490220     0.9416790614
## fs4                          NaN            0.78643601     0.8156488855
## fs5                          NaN            0.80216597     0.9023469208
## fs11                         NaN           -0.03266627    -0.0492278077
## ps                           NaN            0.90836969     0.8926049519
## MR1                          NaN            0.78856747     0.5858493240
## MR1                          NaN            0.84638362     0.9115220546
## MR2                          NaN            0.18881588    -0.1949026832
## MR1                          NaN            0.87490220     0.9416790614
## MR2                          NaN            0.30273427    -0.1088470084
## MR3                          NaN           -0.17003340    -0.1445074609
## MR1                          NaN            0.78643601     0.8156488855
## MR2                          NaN            0.28724924    -0.1238291365
## MR3                          NaN           -0.34523478    -0.2937744911
## MR4                          NaN            0.09486742     0.1378107154
## MR1                          NaN            0.80216597     0.9023469208
## MR2                          NaN            0.06997185     0.0066447189
## MR4                          NaN            0.04598254    -0.0491776284
## MR3                          NaN           -0.30478318    -0.2423539115
## MR5                          NaN            0.05967042    -0.0001710936
##                       PS_Ed_Att_Okbay          IQ         fs         fs2
## Population                        NaN         NaN        NaN         NaN
## Rietveld.et.al.._2013      0.68685154  0.91969359 0.78856747  0.84638362
## PS_Ed_Att_Davies           0.84973534  0.82102968 0.58584932  0.91152205
## PS_Ed_Att_Okbay            1.00000000  0.65366840 0.16061663  0.64328833
## IQ                         0.65366840  1.00000000 0.75454518  0.77616263
## fs                         0.16061663  0.75454518 1.00000000  0.72108580
## fs2                        0.64328833  0.77616263 0.72108580  1.00000000
## fs3                        0.67628889  0.81969957 0.72295100  0.97146429
## fs4                        0.58983960  0.71528180 0.65483399  0.89354063
## fs5                        0.64666952  0.77325104 0.67075040  0.94027226
## fs11                      -0.10341763 -0.03686627 0.03708935  0.06322191
## ps                         0.78221849  0.83676254 0.66228301  0.79229709
## MR1                        0.16061663  0.75454518 1.00000000  0.72108580
## MR1                        0.64328833  0.77616263 0.72108580  1.00000000
## MR2                       -0.47732895  0.26428908 0.62218729 -0.07758889
## MR1                        0.67628889  0.81969957 0.72295100  0.97146429
## MR2                       -0.25446889  0.35941809 0.56002378 -0.06629354
## MR3                       -0.30321964 -0.25276637 0.10354522  0.03492828
## MR1                        0.58983960  0.71528180 0.65483399  0.89354063
## MR2                       -0.27506573  0.31181626 0.53090649 -0.07613008
## MR3                       -0.57089797 -0.35401303 0.11984993 -0.02330972
## MR4                        0.05565769  0.05585519 0.12777696  0.10545063
## MR1                        0.64666952  0.77325104 0.67075040  0.94027226
## MR2                        0.03687246  0.01361237 0.09458786  0.05699563
## MR4                       -0.17407546 -0.14462436 0.21279185  0.12814594
## MR3                       -0.50121970 -0.19630632 0.12837618 -0.04068641
## MR5                       -0.06892137  0.14341002 0.07625123 -0.08068191
##                                fs3          fs4          fs5         fs11
## Population                     NaN          NaN          NaN          NaN
## Rietveld.et.al.._2013  0.874902202  0.786436015  0.802165972 -0.032666269
## PS_Ed_Att_Davies       0.941679061  0.815648886  0.902346921 -0.049227808
## PS_Ed_Att_Okbay        0.676288891  0.589839601  0.646669523 -0.103417635
## IQ                     0.819699569  0.715281803  0.773251043 -0.036866269
## fs                     0.722950998  0.654833994  0.670750402  0.037089351
## fs2                    0.971464292  0.893540631  0.940272258  0.063221912
## fs3                    1.000000000  0.893611642  0.943955046 -0.026052238
## fs4                    0.893611642  1.000000000  0.943719307  0.021545099
## fs5                    0.943955046  0.943719307  1.000000000  0.023641276
## fs11                  -0.026052238  0.021545099  0.023641276  1.000000000
## ps                     0.831233385  0.660154818  0.716109231 -0.108740576
## MR1                    0.722950998  0.654833994  0.670750402  0.037089351
## MR1                    0.971464292  0.893540631  0.940272258  0.063221912
## MR2                   -0.078188273 -0.086656002 -0.119590537  0.047733954
## MR1                    1.000000000  0.893611642  0.943955046 -0.026052238
## MR2                   -0.054233612 -0.075914472 -0.130102925  0.042819224
## MR3                    0.052358486 -0.117120789 -0.157797793 -0.009081295
## MR1                    0.893611642  1.000000000  0.943719307  0.021545099
## MR2                   -0.086426422 -0.034301152 -0.085820984  0.082488021
## MR3                   -0.061976693  0.045146332  0.023375287  0.171904580
## MR4                    0.139773991 -0.267667156 -0.118903076 -0.104843979
## MR1                    0.943955046  0.943719307  1.000000000  0.023641276
## MR2                    0.001604441 -0.026340153 -0.001491502  0.181541772
## MR4                    0.111908633 -0.036024953 -0.124626419 -0.157540807
## MR3                   -0.111385744  0.111153985  0.112118639  0.178521796
## MR5                   -0.011840236 -0.008744385 -0.023990189 -0.165770194
##                                 ps        MR1         MR1         MR2
## Population                     NaN        NaN         NaN         NaN
## Rietveld.et.al.._2013  0.908369691 0.78856747  0.84638362  0.18881588
## PS_Ed_Att_Davies       0.892604952 0.58584932  0.91152205 -0.19490268
## PS_Ed_Att_Okbay        0.782218489 0.16061663  0.64328833 -0.47732895
## IQ                     0.836762538 0.75454518  0.77616263  0.26428908
## fs                     0.662283014 1.00000000  0.72108580  0.62218729
## fs2                    0.792297085 0.72108580  1.00000000 -0.07758889
## fs3                    0.831233385 0.72295100  0.97146429 -0.07818827
## fs4                    0.660154818 0.65483399  0.89354063 -0.08665600
## fs5                    0.716109231 0.67075040  0.94027226 -0.11959054
## fs11                  -0.108740576 0.03708935  0.06322191  0.04773395
## ps                     1.000000000 0.66228301  0.79229709  0.07688113
## MR1                    0.662283014 1.00000000  0.72108580  0.62218729
## MR1                    0.792297085 0.72108580  1.00000000 -0.07758889
## MR2                    0.076881134 0.62218729 -0.07758889  1.00000000
## MR1                    0.831233385 0.72295100  0.97146429 -0.07818827
## MR2                    0.235541566 0.56002378 -0.06629354  0.92694960
## MR3                   -0.025919694 0.10354522  0.03492828  0.10714323
## MR1                    0.660154818 0.65483399  0.89354063 -0.08665600
## MR2                    0.149334450 0.53090649 -0.07613008  0.88531467
## MR3                   -0.398580014 0.11984993 -0.02330972  0.15508404
## MR4                    0.293091339 0.12777696  0.10545063  0.07493190
## MR1                    0.716109231 0.67075040  0.94027226 -0.11959054
## MR2                    0.102745228 0.09458786  0.05699563  0.15901859
## MR4                    0.133971929 0.21279185  0.12814594  0.19662492
## MR3                   -0.445463774 0.12837618 -0.04068641  0.19325197
## MR5                   -0.009929531 0.07625123 -0.08068191  0.11917071
##                                MR1         MR2          MR3          MR1
## Population                     NaN         NaN          NaN          NaN
## Rietveld.et.al.._2013  0.874902202  0.30273427 -0.170033402  0.786436015
## PS_Ed_Att_Davies       0.941679061 -0.10884701 -0.144507461  0.815648886
## PS_Ed_Att_Okbay        0.676288891 -0.25446889 -0.303219645  0.589839601
## IQ                     0.819699569  0.35941809 -0.252766373  0.715281803
## fs                     0.722950998  0.56002378  0.103545217  0.654833994
## fs2                    0.971464292 -0.06629354  0.034928277  0.893540631
## fs3                    1.000000000 -0.05423361  0.052358486  0.893611642
## fs4                    0.893611642 -0.07591447 -0.117120789  1.000000000
## fs5                    0.943955046 -0.13010292 -0.157797793  0.943719307
## fs11                  -0.026052238  0.04281922 -0.009081295  0.021545099
## ps                     0.831233385  0.23554157 -0.025919694  0.660154818
## MR1                    0.722950998  0.56002378  0.103545217  0.654833994
## MR1                    0.971464292 -0.06629354  0.034928277  0.893540631
## MR2                   -0.078188273  0.92694960  0.107143225 -0.086656002
## MR1                    1.000000000 -0.05423361  0.052358486  0.893611642
## MR2                   -0.054233612  1.00000000  0.040795012 -0.075914472
## MR3                    0.052358486  0.04079501  1.000000000 -0.117120789
## MR1                    0.893611642 -0.07591447 -0.117120789  1.000000000
## MR2                   -0.086426422  0.91197759 -0.235635023 -0.034301152
## MR3                   -0.061976693 -0.11525079  0.339863763  0.045146332
## MR4                    0.139773991  0.07446248  0.561178817 -0.267667156
## MR1                    0.943955046 -0.13010292 -0.157797793  0.943719307
## MR2                    0.001604441  0.24847448  0.059911029 -0.026340153
## MR4                    0.111908633  0.18231481  0.783828066 -0.036024953
## MR3                   -0.111385744 -0.08519302 -0.023812979  0.111153985
## MR5                   -0.011840236  0.05345406 -0.169207851 -0.008744385
##                                MR2          MR3         MR4          MR1
## Population                     NaN          NaN         NaN          NaN
## Rietveld.et.al.._2013  0.287249242 -0.345234780  0.09486742  0.802165972
## PS_Ed_Att_Davies      -0.123829137 -0.293774491  0.13781072  0.902346921
## PS_Ed_Att_Okbay       -0.275065727 -0.570897969  0.05565769  0.646669523
## IQ                     0.311816264 -0.354013027  0.05585519  0.773251043
## fs                     0.530906494  0.119849932  0.12777696  0.670750402
## fs2                   -0.076130079 -0.023309720  0.10545063  0.940272258
## fs3                   -0.086426422 -0.061976693  0.13977399  0.943955046
## fs4                   -0.034301152  0.045146332 -0.26766716  0.943719307
## fs5                   -0.085820984  0.023375287 -0.11890308  1.000000000
## fs11                   0.082488021  0.171904580 -0.10484398  0.023641276
## ps                     0.149334450 -0.398580014  0.29309134  0.716109231
## MR1                    0.530906494  0.119849932  0.12777696  0.670750402
## MR1                   -0.076130079 -0.023309720  0.10545063  0.940272258
## MR2                    0.885314669  0.155084041  0.07493190 -0.119590537
## MR1                   -0.086426422 -0.061976693  0.13977399  0.943955046
## MR2                    0.911977594 -0.115250786  0.07446248 -0.130102925
## MR3                   -0.235635023  0.339863763  0.56117882 -0.157797793
## MR1                   -0.034301152  0.045146332 -0.26766716  0.943719307
## MR2                    1.000000000 -0.004898466 -0.12947637 -0.085820984
## MR3                   -0.004898466  1.000000000 -0.13142974  0.023375287
## MR4                   -0.129476367 -0.131429744  1.00000000 -0.118903076
## MR1                   -0.085820984  0.023375287 -0.11890308  1.000000000
## MR2                    0.141494914 -0.049762808  0.10956563 -0.001491502
## MR4                   -0.025942085  0.145390340  0.60091993 -0.124626419
## MR3                    0.041817000  0.667492155 -0.45093114  0.112118639
## MR5                    0.168418092 -0.049737361 -0.09444756 -0.023990189
##                                MR2         MR4         MR3           MR5
## Population                     NaN         NaN         NaN           NaN
## Rietveld.et.al.._2013  0.069971845  0.04598254 -0.30478318  0.0596704246
## PS_Ed_Att_Davies       0.006644719 -0.04917763 -0.24235391 -0.0001710936
## PS_Ed_Att_Okbay        0.036872460 -0.17407546 -0.50121970 -0.0689213736
## IQ                     0.013612370 -0.14462436 -0.19630632  0.1434100195
## fs                     0.094587858  0.21279185  0.12837618  0.0762512340
## fs2                    0.056995629  0.12814594 -0.04068641 -0.0806819107
## fs3                    0.001604441  0.11190863 -0.11138574 -0.0118402362
## fs4                   -0.026340153 -0.03602495  0.11115399 -0.0087443853
## fs5                   -0.001491502 -0.12462642  0.11211864 -0.0239901886
## fs11                   0.181541772 -0.15754081  0.17852180 -0.1657701938
## ps                     0.102745228  0.13397193 -0.44546377 -0.0099295312
## MR1                    0.094587858  0.21279185  0.12837618  0.0762512340
## MR1                    0.056995629  0.12814594 -0.04068641 -0.0806819107
## MR2                    0.159018591  0.19662492  0.19325197  0.1191707110
## MR1                    0.001604441  0.11190863 -0.11138574 -0.0118402362
## MR2                    0.248474480  0.18231481 -0.08519302  0.0534540602
## MR3                    0.059911029  0.78382807 -0.02381298 -0.1692078507
## MR1                   -0.026340153 -0.03602495  0.11115399 -0.0087443853
## MR2                    0.141494914 -0.02594208  0.04181700  0.1684180924
## MR3                   -0.049762808  0.14539034  0.66749215 -0.0497373607
## MR4                    0.109565626  0.60091993 -0.45093114 -0.0944475556
## MR1                   -0.001491502 -0.12462642  0.11211864 -0.0239901886
## MR2                    1.000000000  0.21320909 -0.10027722 -0.9349433730
## MR4                    0.213209085  1.00000000 -0.28034520 -0.2802294493
## MR3                   -0.100277223 -0.28034520  1.00000000  0.0587644890
## MR5                   -0.934943373 -0.28022945  0.05876449  1.0000000000

# SAC analysis ------------------------------------------------------------
#which rows has missing data?
v_keeprows = !apply(d_phenotype, MARGIN = 1, anyNA)
#the functions cannot handle missing data, so we need to subset

#is there SAC in the residuals?
#get model resids
d_phenotype$IQ_resid_fs = lm("IQ ~ fs", data = d_phenotype, na.action = na.exclude) %>% resid()
d_phenotype$IQ_resid_ps = lm("IQ ~ ps", data = d_phenotype, na.action = na.exclude) %>% resid()

#measure SAC
SAC_measures(d_phenotype[v_keeprows, ], dists = m_fst[v_keeprows, v_keeprows], vars = c("IQ", "IQ_resid_fs", "IQ_resid_ps"))

## Factors were converted to characters.

## Factors were converted to characters.
## Factors were converted to characters.

##              Morans_I    knsn_3
## IQ          0.6579384 0.9767737
## IQ_resid_fs 0.4583273 0.9553662
## IQ_resid_ps 0.2174602 0.8575929

#correct for SAC
SAC_control(d_phenotype[v_keeprows, ], dependent = "IQ", predictors = "fs", dists = m_fst[v_keeprows, v_keeprows])

## Factors were converted to characters.

##    Uncorrected KNSNR_b_k3    SLR_k3
## fs   0.7545452  0.5642874 0.6990354

SAC_control(d_phenotype[v_keeprows, ], dependent = "IQ", predictors = "fs2", dists = m_fst[v_keeprows, v_keeprows])

## Factors were converted to characters.

##     Uncorrected KNSNR_b_k3   SLR_k3
## fs2   0.7761626  0.3174107 0.380124

SAC_control(d_phenotype[v_keeprows, ], dependent = "IQ", predictors = "fs3", dists = m_fst[v_keeprows, v_keeprows])

## Factors were converted to characters.

##     Uncorrected KNSNR_b_k3    SLR_k3
## fs3   0.8196996  0.5391488 0.4565435

SAC_control(d_phenotype[v_keeprows, ], dependent = "IQ", predictors = "fs4", dists = m_fst[v_keeprows, v_keeprows])

## Factors were converted to characters.

##     Uncorrected KNSNR_b_k3     SLR_k3
## fs4   0.7152818 0.07617423 -0.3111954

SAC_control(d_phenotype[v_keeprows, ], dependent = "IQ", predictors = "fs5", dists = m_fst[v_keeprows, v_keeprows])

## Factors were converted to characters.

##     Uncorrected KNSNR_b_k3     SLR_k3
## fs5    0.773251  0.3631436 0.08125434

SAC_control(d_phenotype[v_keeprows, ], dependent = "IQ", predictors = "fs11", dists = m_fst[v_keeprows, v_keeprows])

## Factors were converted to characters.

##      Uncorrected KNSNR_b_k3   SLR_k3
## fs11 -0.03686627  0.2492557 0.507478

SAC_control(d_phenotype[v_keeprows, ], dependent = "IQ", predictors = "ps", dists = m_fst[v_keeprows, v_keeprows])

## Factors were converted to characters.

##    Uncorrected KNSNR_b_k3    SLR_k3
## ps   0.8367625  0.1902149 0.3051521

# random snps -------------------------------------------------------------
#resample random SNPs, extract their 1st factor and run analyses to get an idea of what chance results look like

set.seed(1) #reproducible
silence({
  d_simul = ldply(1:1000, .fun = function(i) {
    #pick random SNPs, same number as in study
    d_tmp = d_freqs[sample(1:nrow(d_freqs), size = length(l_OKBAY$included_SNPs)), v_1kg_pops] %>% df_t()
    
    #factor analyze them
    fa_tmp = silence(fa(d_tmp, scores = "Bartlett"))
    
    #save scores
    d_tmp_pheno = d_phenotype #copy of pheno data
    d_tmp_pheno$fs = fa_tmp$scores
    
    #do SAC control
    SAC_control(d_tmp_pheno[v_keeprows, ], dependent = "IQ", predictors = "fs", dists = m_fst[v_keeprows, v_keeprows], SLR_include_self = T)
  })
})


GG_denhist(d_simul, var = "Uncorrected")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

quantile(d_simul$Uncorrected, probs = c(seq(.05, .95, .05), .96, .97, .98, .99))

##         5%        10%        15%        20%        25%        30% 
## -0.7898598 -0.7871279 -0.7853872 -0.7839151 -0.7828417 -0.7819528 
##        35%        40%        45%        50%        55%        60% 
## -0.7810795 -0.7802045 -0.7792233 -0.7785480 -0.7776478 -0.7766970 
##        65%        70%        75%        80%        85%        90% 
## -0.7757929 -0.7751406 -0.7739916 -0.7727148 -0.7712160 -0.7691741 
##        95%        96%        97%        98%        99% 
## -0.7662913 -0.7654121 -0.7640451 -0.7634001 -0.7619504

GG_denhist(d_simul, var = "KNSNR_b_k3")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

GG_denhist(d_simul, var = "SLR_k3")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#SDs approach
#SNPs under selection should differ more in the SDs than neutral SNPs

set.seed(1) #reproducible
silence({
  d_simul_sd = ldply(1:1000, .fun = function(i) {
    #pick random SNPs, same number as in study
    d_tmp = d_freqs[sample(1:nrow(d_freqs), size = length(l_OKBAY$included_SNPs)), v_1kg_pops] %>% df_t()
    
    #calculate mean sd
    mean_sd(d_tmp)
  })
})


#from SNP hits
v_SNP_hit_sd = mean_sd(l_OKBAY$effect_freqs);v_SNP_hit_sd

## [1] 0.1148372

(v_SNP_hit_sd - mean(d_simul_sd$V1)) / sd(d_simul_sd$V1)

## [1] 8.291788

GG_denhist(d_simul_sd$V1) + geom_vline(xintercept = v_SNP_hit_sd, color = "blue", linetype = "dashed") + xlab("Mean standard deviation of SNPs. Distribution = random sets (null model). Blue line = educational attainment hits.\nDistance between mean of random sets and hits = 16.8 sd.")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave("figures/SD_dist.png")

## Saving 7 x 5 in image

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Do Okbay GWASs results support a global genetic model for intelligence?

Emil O. W. Kirkegaard

3 apr 2017

Start-up code

Analyses