Init

Some packages.

options(digits = 2)
library(pacman)
p_load(kirkegaard)

Convert 23andme files to BED

Done by system call to PLINK.

#find 23andme files
list_23andme = dir("data/23andme", pattern = "\\.tsv", full.names = T)

#call plink on each
for (f in list_23andme) {
  system(sprintf("plink --23file %s --make-bed --out %s",
                   f,
                   "data/bed/" + str_filename(f) %>% str_replace("\\.tsv$", "")
                   )
           )
}

Score polygenic

We use Okbay et al meta-analyis (education).

#gwas file
gwas_file = "data/GWAS/Okbay_2016_top5000.txt"

#inspect in R too
gwas = read_tsv(gwas_file)
## Parsed with column specification:
## cols(
##   MarkerName = col_character(),
##   CHR = col_integer(),
##   POS = col_integer(),
##   A1 = col_character(),
##   A2 = col_character(),
##   EAF = col_double(),
##   Beta = col_double(),
##   SE = col_double(),
##   Pval = col_double()
## )
#gwas col specification
#these must be the cols that refer to id, allele, effect size
gwas_cols = "1 4 7"

#find persons to score
list_bed = dir("data/bed", pattern = ".bed", full.names = T) %>% 
  #remove .bed extension
  str_replace("\\.bed$", "")

#call plink on each
for (f in list_bed) {
  system(sprintf("plink --bfile %s --score %s %s header --out %s",
                 f,
                 gwas_file,
                 gwas_cols,
                 "data/scored/" + (f %>% str_filename())
                 )
           )
}

Examine polygenics

Load the scored genomes from PLINK and plot.

#polygenics
list_polygenics = dir("data/scored", pattern = "\\.profile$", full.names = T)